From 3a648c98b68769b9a50f28301db1865f8efdcd0a Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 8 Jun 2022 10:14:03 -0700 Subject: [PATCH 001/279] Kusto-phase1: Add Support to Kusto Query Language This is the initial implement of Kusto Query Language. in this commit, we support the following features as MVP : Tabular expression statements Limit returned results Select Column (basic project) sort, order Perform string equality operations Filter using a list of elements Filter using common string operations Some string operators Aggregate by columns Base aggregate functions only support avg, count ,min, max, sum Aggregate by time intervals --- src/Parsers/Kusto/ParserKQLFilter.cpp | 25 +- src/Parsers/Kusto/ParserKQLLimit.cpp | 45 ++- src/Parsers/Kusto/ParserKQLOperators.cpp | 129 ++------ src/Parsers/Kusto/ParserKQLOperators.h | 13 +- src/Parsers/Kusto/ParserKQLProject.cpp | 30 +- src/Parsers/Kusto/ParserKQLProject.h | 6 + src/Parsers/Kusto/ParserKQLQuery.cpp | 357 +++++------------------ src/Parsers/Kusto/ParserKQLQuery.h | 19 +- src/Parsers/Kusto/ParserKQLSort.cpp | 33 ++- src/Parsers/Kusto/ParserKQLStatement.cpp | 57 +--- src/Parsers/Kusto/ParserKQLStatement.h | 7 - src/Parsers/Kusto/ParserKQLSummarize.cpp | 155 +++++++--- src/Parsers/Kusto/ParserKQLSummarize.h | 4 +- src/Parsers/Kusto/ParserKQLTable.cpp | 27 +- src/Parsers/Kusto/ParserKQLTable.h | 3 +- src/Parsers/tests/gtest_Parser.cpp | 32 +- 16 files changed, 384 insertions(+), 558 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index 3a399bdccdb..ad7ad807d03 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -10,15 +10,28 @@ namespace DB bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - String expr = getExprFromToken(pos); - ASTPtr where_expression; + if (op_pos.empty()) + return true; + Pos begin = pos; + String expr; - Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos pos_filter(token_filter, pos.max_depth); - if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, where_expression, expected)) + KQLOperators convetor; + + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + if (expr.empty()) + expr = "(" + convetor.getExprFromToken(pos) +")"; + else + expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + } + + Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(tokenFilter, pos.max_depth); + if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) return false; - node->as()->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); + pos = begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index bb8e08fd378..7811ebba9ab 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -2,26 +2,55 @@ #include #include #include -#include #include -#include namespace DB { bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ASTPtr limit_length; + if (op_pos.empty()) + return true; - auto expr = getExprFromToken(pos); + auto begin = pos; + Int64 minLimit = -1; + auto final_pos = pos; + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it; + auto isNumber = [&] + { + for (auto ch = pos->begin ; ch < pos->end; ++ch) + { + if (!isdigit(*ch)) + return false; + } + return true; + }; - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + if (!isNumber()) + return false; - if (!ParserExpressionWithOptionalAlias(false).parse(new_pos, limit_length, expected)) + auto limitLength = std::strtol(pos->begin,nullptr, 10); + if (-1 == minLimit) + { + minLimit = limitLength; + final_pos = pos; + } + else + { + if (minLimit > limitLength) + { + minLimit = limitLength; + final_pos = pos; + } + } + } + + if (!ParserExpressionWithOptionalAlias(false).parse(final_pos, node, expected)) return false; - node->as()->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); + pos = begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 1575cffcc39..933ae0ce514 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -1,8 +1,6 @@ #include #include #include -#include -#include namespace DB { @@ -12,11 +10,9 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens, IParser::Pos &token_pos,String kql_op, String ch_op) +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) { - String new_expr; - Expected expected; - ParserToken s_lparen(TokenType::OpeningRoundBracket); + String new_expr, leftWildcards= "", rightWildcards=""; ++token_pos; if (!s_lparen.ignore(token_pos, expected)) @@ -73,65 +69,40 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; case WildcardsPos::left: - left_wildcards ="%"; + leftWildcards ="%"; break; case WildcardsPos::right: - right_wildcards = "%"; + rightWildcards = "%"; break; case WildcardsPos::both: - left_wildcards ="%"; - right_wildcards = "%"; + leftWildcards ="%"; + rightWildcards = "%"; break; } - switch (space_pos) - { - case WildcardsPos::none: - break; - - case WildcardsPos::left: - left_space =" "; - break; - - case WildcardsPos::right: - right_space = " "; - break; - - case WildcardsPos::both: - left_space =" "; - right_space = " "; - break; - } - - ++token_pos; - - if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + left_space + String(token_pos->begin + 1,token_pos->end - 1) + right_space + right_wildcards + "')"; - else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) - { - auto tmp_arg = String(token_pos->begin, token_pos->end); - new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; - } + if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) + new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; else throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); tokens.pop_back(); return new_expr; } -bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) +String KQLOperators::getExprFromToken(IParser::Pos pos) { - auto begin = pos; + String res; + std::vector tokens; - if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - KQLOperatorValue op_value = KQLOperatorValue::none; + KQLOperatorValue opValue = KQLOperatorValue::none; auto token = String(pos->begin,pos->end); String op = token; - if (token == "!") + if ( token == "!" ) { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) @@ -162,27 +133,16 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) else --pos; } - else - --pos; - if (KQLOperator.find(op) == KQLOperator.end()) - { - pos = begin; - return false; - } - - op_value = KQLOperator[op]; + if (KQLOperator.find(op) != KQLOperator.end()) + opValue = KQLOperator[op]; String new_expr; - - if (op_value == KQLOperatorValue::none) + if (opValue == KQLOperatorValue::none) tokens.push_back(op); else { - auto last_op = tokens.back(); - auto last_pos = pos; - - switch (op_value) + switch (opValue) { case KQLOperatorValue::contains: new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); @@ -221,7 +181,7 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) case KQLOperatorValue::not_equal: break; - + case KQLOperatorValue::equal_cs: new_expr = "=="; break; @@ -229,6 +189,7 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; + case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; @@ -238,11 +199,9 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) break; case KQLOperatorValue::has_all: - new_expr = genHasAnyAllOpExpr(tokens, pos, "has_all", "hasTokenCaseInsensitive"); break; case KQLOperatorValue::has_any: - new_expr = genHasAnyAllOpExpr(tokens, pos, "has_any", "hasTokenCaseInsensitive"); break; case KQLOperatorValue::has_cs: @@ -254,67 +213,35 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) break; case KQLOperatorValue::hasprefix: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::not_hasprefix: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::hasprefix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::not_hasprefix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::hassuffix: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::not_hassuffix: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::hassuffix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::not_hassuffix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::in_cs: - new_expr = genInOpExpr(pos,op,"in"); + new_expr = "in"; break; - + case KQLOperatorValue::not_in_cs: - new_expr = genInOpExpr(pos,op,"not in"); + new_expr = "not in"; break; case KQLOperatorValue::in: @@ -349,11 +276,13 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) tokens.push_back(new_expr); } - return true; + ++pos; } - pos = begin; - return false; + + for (auto it=tokens.begin(); it!=tokens.end(); ++it) + res = res + *it + " "; + + return res; } } - diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 72e25cc3cf9..1bab8ae84ef 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -2,14 +2,13 @@ #include #include -#include + namespace DB { -class KQLOperators -{ +class KQLOperators { public: - bool convert(std::vector &tokens,IParser::Pos &pos); + String getExprFromToken(IParser::Pos pos); protected: enum class WildcardsPos:uint8_t @@ -83,7 +82,7 @@ protected: {"hasprefix" , KQLOperatorValue::hasprefix}, {"!hasprefix" , KQLOperatorValue::not_hasprefix}, {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, - {"!hasprefix_cs" , KQLOperatorValue::not_hasprefix_cs}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix_cs}, {"hassuffix" , KQLOperatorValue::hassuffix}, {"!hassuffix" , KQLOperatorValue::not_hassuffix}, {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, @@ -98,9 +97,7 @@ protected: {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); - static String genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op); - static String genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op); + String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); }; } diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index e978323d821..fee8cdb612b 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -6,20 +6,42 @@ namespace DB bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ASTPtr select_expression_list; + auto begin = pos; String expr; + if (op_pos.empty()) + expr = "*"; + else + { + for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + { + pos = *it ; + while (!pos->isEnd() && pos->type != TokenType::PipeMark) + { + if (pos->type == TokenType::BareWord) + { + String tmp(pos->begin,pos->end); - expr = getExprFromToken(pos); + if (it != op_pos.begin() && columns.find(tmp) == columns.end()) + return false; + columns.insert(tmp); + } + ++pos; + } + } + expr = getExprFromToken(op_pos.back()); + } Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); IParser::Pos new_pos(tokens, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) + if (!ParserNotEmptyExpressionList(true).parse(new_pos, node, expected)) return false; - node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + pos = begin; return true; } + + } diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h index b64675beed0..3ab3c82f1be 100644 --- a/src/Parsers/Kusto/ParserKQLProject.h +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -8,9 +8,15 @@ namespace DB class ParserKQLProject : public ParserKQLBase { +public: + void addColumn(String column) {columns.insert(column);} + protected: const char * getName() const override { return "KQL project"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + std::unordered_set columns; }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 04ee36705a9..0a9fa1fc4df 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -7,335 +7,116 @@ #include #include #include +#include #include -#include -#include -#include -#include -#include -#include namespace DB { -String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth) +bool ParserKQLBase :: parsePrepare(Pos & pos) { - Tokens tokens(text.c_str(), text.c_str() + text.size()); - IParser::Pos pos(tokens, max_depth); - - return getExprFromToken(pos); + op_pos.push_back(pos); + return true; } -String ParserKQLBase :: getExprFromPipe(Pos & pos) -{ - uint16_t bracket_count = 0; - auto begin = pos; - auto end = pos; - while (!end->isEnd() && end->type != TokenType::Semicolon) - { - if (end->type == TokenType::OpeningRoundBracket) - ++bracket_count; - - if (end->type == TokenType::OpeningRoundBracket) - --bracket_count; - - if (end->type == TokenType::PipeMark && bracket_count == 0) - break; - - ++end; - } - --end; - return String(begin->begin, end->end); -} - -String ParserKQLBase :: getExprFromToken(Pos & pos) +String ParserKQLBase :: getExprFromToken(Pos pos) { String res; - std::vector tokens; - String alias; - - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + while (!pos->isEnd() && pos->type != TokenType::PipeMark) { - String token = String(pos->begin,pos->end); - - if (token == "=") - { - ++pos; - if (String(pos->begin,pos->end) != "~") - { - alias = tokens.back(); - tokens.pop_back(); - } - --pos; - } - else if (!KQLOperators().convert(tokens,pos)) - { - tokens.push_back(token); - } - - if (pos->type == TokenType::Comma && !alias.empty()) - { - tokens.pop_back(); - tokens.push_back("AS"); - tokens.push_back(alias); - tokens.push_back(","); - alias.clear(); - } + res = res + String(pos->begin,pos->end) +" "; ++pos; } - - if (!alias.empty()) - { - tokens.push_back("AS"); - tokens.push_back(alias); - } - - for (auto const &token : tokens) - res = res.empty()? token : res +" " + token; return res; } -std::unique_ptr ParserKQLQuery::getOperator(String & op_name) -{ - if (op_name == "filter" || op_name == "where") - return std::make_unique(); - else if (op_name == "limit" || op_name == "take") - return std::make_unique(); - else if (op_name == "project") - return std::make_unique(); - else if (op_name == "sort by" || op_name == "order by") - return std::make_unique(); - else if (op_name == "summarize") - return std::make_unique(); - else if (op_name == "table") - return std::make_unique(); - else - return nullptr; -} - bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - struct KQLOperatorDataFlowState - { - String operator_name; - bool need_input; - bool gen_output; - int8_t backspace_steps; // how many steps to last token of previous pipe - }; - auto select_query = std::make_shared(); node = select_query; - ASTPtr tables; - std::unordered_map kql_parser = - { - { "filter", {"filter", false, false, 3}}, - { "where", {"filter", false, false, 3}}, - { "limit", {"limit", false, true, 3}}, - { "take", {"limit", false, true, 3}}, - { "project", {"project", false, false, 3}}, - { "sort by", {"order by", false, false, 4}}, - { "order by", {"order by", false, false, 4}}, - { "table", {"table", false, false, 3}}, - { "summarize", {"summarize", true, true, 3}} + ParserKQLFilter KQLfilter_p; + ParserKQLLimit KQLlimit_p; + ParserKQLProject KQLproject_p; + ParserKQLSort KQLsort_p; + ParserKQLSummarize KQLsummarize_p; + ParserKQLTable KQLtable_p; + + ASTPtr select_expression_list; + ASTPtr tables; + ASTPtr where_expression; + ASTPtr group_expression_list; + ASTPtr order_expression_list; + ASTPtr limit_length; + + std::unordered_map KQLParser = { + { "filter",&KQLfilter_p}, + { "where",&KQLfilter_p}, + { "limit",&KQLlimit_p}, + { "take",&KQLlimit_p}, + { "project",&KQLproject_p}, + { "sort",&KQLsort_p}, + { "order",&KQLsort_p}, + { "summarize",&KQLsummarize_p}, + { "table",&KQLtable_p} }; std::vector> operation_pos; - String table_name(pos->begin, pos->end); + operation_pos.push_back(std::make_pair("table",pos)); - operation_pos.push_back(std::make_pair("table", pos)); - ++pos; - uint16_t bracket_count = 0; - - while (!pos->isEnd() && pos->type != TokenType::Semicolon) + while (!pos->isEnd()) { - if (pos->type == TokenType::OpeningRoundBracket) - ++bracket_count; - if (pos->type == TokenType::OpeningRoundBracket) - --bracket_count; - - if (pos->type == TokenType::PipeMark && bracket_count == 0) + ++pos; + if (pos->type == TokenType::PipeMark) { ++pos; - String kql_operator(pos->begin, pos->end); - if (kql_operator == "order" || kql_operator == "sort") - { - ++pos; - ParserKeyword s_by("by"); - if (s_by.ignore(pos,expected)) - { - kql_operator = "order by"; - --pos; - } - } - if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) + String KQLoperator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) return false; ++pos; - operation_pos.push_back(std::make_pair(kql_operator, pos)); + operation_pos.push_back(std::make_pair(KQLoperator,pos)); } - else - ++pos; } - auto kql_operator_str = operation_pos.back().first; - auto npos = operation_pos.back().second; - if (!npos.isValid()) + for (auto &op_pos : operation_pos) + { + auto KQLoperator = op_pos.first; + auto npos = op_pos.second; + if (!npos.isValid()) + return false; + + if (!KQLParser[KQLoperator]->parsePrepare(npos)) + return false; + } + + if (!KQLtable_p.parse(pos, tables, expected)) return false; - auto kql_operator_p = getOperator(kql_operator_str); - - if (!kql_operator_p) + if (!KQLproject_p.parse(pos, select_expression_list, expected)) return false; - if (operation_pos.size() == 1) - { - if (!kql_operator_p->parse(npos, node, expected)) - return false; - } - else if (operation_pos.size() == 2 && operation_pos.front().first == "table") - { - if (!kql_operator_p->parse(npos, node, expected)) - return false; - npos = operation_pos.front().second; - if (!ParserKQLTable().parse(npos, node, expected)) - return false; - } + if (!KQLlimit_p.parse(pos, limit_length, expected)) + return false; + + if (!KQLfilter_p.parse(pos, where_expression, expected)) + return false; + + if (!KQLsort_p.parse(pos, order_expression_list, expected)) + return false; + + if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + return false; else - { - String project_clause, order_clause, where_clause, limit_clause; - auto last_pos = operation_pos.back().second; - auto last_op = operation_pos.back().first; + group_expression_list = KQLsummarize_p.group_expression_list; - auto set_main_query_clause =[&](String & op, Pos & op_pos) - { - auto op_str = ParserKQLBase::getExprFromPipe(op_pos); - if (op == "project") - project_clause = op_str; - else if (op == "where" || op == "filter") - where_clause = where_clause.empty() ? std::format("({})", op_str) : where_clause + std::format("AND ({})", op_str); - else if (op == "limit" || op == "take") - limit_clause = op_str; - else if (op == "order by" || op == "sort by") - order_clause = order_clause.empty() ? op_str : order_clause + "," + op_str; - }; + select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); + select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); - set_main_query_clause(last_op, last_pos); - - operation_pos.pop_back(); - - if (kql_parser[last_op].need_input) - { - if (!kql_operator_p->parse(npos, node, expected)) - return false; - } - else - { - while (!operation_pos.empty()) - { - auto prev_op = operation_pos.back().first; - auto prev_pos = operation_pos.back().second; - - if (kql_parser[prev_op].gen_output) - break; - if (!project_clause.empty() && prev_op == "project") - break; - set_main_query_clause(prev_op, prev_pos); - operation_pos.pop_back(); - last_op = prev_op; - last_pos = prev_pos; - } - } - - if (!operation_pos.empty()) - { - for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) - --last_pos; - - String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end)); - Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); - IParser::Pos pos_subquery(token_subquery, pos.max_depth); - - if (!ParserKQLSubquery().parse(pos_subquery, tables, expected)) - return false; - select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); - } - else - { - if (!ParserKQLTable().parse(last_pos, node, expected)) - return false; - } - - auto set_query_clasue =[&](String op_str, String op_calsue) - { - auto oprator = getOperator(op_str); - if (oprator) - { - Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); - IParser::Pos pos_clause(token_clause, pos.max_depth); - if (!oprator->parse(pos_clause, node, expected)) - return false; - } - return true; - }; - - if (!select_query->select()) - { - if (project_clause.empty()) - project_clause = "*"; - if (!set_query_clasue("project", project_clause)) - return false; - } - - if (!order_clause.empty()) - if (!set_query_clasue("order by", order_clause)) - return false; - - if (!where_clause.empty()) - if (!set_query_clasue("where", where_clause)) - return false; - - if (!limit_clause.empty()) - if (!set_query_clasue("limit", limit_clause)) - return false; - return true; - } - - if (!select_query->select()) - { - auto expr = String("*"); - Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); - if (!std::make_unique()->parse(new_pos, node, expected)) - return false; - } - - return true; -} - -bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ASTPtr select_node; - - if (!ParserKQLTaleFunction().parse(pos, select_node, expected)) - return false; - - ASTPtr node_subquery = std::make_shared(); - node_subquery->children.push_back(select_node); - - ASTPtr node_table_expr = std::make_shared(); - node_table_expr->as()->subquery = node_subquery; - - node_table_expr->children.emplace_back(node_subquery); - - ASTPtr node_table_in_select_query_emlement = std::make_shared(); - node_table_in_select_query_emlement->as()->table_expression = node_table_expr; - - ASTPtr res = std::make_shared(); - - res->children.emplace_back(node_table_in_select_query_emlement); - - node = res; return true; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 42f5f84f031..25aa4e6b83c 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -1,32 +1,25 @@ #pragma once #include -#include namespace DB { class ParserKQLBase : public IParserBase { public: - static String getExprFromToken(Pos & pos); - static String getExprFromPipe(Pos & pos); - static String getExprFromToken(const String & text, const uint32_t & max_depth); + virtual bool parsePrepare(Pos & pos) ; + +protected: + std::vector op_pos; + std::vector expresions; + virtual String getExprFromToken(Pos pos); }; class ParserKQLQuery : public IParserBase { - protected: - static std::unique_ptr getOperator(String &op_name); const char * getName() const override { return "KQL query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -class ParserKQLSubquery : public IParserBase -{ -protected: - const char * getName() const override { return "KQL subquery"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - } diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index ef4b84b17c7..496a79b610a 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -10,50 +10,61 @@ namespace DB bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + if (op_pos.empty()) + return true; + + auto begin = pos; bool has_dir = false; std::vector has_directions; ParserOrderByExpressionList order_list; ASTPtr order_expression_list; - auto expr = getExprFromToken(pos); + ParserKeyword by("by"); - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + pos = op_pos.back(); // sort only affected by last one - auto pos_backup = new_pos; - if (!order_list.parse(pos_backup, order_expression_list, expected)) + if (!by.ignore(pos, expected)) return false; - while (!new_pos->isEnd() && new_pos->type != TokenType::PipeMark && new_pos->type != TokenType::Semicolon) + if (!order_list.parse(pos,order_expression_list,expected)) + return false; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + return false; + + pos = op_pos.back(); + while (!pos->isEnd() && pos->type != TokenType::PipeMark) { String tmp(new_pos->begin, new_pos->end); if (tmp == "desc" || tmp == "asc") has_dir = true; - if (new_pos->type == TokenType::Comma) + if (pos->type == TokenType::Comma) { has_directions.push_back(has_dir); has_dir = false; } - ++new_pos; + + ++pos; } has_directions.push_back(has_dir); - for (uint64_t i = 0; i < order_expression_list->children.size(); ++i) + for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) { if (!has_directions[i]) { - auto *order_expr = order_expression_list->children[i]->as(); + auto order_expr = order_expression_list->children[i]->as(); order_expr->direction = -1; // default desc if (!order_expr->nulls_direction_was_explicitly_specified) order_expr->nulls_direction = -1; else order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1; + } } - node->as()->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + node = order_expression_list; + pos =begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 21e480234d3..7dea87eef25 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -4,7 +4,6 @@ #include #include #include -#include namespace DB { @@ -22,10 +21,10 @@ bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery kql_p; + ParserKQLWithUnionQuery KQL_p; ASTPtr query; - bool parsed = kql_p.parse(pos, query, expected); + bool parsed = KQL_p.parse(pos, query, expected); if (!parsed) return false; @@ -36,19 +35,20 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ASTPtr kql_query; + // will support union next phase + ASTPtr KQLQuery; - if (!ParserKQLQuery().parse(pos, kql_query, expected)) + if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) return false; - if (kql_query->as()) + if (KQLQuery->as()) { - node = std::move(kql_query); + node = std::move(KQLQuery); return true; } auto list_node = std::make_shared(); - list_node->children.push_back(kql_query); + list_node->children.push_back(KQLQuery); auto select_with_union_query = std::make_shared(); node = select_with_union_query; @@ -58,45 +58,4 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } -bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKQLWithUnionQuery kql_p; - ASTPtr select; - ParserToken s_lparen(TokenType::OpeningRoundBracket); - - auto begin = pos; - auto paren_count = 0 ; - String kql_statement; - - if (s_lparen.ignore(pos, expected)) - { - ++paren_count; - while (!pos->isEnd()) - { - if (pos->type == TokenType::ClosingRoundBracket) - --paren_count; - if (pos->type == TokenType::OpeningRoundBracket) - ++paren_count; - - if (paren_count == 0) - break; - - kql_statement = kql_statement + " " + String(pos->begin,pos->end); - ++pos; - } - - Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); - IParser::Pos pos_kql(token_kql, pos.max_depth); - - if (kql_p.parse(pos_kql, select, expected)) - { - node = select; - ++pos; - return true; - } - } - pos = begin; - return false; -}; - } diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h index ef44b2d6c8a..1eed2d00845 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.h +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -41,12 +41,5 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -class ParserKQLTaleFunction : public IParserBase -{ -protected: - const char * getName() const override { return "KQL() function"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 75eacb1adbd..f7422c02bca 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,3 +1,7 @@ +#include +#include +#include +#include #include #include #include @@ -10,71 +14,148 @@ #include #include #include +#include #include #include #include #include - namespace DB { +std::pair removeLastWord(String input) +{ + std::istringstream ss(input); + std::string token; + std::vector temp; + + while (std::getline(ss, token, ' ')) + { + temp.push_back(token); + } + + String firstPart; + for (std::size_t i = 0; i < temp.size() - 1; i++) + { + firstPart += temp[i]; + } + + return std::make_pair(firstPart, temp[temp.size() - 1]); +} + bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ASTPtr select_expression_list; - ASTPtr group_expression_list; + if (op_pos.empty()) + return true; + if (op_pos.size() != 1) // now only support one summarize + return false; - String expr_aggregation; - String expr_groupby; - String expr_columns; - bool groupby = false; + //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName + + //summarize has syntax : + + //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] + + //right now , we only support: + + //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] + //Aggregation -> the Aggregation function on column + //GroupExpression - > columns auto begin = pos; - auto pos_groupby = pos; + + pos = op_pos.back(); + String exprAggregation; + String exprGroupby; + String exprColumns; + + bool groupby = false; + bool bin_function = false; + String bin_column; + String last_string; + String column_name; + int character_passed = 0; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { if (String(pos->begin, pos->end) == "by") - { groupby = true; - auto end = pos; - --end; - expr_aggregation = begin <= end ? String(begin->begin, end->end) : ""; - pos_groupby = pos; - ++pos_groupby; + else + { + if (groupby) + { + if (String(pos->begin, pos->end) == "bin") + { + exprGroupby = exprGroupby + "round" + " "; + bin_function = true; + } + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + + if (bin_function && last_string == "(") + { + bin_column = String(pos->begin, pos->end); + bin_function = false; + } + + last_string = String(pos->begin, pos->end); + } + + else + { + if (String(pos->begin, pos->end) == "=") + { + std::pair temp = removeLastWord(exprAggregation); + exprAggregation = temp.first; + column_name = temp.second; + } + else + { + if (!column_name.empty()) + { + exprAggregation = exprAggregation + String(pos->begin, pos->end); + character_passed++; + if (String(pos->begin, pos->end) == ")") // was 4 + { + exprAggregation = exprAggregation + " AS " + column_name; + column_name = ""; + } + } + else + { + exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + } + } + } } ++pos; } - --pos; - if (groupby) - expr_groupby = String(pos_groupby->begin, pos->end); + + if(!bin_column.empty()) + exprGroupby = exprGroupby + " AS " + bin_column; + + if (exprGroupby.empty()) + exprColumns = exprAggregation; else - expr_aggregation = begin <= pos ? String(begin->begin, pos->end) : ""; - - auto expr_aggregation_str = expr_aggregation.empty() ? "" : expr_aggregation +","; - expr_columns = groupby ? expr_aggregation_str + expr_groupby : expr_aggregation_str; - - String converted_columns = getExprFromToken(expr_columns, pos.max_depth); - - Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); - IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); - - if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected)) + { + if (exprAggregation.empty()) + exprColumns = exprGroupby; + else + exprColumns = exprGroupby + "," + exprAggregation; + } + Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); + IParser::Pos posColumns(tokenColumns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) return false; - node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); - if (groupby) { - String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth); - - Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); - IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); - - if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) + Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); + IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) return false; - node->as()->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); } + pos = begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 1aad02705df..426ac29fe6a 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -8,10 +8,12 @@ namespace DB class ParserKQLSummarize : public ParserKQLBase { - +public: + ASTPtr group_expression_list; protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + }; } diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 6356ad688b6..8d450799785 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -3,14 +3,23 @@ #include #include #include -#include + namespace DB { +bool ParserKQLTable :: parsePrepare(Pos & pos) +{ + if (!op_pos.empty()) + return false; + + op_pos.push_back(pos); + return true; +} + bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::unordered_set sql_keywords - ({ + ( { "SELECT", "INSERT", "CREATE", @@ -33,9 +42,14 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "TRUNCATE", "USE", "EXPLAIN" - }); + } ); + + if (op_pos.empty()) + return false; + + auto begin = pos; + pos = op_pos.back(); - ASTPtr tables; String table_name(pos->begin,pos->end); String table_name_upcase(table_name); @@ -44,10 +58,9 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (sql_keywords.find(table_name_upcase) != sql_keywords.end()) return false; - if (!ParserTablesInSelectQuery().parse(pos, tables, expected)) + if (!ParserTablesInSelectQuery().parse(pos, node, expected)) return false; - - node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + pos = begin; return true; } diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h index c67dcb15156..1266b6e732d 100644 --- a/src/Parsers/Kusto/ParserKQLTable.h +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -8,10 +8,11 @@ namespace DB class ParserKQLTable : public ParserKQLBase { - protected: const char * getName() const override { return "KQL Table"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool parsePrepare(Pos &pos) override; + }; } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 18e91c533e0..73f5ec1bde3 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -324,19 +324,19 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | project FirstName,LastName,Occupation | take 1 | take 3", - "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 1\n)\nLIMIT 3" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" }, { "Customers | project FirstName,LastName,Occupation | take 3 | take 1", - "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)\nLIMIT 1" + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 1" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", - "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" + "SELECT\n FirstName,\n LastName\nFROM Customers\nLIMIT 3" }, { "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", - "SELECT\n FirstName,\n LastName,\n Education\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" + "throws Syntax error" }, { "Customers | sort by FirstName desc", @@ -344,7 +344,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | take 3 | order by FirstName desc", - "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)\nORDER BY FirstName DESC" + "SELECT *\nFROM Customers\nORDER BY FirstName DESC\nLIMIT 3" }, { "Customers | sort by FirstName asc", @@ -368,7 +368,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | sort by FirstName | order by Age ", - "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName DESC" + "SELECT *\nFROM Customers\nORDER BY Age DESC" }, { "Customers | sort by FirstName nulls first", @@ -416,27 +416,27 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | where Age > 30 | where Education == 'Bachelors'", - "SELECT *\nFROM Customers\nWHERE (Education = 'Bachelors') AND (Age > 30)" + "throws Syntax error" }, { "Customers |summarize count() by Occupation", - "SELECT\n count(),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n count()\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize sum(Age) by Occupation", - "SELECT\n sum(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n sum(Age)\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize avg(Age) by Occupation", - "SELECT\n avg(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n avg(Age)\nFROM Customers\nGROUP BY Occupation" }, { "Customers|summarize min(Age) by Occupation", - "SELECT\n min(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n min(Age)\nFROM Customers\nGROUP BY Occupation" }, { "Customers |summarize max(Age) by Occupation", - "SELECT\n max(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" }, { "Customers | where FirstName contains 'pet'", @@ -475,12 +475,8 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" }, { - "Customers | where FirstName startswith 'pet'", - "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" - }, - { - "Customers | where FirstName !startswith 'pet'", - "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" + "Customers|summarize count() by bin(Age, 10) ", + "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" } }))); From 0581ac45d11b093a36e6f5cdfd08af9c36531b31 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:04:20 -0700 Subject: [PATCH 002/279] Kusto summarize init --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 104 ++++++++++++++++++----- src/Parsers/Kusto/ParserKQLSummarize.h | 5 +- 2 files changed, 84 insertions(+), 25 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index f7422c02bca..24473118dc0 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,7 +1,9 @@ #include #include -#include +//#include #include +#include +#include #include #include #include @@ -19,16 +21,21 @@ #include #include #include + namespace DB { -std::pair removeLastWord(String input) +std::pair ParserKQLSummarize::removeLastWord(String input) { - std::istringstream ss(input); - std::string token; + ReadBufferFromString in(input); + String token; std::vector temp; - while (std::getline(ss, token, ' ')) + while (!in.eof()) { + readStringUntilWhitespace(token, in); + if (in.eof()) + break; + skipWhitespaceIfAny(in); temp.push_back(token); } @@ -37,10 +44,65 @@ std::pair removeLastWord(String input) { firstPart += temp[i]; } + if (temp.size() > 0) + { + return std::make_pair(firstPart, temp[temp.size() - 1]); + } - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair("", ""); } +String ParserKQLSummarize::getBinGroupbyString(String exprBin) +{ + String column_name; + bool bracket_start = false; + bool comma_start = false; + String bin_duration; + + for (std::size_t i = 0; i < exprBin.size(); i++) + { + if (comma_start && exprBin[i] != ')') + bin_duration += exprBin[i]; + if (exprBin[i] == ',') + { + comma_start = true; + bracket_start = false; + } + if (bracket_start == true) + column_name += exprBin[i]; + if (exprBin[i] == '(') + bracket_start = true; + } + + + std::size_t len = bin_duration.size(); + char bin_type = bin_duration[len - 1]; // y, d, h, m, s + if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) + { + return "toInt32(" + column_name + "/" + bin_duration + ") * " + bin_duration + " AS bin_int"; + } + bin_duration = bin_duration.substr(0, len - 1); + + switch (bin_type) + { + case 'y': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / (12*30*86400)) / " + bin_duration + ") * (" + + bin_duration + " * (12*30*86400))) AS bin_year"; + case 'd': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 86400) / " + bin_duration + ") * (" + bin_duration + + " * 86400)) AS bin_day"; + case 'h': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 3600) / " + bin_duration + ") * (" + bin_duration + + " * 3600)) AS bin_hour"; + case 'm': + return "toDateTime(toInt32((toFloat32(toDateTime(" + column_name + ")) / 60) / " + bin_duration + ") * (" + bin_duration + + " * 60)) AS bin_minute"; + case 's': + return "toDateTime(" + column_name + ") AS bin_sec"; + default: + return ""; + } +} bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -67,7 +129,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte String exprAggregation; String exprGroupby; String exprColumns; - + String exprBin; bool groupby = false; bool bin_function = false; String bin_column; @@ -83,21 +145,20 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin") + if (String(pos->begin, pos->end) == "bin" || bin_function == true) { - exprGroupby = exprGroupby + "round" + " "; bin_function = true; - } - else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; - - if (bin_function && last_string == "(") - { - bin_column = String(pos->begin, pos->end); - bin_function = false; + exprBin += String(pos->begin, pos->end); + if (String(pos->begin, pos->end) == ")") + { + exprBin = getBinGroupbyString(exprBin); + exprGroupby += exprBin; + bin_function = false; + } } - last_string = String(pos->begin, pos->end); + else + exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; } else @@ -114,13 +175,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { exprAggregation = exprAggregation + String(pos->begin, pos->end); character_passed++; - if (String(pos->begin, pos->end) == ")") // was 4 + if (String(pos->begin, pos->end) == ")") { exprAggregation = exprAggregation + " AS " + column_name; column_name = ""; } } - else + else if (!bin_function) { exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; } @@ -130,9 +191,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if(!bin_column.empty()) - exprGroupby = exprGroupby + " AS " + bin_column; - if (exprGroupby.empty()) exprColumns = exprAggregation; else diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 426ac29fe6a..1420d5ce519 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -5,15 +5,16 @@ namespace DB { - class ParserKQLSummarize : public ParserKQLBase { public: ASTPtr group_expression_list; + protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - + std::pair removeLastWord(String input); + String getBinGroupbyString(String exprBin); }; } From 785fb8ee917fe1b8d086dc2a7f15e92dcd1738b5 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:18:49 -0700 Subject: [PATCH 003/279] added single unit test case for summarize bin() --- src/Parsers/tests/gtest_Parser.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 73f5ec1bde3..7c2ca186481 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -438,6 +438,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers |summarize max(Age) by Occupation", "SELECT\n Occupation,\n max(Age)\nFROM Customers\nGROUP BY Occupation" }, + { + "Customers |summarize count() by bin(Age, 10)", + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" + } { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" From e4494255b4558f3efa7bf2d14aab743724e90cbd Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:29:51 -0700 Subject: [PATCH 004/279] removed unwanted comments --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 24473118dc0..0260902c937 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,6 +1,5 @@ #include #include -//#include #include #include #include @@ -111,17 +110,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - //summarize avg(age) by FirstName ==> select FirstName,avg(Age) from Customers3 group by FirstName - - //summarize has syntax : - - //T | summarize [SummarizeParameters] [[Column =] Aggregation [, ...]] [by [Column =] GroupExpression [, ...]] - - //right now , we only support: - - //T | summarize Aggregation [, ...] [by GroupExpression [, ...]] - //Aggregation -> the Aggregation function on column - //GroupExpression - > columns auto begin = pos; From e67031e3f8beaad8863f782ea4690d2385fb52cf Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 12:06:15 -0700 Subject: [PATCH 005/279] corrected unit test --- src/Parsers/tests/gtest_Parser.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 7c2ca186481..3f95a373dfa 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -440,8 +440,8 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers |summarize count() by bin(Age, 10)", - "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count(Age)\nFROM Customers\nGROUP BY bin_int" - } + "SELECT\n toInt32(Age / 10) * 10 AS bin_int,\n count()\nFROM Customers\nGROUP BY bin_int" + }, { "Customers | where FirstName contains 'pet'", "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" @@ -477,10 +477,6 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" - }, - { - "Customers|summarize count() by bin(Age, 10) ", - "SELECT\n round(Age, 10) AS Age,\n count()\nFROM Customers\nGROUP BY Age" } }))); From bc1846f07bc81c98a7b93a8772e6098b86ddf8bf Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 18:49:22 -0700 Subject: [PATCH 006/279] Kusto-phase1 : Add new test cases --- src/Parsers/tests/gtest_Parser.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 3f95a373dfa..60bcbae0303 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -477,6 +477,14 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName matches regex 'P.*r'", "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers | where FirstName startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + }, + { + "Customers | where FirstName !startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" } }))); From d0f6778528b417b20f06aeb4edb0ba39a97bb56a Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 9 Jun 2022 22:17:58 -0700 Subject: [PATCH 007/279] Kusto-phase1: Fixed the bug for KQL filer with multiple operations --- src/Parsers/Kusto/ParserKQLOperators.cpp | 2 ++ src/Parsers/tests/gtest_Parser.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 933ae0ce514..72759258eb2 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -133,6 +133,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) else --pos; } + else + --pos; if (KQLOperator.find(op) != KQLOperator.end()) opValue = KQLOperator[op]; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 60bcbae0303..096063c2aa9 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -416,7 +416,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "Customers | where Age > 30 | where Education == 'Bachelors'", - "throws Syntax error" + "SELECT *\nFROM Customers\nWHERE (Age > 30) AND (Education = 'Bachelors')" }, { "Customers |summarize count() by Occupation", From b9d850cad665872c1eeb086a9833835adfa656c6 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 11 Jun 2022 10:33:38 -0700 Subject: [PATCH 008/279] Kusto-phase1: Fixed style --- src/Parsers/Kusto/ParserKQLFilter.cpp | 11 ++-- src/Parsers/Kusto/ParserKQLLimit.cpp | 25 ++++---- src/Parsers/Kusto/ParserKQLOperators.cpp | 30 +++++----- src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 56 ++++++++--------- src/Parsers/Kusto/ParserKQLSort.cpp | 4 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 14 ++--- src/Parsers/Kusto/ParserKQLSummarize.cpp | 76 ++++++++++++------------ src/Parsers/Kusto/ParserKQLSummarize.h | 4 +- 9 files changed, 109 insertions(+), 113 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index ad7ad807d03..466370f5d80 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -17,17 +17,16 @@ bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) KQLOperators convetor; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po : op_pos) { - pos = *it; if (expr.empty()) - expr = "(" + convetor.getExprFromToken(pos) +")"; + expr = "(" + convetor.getExprFromToken(op_po) +")"; else - expr = expr + " and (" + convetor.getExprFromToken(pos) +")"; + expr = expr + " and (" + convetor.getExprFromToken(op_po) +")"; } - Tokens tokenFilter(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos pos_filter(tokenFilter, pos.max_depth); + Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(token_filter, pos.max_depth); if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, node, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 7811ebba9ab..4f7eddd9662 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -13,14 +13,13 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; auto begin = pos; - Int64 minLimit = -1; + Int64 min_limit = -1; auto final_pos = pos; - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) + for (auto op_po: op_pos) { - pos = *it; - auto isNumber = [&] + auto is_number = [&] { - for (auto ch = pos->begin ; ch < pos->end; ++ch) + for (const auto *ch = op_po->begin ; ch < op_po->end; ++ch) { if (!isdigit(*ch)) return false; @@ -28,21 +27,21 @@ bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; }; - if (!isNumber()) + if (!is_number()) return false; - auto limitLength = std::strtol(pos->begin,nullptr, 10); - if (-1 == minLimit) + auto limit_length = std::strtol(op_po->begin,nullptr, 10); + if (-1 == min_limit) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } else { - if (minLimit > limitLength) + if (min_limit > limit_length) { - minLimit = limitLength; - final_pos = pos; + min_limit = limit_length; + final_pos = op_po; } } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 72759258eb2..64767ff3054 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -10,9 +10,9 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos) +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos) { - String new_expr, leftWildcards= "", rightWildcards=""; + String new_expr, left_wildcards, right_wildcards; ++token_pos; if (!s_lparen.ignore(token_pos, expected)) @@ -69,21 +69,21 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; case WildcardsPos::left: - leftWildcards ="%"; + left_wildcards ="%"; break; case WildcardsPos::right: - rightWildcards = "%"; + right_wildcards = "%"; break; case WildcardsPos::both: - leftWildcards ="%"; - rightWildcards = "%"; + left_wildcards ="%"; + right_wildcards = "%"; break; } - if (!tokens.empty() && ((++tokenPos)->type == TokenType::StringLiteral || tokenPos->type == TokenType::QuotedIdentifier)) - new_expr = CHOp +"(" + tokens.back() +", '"+leftWildcards + String(tokenPos->begin + 1,tokenPos->end - 1 ) + rightWildcards + "')"; + if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); tokens.pop_back(); @@ -97,7 +97,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - KQLOperatorValue opValue = KQLOperatorValue::none; + KQLOperatorValue op_value = KQLOperatorValue::none; auto token = String(pos->begin,pos->end); @@ -137,14 +137,14 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) --pos; if (KQLOperator.find(op) != KQLOperator.end()) - opValue = KQLOperator[op]; + op_value = KQLOperator[op]; String new_expr; - if (opValue == KQLOperatorValue::none) + if (op_value == KQLOperatorValue::none) tokens.push_back(op); else { - switch (opValue) + switch (op_value) { case KQLOperatorValue::contains: new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); @@ -241,7 +241,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::in_cs: new_expr = "in"; break; - + case KQLOperatorValue::not_in_cs: new_expr = "not in"; break; @@ -281,8 +281,8 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) ++pos; } - for (auto it=tokens.begin(); it!=tokens.end(); ++it) - res = res + *it + " "; + for (auto & token : tokens) + res = res + token + " "; return res; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 1bab8ae84ef..f2888ecfd8f 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -97,7 +97,7 @@ protected: {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - String genHaystackOpExpr(std::vector &tokens,IParser::Pos &tokenPos,String KQLOp, String CHOp, WildcardsPos wildcardsPos); + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos); }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 0a9fa1fc4df..55aade6b2b9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -35,12 +35,12 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto select_query = std::make_shared(); node = select_query; - ParserKQLFilter KQLfilter_p; - ParserKQLLimit KQLlimit_p; - ParserKQLProject KQLproject_p; - ParserKQLSort KQLsort_p; - ParserKQLSummarize KQLsummarize_p; - ParserKQLTable KQLtable_p; + ParserKQLFilter kql_filter_p; + ParserKQLLimit kql_limit_p; + ParserKQLProject kql_project_p; + ParserKQLSort kql_sort_p; + ParserKQLSummarize kql_summarize_p; + ParserKQLTable kql_table_p; ASTPtr select_expression_list; ASTPtr tables; @@ -49,16 +49,16 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr order_expression_list; ASTPtr limit_length; - std::unordered_map KQLParser = { - { "filter",&KQLfilter_p}, - { "where",&KQLfilter_p}, - { "limit",&KQLlimit_p}, - { "take",&KQLlimit_p}, - { "project",&KQLproject_p}, - { "sort",&KQLsort_p}, - { "order",&KQLsort_p}, - { "summarize",&KQLsummarize_p}, - { "table",&KQLtable_p} + std::unordered_map kql_parser = { + { "filter",&kql_filter_p}, + { "where",&kql_filter_p}, + { "limit",&kql_limit_p}, + { "take",&kql_limit_p}, + { "project",&kql_project_p}, + { "sort",&kql_sort_p}, + { "order",&kql_sort_p}, + { "summarize",&kql_summarize_p}, + { "table",&kql_table_p} }; std::vector> operation_pos; @@ -71,44 +71,44 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (pos->type == TokenType::PipeMark) { ++pos; - String KQLoperator(pos->begin,pos->end); - if (pos->type != TokenType::BareWord || KQLParser.find(KQLoperator) == KQLParser.end()) + String kql_operator(pos->begin,pos->end); + if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) return false; ++pos; - operation_pos.push_back(std::make_pair(KQLoperator,pos)); + operation_pos.push_back(std::make_pair(kql_operator,pos)); } } for (auto &op_pos : operation_pos) { - auto KQLoperator = op_pos.first; + auto kql_operator = op_pos.first; auto npos = op_pos.second; if (!npos.isValid()) return false; - if (!KQLParser[KQLoperator]->parsePrepare(npos)) + if (!kql_parser[kql_operator]->parsePrepare(npos)) return false; } - if (!KQLtable_p.parse(pos, tables, expected)) + if (!kql_table_p.parse(pos, tables, expected)) return false; - if (!KQLproject_p.parse(pos, select_expression_list, expected)) + if (!kql_project_p.parse(pos, select_expression_list, expected)) return false; - if (!KQLlimit_p.parse(pos, limit_length, expected)) + if (!kql_limit_p.parse(pos, limit_length, expected)) return false; - if (!KQLfilter_p.parse(pos, where_expression, expected)) + if (!kql_filter_p.parse(pos, where_expression, expected)) return false; - if (!KQLsort_p.parse(pos, order_expression_list, expected)) + if (!kql_sort_p.parse(pos, order_expression_list, expected)) return false; - if (!KQLsummarize_p.parse(pos, select_expression_list, expected)) + if (!kql_summarize_p.parse(pos, select_expression_list, expected)) return false; else - group_expression_list = KQLsummarize_p.group_expression_list; + group_expression_list = kql_summarize_p.group_expression_list; select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 496a79b610a..ad2e8a05183 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -48,11 +48,11 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } has_directions.push_back(has_dir); - for (unsigned long i = 0; i < order_expression_list->children.size(); ++i) + for (uint64_t i = 0; i < order_expression_list->children.size(); ++i) { if (!has_directions[i]) { - auto order_expr = order_expression_list->children[i]->as(); + auto *order_expr = order_expression_list->children[i]->as(); order_expr->direction = -1; // default desc if (!order_expr->nulls_direction_was_explicitly_specified) order_expr->nulls_direction = -1; diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 7dea87eef25..2afbad22131 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -21,10 +21,10 @@ bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery KQL_p; + ParserKQLWithUnionQuery kql_p; ASTPtr query; - bool parsed = KQL_p.parse(pos, query, expected); + bool parsed = kql_p.parse(pos, query, expected); if (!parsed) return false; @@ -36,19 +36,19 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { // will support union next phase - ASTPtr KQLQuery; + ASTPtr kql_query; - if (!ParserKQLQuery().parse(pos, KQLQuery, expected)) + if (!ParserKQLQuery().parse(pos, kql_query, expected)) return false; - if (KQLQuery->as()) + if (kql_query->as()) { - node = std::move(KQLQuery); + node = std::move(kql_query); return true; } auto list_node = std::make_shared(); - list_node->children.push_back(KQLQuery); + list_node->children.push_back(kql_query); auto select_with_union_query = std::make_shared(); node = select_with_union_query; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 0260902c937..48544a31104 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -38,42 +38,41 @@ std::pair ParserKQLSummarize::removeLastWord(String input) temp.push_back(token); } - String firstPart; + String first_part; for (std::size_t i = 0; i < temp.size() - 1; i++) { - firstPart += temp[i]; + first_part += temp[i]; } - if (temp.size() > 0) + if (!temp.empty()) { - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair(first_part, temp[temp.size() - 1]); } return std::make_pair("", ""); } -String ParserKQLSummarize::getBinGroupbyString(String exprBin) +String ParserKQLSummarize::getBinGroupbyString(String expr_bin) { String column_name; bool bracket_start = false; bool comma_start = false; String bin_duration; - for (std::size_t i = 0; i < exprBin.size(); i++) + for (char ch : expr_bin) { - if (comma_start && exprBin[i] != ')') - bin_duration += exprBin[i]; - if (exprBin[i] == ',') + if (comma_start && ch != ')') + bin_duration += ch; + if (ch == ',') { comma_start = true; bracket_start = false; } - if (bracket_start == true) - column_name += exprBin[i]; - if (exprBin[i] == '(') + if (bracket_start) + column_name += ch; + if (ch == '(') bracket_start = true; } - std::size_t len = bin_duration.size(); char bin_type = bin_duration[len - 1]; // y, d, h, m, s if ((bin_type != 'y') && (bin_type != 'd') && (bin_type != 'h') && (bin_type != 'm') && (bin_type != 's')) @@ -110,14 +109,13 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (op_pos.size() != 1) // now only support one summarize return false; - auto begin = pos; pos = op_pos.back(); - String exprAggregation; - String exprGroupby; - String exprColumns; - String exprBin; + String expr_aggregation; + String expr_groupby; + String expr_columns; + String expr_bin; bool groupby = false; bool bin_function = false; String bin_column; @@ -133,45 +131,45 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (groupby) { - if (String(pos->begin, pos->end) == "bin" || bin_function == true) + if (String(pos->begin, pos->end) == "bin" || bin_function) { bin_function = true; - exprBin += String(pos->begin, pos->end); + expr_bin += String(pos->begin, pos->end); if (String(pos->begin, pos->end) == ")") { - exprBin = getBinGroupbyString(exprBin); - exprGroupby += exprBin; + expr_bin = getBinGroupbyString(expr_bin); + expr_groupby += expr_bin; bin_function = false; } } else - exprGroupby = exprGroupby + String(pos->begin, pos->end) + " "; + expr_groupby = expr_groupby + String(pos->begin, pos->end) + " "; } else { if (String(pos->begin, pos->end) == "=") { - std::pair temp = removeLastWord(exprAggregation); - exprAggregation = temp.first; + std::pair temp = removeLastWord(expr_aggregation); + expr_aggregation = temp.first; column_name = temp.second; } else { if (!column_name.empty()) { - exprAggregation = exprAggregation + String(pos->begin, pos->end); + expr_aggregation = expr_aggregation + String(pos->begin, pos->end); character_passed++; if (String(pos->begin, pos->end) == ")") { - exprAggregation = exprAggregation + " AS " + column_name; + expr_aggregation = expr_aggregation + " AS " + column_name; column_name = ""; } } else if (!bin_function) { - exprAggregation = exprAggregation + String(pos->begin, pos->end) + " "; + expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; } } } @@ -179,25 +177,25 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; } - if (exprGroupby.empty()) - exprColumns = exprAggregation; + if (expr_groupby.empty()) + expr_columns = expr_aggregation; else { - if (exprAggregation.empty()) - exprColumns = exprGroupby; + if (expr_aggregation.empty()) + expr_columns = expr_groupby; else - exprColumns = exprGroupby + "," + exprAggregation; + expr_columns = expr_groupby + "," + expr_aggregation; } - Tokens tokenColumns(exprColumns.c_str(), exprColumns.c_str() + exprColumns.size()); - IParser::Pos posColumns(tokenColumns, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(posColumns, node, expected)) + Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); + IParser::Pos pos_columns(token_columns, pos.max_depth); + if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) return false; if (groupby) { - Tokens tokenGroupby(exprGroupby.c_str(), exprGroupby.c_str() + exprGroupby.size()); - IParser::Pos postokenGroupby(tokenGroupby, pos.max_depth); - if (!ParserNotEmptyExpressionList(false).parse(postokenGroupby, group_expression_list, expected)) + Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); + IParser::Pos postoken_groupby(token_groupby, pos.max_depth); + if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) return false; } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 1420d5ce519..b243f74d08f 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -13,8 +13,8 @@ public: protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - std::pair removeLastWord(String input); - String getBinGroupbyString(String exprBin); + static std::pair removeLastWord(String input); + static String getBinGroupbyString(String expr_bin); }; } From a90fb535a78f8f2f2cbed839739f5d70d7a40869 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 12 Jun 2022 20:05:51 -0700 Subject: [PATCH 009/279] Kusto-pahse1: Fixed moy style issues. --- src/Parsers/Kusto/ParserKQLOperators.cpp | 8 ++++---- src/Parsers/Kusto/ParserKQLOperators.h | 3 ++- src/Parsers/Kusto/ParserKQLProject.cpp | 2 -- src/Parsers/Kusto/ParserKQLQuery.cpp | 5 ++--- src/Parsers/Kusto/ParserKQLQuery.h | 2 +- src/Parsers/Kusto/ParserKQLTable.cpp | 10 +++++----- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 64767ff3054..0d47eec899f 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -83,7 +83,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; else throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); tokens.pop_back(); @@ -102,7 +102,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!" ) + if ( token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) @@ -183,7 +183,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal: break; - + case KQLOperatorValue::equal_cs: new_expr = "=="; break; @@ -191,7 +191,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; - + case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index f2888ecfd8f..f3a995e908f 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -6,7 +6,8 @@ namespace DB { -class KQLOperators { +class KQLOperators +{ public: String getExprFromToken(IParser::Pos pos); protected: diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index fee8cdb612b..0e25c9c4a6c 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -42,6 +42,4 @@ bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected return true; } - - } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 55aade6b2b9..1a850e77f48 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include namespace DB @@ -15,8 +14,8 @@ namespace DB bool ParserKQLBase :: parsePrepare(Pos & pos) { - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } String ParserKQLBase :: getExprFromToken(Pos pos) diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 25aa4e6b83c..0545cd00cd9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -11,7 +11,7 @@ public: protected: std::vector op_pos; - std::vector expresions; + std::vector expressions; virtual String getExprFromToken(Pos pos); }; diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 8d450799785..a7ae7fef579 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -9,17 +9,17 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { - if (!op_pos.empty()) + if (!op_pos.empty()) return false; - op_pos.push_back(pos); - return true; + op_pos.push_back(pos); + return true; } bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::unordered_set sql_keywords - ( { + ({ "SELECT", "INSERT", "CREATE", @@ -42,7 +42,7 @@ bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "TRUNCATE", "USE", "EXPLAIN" - } ); + }); if (op_pos.empty()) return false; From 89b7d83e9cc7ceeedd09d5f681798baebd715ab5 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 13 Jun 2022 06:26:02 -0700 Subject: [PATCH 010/279] Kusto-phase1: Fixed misleading indentation --- src/Parsers/Kusto/ParserKQLOperators.cpp | 4 ++-- src/Parsers/Kusto/ParserKQLTable.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 0d47eec899f..bba52dcdea0 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -83,7 +83,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos } if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1 ) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; else throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); tokens.pop_back(); @@ -102,7 +102,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) auto token = String(pos->begin,pos->end); String op = token; - if ( token == "!") + if (token == "!") { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index a7ae7fef579..f1fc13d2c48 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserKQLTable :: parsePrepare(Pos & pos) { if (!op_pos.empty()) - return false; + return false; op_pos.push_back(pos); return true; From 591f98a1456747193b00d33ac8c04d0d02d374af Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 14 Jun 2022 07:40:06 -0700 Subject: [PATCH 011/279] Kusto-pahse2: Add support for multiple summarize --- src/Parsers/Kusto/ParserKQLQuery.cpp | 6 +++ src/Parsers/Kusto/ParserKQLSummarize.cpp | 49 +++++++++++++++++++++++- src/Parsers/Kusto/ParserKQLSummarize.h | 5 ++- 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 1a850e77f48..d925f66b321 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -63,6 +63,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) std::vector> operation_pos; operation_pos.push_back(std::make_pair("table",pos)); + String table_name(pos->begin,pos->end); while (!pos->isEnd()) { @@ -104,10 +105,15 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!kql_sort_p.parse(pos, order_expression_list, expected)) return false; + kql_summarize_p.setTableName(table_name); if (!kql_summarize_p.parse(pos, select_expression_list, expected)) return false; else + { group_expression_list = kql_summarize_p.group_expression_list; + if (kql_summarize_p.tables) + tables = kql_summarize_p.tables; + } select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 48544a31104..7a88fec1988 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -106,10 +106,57 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { if (op_pos.empty()) return true; - if (op_pos.size() != 1) // now only support one summarize + if (op_pos.size() > 2) // now only support max 2 summarize return false; auto begin = pos; + ASTPtr sub_qurery_table; + +// rewrite this part, make it resusable (may contains bin etc, and please inmplement summarize age= avg(Age) for sub query too): + if (op_pos.size() == 2) + { + bool groupby = false; + auto sub_pos = op_pos.front(); + String sub_aggregation; + String sub_groupby; + String sub_columns; + while (!sub_pos->isEnd() && sub_pos->type != TokenType::PipeMark && sub_pos->type != TokenType::Semicolon) + { + if (String(sub_pos->begin,sub_pos->end) == "by") + groupby = true; + else + { + if (groupby) + sub_groupby = sub_groupby + String(sub_pos->begin,sub_pos->end) +" "; + else + sub_aggregation = sub_aggregation + String(sub_pos->begin,sub_pos->end) +" "; + } + ++sub_pos; + } + + String sub_query; + if (sub_groupby.empty()) + { + sub_columns =sub_aggregation; + sub_query = "(SELECT " + sub_columns+ " FROM "+ table_name+")"; + } + else + { + if (sub_aggregation.empty()) + sub_columns = sub_groupby; + else + sub_columns = sub_groupby + "," + sub_aggregation; + sub_query = "(SELECT " + sub_columns+ " FROM "+ table_name + " GROUP BY "+sub_groupby+")"; + } + + Tokens token_subquery(sub_query.c_str(), sub_query.c_str()+sub_query.size()); + IParser::Pos pos_subquery(token_subquery, pos.max_depth); + + if (!ParserTablesInSelectQuery().parse(pos_subquery, sub_qurery_table, expected)) + return false; + tables = sub_qurery_table; + } + pos = op_pos.back(); String expr_aggregation; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index b243f74d08f..b71af138e7e 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -9,12 +9,15 @@ class ParserKQLSummarize : public ParserKQLBase { public: ASTPtr group_expression_list; - + ASTPtr tables; + void setTableName(String table_name_) {table_name = table_name_;} protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; static std::pair removeLastWord(String input); static String getBinGroupbyString(String expr_bin); +private: + String table_name; }; } From e8c63be34adb274514cd0043d0d8c9cbb0d35460 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 17 Jun 2022 08:47:08 -0700 Subject: [PATCH 012/279] Kusto-phase2 : Added KQL functions interface. changed the summarize class for new aggregation functions --- src/Parsers/CMakeLists.txt | 1 + .../KustoFunctions/IParserKQLFunction.cpp | 33 + .../Kusto/KustoFunctions/IParserKQLFunction.h | 39 + .../KQLAggregationFunctions.cpp | 24 + .../KustoFunctions/KQLAggregationFunctions.h | 9 + .../KustoFunctions/KQLBinaryFunctions.cpp | 24 + .../Kusto/KustoFunctions/KQLBinaryFunctions.h | 9 + .../KustoFunctions/KQLCastingFunctions.cpp | 51 ++ .../KustoFunctions/KQLCastingFunctions.h | 50 ++ .../KustoFunctions/KQLDateTimeFunctions.cpp | 24 + .../KustoFunctions/KQLDateTimeFunctions.h | 9 + .../KustoFunctions/KQLDynamicFunctions.cpp | 24 + .../KustoFunctions/KQLDynamicFunctions.h | 9 + .../KustoFunctions/KQLFunctionFactory.cpp | 742 ++++++++++++++++++ .../Kusto/KustoFunctions/KQLFunctionFactory.h | 386 +++++++++ .../KustoFunctions/KQLGeneralFunctions.cpp | 24 + .../KustoFunctions/KQLGeneralFunctions.h | 9 + .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 24 + .../Kusto/KustoFunctions/KQLIPFunctions.h | 9 + .../KustoFunctions/KQLStringFunctions.cpp | 365 +++++++++ .../Kusto/KustoFunctions/KQLStringFunctions.h | 267 +++++++ .../KustoFunctions/KQLTimeSeriesFunctions.cpp | 24 + .../KustoFunctions/KQLTimeSeriesFunctions.h | 9 + src/Parsers/Kusto/ParserKQLOperators.cpp | 5 +- src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 24 +- src/Parsers/Kusto/ParserKQLQuery.h | 7 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 13 +- src/Parsers/Kusto/ParserKQLSummarize.h | 4 + 29 files changed, 2210 insertions(+), 10 deletions(-) create mode 100644 src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index d74137f8a91..3bc1b3a981f 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -5,6 +5,7 @@ add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) add_headers_and_sources(clickhouse_parsers ./Kusto) add_headers_and_sources(clickhouse_parsers ./PRQL) +add_headers_and_sources(clickhouse_parsers ./Kusto/KustoFunctions) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) if (TARGET ch_rust::prql) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp new file mode 100644 index 00000000000..5455f41a0c2 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -0,0 +1,33 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) +{ + return wrapConvertImpl(pos, IncreaseDepthTag{}, [&] + { + bool res = convertImpl(out,pos); + if (!res) + out = ""; + return res; + }); +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h new file mode 100644 index 00000000000..81bf97f390b --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +namespace DB +{ +class IParserKQLFunction //: public IParser +{ +public: + template + ALWAYS_INLINE static bool wrapConvertImpl(IParser::Pos & pos, const F & func) + { + IParser::Pos begin = pos; + bool res = func(); + if (!res) + pos = begin; + return res; + } + struct IncreaseDepthTag {}; + template + ALWAYS_INLINE static bool wrapConvertImpl(IParser::Pos & pos, IncreaseDepthTag, const F & func) + { + IParser::Pos begin = pos; + pos.increaseDepth(); + bool res = func(); + pos.decreaseDepth(); + if (!res) + pos = begin; + return res; + } + bool convert(String &out,IParser::Pos &pos); + virtual const char * getName() const = 0; + virtual ~IParserKQLFunction() = default; +protected: + virtual bool convertImpl(String &out,IParser::Pos &pos) = 0; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp new file mode 100644 index 00000000000..20b4b880a83 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h new file mode 100644 index 00000000000..45759032826 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp new file mode 100644 index 00000000000..20b4b880a83 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h new file mode 100644 index 00000000000..45759032826 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp new file mode 100644 index 00000000000..5f43aa16d8e --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -0,0 +1,51 @@ + +#include +#include +#include +#include + +namespace DB +{ +bool Tobool::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToDatetime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToDouble::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToInt::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToString::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToTimespan::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h new file mode 100644 index 00000000000..ab73fb3fc21 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +namespace DB +{ +class Tobool : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tobool()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToDatetime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "todatetime()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToDouble : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "todouble()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToInt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "toint()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tostring()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToTimespan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "totimespan()";} + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp new file mode 100644 index 00000000000..20b4b880a83 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h new file mode 100644 index 00000000000..45759032826 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp new file mode 100644 index 00000000000..20b4b880a83 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h new file mode 100644 index 00000000000..45759032826 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp new file mode 100644 index 00000000000..528f906e51e --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -0,0 +1,742 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + std::unordered_map KQLFunctionFactory::kql_functions = + { + {"datetime", KQLFunctionValue::datetime}, + {"ago", KQLFunctionValue::ago}, + {"datetime_add", KQLFunctionValue::datetime_add}, + {"datetime_part", KQLFunctionValue::datetime_part}, + {"datetime_diff", KQLFunctionValue::datetime_diff}, + {"dayofmonth", KQLFunctionValue::dayofmonth}, + {"dayofweek", KQLFunctionValue::dayofweek}, + {"dayofyear", KQLFunctionValue::dayofyear}, + {"endofday", KQLFunctionValue::endofday}, + {"endofweek", KQLFunctionValue::endofweek}, + {"endofyear", KQLFunctionValue::endofyear}, + {"format_datetime", KQLFunctionValue::format_datetime}, + {"format_timespan", KQLFunctionValue::format_timespan}, + {"getmonth", KQLFunctionValue::getmonth}, + {"getyear", KQLFunctionValue::getyear}, + {"hoursofday", KQLFunctionValue::hoursofday}, + {"make_timespan", KQLFunctionValue::make_timespan}, + {"make_datetime", KQLFunctionValue::make_datetime}, + {"now", KQLFunctionValue::now}, + {"startofday", KQLFunctionValue::startofday}, + {"startofmonth", KQLFunctionValue::startofmonth}, + {"startofweek", KQLFunctionValue::startofweek}, + {"startofyear", KQLFunctionValue::startofyear}, + {"todatetime", KQLFunctionValue::todatetime}, + {"totimespan", KQLFunctionValue::totimespan}, + {"unixtime_microseconds_todatetime", KQLFunctionValue::unixtime_microseconds_todatetime}, + {"unixtime_milliseconds_todatetime", KQLFunctionValue::unixtime_milliseconds_todatetime}, + {"unixtime_nanoseconds_todatetime", KQLFunctionValue::unixtime_nanoseconds_todatetime}, + {"unixtime_seconds_todatetime", KQLFunctionValue::unixtime_seconds_todatetime}, + {"weekofyear", KQLFunctionValue::weekofyear}, + + {"base64_encode_tostring", KQLFunctionValue::base64_encode_tostring}, + {"base64_encode_fromguid", KQLFunctionValue::base64_encode_fromguid}, + {"base64_decode_tostring", KQLFunctionValue::base64_decode_tostring}, + {"base64_decode_toarray", KQLFunctionValue::base64_decode_toarray}, + {"base64_decode_toguid", KQLFunctionValue::base64_decode_toguid}, + {"countof", KQLFunctionValue::countof}, + {"extract", KQLFunctionValue::extract}, + {"extract_all", KQLFunctionValue::extract_all}, + {"extractjson", KQLFunctionValue::extractjson}, + {"has_any_index", KQLFunctionValue::has_any_index}, + {"indexof", KQLFunctionValue::indexof}, + {"isempty", KQLFunctionValue::isempty}, + {"isnotempty", KQLFunctionValue::isnotempty}, + {"isnotnull", KQLFunctionValue::isnotnull}, + {"isnull", KQLFunctionValue::isnull}, + {"parse_command_line", KQLFunctionValue::parse_command_line}, + {"parse_csv", KQLFunctionValue::parse_csv}, + {"parse_json", KQLFunctionValue::parse_json}, + {"parse_url", KQLFunctionValue::parse_url}, + {"parse_urlquery", KQLFunctionValue::parse_urlquery}, + {"parse_version", KQLFunctionValue::parse_version}, + {"replace_regex", KQLFunctionValue::replace_regex}, + {"reverse", KQLFunctionValue::reverse}, + {"split", KQLFunctionValue::split}, + {"strcat", KQLFunctionValue::strcat}, + {"strcat_delim", KQLFunctionValue::strcat_delim}, + {"strcmp", KQLFunctionValue::strcmp}, + {"strlen", KQLFunctionValue::strlen}, + {"strrep", KQLFunctionValue::strrep}, + {"substring", KQLFunctionValue::substring}, + {"toupper", KQLFunctionValue::toupper}, + {"translate", KQLFunctionValue::translate}, + {"trim", KQLFunctionValue::trim}, + {"trim_end", KQLFunctionValue::trim_end}, + {"trim_start", KQLFunctionValue::trim_start}, + {"url_decode", KQLFunctionValue::url_decode}, + {"url_encode", KQLFunctionValue::url_encode}, + + {"array_concat", KQLFunctionValue::array_concat}, + {"array_iif", KQLFunctionValue::array_iif}, + {"array_index_of", KQLFunctionValue::array_index_of}, + {"array_length", KQLFunctionValue::array_length}, + {"array_reverse", KQLFunctionValue::array_reverse}, + {"array_rotate_left", KQLFunctionValue::array_rotate_left}, + {"array_rotate_right", KQLFunctionValue::array_rotate_right}, + {"array_shift_left", KQLFunctionValue::array_shift_left}, + {"array_shift_right", KQLFunctionValue::array_shift_right}, + {"array_slice", KQLFunctionValue::array_slice}, + {"array_sort_asc", KQLFunctionValue::array_sort_asc}, + {"array_sort_desc", KQLFunctionValue::array_sort_desc}, + {"array_split", KQLFunctionValue::array_split}, + {"array_sum", KQLFunctionValue::array_sum}, + {"bag_keys", KQLFunctionValue::bag_keys}, + {"bag_merge", KQLFunctionValue::bag_merge}, + {"bag_remove_keys", KQLFunctionValue::bag_remove_keys}, + {"jaccard_index", KQLFunctionValue::jaccard_index}, + {"pack", KQLFunctionValue::pack}, + {"pack_all", KQLFunctionValue::pack_all}, + {"pack_array", KQLFunctionValue::pack_array}, + {"repeat", KQLFunctionValue::repeat}, + {"set_difference", KQLFunctionValue::set_difference}, + {"set_has_element", KQLFunctionValue::set_has_element}, + {"set_intersect", KQLFunctionValue::set_intersect}, + {"set_union", KQLFunctionValue::set_union}, + {"treepath", KQLFunctionValue::treepath}, + {"zip", KQLFunctionValue::zip}, + + {"tobool", KQLFunctionValue::tobool}, + {"toboolean", KQLFunctionValue::tobool}, + {"todouble", KQLFunctionValue::todouble}, + {"toint", KQLFunctionValue::toint}, + {"toreal", KQLFunctionValue::todouble}, + {"tostring", KQLFunctionValue::tostring}, + {"totimespan", KQLFunctionValue::totimespan}, + + {"arg_max", KQLFunctionValue::arg_max}, + {"arg_min", KQLFunctionValue::arg_min}, + {"avg", KQLFunctionValue::avg}, + {"avgif", KQLFunctionValue::avgif}, + {"binary_all_and", KQLFunctionValue::binary_all_and}, + {"binary_all_or", KQLFunctionValue::binary_all_or}, + {"binary_all_xor", KQLFunctionValue::binary_all_xor}, + {"buildschema", KQLFunctionValue::buildschema}, + {"count", KQLFunctionValue::count}, + {"countif", KQLFunctionValue::countif}, + {"dcount", KQLFunctionValue::dcount}, + {"dcountif", KQLFunctionValue::dcountif}, + {"make_bag", KQLFunctionValue::make_bag}, + {"make_bag_if", KQLFunctionValue::make_bag_if}, + {"make_list", KQLFunctionValue::make_list}, + {"make_list_if", KQLFunctionValue::make_list_if}, + {"make_list_with_nulls", KQLFunctionValue::make_list_with_nulls}, + {"make_set", KQLFunctionValue::make_set}, + {"make_set_if", KQLFunctionValue::make_set_if}, + {"max", KQLFunctionValue::max}, + {"maxif", KQLFunctionValue::maxif}, + {"min", KQLFunctionValue::min}, + {"minif", KQLFunctionValue::minif}, + {"percentiles", KQLFunctionValue::percentiles}, + {"percentiles_array", KQLFunctionValue::percentiles_array}, + {"percentilesw", KQLFunctionValue::percentilesw}, + {"percentilesw_array", KQLFunctionValue::percentilesw_array}, + {"stdev", KQLFunctionValue::stdev}, + {"stdevif", KQLFunctionValue::stdevif}, + {"sum", KQLFunctionValue::sum}, + {"sumif", KQLFunctionValue::sumif}, + {"take_any", KQLFunctionValue::take_any}, + {"take_anyif", KQLFunctionValue::take_anyif}, + {"variance", KQLFunctionValue::variance}, + {"varianceif", KQLFunctionValue::varianceif}, + + {"series_fir", KQLFunctionValue::series_fir}, + {"series_iir", KQLFunctionValue::series_iir}, + {"series_fit_line", KQLFunctionValue::series_fit_line}, + {"series_fit_line_dynamic", KQLFunctionValue::series_fit_line_dynamic}, + {"series_fit_2lines", KQLFunctionValue::series_fit_2lines}, + {"series_fit_2lines_dynamic", KQLFunctionValue::series_fit_2lines_dynamic}, + {"series_outliers", KQLFunctionValue::series_outliers}, + {"series_periods_detect", KQLFunctionValue::series_periods_detect}, + {"series_periods_validate", KQLFunctionValue::series_periods_validate}, + {"series_stats_dynamic", KQLFunctionValue::series_stats_dynamic}, + {"series_stats", KQLFunctionValue::series_stats}, + {"series_fill_backward", KQLFunctionValue::series_fill_backward}, + {"series_fill_const", KQLFunctionValue::series_fill_const}, + {"series_fill_forward", KQLFunctionValue::series_fill_forward}, + {"series_fill_linear", KQLFunctionValue::series_fill_linear}, + + {"ipv4_compare", KQLFunctionValue::ipv4_compare}, + {"ipv4_is_in_range", KQLFunctionValue::ipv4_is_in_range}, + {"ipv4_is_match", KQLFunctionValue::ipv4_is_match}, + {"ipv4_is_private", KQLFunctionValue::ipv4_is_private}, + {"ipv4_netmask_suffix", KQLFunctionValue::ipv4_netmask_suffix}, + {"parse_ipv4", KQLFunctionValue::parse_ipv4}, + {"parse_ipv4_mask", KQLFunctionValue::parse_ipv4_mask}, + {"ipv6_compare", KQLFunctionValue::ipv6_compare}, + {"ipv6_is_match", KQLFunctionValue::ipv6_is_match}, + {"parse_ipv6", KQLFunctionValue::parse_ipv6}, + {"parse_ipv6_mask", KQLFunctionValue::parse_ipv6_mask}, + {"format_ipv4", KQLFunctionValue::format_ipv4}, + {"format_ipv4_mask", KQLFunctionValue::format_ipv4_mask}, + + {"binary_and", KQLFunctionValue::binary_and}, + {"binary_not", KQLFunctionValue::binary_not}, + {"binary_or", KQLFunctionValue::binary_or}, + {"binary_shift_left", KQLFunctionValue::binary_shift_left}, + {"binary_shift_right", KQLFunctionValue::binary_shift_right}, + {"binary_xor", KQLFunctionValue::binary_xor}, + {"bitset_count_ones", KQLFunctionValue::bitset_count_ones}, + {"bin", KQLFunctionValue::bin} + }; + + +std::unique_ptr KQLFunctionFactory::get(String &kql_function) +{ +/* if (kql_function=="strrep") + return std::make_unique(); + else if (kql_function=="strcat") + return std::make_unique(); + else + return nullptr;*/ + if (kql_functions.find(kql_function) == kql_functions.end()) + return nullptr; + + auto kql_function_id = kql_functions[kql_function]; + switch (kql_function_id) + { + case KQLFunctionValue::none: + return nullptr; + + case KQLFunctionValue::timespan: + return nullptr; + + case KQLFunctionValue::datetime: + return nullptr; + + case KQLFunctionValue::ago: + return nullptr; + + case KQLFunctionValue::datetime_add: + return nullptr; + + case KQLFunctionValue::datetime_part: + return nullptr; + + case KQLFunctionValue::datetime_diff: + return nullptr; + + case KQLFunctionValue::dayofmonth: + return nullptr; + + case KQLFunctionValue::dayofweek: + return nullptr; + + case KQLFunctionValue::dayofyear: + return nullptr; + + case KQLFunctionValue::endofday: + return nullptr; + + case KQLFunctionValue::endofweek: + return nullptr; + + case KQLFunctionValue::endofyear: + return nullptr; + + case KQLFunctionValue::format_datetime: + return nullptr; + + case KQLFunctionValue::format_timespan: + return nullptr; + + case KQLFunctionValue::getmonth: + return nullptr; + + case KQLFunctionValue::getyear: + return nullptr; + + case KQLFunctionValue::hoursofday: + return nullptr; + + case KQLFunctionValue::make_timespan: + return nullptr; + + case KQLFunctionValue::make_datetime: + return nullptr; + + case KQLFunctionValue::now: + return nullptr; + + case KQLFunctionValue::startofday: + return nullptr; + + case KQLFunctionValue::startofmonth: + return nullptr; + + case KQLFunctionValue::startofweek: + return nullptr; + + case KQLFunctionValue::startofyear: + return nullptr; + + case KQLFunctionValue::unixtime_microseconds_todatetime: + return nullptr; + + case KQLFunctionValue::unixtime_milliseconds_todatetime: + return nullptr; + + case KQLFunctionValue::unixtime_nanoseconds_todatetime: + return nullptr; + + case KQLFunctionValue::unixtime_seconds_todatetime: + return nullptr; + + case KQLFunctionValue::weekofyear: + return nullptr; + + + case KQLFunctionValue::base64_encode_tostring: + return nullptr; + + case KQLFunctionValue::base64_encode_fromguid: + return nullptr; + + case KQLFunctionValue::base64_decode_tostring: + return nullptr; + + case KQLFunctionValue::base64_decode_toarray: + return nullptr; + + case KQLFunctionValue::base64_decode_toguid: + return nullptr; + + case KQLFunctionValue::countof: + return nullptr; + + case KQLFunctionValue::extract: + return nullptr; + + case KQLFunctionValue::extract_all: + return nullptr; + + case KQLFunctionValue::extractjson: + return nullptr; + + case KQLFunctionValue::has_any_index: + return nullptr; + + case KQLFunctionValue::indexof: + return nullptr; + + case KQLFunctionValue::isempty: + return nullptr; + + case KQLFunctionValue::isnotempty: + return nullptr; + + case KQLFunctionValue::isnotnull: + return nullptr; + + case KQLFunctionValue::isnull: + return nullptr; + + case KQLFunctionValue::parse_command_line: + return nullptr; + + case KQLFunctionValue::parse_csv: + return nullptr; + + case KQLFunctionValue::parse_json: + return nullptr; + + case KQLFunctionValue::parse_url: + return nullptr; + + case KQLFunctionValue::parse_urlquery: + return nullptr; + + case KQLFunctionValue::parse_version: + return nullptr; + + case KQLFunctionValue::replace_regex: + return nullptr; + + case KQLFunctionValue::reverse: + return nullptr; + + case KQLFunctionValue::split: + return nullptr; + + case KQLFunctionValue::strcat: + return std::make_unique(); + + case KQLFunctionValue::strcat_delim: + return nullptr; + + case KQLFunctionValue::strcmp: + return nullptr; + + case KQLFunctionValue::strlen: + return nullptr; + + case KQLFunctionValue::strrep: + return std::make_unique(); + + case KQLFunctionValue::substring: + return nullptr; + + case KQLFunctionValue::toupper: + return nullptr; + + case KQLFunctionValue::translate: + return nullptr; + + case KQLFunctionValue::trim: + return nullptr; + + case KQLFunctionValue::trim_end: + return nullptr; + + case KQLFunctionValue::trim_start: + return nullptr; + + case KQLFunctionValue::url_decode: + return nullptr; + + case KQLFunctionValue::url_encode: + return nullptr; + + case KQLFunctionValue::array_concat: + return nullptr; + + case KQLFunctionValue::array_iif: + return nullptr; + + case KQLFunctionValue::array_index_of: + return nullptr; + + case KQLFunctionValue::array_length: + return nullptr; + + case KQLFunctionValue::array_reverse: + return nullptr; + + case KQLFunctionValue::array_rotate_left: + return nullptr; + + case KQLFunctionValue::array_rotate_right: + return nullptr; + + case KQLFunctionValue::array_shift_left: + return nullptr; + + case KQLFunctionValue::array_shift_right: + return nullptr; + + case KQLFunctionValue::array_slice: + return nullptr; + + case KQLFunctionValue::array_sort_asc: + return nullptr; + + case KQLFunctionValue::array_sort_desc: + return nullptr; + + case KQLFunctionValue::array_split: + return nullptr; + + case KQLFunctionValue::array_sum: + return nullptr; + + case KQLFunctionValue::bag_keys: + return nullptr; + + case KQLFunctionValue::bag_merge: + return nullptr; + + case KQLFunctionValue::bag_remove_keys: + return nullptr; + + case KQLFunctionValue::jaccard_index: + return nullptr; + + case KQLFunctionValue::pack: + return nullptr; + + case KQLFunctionValue::pack_all: + return nullptr; + + case KQLFunctionValue::pack_array: + return nullptr; + + case KQLFunctionValue::repeat: + return nullptr; + + case KQLFunctionValue::set_difference: + return nullptr; + + case KQLFunctionValue::set_has_element: + return nullptr; + + case KQLFunctionValue::set_intersect: + return nullptr; + + case KQLFunctionValue::set_union: + return nullptr; + + case KQLFunctionValue::treepath: + return nullptr; + + case KQLFunctionValue::zip: + return nullptr; + + case KQLFunctionValue::tobool: + return std::make_unique(); + + case KQLFunctionValue::todatetime: + return std::make_unique(); + + case KQLFunctionValue::todouble: + return std::make_unique(); + + case KQLFunctionValue::toint: + return std::make_unique(); + + case KQLFunctionValue::tostring: + return std::make_unique(); + + case KQLFunctionValue::totimespan: + return std::make_unique(); + + case KQLFunctionValue::arg_max: + return nullptr; + + case KQLFunctionValue::arg_min: + return nullptr; + + case KQLFunctionValue::avg: + return nullptr; + + case KQLFunctionValue::avgif: + return nullptr; + + case KQLFunctionValue::binary_all_and: + return nullptr; + + case KQLFunctionValue::binary_all_or: + return nullptr; + + case KQLFunctionValue::binary_all_xor: + return nullptr; + case KQLFunctionValue::buildschema: + return nullptr; + + case KQLFunctionValue::count: + return nullptr; + + case KQLFunctionValue::countif: + return nullptr; + + case KQLFunctionValue::dcount: + return nullptr; + + case KQLFunctionValue::dcountif: + return nullptr; + + case KQLFunctionValue::make_bag: + return nullptr; + + case KQLFunctionValue::make_bag_if: + return nullptr; + + case KQLFunctionValue::make_list: + return nullptr; + + case KQLFunctionValue::make_list_if: + return nullptr; + + case KQLFunctionValue::make_list_with_nulls: + return nullptr; + + case KQLFunctionValue::make_set: + return nullptr; + + case KQLFunctionValue::make_set_if: + return nullptr; + + case KQLFunctionValue::max: + return nullptr; + + case KQLFunctionValue::maxif: + return nullptr; + + case KQLFunctionValue::min: + return nullptr; + + case KQLFunctionValue::minif: + return nullptr; + + case KQLFunctionValue::percentiles: + return nullptr; + + case KQLFunctionValue::percentiles_array: + return nullptr; + + case KQLFunctionValue::percentilesw: + return nullptr; + + case KQLFunctionValue::percentilesw_array: + return nullptr; + + case KQLFunctionValue::stdev: + return nullptr; + + case KQLFunctionValue::stdevif: + return nullptr; + + case KQLFunctionValue::sum: + return nullptr; + + case KQLFunctionValue::sumif: + return nullptr; + + case KQLFunctionValue::take_any: + return nullptr; + + case KQLFunctionValue::take_anyif: + return nullptr; + + case KQLFunctionValue::variance: + return nullptr; + + case KQLFunctionValue::varianceif: + return nullptr; + + + case KQLFunctionValue::series_fir: + return nullptr; + + case KQLFunctionValue::series_iir: + return nullptr; + + case KQLFunctionValue::series_fit_line: + return nullptr; + + case KQLFunctionValue::series_fit_line_dynamic: + return nullptr; + + case KQLFunctionValue::series_fit_2lines: + return nullptr; + + case KQLFunctionValue::series_fit_2lines_dynamic: + return nullptr; + + case KQLFunctionValue::series_outliers: + return nullptr; + + case KQLFunctionValue::series_periods_detect: + return nullptr; + + case KQLFunctionValue::series_periods_validate: + return nullptr; + + case KQLFunctionValue::series_stats_dynamic: + return nullptr; + + case KQLFunctionValue::series_stats: + return nullptr; + + case KQLFunctionValue::series_fill_backward: + return nullptr; + + case KQLFunctionValue::series_fill_const: + return nullptr; + + case KQLFunctionValue::series_fill_forward: + return nullptr; + + case KQLFunctionValue::series_fill_linear: + return nullptr; + + + case KQLFunctionValue::ipv4_compare: + return nullptr; + + case KQLFunctionValue::ipv4_is_in_range: + return nullptr; + + case KQLFunctionValue::ipv4_is_match: + return nullptr; + + case KQLFunctionValue::ipv4_is_private: + return nullptr; + + case KQLFunctionValue::ipv4_netmask_suffix: + return nullptr; + + case KQLFunctionValue::parse_ipv4: + return nullptr; + + case KQLFunctionValue::parse_ipv4_mask: + return nullptr; + + case KQLFunctionValue::ipv6_compare: + return nullptr; + + case KQLFunctionValue::ipv6_is_match: + return nullptr; + + case KQLFunctionValue::parse_ipv6: + return nullptr; + + case KQLFunctionValue::parse_ipv6_mask: + return nullptr; + + case KQLFunctionValue::format_ipv4: + return nullptr; + + case KQLFunctionValue::format_ipv4_mask: + return nullptr; + + + case KQLFunctionValue::binary_and: + return nullptr; + + case KQLFunctionValue::binary_not: + return nullptr; + + case KQLFunctionValue::binary_or: + return nullptr; + + case KQLFunctionValue::binary_shift_left: + return nullptr; + + case KQLFunctionValue::binary_shift_right: + return nullptr; + + case KQLFunctionValue::binary_xor: + return nullptr; + + case KQLFunctionValue::bitset_count_ones: + return nullptr; + + case KQLFunctionValue::bin: + return nullptr; + } +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h new file mode 100644 index 00000000000..86e879b4668 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -0,0 +1,386 @@ +#pragma once + +#include +#include +#include +namespace DB +{ + enum class KQLFunctionValue : uint16_t + { none, + timespan, + datetime, + ago, + datetime_add, + datetime_part, + datetime_diff, + dayofmonth, + dayofweek, + dayofyear, + endofday, + endofweek, + endofyear, + format_datetime, + format_timespan, + getmonth, + getyear, + hoursofday, + make_timespan, + make_datetime, + now, + startofday, + startofmonth, + startofweek, + startofyear, + todatetime, + totimespan, + unixtime_microseconds_todatetime, + unixtime_milliseconds_todatetime, + unixtime_nanoseconds_todatetime, + unixtime_seconds_todatetime, + weekofyear, + + base64_encode_tostring, + base64_encode_fromguid, + base64_decode_tostring, + base64_decode_toarray, + base64_decode_toguid, + countof, + extract, + extract_all, + extractjson, + has_any_index, + indexof, + isempty, + isnotempty, + isnotnull, + isnull, + parse_command_line, + parse_csv, + parse_json, + parse_url, + parse_urlquery, + parse_version, + replace_regex, + reverse, + split, + strcat, + strcat_delim, + strcmp, + strlen, + strrep, + substring, + toupper, + translate, + trim, + trim_end, + trim_start, + url_decode, + url_encode, + + array_concat, + array_iif, + array_index_of, + array_length, + array_reverse, + array_rotate_left, + array_rotate_right, + array_shift_left, + array_shift_right, + array_slice, + array_sort_asc, + array_sort_desc, + array_split, + array_sum, + bag_keys, + bag_merge, + bag_remove_keys, + jaccard_index, + pack, + pack_all, + pack_array, + repeat, + set_difference, + set_has_element, + set_intersect, + set_union, + treepath, + zip, + + tobool, + todouble, + toint, + tostring, + + arg_max, + arg_min, + avg, + avgif, + binary_all_and, + binary_all_or, + binary_all_xor, + buildschema, + count, + countif, + dcount, + dcountif, + make_bag, + make_bag_if, + make_list, + make_list_if, + make_list_with_nulls, + make_set, + make_set_if, + max, + maxif, + min, + minif, + percentiles, + percentiles_array, + percentilesw, + percentilesw_array, + stdev, + stdevif, + sum, + sumif, + take_any, + take_anyif, + variance, + varianceif, + + series_fir, + series_iir, + series_fit_line, + series_fit_line_dynamic, + series_fit_2lines, + series_fit_2lines_dynamic, + series_outliers, + series_periods_detect, + series_periods_validate, + series_stats_dynamic, + series_stats, + series_fill_backward, + series_fill_const, + series_fill_forward, + series_fill_linear, + + ipv4_compare, + ipv4_is_in_range, + ipv4_is_match, + ipv4_is_private, + ipv4_netmask_suffix, + parse_ipv4, + parse_ipv4_mask, + ipv6_compare, + ipv6_is_match, + parse_ipv6, + parse_ipv6_mask, + format_ipv4, + format_ipv4_mask, + + binary_and, + binary_not, + binary_or, + binary_shift_left, + binary_shift_right, + binary_xor, + bitset_count_ones, + + bin + }; + +class KQLFunctionFactory +{ +public: + static std::unique_ptr get(String &kql_function); + +protected: + + + static std::unordered_map kql_functions;/* = + { + {"datetime", KQLFunctionValue::datetime}, + {"ago", KQLFunctionValue::ago}, + {"datetime_add", KQLFunctionValue::datetime_add}, + {"datetime_part", KQLFunctionValue::datetime_part}, + {"datetime_diff", KQLFunctionValue::datetime_diff}, + {"dayofmonth", KQLFunctionValue::dayofmonth}, + {"dayofweek", KQLFunctionValue::dayofweek}, + {"dayofyear", KQLFunctionValue::dayofyear}, + {"endofday", KQLFunctionValue::endofday}, + {"endofweek", KQLFunctionValue::endofweek}, + {"endofyear", KQLFunctionValue::endofyear}, + {"format_datetime", KQLFunctionValue::format_datetime}, + {"format_timespan", KQLFunctionValue::format_timespan}, + {"getmonth", KQLFunctionValue::getmonth}, + {"getyear", KQLFunctionValue::getyear}, + {"hoursofday", KQLFunctionValue::hoursofday}, + {"make_timespan", KQLFunctionValue::make_timespan}, + {"make_datetime", KQLFunctionValue::make_datetime}, + {"now", KQLFunctionValue::now}, + {"startofday", KQLFunctionValue::startofday}, + {"startofmonth", KQLFunctionValue::startofmonth}, + {"startofweek", KQLFunctionValue::startofweek}, + {"startofyear", KQLFunctionValue::startofyear}, + {"todatetime", KQLFunctionValue::todatetime}, + {"totimespan", KQLFunctionValue::totimespan}, + {"unixtime_microseconds_todatetime", KQLFunctionValue::unixtime_microseconds_todatetime}, + {"unixtime_milliseconds_todatetime", KQLFunctionValue::unixtime_milliseconds_todatetime}, + {"unixtime_nanoseconds_todatetime", KQLFunctionValue::unixtime_nanoseconds_todatetime}, + {"unixtime_seconds_todatetime", KQLFunctionValue::unixtime_seconds_todatetime}, + {"weekofyear", KQLFunctionValue::weekofyear}, + + {"base64_encode_tostring", KQLFunctionValue::base64_encode_tostring}, + {"base64_encode_fromguid", KQLFunctionValue::base64_encode_fromguid}, + {"base64_decode_tostring", KQLFunctionValue::base64_decode_tostring}, + {"base64_decode_toarray", KQLFunctionValue::base64_decode_toarray}, + {"base64_decode_toguid", KQLFunctionValue::base64_decode_toguid}, + {"countof", KQLFunctionValue::countof}, + {"extract", KQLFunctionValue::extract}, + {"extract_all", KQLFunctionValue::extract_all}, + {"extractjson", KQLFunctionValue::extractjson}, + {"has_any_index", KQLFunctionValue::has_any_index}, + {"indexof", KQLFunctionValue::indexof}, + {"isempty", KQLFunctionValue::isempty}, + {"isnotempty", KQLFunctionValue::isnotempty}, + {"isnotnull", KQLFunctionValue::isnotnull}, + {"isnull", KQLFunctionValue::isnull}, + {"parse_command_line", KQLFunctionValue::parse_command_line}, + {"parse_csv", KQLFunctionValue::parse_csv}, + {"parse_json", KQLFunctionValue::parse_json}, + {"parse_url", KQLFunctionValue::parse_url}, + {"parse_urlquery", KQLFunctionValue::parse_urlquery}, + {"parse_version", KQLFunctionValue::parse_version}, + {"replace_regex", KQLFunctionValue::replace_regex}, + {"reverse", KQLFunctionValue::reverse}, + {"split", KQLFunctionValue::split}, + {"strcat", KQLFunctionValue::strcat}, + {"strcat_delim", KQLFunctionValue::strcat_delim}, + {"strcmp", KQLFunctionValue::strcmp}, + {"strlen", KQLFunctionValue::strlen}, + {"strrep", KQLFunctionValue::strrep}, + {"substring", KQLFunctionValue::substring}, + {"toupper", KQLFunctionValue::toupper}, + {"translate", KQLFunctionValue::translate}, + {"trim", KQLFunctionValue::trim}, + {"trim_end", KQLFunctionValue::trim_end}, + {"trim_start", KQLFunctionValue::trim_start}, + {"url_decode", KQLFunctionValue::url_decode}, + {"url_encode", KQLFunctionValue::url_encode}, + + {"array_concat", KQLFunctionValue::array_concat}, + {"array_iif", KQLFunctionValue::array_iif}, + {"array_index_of", KQLFunctionValue::array_index_of}, + {"array_length", KQLFunctionValue::array_length}, + {"array_reverse", KQLFunctionValue::array_reverse}, + {"array_rotate_left", KQLFunctionValue::array_rotate_left}, + {"array_rotate_right", KQLFunctionValue::array_rotate_right}, + {"array_shift_left", KQLFunctionValue::array_shift_left}, + {"array_shift_right", KQLFunctionValue::array_shift_right}, + {"array_slice", KQLFunctionValue::array_slice}, + {"array_sort_asc", KQLFunctionValue::array_sort_asc}, + {"array_sort_desc", KQLFunctionValue::array_sort_desc}, + {"array_split", KQLFunctionValue::array_split}, + {"array_sum", KQLFunctionValue::array_sum}, + {"bag_keys", KQLFunctionValue::bag_keys}, + {"bag_merge", KQLFunctionValue::bag_merge}, + {"bag_remove_keys", KQLFunctionValue::bag_remove_keys}, + {"jaccard_index", KQLFunctionValue::jaccard_index}, + {"pack", KQLFunctionValue::pack}, + {"pack_all", KQLFunctionValue::pack_all}, + {"pack_array", KQLFunctionValue::pack_array}, + {"repeat", KQLFunctionValue::repeat}, + {"set_difference", KQLFunctionValue::set_difference}, + {"set_has_element", KQLFunctionValue::set_has_element}, + {"set_intersect", KQLFunctionValue::set_intersect}, + {"set_union", KQLFunctionValue::set_union}, + {"treepath", KQLFunctionValue::treepath}, + {"zip", KQLFunctionValue::zip}, + + {"tobool", KQLFunctionValue::tobool}, + {"toboolean", KQLFunctionValue::tobool}, + {"todouble", KQLFunctionValue::todouble}, + {"toint", KQLFunctionValue::toint}, + {"toreal", KQLFunctionValue::todouble}, + {"tostring", KQLFunctionValue::tostring}, + {"totimespan", KQLFunctionValue::totimespan}, + + {"arg_max", KQLFunctionValue::arg_max}, + {"arg_min", KQLFunctionValue::arg_min}, + {"avg", KQLFunctionValue::avg}, + {"avgif", KQLFunctionValue::avgif}, + {"binary_all_and", KQLFunctionValue::binary_all_and}, + {"binary_all_or", KQLFunctionValue::binary_all_or}, + {"binary_all_xor", KQLFunctionValue::binary_all_xor}, + {"buildschema", KQLFunctionValue::buildschema}, + {"count", KQLFunctionValue::count}, + {"countif", KQLFunctionValue::countif}, + {"dcount", KQLFunctionValue::dcount}, + {"dcountif", KQLFunctionValue::dcountif}, + {"make_bag", KQLFunctionValue::make_bag}, + {"make_bag_if", KQLFunctionValue::make_bag_if}, + {"make_list", KQLFunctionValue::make_list}, + {"make_list_if", KQLFunctionValue::make_list_if}, + {"make_list_with_nulls", KQLFunctionValue::make_list_with_nulls}, + {"make_set", KQLFunctionValue::make_set}, + {"make_set_if", KQLFunctionValue::make_set_if}, + {"max", KQLFunctionValue::max}, + {"maxif", KQLFunctionValue::maxif}, + {"min", KQLFunctionValue::min}, + {"minif", KQLFunctionValue::minif}, + {"percentiles", KQLFunctionValue::percentiles}, + {"percentiles_array", KQLFunctionValue::percentiles_array}, + {"percentilesw", KQLFunctionValue::percentilesw}, + {"percentilesw_array", KQLFunctionValue::percentilesw_array}, + {"stdev", KQLFunctionValue::stdev}, + {"stdevif", KQLFunctionValue::stdevif}, + {"sum", KQLFunctionValue::sum}, + {"sumif", KQLFunctionValue::sumif}, + {"take_any", KQLFunctionValue::take_any}, + {"take_anyif", KQLFunctionValue::take_anyif}, + {"variance", KQLFunctionValue::variance}, + {"varianceif", KQLFunctionValue::varianceif}, + + {"series_fir", KQLFunctionValue::series_fir}, + {"series_iir", KQLFunctionValue::series_iir}, + {"series_fit_line", KQLFunctionValue::series_fit_line}, + {"series_fit_line_dynamic", KQLFunctionValue::series_fit_line_dynamic}, + {"series_fit_2lines", KQLFunctionValue::series_fit_2lines}, + {"series_fit_2lines_dynamic", KQLFunctionValue::series_fit_2lines_dynamic}, + {"series_outliers", KQLFunctionValue::series_outliers}, + {"series_periods_detect", KQLFunctionValue::series_periods_detect}, + {"series_periods_validate", KQLFunctionValue::series_periods_validate}, + {"series_stats_dynamic", KQLFunctionValue::series_stats_dynamic}, + {"series_stats", KQLFunctionValue::series_stats}, + {"series_fill_backward", KQLFunctionValue::series_fill_backward}, + {"series_fill_const", KQLFunctionValue::series_fill_const}, + {"series_fill_forward", KQLFunctionValue::series_fill_forward}, + {"series_fill_linear", KQLFunctionValue::series_fill_linear}, + + {"ipv4_compare", KQLFunctionValue::ipv4_compare}, + {"ipv4_is_in_range", KQLFunctionValue::ipv4_is_in_range}, + {"ipv4_is_match", KQLFunctionValue::ipv4_is_match}, + {"ipv4_is_private", KQLFunctionValue::ipv4_is_private}, + {"ipv4_netmask_suffix", KQLFunctionValue::ipv4_netmask_suffix}, + {"parse_ipv4", KQLFunctionValue::parse_ipv4}, + {"parse_ipv4_mask", KQLFunctionValue::parse_ipv4_mask}, + {"ipv6_compare", KQLFunctionValue::ipv6_compare}, + {"ipv6_is_match", KQLFunctionValue::ipv6_is_match}, + {"parse_ipv6", KQLFunctionValue::parse_ipv6}, + {"parse_ipv6_mask", KQLFunctionValue::parse_ipv6_mask}, + {"format_ipv4", KQLFunctionValue::format_ipv4}, + {"format_ipv4_mask", KQLFunctionValue::format_ipv4_mask}, + + {"binary_and", KQLFunctionValue::binary_and}, + {"binary_not", KQLFunctionValue::binary_not}, + {"binary_or", KQLFunctionValue::binary_or}, + {"binary_shift_left", KQLFunctionValue::binary_shift_left}, + {"binary_shift_right", KQLFunctionValue::binary_shift_right}, + {"binary_xor", KQLFunctionValue::binary_xor}, + {"bitset_count_ones", KQLFunctionValue::bitset_count_ones}, + {"bin", KQLFunctionValue::bin} + };*/ + +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp new file mode 100644 index 00000000000..20b4b880a83 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h new file mode 100644 index 00000000000..45759032826 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp new file mode 100644 index 00000000000..20b4b880a83 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h new file mode 100644 index 00000000000..45759032826 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp new file mode 100644 index 00000000000..851c631d1ce --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -0,0 +1,365 @@ +#include +#include +#include +#include + +namespace DB +{ + +bool Base64EncodeToString::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Base64EncodeFromGuid::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Base64DecodeToString::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Base64DecodeToArray::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Base64DecodeToGuid::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool CountOf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Extract::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ExtractAll::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ExtractJson::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool HasAnyIndex::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool IndexOf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool IsEmpty::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool IsNotEmpty::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool IsNotNull::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseCommandLine::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool IsNull::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseCsv::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseJson::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseUrl::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseUrlQuery::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseVersion::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ReplaceRegex::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Reverse::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Split::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StrCat::convertImpl(String &out,IParser::Pos &pos) +{ + std::unique_ptr fun; + std::vector args; + String res = "concat("; + + ++pos; + if (pos->type != TokenType::OpeningRoundBracket) + { + --pos; + return false; + } + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + ++pos; + String tmp_arg = String(pos->begin,pos->end); + if (pos->type == TokenType::BareWord ) + { + String new_arg; + fun = KQLFunctionFactory::get(tmp_arg); + if (fun && fun->convert(new_arg,pos)) + tmp_arg = new_arg; + } + else if (pos->type == TokenType::ClosingRoundBracket) + { + for (auto arg : args) + res+=arg; + + res += ")"; + out = res; + return true; + } + args.push_back(tmp_arg); + } + return false; +} + +bool StrCatDelim::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StrCmp::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StrLen::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StrRep::convertImpl(String &out,IParser::Pos &pos) +{ + std::unique_ptr fun; + String res = String(pos->begin,pos->end); + ++pos; + if (pos->type != TokenType::OpeningRoundBracket) + { + --pos; + return false; + } + ++pos; + String value = String(pos->begin,pos->end); + if (pos->type == TokenType::BareWord ) + { String func_value; + fun = KQLFunctionFactory::get(value); + if (fun && fun->convert(func_value,pos)) + value = func_value; + } + ++pos; + if (pos->type != TokenType::Comma) + return false; + + ++pos; + String multiplier = String(pos->begin,pos->end); + String new_multiplier; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::BareWord ) + { + String fun_multiplier; + fun = KQLFunctionFactory::get(multiplier); + if ( fun && fun->convert(fun_multiplier,pos)) + new_multiplier += fun_multiplier; + } + else if (pos->type == TokenType::Comma ||pos->type == TokenType::ClosingRoundBracket) // has delimiter + { + break; + } + else + new_multiplier += String(pos->begin,pos->end); + ++pos; + } + + if (!new_multiplier.empty()) + multiplier = new_multiplier; + + String delimiter ; + if (pos->type == TokenType::Comma) + { + ++pos; + delimiter = String(pos->begin,pos->end); + if (pos->type == TokenType::BareWord ) + { String func_delimiter; + fun = KQLFunctionFactory::get(delimiter); + if (fun && fun->convert(func_delimiter,pos)) + delimiter = func_delimiter; + } + ++pos; + } + if (pos->type == TokenType::ClosingRoundBracket) + { + if (!delimiter.empty()) + { + String repeated_str = "repeat(concat("+value+"," + delimiter + ")," + multiplier + ")"; + res = "substr("+ repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; + } + else + res = "repeat("+ value + ", " + multiplier + ")"; + out = res; + return true; + } + return false; +} + +bool SubString::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ToUpper::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Translate::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Trim::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool TrimEnd::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool TrimStart::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UrlDecode::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UrlEncode::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h new file mode 100644 index 00000000000..db7ab507750 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h @@ -0,0 +1,267 @@ +#pragma once + +#include +#include +namespace DB +{ +class Base64EncodeToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_encode_tostring()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64EncodeFromGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_encode_fromguid()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64DecodeToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_tostring()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64DecodeToArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_toarray()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Base64DecodeToGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_toguid()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class CountOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "countof()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Extract : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extract()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ExtractAll : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extract_all()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ExtractJson : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extractjson()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class HasAnyIndex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_index()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IndexOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "indexof()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsEmpty : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isempty()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsNotEmpty : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnotempty()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsNotNull : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnotnull()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class IsNull : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnull()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseCommandLine : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_command_line()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseCsv : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_csv()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseJson : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_json()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseUrl : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_url()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseUrlQuery : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_urlquery()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseVersion : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_version()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ReplaceRegex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "replace_regex()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Reverse : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "reverse()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Split : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "split()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrCat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrCatDelim : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcat_delim()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrCmp : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcmp()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrLen : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strlen()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StrRep : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strrep()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SubString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "substring()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ToUpper : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "toupper()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Translate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "translate()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Trim : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TrimEnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim_end()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TrimStart : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim_start()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UrlDecode : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "url_decode()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UrlEncode : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "url_encode()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp new file mode 100644 index 00000000000..20b4b880a83 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp @@ -0,0 +1,24 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + + + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h new file mode 100644 index 00000000000..45759032826 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include +namespace DB +{ + +} + diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index bba52dcdea0..2a3d8238c46 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -90,11 +90,13 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos return new_expr; } -String KQLOperators::getExprFromToken(IParser::Pos pos) +String KQLOperators::getExprFromToken(IParser::Pos &pos) { String res; std::vector tokens; + auto begin = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { KQLOperatorValue op_value = KQLOperatorValue::none; @@ -284,6 +286,7 @@ String KQLOperators::getExprFromToken(IParser::Pos pos) for (auto & token : tokens) res = res + token + " "; + pos = begin; return res; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index f3a995e908f..564aa9d8aa5 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -9,7 +9,7 @@ namespace DB class KQLOperators { public: - String getExprFromToken(IParser::Pos pos); + String getExprFromToken(IParser::Pos &pos) ; protected: enum class WildcardsPos:uint8_t diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index d925f66b321..f1348c4b3c6 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -8,7 +8,9 @@ #include #include #include - +#include +#include +#include namespace DB { @@ -18,12 +20,22 @@ bool ParserKQLBase :: parsePrepare(Pos & pos) return true; } -String ParserKQLBase :: getExprFromToken(Pos pos) +String ParserKQLBase :: getExprFromToken(Pos &pos) { String res; - while (!pos->isEnd() && pos->type != TokenType::PipeMark) + std::unique_ptr kql_function; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - res = res + String(pos->begin,pos->end) +" "; + String token = String(pos->begin,pos->end); + String new_token; + if (pos->type == TokenType::BareWord ) + { + kql_function = KQLFunctionFactory::get(token); + if (kql_function && kql_function->convert(new_token,pos)) + token = new_token; + } + res = res + token +" "; ++pos; } return res; @@ -106,6 +118,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; kql_summarize_p.setTableName(table_name); + kql_summarize_p.setFilterPos(kql_filter_p.op_pos); if (!kql_summarize_p.parse(pos, select_expression_list, expected)) return false; else @@ -113,6 +126,9 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) group_expression_list = kql_summarize_p.group_expression_list; if (kql_summarize_p.tables) tables = kql_summarize_p.tables; + + if (kql_summarize_p.where_expression) + where_expression = kql_summarize_p.where_expression; } select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 0545cd00cd9..42122fb6e00 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -7,12 +7,13 @@ namespace DB class ParserKQLBase : public IParserBase { public: - virtual bool parsePrepare(Pos & pos) ; + virtual bool parsePrepare(Pos & pos); + std::vector op_pos; protected: - std::vector op_pos; + std::vector expressions; - virtual String getExprFromToken(Pos pos); + virtual String getExprFromToken(Pos &pos); }; class ParserKQLQuery : public IParserBase diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 2afbad22131..cc4bece7ebf 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -4,7 +4,7 @@ #include #include #include - +#include namespace DB { @@ -57,5 +57,16 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +/* +bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) +{ + return wrapParseImpl(pos, IncreaseDepthTag{}, [&] + { + bool res = convertImpl(out,pos); + if (!res) + out = ""; + return res; + }); +}*/ } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index b71af138e7e..8a92412d87c 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -10,7 +10,10 @@ class ParserKQLSummarize : public ParserKQLBase public: ASTPtr group_expression_list; ASTPtr tables; + ASTPtr where_expression; + void setTableName(String table_name_) {table_name = table_name_;} + void setFilterPos(std::vector &filter_pos_) {filter_pos = filter_pos_;} protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; @@ -18,6 +21,7 @@ protected: static String getBinGroupbyString(String expr_bin); private: String table_name; + std::vector filter_pos; }; } From 7553e551f11a3ad36c4b8d7dcc45524331da3f4d Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 21 Jun 2022 09:33:07 -0700 Subject: [PATCH 013/279] Kusto-phase2: Add KQL functions parser --- .../KustoFunctions/IParserKQLFunction.cpp | 45 ++- .../Kusto/KustoFunctions/IParserKQLFunction.h | 3 +- .../KQLAggregationFunctions.cpp | 244 +++++++++++- .../KustoFunctions/KQLAggregationFunctions.h | 245 ++++++++++++ .../KustoFunctions/KQLBinaryFunctions.cpp | 48 ++- .../Kusto/KustoFunctions/KQLBinaryFunctions.h | 48 +++ .../KustoFunctions/KQLCastingFunctions.cpp | 7 +- .../KustoFunctions/KQLCastingFunctions.h | 6 +- .../KustoFunctions/KQLDateTimeFunctions.cpp | 202 +++++++++- .../KustoFunctions/KQLDateTimeFunctions.h | 203 ++++++++++ .../KustoFunctions/KQLDynamicFunctions.cpp | 195 +++++++++- .../KustoFunctions/KQLDynamicFunctions.h | 195 ++++++++++ .../KustoFunctions/KQLFunctionFactory.cpp | 350 +++++++++--------- .../Kusto/KustoFunctions/KQLFunctionFactory.h | 190 +--------- .../KustoFunctions/KQLGeneralFunctions.cpp | 8 +- .../KustoFunctions/KQLGeneralFunctions.h | 6 + .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 90 ++++- .../Kusto/KustoFunctions/KQLIPFunctions.h | 90 +++++ .../KustoFunctions/KQLStringFunctions.cpp | 92 ++--- .../Kusto/KustoFunctions/KQLStringFunctions.h | 17 +- .../KustoFunctions/KQLTimeSeriesFunctions.cpp | 104 +++++- .../KustoFunctions/KQLTimeSeriesFunctions.h | 104 ++++++ src/Parsers/Kusto/ParserKQLFilter.cpp | 6 +- src/Parsers/Kusto/ParserKQLOperators.cpp | 43 ++- src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 16 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 11 - src/Parsers/Kusto/ParserKQLStatement.h | 1 - src/Parsers/Kusto/ParserKQLTable.h | 1 - 29 files changed, 2080 insertions(+), 492 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 5455f41a0c2..e7134678e95 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -15,6 +14,7 @@ #include #include #include +#include namespace DB { @@ -30,4 +30,47 @@ bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) }); } +bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const String &ch_fn) +{ + std::unique_ptr fun; + std::vector args; + + String res =ch_fn + "("; + out = res; + auto begin = pos; + + ++pos; + if (pos->type != TokenType::OpeningRoundBracket) + { + pos = begin; + return false; + } + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + ++pos; + String tmp_arg = String(pos->begin,pos->end); + if (pos->type == TokenType::BareWord ) + { + String new_arg; + fun = KQLFunctionFactory::get(tmp_arg); + if (fun && fun->convert(new_arg,pos)) + tmp_arg = new_arg; + } + else if (pos->type == TokenType::ClosingRoundBracket) + { + for (auto arg : args) + res+=arg; + + res += ")"; + out = res; + return true; + } + args.push_back(tmp_arg); + } + + pos = begin; + return false; +} + } diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index 81bf97f390b..c633f78fa33 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -4,7 +4,7 @@ #include namespace DB { -class IParserKQLFunction //: public IParser +class IParserKQLFunction { public: template @@ -33,6 +33,7 @@ public: virtual ~IParserKQLFunction() = default; protected: virtual bool convertImpl(String &out,IParser::Pos &pos) = 0; + static bool directMapping(String &out,IParser::Pos &pos,const String &ch_fn); }; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index 20b4b880a83..91c3639ace4 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,249 @@ namespace DB { +bool ArgMax::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool ArgMin::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Avg::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool AvgIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryAllAnd::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryAllOr::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryAllXor::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BuildSchema::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Count::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool CountIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DCount::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DCountIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeBag::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeBagIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeList::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeListIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeListWithNulls::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeSet::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeSetIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Max::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MaxIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Min::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MinIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Percentiles::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Percentilesw::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Stdev::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StdevIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Sum::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SumIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool TakeAny::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool TakeAnyIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Variance::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool VarianceIf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h index 45759032826..6e7130420f4 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h @@ -4,6 +4,251 @@ #include namespace DB { +class ArgMax : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "arg_max()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArgMin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "arg_min()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Avg : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "avg()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class AvgIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "avgif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryAllAnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_and()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryAllOr : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_or()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryAllXor : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_xor()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BuildSchema : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "buildschema()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Count : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "count()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class CountIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "countif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DCount : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dcount()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DCountIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dcountif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeBag : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_bag()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeBagIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_bag_if()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeList : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeListIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list_if()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeListWithNulls : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list_with_nulls()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeSet : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_set()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeSetIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_set_if()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Max : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "max()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MaxIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "maxif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Min : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "min()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MinIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "minif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentiles : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentiles()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PercentilesArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentiles_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentilesw : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilesw()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PercentileswArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilesw_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Stdev : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "stdev()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StdevIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "stdevif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Sum : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sum()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SumIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sumif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TakeAny : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "take_any()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TakeAnyIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "take_anyif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Variance : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "variance()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class VarianceIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "varianceif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp index 20b4b880a83..2a06c4e715b 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,53 @@ namespace DB { +bool BinaryAnd::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool BinaryNot::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryOr::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryShiftLeft::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryShiftRight::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BinaryXor::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BitsetCountOnes::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h index 45759032826..94ca3a5abbf 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h @@ -4,6 +4,54 @@ #include namespace DB { +class BinaryAnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_and()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryNot : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_not()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryOr : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_or()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryShiftLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_shift_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryShiftRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_shift_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryXor : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_xor()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BitsetCountOnes : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bitset_count_ones()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp index 5f43aa16d8e..9129d82aa78 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -6,14 +5,14 @@ namespace DB { -bool Tobool::convertImpl(String &out,IParser::Pos &pos) +bool ToBool::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ToDatetime::convertImpl(String &out,IParser::Pos &pos) +bool ToDateTime::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; @@ -41,7 +40,7 @@ bool ToString::convertImpl(String &out,IParser::Pos &pos) return false; } -bool ToTimespan::convertImpl(String &out,IParser::Pos &pos) +bool ToTimeSpan::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h index ab73fb3fc21..fa6a20e6068 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h @@ -4,14 +4,14 @@ #include namespace DB { -class Tobool : public IParserKQLFunction +class ToBool : public IParserKQLFunction { protected: const char * getName() const override { return "tobool()";} bool convertImpl(String &out,IParser::Pos &pos) override; }; -class ToDatetime : public IParserKQLFunction +class ToDateTime : public IParserKQLFunction { protected: const char * getName() const override { return "todatetime()";} @@ -39,7 +39,7 @@ protected: bool convertImpl(String &out,IParser::Pos &pos) override; }; -class ToTimespan : public IParserKQLFunction +class ToTimeSpan : public IParserKQLFunction { protected: const char * getName() const override { return "totimespan()";} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 20b4b880a83..0f098cbebda 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,207 @@ namespace DB { +bool TimeSpan::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool DateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ago::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatetimeAdd::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +}; + +bool DatetimePart::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatetimeDiff::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DayOfMonth::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DayOfWeek::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DayOfYear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool EndOfDay::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool EndOfWeek::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool EndOfYear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool FormatDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool FormatTimeSpan::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool GetMonth::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool GetYear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool HoursOfDay::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeTimeSpan::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool MakeDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Now::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StartOfDay::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StartOfMonth::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StartOfWeek::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool StartOfYear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UnixTimeMicrosecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UnixTimeMillisecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UnixTimeNanosecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool UnixTimeSecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool WeekOfYear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h index 45759032826..7627465ab5b 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -5,5 +5,208 @@ namespace DB { +class TimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "timespan()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ago : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ago()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimeAdd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_add()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimePart : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_part()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimeDiff : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_diff()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_datetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatTimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_timespan()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class GetMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "getmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class GetYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "getyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class HoursOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "hoursofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeTimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_timespan()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_datetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Now : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "now()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeMicrosecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_microseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeMillisecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_milliseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeNanosecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_nanoseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeSecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_seconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class WeekOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "weekofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp index 20b4b880a83..a6ff0a374eb 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,200 @@ namespace DB { +bool ArrayConcat::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool ArrayIif::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayIndexOf::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayLength::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayReverse::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayRotateLeft::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayRotateRight::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayShiftLeft::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArrayShiftRight::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArraySlice::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArraySortAsc::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArraySortDesc::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArraySplit::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ArraySum::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BagKeys::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BagMerge::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool BagRemoveKeys::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool JaccardIndex::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Pack::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool PackAll::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool PackArray::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Repeat::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SetDifference::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SetHasElement::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SetIntersect::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SetUnion::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool TreePath::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Zip::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h index 45759032826..e36fd60eaea 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h @@ -4,6 +4,201 @@ #include namespace DB { +class ArrayConcat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_concat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayIif : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_iif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayIndexOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_index_of()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayLength : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_length()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayReverse : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_reverse()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayRotateLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_rotate_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayRotateRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_rotate_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayShiftLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_shift_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayShiftRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_shift_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySlice : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_slice()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySortAsc : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sort_asc()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySortDesc : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sort_desc()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySplit : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_split()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySum : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sum()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagKeys : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_keys()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagMerge : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_merge()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagRemoveKeys : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_remove_keys()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class JaccardIndex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "jaccard_index()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Pack : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PackAll : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack_all()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PackArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Repeat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "repeat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetDifference : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_difference()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetHasElement : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_has_element()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetIntersect : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_intersect()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetUnion : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_union()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TreePath : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "treepath()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Zip : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "zip()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp index 528f906e51e..25e0c2af2f9 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -65,7 +64,9 @@ namespace DB {"indexof", KQLFunctionValue::indexof}, {"isempty", KQLFunctionValue::isempty}, {"isnotempty", KQLFunctionValue::isnotempty}, + {"notempty", KQLFunctionValue::isnotempty}, {"isnotnull", KQLFunctionValue::isnotnull}, + {"notnull", KQLFunctionValue::isnotnull}, {"isnull", KQLFunctionValue::isnull}, {"parse_command_line", KQLFunctionValue::parse_command_line}, {"parse_csv", KQLFunctionValue::parse_csv}, @@ -82,6 +83,7 @@ namespace DB {"strlen", KQLFunctionValue::strlen}, {"strrep", KQLFunctionValue::strrep}, {"substring", KQLFunctionValue::substring}, + {"tolower", KQLFunctionValue::tolower}, {"toupper", KQLFunctionValue::toupper}, {"translate", KQLFunctionValue::translate}, {"trim", KQLFunctionValue::trim}, @@ -206,12 +208,6 @@ namespace DB std::unique_ptr KQLFunctionFactory::get(String &kql_function) { -/* if (kql_function=="strrep") - return std::make_unique(); - else if (kql_function=="strcat") - return std::make_unique(); - else - return nullptr;*/ if (kql_functions.find(kql_function) == kql_functions.end()) return nullptr; @@ -222,293 +218,295 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function return nullptr; case KQLFunctionValue::timespan: - return nullptr; + return std::make_unique(); case KQLFunctionValue::datetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ago: - return nullptr; + return std::make_unique(); case KQLFunctionValue::datetime_add: - return nullptr; + return std::make_unique(); case KQLFunctionValue::datetime_part: - return nullptr; + return std::make_unique(); case KQLFunctionValue::datetime_diff: - return nullptr; + return std::make_unique(); case KQLFunctionValue::dayofmonth: - return nullptr; + return std::make_unique(); case KQLFunctionValue::dayofweek: - return nullptr; + return std::make_unique(); case KQLFunctionValue::dayofyear: - return nullptr; + return std::make_unique(); case KQLFunctionValue::endofday: - return nullptr; + return std::make_unique(); case KQLFunctionValue::endofweek: - return nullptr; + return std::make_unique(); case KQLFunctionValue::endofyear: - return nullptr; + return std::make_unique(); case KQLFunctionValue::format_datetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::format_timespan: - return nullptr; + return std::make_unique(); case KQLFunctionValue::getmonth: - return nullptr; + return std::make_unique(); case KQLFunctionValue::getyear: - return nullptr; + return std::make_unique(); case KQLFunctionValue::hoursofday: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_timespan: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_datetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::now: - return nullptr; + return std::make_unique(); case KQLFunctionValue::startofday: - return nullptr; + return std::make_unique(); case KQLFunctionValue::startofmonth: - return nullptr; + return std::make_unique(); case KQLFunctionValue::startofweek: - return nullptr; + return std::make_unique(); case KQLFunctionValue::startofyear: - return nullptr; + return std::make_unique(); case KQLFunctionValue::unixtime_microseconds_todatetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::unixtime_milliseconds_todatetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::unixtime_nanoseconds_todatetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::unixtime_seconds_todatetime: - return nullptr; + return std::make_unique(); case KQLFunctionValue::weekofyear: - return nullptr; - + return std::make_unique(); case KQLFunctionValue::base64_encode_tostring: - return nullptr; + return std::make_unique(); case KQLFunctionValue::base64_encode_fromguid: - return nullptr; + return std::make_unique(); case KQLFunctionValue::base64_decode_tostring: - return nullptr; + return std::make_unique(); case KQLFunctionValue::base64_decode_toarray: - return nullptr; + return std::make_unique(); case KQLFunctionValue::base64_decode_toguid: - return nullptr; + return std::make_unique(); case KQLFunctionValue::countof: - return nullptr; + return std::make_unique(); case KQLFunctionValue::extract: - return nullptr; + return std::make_unique(); case KQLFunctionValue::extract_all: - return nullptr; + return std::make_unique(); case KQLFunctionValue::extractjson: - return nullptr; + return std::make_unique(); case KQLFunctionValue::has_any_index: - return nullptr; + return std::make_unique(); case KQLFunctionValue::indexof: - return nullptr; + return std::make_unique(); case KQLFunctionValue::isempty: - return nullptr; + return std::make_unique(); case KQLFunctionValue::isnotempty: - return nullptr; + return std::make_unique(); case KQLFunctionValue::isnotnull: - return nullptr; + return std::make_unique(); case KQLFunctionValue::isnull: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_command_line: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_csv: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_json: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_url: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_urlquery: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_version: - return nullptr; + return std::make_unique(); case KQLFunctionValue::replace_regex: - return nullptr; + return std::make_unique(); case KQLFunctionValue::reverse: - return nullptr; + return std::make_unique(); case KQLFunctionValue::split: - return nullptr; + return std::make_unique(); case KQLFunctionValue::strcat: return std::make_unique(); case KQLFunctionValue::strcat_delim: - return nullptr; + return std::make_unique(); case KQLFunctionValue::strcmp: - return nullptr; + return std::make_unique(); case KQLFunctionValue::strlen: - return nullptr; + return std::make_unique(); case KQLFunctionValue::strrep: return std::make_unique(); case KQLFunctionValue::substring: - return nullptr; + return std::make_unique(); + + case KQLFunctionValue::tolower: + return std::make_unique(); case KQLFunctionValue::toupper: - return nullptr; + return std::make_unique(); case KQLFunctionValue::translate: - return nullptr; + return std::make_unique(); case KQLFunctionValue::trim: - return nullptr; + return std::make_unique(); case KQLFunctionValue::trim_end: - return nullptr; + return std::make_unique(); case KQLFunctionValue::trim_start: - return nullptr; + return std::make_unique(); case KQLFunctionValue::url_decode: - return nullptr; + return std::make_unique(); case KQLFunctionValue::url_encode: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_concat: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_iif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_index_of: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_length: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_reverse: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_rotate_left: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_rotate_right: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_shift_left: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_shift_right: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_slice: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_sort_asc: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_sort_desc: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_split: - return nullptr; + return std::make_unique(); case KQLFunctionValue::array_sum: - return nullptr; + return std::make_unique(); case KQLFunctionValue::bag_keys: - return nullptr; + return std::make_unique(); case KQLFunctionValue::bag_merge: - return nullptr; + return std::make_unique(); case KQLFunctionValue::bag_remove_keys: - return nullptr; + return std::make_unique(); case KQLFunctionValue::jaccard_index: - return nullptr; + return std::make_unique(); case KQLFunctionValue::pack: - return nullptr; + return std::make_unique(); case KQLFunctionValue::pack_all: - return nullptr; + return std::make_unique(); case KQLFunctionValue::pack_array: - return nullptr; + return std::make_unique(); case KQLFunctionValue::repeat: - return nullptr; + return std::make_unique(); case KQLFunctionValue::set_difference: - return nullptr; + return std::make_unique(); case KQLFunctionValue::set_has_element: - return nullptr; + return std::make_unique(); case KQLFunctionValue::set_intersect: - return nullptr; + return std::make_unique(); case KQLFunctionValue::set_union: - return nullptr; + return std::make_unique(); case KQLFunctionValue::treepath: - return nullptr; + return std::make_unique(); case KQLFunctionValue::zip: - return nullptr; + return std::make_unique(); case KQLFunctionValue::tobool: - return std::make_unique(); + return std::make_unique(); case KQLFunctionValue::todatetime: - return std::make_unique(); + return std::make_unique(); case KQLFunctionValue::todouble: return std::make_unique(); @@ -520,222 +518,220 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function return std::make_unique(); case KQLFunctionValue::totimespan: - return std::make_unique(); + return std::make_unique(); case KQLFunctionValue::arg_max: - return nullptr; + return std::make_unique(); case KQLFunctionValue::arg_min: - return nullptr; + return std::make_unique(); case KQLFunctionValue::avg: - return nullptr; + return std::make_unique(); case KQLFunctionValue::avgif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_all_and: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_all_or: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_all_xor: - return nullptr; + return std::make_unique(); + case KQLFunctionValue::buildschema: - return nullptr; + return std::make_unique(); case KQLFunctionValue::count: - return nullptr; + return std::make_unique(); case KQLFunctionValue::countif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::dcount: - return nullptr; + return std::make_unique(); case KQLFunctionValue::dcountif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_bag: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_bag_if: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_list: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_list_if: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_list_with_nulls: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_set: - return nullptr; + return std::make_unique(); case KQLFunctionValue::make_set_if: - return nullptr; + return std::make_unique(); case KQLFunctionValue::max: - return nullptr; + return std::make_unique(); case KQLFunctionValue::maxif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::min: - return nullptr; + return std::make_unique(); case KQLFunctionValue::minif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::percentiles: - return nullptr; + return std::make_unique(); case KQLFunctionValue::percentiles_array: - return nullptr; + return std::make_unique(); case KQLFunctionValue::percentilesw: - return nullptr; + return std::make_unique(); case KQLFunctionValue::percentilesw_array: - return nullptr; + return std::make_unique(); case KQLFunctionValue::stdev: - return nullptr; + return std::make_unique(); case KQLFunctionValue::stdevif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::sum: - return nullptr; + return std::make_unique(); case KQLFunctionValue::sumif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::take_any: - return nullptr; + return std::make_unique(); case KQLFunctionValue::take_anyif: - return nullptr; + return std::make_unique(); case KQLFunctionValue::variance: - return nullptr; + return std::make_unique(); case KQLFunctionValue::varianceif: - return nullptr; - + return std::make_unique(); case KQLFunctionValue::series_fir: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_iir: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fit_line: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fit_line_dynamic: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fit_2lines: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fit_2lines_dynamic: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_outliers: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_periods_detect: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_periods_validate: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_stats_dynamic: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_stats: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fill_backward: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fill_const: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fill_forward: - return nullptr; + return std::make_unique(); case KQLFunctionValue::series_fill_linear: - return nullptr; - + return std::make_unique(); case KQLFunctionValue::ipv4_compare: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv4_is_in_range: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv4_is_match: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv4_is_private: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv4_netmask_suffix: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_ipv4: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_ipv4_mask: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv6_compare: - return nullptr; + return std::make_unique(); case KQLFunctionValue::ipv6_is_match: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_ipv6: - return nullptr; + return std::make_unique(); case KQLFunctionValue::parse_ipv6_mask: - return nullptr; + return std::make_unique(); case KQLFunctionValue::format_ipv4: - return nullptr; + return std::make_unique(); case KQLFunctionValue::format_ipv4_mask: - return nullptr; - + return std::make_unique(); case KQLFunctionValue::binary_and: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_not: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_or: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_shift_left: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_shift_right: - return nullptr; + return std::make_unique(); case KQLFunctionValue::binary_xor: - return nullptr; + return std::make_unique(); case KQLFunctionValue::bitset_count_ones: - return nullptr; + return std::make_unique(); case KQLFunctionValue::bin: - return nullptr; + return std::make_unique(); } } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h index 86e879b4668..8f57133c071 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -69,6 +69,7 @@ namespace DB strlen, strrep, substring, + tolower, toupper, translate, trim, @@ -187,199 +188,14 @@ namespace DB bin }; - + class KQLFunctionFactory { public: static std::unique_ptr get(String &kql_function); protected: - - - static std::unordered_map kql_functions;/* = - { - {"datetime", KQLFunctionValue::datetime}, - {"ago", KQLFunctionValue::ago}, - {"datetime_add", KQLFunctionValue::datetime_add}, - {"datetime_part", KQLFunctionValue::datetime_part}, - {"datetime_diff", KQLFunctionValue::datetime_diff}, - {"dayofmonth", KQLFunctionValue::dayofmonth}, - {"dayofweek", KQLFunctionValue::dayofweek}, - {"dayofyear", KQLFunctionValue::dayofyear}, - {"endofday", KQLFunctionValue::endofday}, - {"endofweek", KQLFunctionValue::endofweek}, - {"endofyear", KQLFunctionValue::endofyear}, - {"format_datetime", KQLFunctionValue::format_datetime}, - {"format_timespan", KQLFunctionValue::format_timespan}, - {"getmonth", KQLFunctionValue::getmonth}, - {"getyear", KQLFunctionValue::getyear}, - {"hoursofday", KQLFunctionValue::hoursofday}, - {"make_timespan", KQLFunctionValue::make_timespan}, - {"make_datetime", KQLFunctionValue::make_datetime}, - {"now", KQLFunctionValue::now}, - {"startofday", KQLFunctionValue::startofday}, - {"startofmonth", KQLFunctionValue::startofmonth}, - {"startofweek", KQLFunctionValue::startofweek}, - {"startofyear", KQLFunctionValue::startofyear}, - {"todatetime", KQLFunctionValue::todatetime}, - {"totimespan", KQLFunctionValue::totimespan}, - {"unixtime_microseconds_todatetime", KQLFunctionValue::unixtime_microseconds_todatetime}, - {"unixtime_milliseconds_todatetime", KQLFunctionValue::unixtime_milliseconds_todatetime}, - {"unixtime_nanoseconds_todatetime", KQLFunctionValue::unixtime_nanoseconds_todatetime}, - {"unixtime_seconds_todatetime", KQLFunctionValue::unixtime_seconds_todatetime}, - {"weekofyear", KQLFunctionValue::weekofyear}, - - {"base64_encode_tostring", KQLFunctionValue::base64_encode_tostring}, - {"base64_encode_fromguid", KQLFunctionValue::base64_encode_fromguid}, - {"base64_decode_tostring", KQLFunctionValue::base64_decode_tostring}, - {"base64_decode_toarray", KQLFunctionValue::base64_decode_toarray}, - {"base64_decode_toguid", KQLFunctionValue::base64_decode_toguid}, - {"countof", KQLFunctionValue::countof}, - {"extract", KQLFunctionValue::extract}, - {"extract_all", KQLFunctionValue::extract_all}, - {"extractjson", KQLFunctionValue::extractjson}, - {"has_any_index", KQLFunctionValue::has_any_index}, - {"indexof", KQLFunctionValue::indexof}, - {"isempty", KQLFunctionValue::isempty}, - {"isnotempty", KQLFunctionValue::isnotempty}, - {"isnotnull", KQLFunctionValue::isnotnull}, - {"isnull", KQLFunctionValue::isnull}, - {"parse_command_line", KQLFunctionValue::parse_command_line}, - {"parse_csv", KQLFunctionValue::parse_csv}, - {"parse_json", KQLFunctionValue::parse_json}, - {"parse_url", KQLFunctionValue::parse_url}, - {"parse_urlquery", KQLFunctionValue::parse_urlquery}, - {"parse_version", KQLFunctionValue::parse_version}, - {"replace_regex", KQLFunctionValue::replace_regex}, - {"reverse", KQLFunctionValue::reverse}, - {"split", KQLFunctionValue::split}, - {"strcat", KQLFunctionValue::strcat}, - {"strcat_delim", KQLFunctionValue::strcat_delim}, - {"strcmp", KQLFunctionValue::strcmp}, - {"strlen", KQLFunctionValue::strlen}, - {"strrep", KQLFunctionValue::strrep}, - {"substring", KQLFunctionValue::substring}, - {"toupper", KQLFunctionValue::toupper}, - {"translate", KQLFunctionValue::translate}, - {"trim", KQLFunctionValue::trim}, - {"trim_end", KQLFunctionValue::trim_end}, - {"trim_start", KQLFunctionValue::trim_start}, - {"url_decode", KQLFunctionValue::url_decode}, - {"url_encode", KQLFunctionValue::url_encode}, - - {"array_concat", KQLFunctionValue::array_concat}, - {"array_iif", KQLFunctionValue::array_iif}, - {"array_index_of", KQLFunctionValue::array_index_of}, - {"array_length", KQLFunctionValue::array_length}, - {"array_reverse", KQLFunctionValue::array_reverse}, - {"array_rotate_left", KQLFunctionValue::array_rotate_left}, - {"array_rotate_right", KQLFunctionValue::array_rotate_right}, - {"array_shift_left", KQLFunctionValue::array_shift_left}, - {"array_shift_right", KQLFunctionValue::array_shift_right}, - {"array_slice", KQLFunctionValue::array_slice}, - {"array_sort_asc", KQLFunctionValue::array_sort_asc}, - {"array_sort_desc", KQLFunctionValue::array_sort_desc}, - {"array_split", KQLFunctionValue::array_split}, - {"array_sum", KQLFunctionValue::array_sum}, - {"bag_keys", KQLFunctionValue::bag_keys}, - {"bag_merge", KQLFunctionValue::bag_merge}, - {"bag_remove_keys", KQLFunctionValue::bag_remove_keys}, - {"jaccard_index", KQLFunctionValue::jaccard_index}, - {"pack", KQLFunctionValue::pack}, - {"pack_all", KQLFunctionValue::pack_all}, - {"pack_array", KQLFunctionValue::pack_array}, - {"repeat", KQLFunctionValue::repeat}, - {"set_difference", KQLFunctionValue::set_difference}, - {"set_has_element", KQLFunctionValue::set_has_element}, - {"set_intersect", KQLFunctionValue::set_intersect}, - {"set_union", KQLFunctionValue::set_union}, - {"treepath", KQLFunctionValue::treepath}, - {"zip", KQLFunctionValue::zip}, - - {"tobool", KQLFunctionValue::tobool}, - {"toboolean", KQLFunctionValue::tobool}, - {"todouble", KQLFunctionValue::todouble}, - {"toint", KQLFunctionValue::toint}, - {"toreal", KQLFunctionValue::todouble}, - {"tostring", KQLFunctionValue::tostring}, - {"totimespan", KQLFunctionValue::totimespan}, - - {"arg_max", KQLFunctionValue::arg_max}, - {"arg_min", KQLFunctionValue::arg_min}, - {"avg", KQLFunctionValue::avg}, - {"avgif", KQLFunctionValue::avgif}, - {"binary_all_and", KQLFunctionValue::binary_all_and}, - {"binary_all_or", KQLFunctionValue::binary_all_or}, - {"binary_all_xor", KQLFunctionValue::binary_all_xor}, - {"buildschema", KQLFunctionValue::buildschema}, - {"count", KQLFunctionValue::count}, - {"countif", KQLFunctionValue::countif}, - {"dcount", KQLFunctionValue::dcount}, - {"dcountif", KQLFunctionValue::dcountif}, - {"make_bag", KQLFunctionValue::make_bag}, - {"make_bag_if", KQLFunctionValue::make_bag_if}, - {"make_list", KQLFunctionValue::make_list}, - {"make_list_if", KQLFunctionValue::make_list_if}, - {"make_list_with_nulls", KQLFunctionValue::make_list_with_nulls}, - {"make_set", KQLFunctionValue::make_set}, - {"make_set_if", KQLFunctionValue::make_set_if}, - {"max", KQLFunctionValue::max}, - {"maxif", KQLFunctionValue::maxif}, - {"min", KQLFunctionValue::min}, - {"minif", KQLFunctionValue::minif}, - {"percentiles", KQLFunctionValue::percentiles}, - {"percentiles_array", KQLFunctionValue::percentiles_array}, - {"percentilesw", KQLFunctionValue::percentilesw}, - {"percentilesw_array", KQLFunctionValue::percentilesw_array}, - {"stdev", KQLFunctionValue::stdev}, - {"stdevif", KQLFunctionValue::stdevif}, - {"sum", KQLFunctionValue::sum}, - {"sumif", KQLFunctionValue::sumif}, - {"take_any", KQLFunctionValue::take_any}, - {"take_anyif", KQLFunctionValue::take_anyif}, - {"variance", KQLFunctionValue::variance}, - {"varianceif", KQLFunctionValue::varianceif}, - - {"series_fir", KQLFunctionValue::series_fir}, - {"series_iir", KQLFunctionValue::series_iir}, - {"series_fit_line", KQLFunctionValue::series_fit_line}, - {"series_fit_line_dynamic", KQLFunctionValue::series_fit_line_dynamic}, - {"series_fit_2lines", KQLFunctionValue::series_fit_2lines}, - {"series_fit_2lines_dynamic", KQLFunctionValue::series_fit_2lines_dynamic}, - {"series_outliers", KQLFunctionValue::series_outliers}, - {"series_periods_detect", KQLFunctionValue::series_periods_detect}, - {"series_periods_validate", KQLFunctionValue::series_periods_validate}, - {"series_stats_dynamic", KQLFunctionValue::series_stats_dynamic}, - {"series_stats", KQLFunctionValue::series_stats}, - {"series_fill_backward", KQLFunctionValue::series_fill_backward}, - {"series_fill_const", KQLFunctionValue::series_fill_const}, - {"series_fill_forward", KQLFunctionValue::series_fill_forward}, - {"series_fill_linear", KQLFunctionValue::series_fill_linear}, - - {"ipv4_compare", KQLFunctionValue::ipv4_compare}, - {"ipv4_is_in_range", KQLFunctionValue::ipv4_is_in_range}, - {"ipv4_is_match", KQLFunctionValue::ipv4_is_match}, - {"ipv4_is_private", KQLFunctionValue::ipv4_is_private}, - {"ipv4_netmask_suffix", KQLFunctionValue::ipv4_netmask_suffix}, - {"parse_ipv4", KQLFunctionValue::parse_ipv4}, - {"parse_ipv4_mask", KQLFunctionValue::parse_ipv4_mask}, - {"ipv6_compare", KQLFunctionValue::ipv6_compare}, - {"ipv6_is_match", KQLFunctionValue::ipv6_is_match}, - {"parse_ipv6", KQLFunctionValue::parse_ipv6}, - {"parse_ipv6_mask", KQLFunctionValue::parse_ipv6_mask}, - {"format_ipv4", KQLFunctionValue::format_ipv4}, - {"format_ipv4_mask", KQLFunctionValue::format_ipv4_mask}, - - {"binary_and", KQLFunctionValue::binary_and}, - {"binary_not", KQLFunctionValue::binary_not}, - {"binary_or", KQLFunctionValue::binary_or}, - {"binary_shift_left", KQLFunctionValue::binary_shift_left}, - {"binary_shift_right", KQLFunctionValue::binary_shift_right}, - {"binary_xor", KQLFunctionValue::binary_xor}, - {"bitset_count_ones", KQLFunctionValue::bitset_count_ones}, - {"bin", KQLFunctionValue::bin} - };*/ - + static std::unordered_map kql_functions; }; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp index 20b4b880a83..253292a7d9d 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,11 @@ namespace DB { - +bool Bin::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h index 45759032826..802fd152333 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h @@ -4,6 +4,12 @@ #include namespace DB { +class Bin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bin()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 20b4b880a83..f271d924aff 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,95 @@ namespace DB { +bool Ipv4Compare::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool Ipv4IsInRange::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv4IsMatch::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv4IsPrivate::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv4NetmaskSuffix::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseIpv4::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseIpv4Mask::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv6Compare::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool Ipv6IsMatch::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseIpv6::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool ParseIpv6Mask::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool FormatIpv4::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool FormatIpv4Mask::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h index 45759032826..3ee5dda4c83 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h @@ -4,6 +4,96 @@ #include namespace DB { +class Ipv4Compare : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_compare()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv4IsInRange : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_in_range()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv4IsMatch : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_match()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv4IsPrivate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_private()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv4NetmaskSuffix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_netmask_suffix()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv4()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseIpv4Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv4_mask()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv6Compare : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv6_compare()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Ipv6IsMatch : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv6_is_match()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseIpv6 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv6()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ParseIpv6Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv6_mask()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_ipv4()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatIpv4Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_ipv4_mask()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 851c631d1ce..a7f7c373566 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -8,16 +8,12 @@ namespace DB bool Base64EncodeToString::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"base64Encode"); } bool Base64EncodeFromGuid::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"base64Decode"); } bool Base64DecodeToString::convertImpl(String &out,IParser::Pos &pos) @@ -85,23 +81,17 @@ bool IndexOf::convertImpl(String &out,IParser::Pos &pos) bool IsEmpty::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"empty"); } bool IsNotEmpty::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"notEmpty"); } bool IsNotNull::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"isNotNull"); } bool ParseCommandLine::convertImpl(String &out,IParser::Pos &pos) @@ -113,12 +103,10 @@ bool ParseCommandLine::convertImpl(String &out,IParser::Pos &pos) bool IsNull::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"isNull"); } -bool ParseCsv::convertImpl(String &out,IParser::Pos &pos) +bool ParseCSV::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; @@ -132,14 +120,14 @@ bool ParseJson::convertImpl(String &out,IParser::Pos &pos) return false; } -bool ParseUrl::convertImpl(String &out,IParser::Pos &pos) +bool ParseURL::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ParseUrlQuery::convertImpl(String &out,IParser::Pos &pos) +bool ParseURLQuery::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; @@ -176,39 +164,7 @@ bool Split::convertImpl(String &out,IParser::Pos &pos) bool StrCat::convertImpl(String &out,IParser::Pos &pos) { - std::unique_ptr fun; - std::vector args; - String res = "concat("; - - ++pos; - if (pos->type != TokenType::OpeningRoundBracket) - { - --pos; - return false; - } - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) - { - ++pos; - String tmp_arg = String(pos->begin,pos->end); - if (pos->type == TokenType::BareWord ) - { - String new_arg; - fun = KQLFunctionFactory::get(tmp_arg); - if (fun && fun->convert(new_arg,pos)) - tmp_arg = new_arg; - } - else if (pos->type == TokenType::ClosingRoundBracket) - { - for (auto arg : args) - res+=arg; - - res += ")"; - out = res; - return true; - } - args.push_back(tmp_arg); - } - return false; + return directMapping(out,pos,"concat"); } bool StrCatDelim::convertImpl(String &out,IParser::Pos &pos) @@ -227,9 +183,7 @@ bool StrCmp::convertImpl(String &out,IParser::Pos &pos) bool StrLen::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"lengthUTF8"); } bool StrRep::convertImpl(String &out,IParser::Pos &pos) @@ -265,6 +219,8 @@ bool StrRep::convertImpl(String &out,IParser::Pos &pos) fun = KQLFunctionFactory::get(multiplier); if ( fun && fun->convert(fun_multiplier,pos)) new_multiplier += fun_multiplier; + else + new_multiplier = multiplier; } else if (pos->type == TokenType::Comma ||pos->type == TokenType::ClosingRoundBracket) // has delimiter { @@ -313,11 +269,15 @@ bool SubString::convertImpl(String &out,IParser::Pos &pos) return false; } +bool ToLower::convertImpl(String &out,IParser::Pos &pos) +{ + return directMapping(out,pos,"lower"); +} + + bool ToUpper::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"upper"); } bool Translate::convertImpl(String &out,IParser::Pos &pos) @@ -348,18 +308,14 @@ bool TrimStart::convertImpl(String &out,IParser::Pos &pos) return false; } -bool UrlDecode::convertImpl(String &out,IParser::Pos &pos) +bool URLDecode::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"decodeURLComponent"); } -bool UrlEncode::convertImpl(String &out,IParser::Pos &pos) +bool URLEncode::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"encodeURLComponent"); } } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h index db7ab507750..43840c1253f 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h @@ -116,7 +116,7 @@ protected: bool convertImpl(String &out,IParser::Pos &pos) override; }; -class ParseCsv : public IParserKQLFunction +class ParseCSV : public IParserKQLFunction { protected: const char * getName() const override { return "parse_csv()"; } @@ -130,14 +130,14 @@ protected: bool convertImpl(String &out,IParser::Pos &pos) override; }; -class ParseUrl : public IParserKQLFunction +class ParseURL : public IParserKQLFunction { protected: const char * getName() const override { return "parse_url()"; } bool convertImpl(String &out,IParser::Pos &pos) override; }; -class ParseUrlQuery : public IParserKQLFunction +class ParseURLQuery : public IParserKQLFunction { protected: const char * getName() const override { return "parse_urlquery()"; } @@ -214,6 +214,13 @@ protected: bool convertImpl(String &out,IParser::Pos &pos) override; }; +class ToLower : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tolower()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + class ToUpper : public IParserKQLFunction { protected: @@ -249,14 +256,14 @@ protected: bool convertImpl(String &out,IParser::Pos &pos) override; }; -class UrlDecode : public IParserKQLFunction +class URLDecode : public IParserKQLFunction { protected: const char * getName() const override { return "url_decode()"; } bool convertImpl(String &out,IParser::Pos &pos) override; }; -class UrlEncode : public IParserKQLFunction +class URLEncode : public IParserKQLFunction { protected: const char * getName() const override { return "url_encode()"; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp index 20b4b880a83..74b7811f29e 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -19,6 +18,109 @@ namespace DB { +bool SeriesFir::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} +bool SeriesIir::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFitLine::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFitLineDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFit2lines::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFit2linesDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesOutliers::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesPeriodsDetect::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesPeriodsValidate::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesStatsDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesStats::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillBackward::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillConst::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillForward::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillLinear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h index 45759032826..fa97dec151c 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h @@ -4,6 +4,110 @@ #include namespace DB { +class SeriesFir : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fir()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesIir : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_iir()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFitLine : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_line()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFitLineDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_line_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFit2lines : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_2lines()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFit2linesDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_2lines_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesOutliers : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_outliers()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesPeriodsDetect : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_periods_detect()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesPeriodsValidate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_periods_validate()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesStatsDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_stats_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesStats : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_stats()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillBackward : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_backward()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillConst : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_const()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillForward : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_forward()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillLinear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_linear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; } diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index 466370f5d80..ceb59f1d86e 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -15,14 +15,12 @@ bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) Pos begin = pos; String expr; - KQLOperators convetor; - for (auto op_po : op_pos) { if (expr.empty()) - expr = "(" + convetor.getExprFromToken(op_po) +")"; + expr = "(" + getExprFromToken(op_po) +")"; else - expr = expr + " and (" + convetor.getExprFromToken(op_po) +")"; + expr = expr + " and (" + getExprFromToken(op_po) +")"; } Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 2a3d8238c46..ddfb2ad001a 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include namespace DB { @@ -82,22 +84,33 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; } - if (!tokens.empty() && ((++token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + ++token_pos; + + if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; + else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) + { + String tmp_arg = String(token_pos->begin,token_pos->end); + if (token_pos->type == TokenType::BareWord ) + { + String new_arg; + auto fun = KQLFunctionFactory::get(tmp_arg); + if (fun && fun->convert(new_arg,token_pos)) + tmp_arg = new_arg; + } + new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + "', " + tmp_arg +", '"+ right_wildcards + "'))"; + } else throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); tokens.pop_back(); return new_expr; } -String KQLOperators::getExprFromToken(IParser::Pos &pos) +bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) { - String res; - std::vector tokens; - auto begin = pos; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { KQLOperatorValue op_value = KQLOperatorValue::none; @@ -138,8 +151,13 @@ String KQLOperators::getExprFromToken(IParser::Pos &pos) else --pos; - if (KQLOperator.find(op) != KQLOperator.end()) - op_value = KQLOperator[op]; + if (KQLOperator.find(op) == KQLOperator.end()) + { + pos = begin; + return false; + } + + op_value = KQLOperator[op]; String new_expr; if (op_value == KQLOperatorValue::none) @@ -280,14 +298,9 @@ String KQLOperators::getExprFromToken(IParser::Pos &pos) tokens.push_back(new_expr); } - ++pos; + return true; } - - for (auto & token : tokens) - res = res + token + " "; - - pos = begin; - return res; + return false; } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 564aa9d8aa5..20b60d31caf 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -9,7 +9,7 @@ namespace DB class KQLOperators { public: - String getExprFromToken(IParser::Pos &pos) ; + bool convert(std::vector &tokens,IParser::Pos &pos); protected: enum class WildcardsPos:uint8_t diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index f1348c4b3c6..0334722041f 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -23,21 +23,27 @@ bool ParserKQLBase :: parsePrepare(Pos & pos) String ParserKQLBase :: getExprFromToken(Pos &pos) { String res; + std::vector tokens; std::unique_ptr kql_function; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String token = String(pos->begin,pos->end); String new_token; - if (pos->type == TokenType::BareWord ) + if (!KQLOperators().convert(tokens,pos)) { - kql_function = KQLFunctionFactory::get(token); - if (kql_function && kql_function->convert(new_token,pos)) - token = new_token; + if (pos->type == TokenType::BareWord ) + { + kql_function = KQLFunctionFactory::get(token); + if (kql_function && kql_function->convert(new_token,pos)) + token = new_token; + } + tokens.push_back(token); } - res = res + token +" "; ++pos; } + for (auto token:tokens) + res = res + token +" "; return res; } diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index cc4bece7ebf..6ce29b8024f 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -57,16 +57,5 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } -/* -bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) -{ - return wrapParseImpl(pos, IncreaseDepthTag{}, [&] - { - bool res = convertImpl(out,pos); - if (!res) - out = ""; - return res; - }); -}*/ } diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h index 1eed2d00845..aa974504d92 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.h +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -19,7 +19,6 @@ public: {} }; - class ParserKQLWithOutput : public IParserBase { protected: diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h index 1266b6e732d..b5302897ada 100644 --- a/src/Parsers/Kusto/ParserKQLTable.h +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -12,7 +12,6 @@ protected: const char * getName() const override { return "KQL Table"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool parsePrepare(Pos &pos) override; - }; } From fceaf456c1d848e817d9525c7d89d6e880b01ac6 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 8 Jun 2022 10:14:03 -0700 Subject: [PATCH 014/279] Kusto-phase1: Add Support to Kusto Query Language This is the initial implement of Kusto Query Language. in this commit, we support the following features as MVP : Tabular expression statements Limit returned results Select Column (basic project) sort, order Perform string equality operations Filter using a list of elements Filter using common string operations Some string operators Aggregate by columns Base aggregate functions only support avg, count ,min, max, sum Aggregate by time intervals --- src/Parsers/Kusto/ParserKQLOperators.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index ddfb2ad001a..50550376667 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -203,7 +203,6 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) case KQLOperatorValue::not_equal: break; - case KQLOperatorValue::equal_cs: new_expr = "=="; break; @@ -211,7 +210,6 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) case KQLOperatorValue::not_equal_cs: new_expr = "!="; break; - case KQLOperatorValue::has: new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); break; @@ -261,7 +259,6 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) case KQLOperatorValue::in_cs: new_expr = "in"; break; - case KQLOperatorValue::not_in_cs: new_expr = "not in"; break; From b3ebac353f79abf0f15ef4444a1f82ea33b3b46f Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:04:20 -0700 Subject: [PATCH 015/279] Kusto summarize init --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 7a88fec1988..a4d8fb3081c 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -47,6 +47,10 @@ std::pair ParserKQLSummarize::removeLastWord(String input) { return std::make_pair(first_part, temp[temp.size() - 1]); } + if (temp.size() > 0) + { + return std::make_pair(firstPart, temp[temp.size() - 1]); + } return std::make_pair("", ""); } From f4db64449806acdb4269b54bad2d9c800ee143dd Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 11 Jun 2022 10:33:38 -0700 Subject: [PATCH 016/279] Kusto-phase1: Fixed style --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index a4d8fb3081c..cdac747edf0 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -47,9 +47,9 @@ std::pair ParserKQLSummarize::removeLastWord(String input) { return std::make_pair(first_part, temp[temp.size() - 1]); } - if (temp.size() > 0) + if (!temp.empty()) { - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair(first_part, temp[temp.size() - 1]); } return std::make_pair("", ""); From ec8902087b1184d6e91f0f119b4c37eca414ce39 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 17 Jun 2022 08:47:08 -0700 Subject: [PATCH 017/279] Kusto-phase2 : Added KQL functions interface. changed the summarize class for new aggregation functions --- src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h | 1 - src/Parsers/Kusto/ParserKQLStatement.cpp | 11 +++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h index 8f57133c071..7c5f0d54734 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -188,7 +188,6 @@ namespace DB bin }; - class KQLFunctionFactory { public: diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 6ce29b8024f..cc4bece7ebf 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -57,5 +57,16 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +/* +bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) +{ + return wrapParseImpl(pos, IncreaseDepthTag{}, [&] + { + bool res = convertImpl(out,pos); + if (!res) + out = ""; + return res; + }); +}*/ } From 5273241af96aa7db0e2e6dcc937f59611d93fb43 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 21 Jun 2022 09:33:07 -0700 Subject: [PATCH 018/279] Kusto-phase2: Add KQL functions parser --- src/Parsers/Kusto/ParserKQLStatement.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index cc4bece7ebf..6ce29b8024f 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -57,16 +57,5 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } -/* -bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) -{ - return wrapParseImpl(pos, IncreaseDepthTag{}, [&] - { - bool res = convertImpl(out,pos); - if (!res) - out = ""; - return res; - }); -}*/ } From 145b2bd45eb823c405ebf12bd5b9e77b3ecba2ff Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 22 Jun 2022 12:00:47 -0700 Subject: [PATCH 019/279] Kusto-phase2: Add common function to get argument for function convertion --- .../KustoFunctions/IParserKQLFunction.cpp | 52 +++++++++ .../Kusto/KustoFunctions/IParserKQLFunction.h | 2 + .../KustoFunctions/KQLStringFunctions.cpp | 103 +++++++++--------- 3 files changed, 103 insertions(+), 54 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index e7134678e95..ed90c865f51 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -19,6 +19,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + + bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) { return wrapConvertImpl(pos, IncreaseDepthTag{}, [&] @@ -73,4 +79,50 @@ bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const Strin return false; } +String IParserKQLFunction::getConvertedArgument(const String &fn_name, IParser::Pos &pos) +{ + String converted_arg; + std::unique_ptr fun; + + if (pos->type == TokenType::ClosingRoundBracket) + return converted_arg; + + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Syntax error near " + fn_name, ErrorCodes::SYNTAX_ERROR); + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + String token = String(pos->begin,pos->end); + if (pos->type == TokenType::BareWord ) + { + String converted; + fun = KQLFunctionFactory::get(token); + if ( fun && fun->convert(converted,pos)) + converted_arg += converted; + else + converted_arg += token; + } + else if (pos->type == TokenType::Comma ||pos->type == TokenType::ClosingRoundBracket) + { + break; + } + else + converted_arg += token; + ++pos; + } + return converted_arg; +} + +String IParserKQLFunction::getKQLFunctionName(IParser::Pos &pos) +{ + String fn_name = String(pos->begin, pos->end); + ++pos; + if (pos->type != TokenType::OpeningRoundBracket) + { + --pos; + return ""; + } + return fn_name; +} + } diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index c633f78fa33..8af2623a984 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -34,6 +34,8 @@ public: protected: virtual bool convertImpl(String &out,IParser::Pos &pos) = 0; static bool directMapping(String &out,IParser::Pos &pos,const String &ch_fn); + static String getConvertedArgument(const String &fn_name, IParser::Pos &pos); + static String getKQLFunctionName(IParser::Pos &pos); }; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index a7f7c373566..0c8a0891a01 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -188,84 +188,80 @@ bool StrLen::convertImpl(String &out,IParser::Pos &pos) bool StrRep::convertImpl(String &out,IParser::Pos &pos) { - std::unique_ptr fun; - String res = String(pos->begin,pos->end); - ++pos; - if (pos->type != TokenType::OpeningRoundBracket) - { - --pos; + String fn_name = getKQLFunctionName(pos); //String(pos->begin,pos->end); + + if (fn_name.empty()) return false; - } - ++pos; - String value = String(pos->begin,pos->end); - if (pos->type == TokenType::BareWord ) - { String func_value; - fun = KQLFunctionFactory::get(value); - if (fun && fun->convert(func_value,pos)) - value = func_value; - } + + auto begin = pos; + ++pos; + String value = getConvertedArgument(fn_name,pos); if (pos->type != TokenType::Comma) return false; ++pos; - String multiplier = String(pos->begin,pos->end); - String new_multiplier; - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) - { - if (pos->type == TokenType::BareWord ) - { - String fun_multiplier; - fun = KQLFunctionFactory::get(multiplier); - if ( fun && fun->convert(fun_multiplier,pos)) - new_multiplier += fun_multiplier; - else - new_multiplier = multiplier; - } - else if (pos->type == TokenType::Comma ||pos->type == TokenType::ClosingRoundBracket) // has delimiter - { - break; - } - else - new_multiplier += String(pos->begin,pos->end); - ++pos; - } + String multiplier = getConvertedArgument(fn_name,pos); - if (!new_multiplier.empty()) - multiplier = new_multiplier; - - String delimiter ; + String delimiter; if (pos->type == TokenType::Comma) { ++pos; - delimiter = String(pos->begin,pos->end); - if (pos->type == TokenType::BareWord ) - { String func_delimiter; - fun = KQLFunctionFactory::get(delimiter); - if (fun && fun->convert(func_delimiter,pos)) - delimiter = func_delimiter; - } - ++pos; + delimiter = getConvertedArgument(fn_name,pos); } + if (pos->type == TokenType::ClosingRoundBracket) { if (!delimiter.empty()) { String repeated_str = "repeat(concat("+value+"," + delimiter + ")," + multiplier + ")"; - res = "substr("+ repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; + out = "substr("+ repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; } else - res = "repeat("+ value + ", " + multiplier + ")"; - out = res; + out = "repeat("+ value + ", " + multiplier + ")"; + return true; } + + pos = begin; return false; } + bool SubString::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + auto begin = pos; + + ++pos; + String source = getConvertedArgument(fn_name,pos); + + if (pos->type != TokenType::Comma) + return false; + + ++pos; + String startingIndex = getConvertedArgument(fn_name,pos); + + String length; + if (pos->type == TokenType::Comma) + { + ++pos; + length = getConvertedArgument(fn_name,pos); + } + + if (pos->type == TokenType::ClosingRoundBracket) + { + if (length.empty()) + out = "substr("+ source + "," + startingIndex +" + 1)"; + else + out = "substr("+ source + ", " + startingIndex +" + 1, " + length + ")"; + return true; + } + pos = begin; return false; } @@ -274,7 +270,6 @@ bool ToLower::convertImpl(String &out,IParser::Pos &pos) return directMapping(out,pos,"lower"); } - bool ToUpper::convertImpl(String &out,IParser::Pos &pos) { return directMapping(out,pos,"upper"); From 760fd6759e9f9772486e9b2258132516cbcedec1 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 23 Jun 2022 14:26:37 -0700 Subject: [PATCH 020/279] Kusto-phase2: add kusto_auto dialect --- src/Client/ClientBase.cpp | 18 ++++- src/Interpreters/executeQuery.cpp | 17 +++- .../KustoFunctions/IParserKQLFunction.cpp | 78 ++++++++++--------- src/Parsers/Kusto/ParserKQLOperators.cpp | 1 + 4 files changed, 74 insertions(+), 40 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 495e3bdfd4e..2040c3f1440 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -328,12 +328,15 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - std::unique_ptr parser; + std::shared_ptr parser; + ParserKQLStatement kql_parser(end, global_context->getSettings().allow_settings_after_format_in_insert); ASTPtr res; const auto & settings = global_context->getSettingsRef(); size_t max_length = 0; + auto begin = pos; + if (!allow_multi_statements) max_length = settings.max_query_size; @@ -353,13 +356,22 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!res) { - std::cerr << std::endl << message << std::endl << std::endl; - return nullptr; + if (sql_dialect != "kusto") + res = tryParseQuery(kql_parser, begin, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + + if (!res) + { + std::cerr << std::endl << message << std::endl << std::endl; + return nullptr; + } } } else { res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + + if (!res && sql_dialect != "kusto") + res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index bac6807b682..0cba3714855 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -701,12 +701,27 @@ static std::tuple executeQueryImpl( /// Parse the query from string. try { - if (settings.dialect == Dialect::kusto && !internal) + const String & sql_dialect = settings.sql_dialect; + assert(sql_dialect == "clickhouse" || sql_dialect == "kusto" || sql_dialect == "kusto_auto"); + + if (sql_dialect == "kusto" && !internal) { ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + else if (sql_dialect == "kusto_auto" && !internal) + { + try { + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + catch(...) + { + ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } } else if (settings.dialect == Dialect::prql && !internal) { diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index ed90c865f51..73472a42010 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -15,6 +15,7 @@ #include #include #include +#include namespace DB { @@ -38,41 +39,36 @@ bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const String &ch_fn) { - std::unique_ptr fun; - std::vector args; + std::vector arguments; - String res =ch_fn + "("; - out = res; - auto begin = pos; + String fn_name = getKQLFunctionName(pos); - ++pos; - if (pos->type != TokenType::OpeningRoundBracket) - { - pos = begin; + if (fn_name.empty()) return false; - } + String res; + auto begin = pos; + ++pos; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - ++pos; - String tmp_arg = String(pos->begin,pos->end); - if (pos->type == TokenType::BareWord ) - { - String new_arg; - fun = KQLFunctionFactory::get(tmp_arg); - if (fun && fun->convert(new_arg,pos)) - tmp_arg = new_arg; - } - else if (pos->type == TokenType::ClosingRoundBracket) - { - for (auto arg : args) - res+=arg; + String argument = getConvertedArgument(fn_name,pos); + arguments.push_back(argument); + if (pos->type == TokenType::ClosingRoundBracket) + { + for (auto arg : arguments) + { + if (res.empty()) + res = ch_fn + "(" + arg; + else + res = res + ", "+ arg; + } res += ")"; + out = res; return true; } - args.push_back(tmp_arg); + ++pos; } pos = begin; @@ -82,6 +78,7 @@ bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const Strin String IParserKQLFunction::getConvertedArgument(const String &fn_name, IParser::Pos &pos) { String converted_arg; + std::vector tokens; std::unique_ptr fun; if (pos->type == TokenType::ClosingRoundBracket) @@ -93,23 +90,32 @@ String IParserKQLFunction::getConvertedArgument(const String &fn_name, IParser:: while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String token = String(pos->begin,pos->end); - if (pos->type == TokenType::BareWord ) + String new_token; + if (!KQLOperators().convert(tokens,pos)) { - String converted; - fun = KQLFunctionFactory::get(token); - if ( fun && fun->convert(converted,pos)) - converted_arg += converted; + if (pos->type == TokenType::BareWord ) + { + String converted; + fun = KQLFunctionFactory::get(token); + if ( fun && fun->convert(converted,pos)) + tokens.push_back(converted); + else + tokens.push_back(token); + } + else if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + { + break; + } else - converted_arg += token; + tokens.push_back(token); } - else if (pos->type == TokenType::Comma ||pos->type == TokenType::ClosingRoundBracket) - { - break; - } - else - converted_arg += token; ++pos; + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + break; } + for (auto token : tokens) + converted_arg = converted_arg + token +" "; + return converted_arg; } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 50550376667..33e8813286a 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -297,6 +297,7 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) } return true; } + pos = begin; return false; } From 837654b1b7704449f073ae1f202f1e6c6aaff742 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 24 Jun 2022 13:05:52 -0700 Subject: [PATCH 021/279] Kusto-phase2: Add alias support --- src/Parsers/Kusto/ParserKQLProject.cpp | 18 ---------------- src/Parsers/Kusto/ParserKQLQuery.cpp | 29 +++++++++++++++++++++++++- src/Parsers/tests/gtest_Parser.cpp | 4 ---- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index 0e25c9c4a6c..47ecbbfce3e 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -11,25 +11,7 @@ bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (op_pos.empty()) expr = "*"; else - { - for (auto it = op_pos.begin(); it != op_pos.end(); ++it) - { - pos = *it ; - while (!pos->isEnd() && pos->type != TokenType::PipeMark) - { - if (pos->type == TokenType::BareWord) - { - String tmp(pos->begin,pos->end); - - if (it != op_pos.begin() && columns.find(tmp) == columns.end()) - return false; - columns.insert(tmp); - } - ++pos; - } - } expr = getExprFromToken(op_pos.back()); - } Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); IParser::Pos new_pos(tokens, pos.max_depth); diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 0334722041f..d54344e9ea9 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -25,12 +25,23 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) String res; std::vector tokens; std::unique_ptr kql_function; + String alias; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String token = String(pos->begin,pos->end); String new_token; - if (!KQLOperators().convert(tokens,pos)) + if (token == "=") + { + ++pos; + if (String(pos->begin,pos->end) != "~" ) + { + alias = tokens.back(); + tokens.pop_back(); + } + --pos; + } + else if (!KQLOperators().convert(tokens,pos)) { if (pos->type == TokenType::BareWord ) { @@ -40,8 +51,24 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) } tokens.push_back(token); } + + if (pos->type == TokenType::Comma && !alias.empty()) + { + tokens.pop_back(); + tokens.push_back("AS"); + tokens.push_back(alias); + tokens.push_back(","); + alias.clear(); + } ++pos; } + + if (!alias.empty()) + { + tokens.push_back("AS"); + tokens.push_back(alias); + } + for (auto token:tokens) res = res + token +" "; return res; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 096063c2aa9..0fe289adcb9 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -334,10 +334,6 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", "SELECT\n FirstName,\n LastName\nFROM Customers\nLIMIT 3" }, - { - "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", - "throws Syntax error" - }, { "Customers | sort by FirstName desc", "SELECT *\nFROM Customers\nORDER BY FirstName DESC" From 8cb5bb7327f2e076c6217de4861cde331fe5c888 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 29 Jun 2022 11:23:13 -0700 Subject: [PATCH 022/279] Aggregate functions initial code - Priority:HIGHT(Easy and Medium) --- .../KQLAggregationFunctions.cpp | 61 ++++++++++--------- src/Parsers/Kusto/ParserKQLSummarize.cpp | 58 +++++++++++++++--- 2 files changed, 82 insertions(+), 37 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index 91c3639ace4..1bfb094518f 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -21,29 +21,25 @@ namespace DB bool ArgMax::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"argMax"); } bool ArgMin::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"argMin"); } bool Avg::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"avg"); } bool AvgIf::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"avgIf"); } bool BinaryAllAnd::convertImpl(String &out,IParser::Pos &pos) @@ -77,29 +73,40 @@ bool BuildSchema::convertImpl(String &out,IParser::Pos &pos) bool Count::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"count"); } bool CountIf::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"countIf"); } bool DCount::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + String value = getConvertedArgument(fn_name,pos); + + out = "count ( DISTINCT " + value + " ) "; + return true; } bool DCountIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + String value = getConvertedArgument(fn_name,pos); + ++pos; + String condition = getConvertedArgument(fn_name,pos); + out = "countIf ( DISTINCT " + value + " , " + condition + " ) "; + return true; } bool MakeBag::convertImpl(String &out,IParser::Pos &pos) @@ -154,29 +161,25 @@ bool MakeSetIf::convertImpl(String &out,IParser::Pos &pos) bool Max::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"max"); } bool MaxIf::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"maxIf"); } bool Min::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"min"); } bool MinIf::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"minIf"); } bool Percentiles::convertImpl(String &out,IParser::Pos &pos) @@ -224,15 +227,13 @@ bool StdevIf::convertImpl(String &out,IParser::Pos &pos) bool Sum::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"sum"); } bool SumIf::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"sumIf"); } bool TakeAny::convertImpl(String &out,IParser::Pos &pos) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index cdac747edf0..eea72798f82 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -150,13 +150,23 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte sub_columns = sub_groupby; else sub_columns = sub_groupby + "," + sub_aggregation; - sub_query = "(SELECT " + sub_columns+ " FROM "+ table_name + " GROUP BY "+sub_groupby+")"; + sub_query = "SELECT " + sub_columns+ " FROM "+ table_name + " GROUP BY "+sub_groupby+""; } Tokens token_subquery(sub_query.c_str(), sub_query.c_str()+sub_query.size()); IParser::Pos pos_subquery(token_subquery, pos.max_depth); + + String converted_columns = getExprFromToken(pos_subquery); + converted_columns = "(" + converted_columns + ")"; - if (!ParserTablesInSelectQuery().parse(pos_subquery, sub_qurery_table, expected)) + //std::cout << "MALLIK converted_columns: " << converted_columns << std::endl; + + Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + + //if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, node, expected)) + //return false; + if (!ParserTablesInSelectQuery().parse(pos_converted_columns, sub_qurery_table, expected)) return false; tables = sub_qurery_table; } @@ -200,14 +210,14 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte else { - if (String(pos->begin, pos->end) == "=") + /*if (String(pos->begin, pos->end) == "=") { std::pair temp = removeLastWord(expr_aggregation); expr_aggregation = temp.first; column_name = temp.second; - } - else - { + }*/ + //else + //{ if (!column_name.empty()) { expr_aggregation = expr_aggregation + String(pos->begin, pos->end); @@ -222,7 +232,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; } - } + //} } } ++pos; @@ -237,6 +247,11 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte else expr_columns = expr_groupby + "," + expr_aggregation; } + + + /* + Original + Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); IParser::Pos pos_columns(token_columns, pos.max_depth); if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) @@ -249,6 +264,35 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) return false; } + */ + + // For function + Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); + IParser::Pos pos_columns(token_columns, pos.max_depth); + + String converted_columns = getExprFromToken(pos_columns); + + Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, node, expected)) + return false; + + if (groupby) + { + Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); + IParser::Pos postoken_groupby(token_groupby, pos.max_depth); + + String converted_groupby = getExprFromToken(postoken_groupby); + + Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); + IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); + + if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) + return false; + } + + pos = begin; return true; From 1319001fee63fce897fc6f05209b26372917ffe0 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 29 Jun 2022 21:17:17 -0700 Subject: [PATCH 023/279] Aggregate function working with two pipes --- src/Parsers/Kusto/ParserKQLQuery.cpp | 27 ++++++++++++------ src/Parsers/Kusto/ParserKQLSummarize.cpp | 35 ++---------------------- 2 files changed, 20 insertions(+), 42 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index d54344e9ea9..94d31d5d523 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -11,6 +11,8 @@ #include #include #include + +#include namespace DB { @@ -26,7 +28,7 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) std::vector tokens; std::unique_ptr kql_function; String alias; - + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String token = String(pos->begin,pos->end); @@ -46,19 +48,27 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) if (pos->type == TokenType::BareWord ) { kql_function = KQLFunctionFactory::get(token); - if (kql_function && kql_function->convert(new_token,pos)) + if (kql_function && kql_function->convert(new_token,pos)){ token = new_token; + } + } tokens.push_back(token); } - if (pos->type == TokenType::Comma && !alias.empty()) + if (!alias.empty()) { - tokens.pop_back(); - tokens.push_back("AS"); - tokens.push_back(alias); - tokens.push_back(","); - alias.clear(); + if(pos->type == TokenType::Comma || token == "FROM") + { + tokens.pop_back(); + tokens.push_back("AS"); + tokens.push_back(alias); + if(pos->type == TokenType::Comma) + tokens.push_back(","); + else + tokens.push_back("FROM"); + alias.clear(); + } } ++pos; } @@ -68,7 +78,6 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) tokens.push_back("AS"); tokens.push_back(alias); } - for (auto token:tokens) res = res + token +" "; return res; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index eea72798f82..74a32bdba63 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -142,7 +142,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (sub_groupby.empty()) { sub_columns =sub_aggregation; - sub_query = "(SELECT " + sub_columns+ " FROM "+ table_name+")"; + sub_query = "SELECT " + sub_columns+ " FROM "+ table_name+""; } else { @@ -155,17 +155,12 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte Tokens token_subquery(sub_query.c_str(), sub_query.c_str()+sub_query.size()); IParser::Pos pos_subquery(token_subquery, pos.max_depth); - String converted_columns = getExprFromToken(pos_subquery); converted_columns = "(" + converted_columns + ")"; - //std::cout << "MALLIK converted_columns: " << converted_columns << std::endl; - Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); - //if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, node, expected)) - //return false; if (!ParserTablesInSelectQuery().parse(pos_converted_columns, sub_qurery_table, expected)) return false; tables = sub_qurery_table; @@ -210,14 +205,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte else { - /*if (String(pos->begin, pos->end) == "=") - { - std::pair temp = removeLastWord(expr_aggregation); - expr_aggregation = temp.first; - column_name = temp.second; - }*/ - //else - //{ if (!column_name.empty()) { expr_aggregation = expr_aggregation + String(pos->begin, pos->end); @@ -232,7 +219,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { expr_aggregation = expr_aggregation + String(pos->begin, pos->end) + " "; } - //} + } } ++pos; @@ -247,24 +234,6 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte else expr_columns = expr_groupby + "," + expr_aggregation; } - - - /* - Original - - Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); - IParser::Pos pos_columns(token_columns, pos.max_depth); - if (!ParserNotEmptyExpressionList(true).parse(pos_columns, node, expected)) - return false; - - if (groupby) - { - Tokens token_groupby(expr_groupby.c_str(), expr_groupby.c_str() + expr_groupby.size()); - IParser::Pos postoken_groupby(token_groupby, pos.max_depth); - if (!ParserNotEmptyExpressionList(false).parse(postoken_groupby, group_expression_list, expected)) - return false; - } - */ // For function Tokens token_columns(expr_columns.c_str(), expr_columns.c_str() + expr_columns.size()); From 46bf8046b2fe3099575f4d045c2bb6003a534335 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 28 Jun 2022 22:03:36 -0700 Subject: [PATCH 024/279] Kusto-phase2: Add table function kql() --- src/Parsers/ExpressionElementParsers.cpp | 72 +++++++++++++----------- src/Parsers/Kusto/ParserKQLStatement.cpp | 43 ++++++++++++++ src/Parsers/Kusto/ParserKQLStatement.h | 7 +++ src/Parsers/ParserCreateQuery.cpp | 24 +++++--- src/Parsers/ParserInsertQuery.cpp | 8 ++- 5 files changed, 113 insertions(+), 41 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 587908eb49c..9eca5db8f6b 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -42,7 +42,7 @@ #include #include - +#include namespace DB { @@ -105,28 +105,35 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserSelectWithUnionQuery select; ParserExplainQuery explain; - - if (pos->type != TokenType::OpeningRoundBracket) - return false; - ++pos; - ASTPtr result_node = nullptr; + ParserKeyword s_kql("KQL"); - if (ASTPtr select_node; select.parse(pos, select_node, expected)) + if (s_kql.ignore(pos, expected)) { - result_node = std::move(select_node); + if (!ParserKQLTaleFunction().parse(pos, result_node, expected)) + return false; } - else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected)) + else { + if (pos->type != TokenType::OpeningRoundBracket) + return false; + ++pos; + + if (ASTPtr select_node; select.parse(pos, select_node, expected)) + { + result_node = std::move(select_node); + } + else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected)) + { const auto & explain_query = explain_node->as(); if (explain_query.getTableFunction() || explain_query.getTableOverride()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "EXPLAIN in a subquery cannot have a table function or table override"); - /// Replace subquery `(EXPLAIN SELECT ...)` - /// with `(SELECT * FROM viewExplain("", "", SELECT ...))` + /// Replace subquery `(EXPLAIN SELECT ...)` + /// with `(SELECT * FROM viewExplain("", "", SELECT ...))` - String kind_str = ASTExplainQuery::toString(explain_query.getKind()); + String kind_str = ASTExplainQuery::toString(explain_query.getKind()); String settings_str; if (ASTPtr settings_ast = explain_query.getSettings()) @@ -136,31 +143,32 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) settings_str = queryToString(settings_ast); } - const ASTPtr & explained_ast = explain_query.getExplainedQuery(); - if (explained_ast) - { - auto view_explain = makeASTFunction("viewExplain", - std::make_shared(kind_str), - std::make_shared(settings_str), - explained_ast); - result_node = buildSelectFromTableFunction(view_explain); + const ASTPtr & explained_ast = explain_query.getExplainedQuery(); + if (explained_ast) + { + auto view_explain = makeASTFunction("viewExplain", + std::make_shared(kind_str), + std::make_shared(settings_str), + explained_ast); + result_node = buildSelectFromTableFunction(view_explain); + } + else + { + auto view_explain = makeASTFunction("viewExplain", + std::make_shared(kind_str), + std::make_shared(settings_str)); + result_node = buildSelectFromTableFunction(view_explain); + } } else { - auto view_explain = makeASTFunction("viewExplain", - std::make_shared(kind_str), - std::make_shared(settings_str)); - result_node = buildSelectFromTableFunction(view_explain); + return false; } - } - else - { - return false; - } - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + return false; + ++pos; + } node = std::make_shared(); node->children.push_back(result_node); diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 6ce29b8024f..140684597bd 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -5,6 +5,8 @@ #include #include #include +#include + namespace DB { @@ -58,4 +60,45 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery kql_p; + ASTPtr select; + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + auto begin = pos; + auto paren_count = 0 ; + String kql_statement; + + if (s_lparen.ignore(pos, expected)) + { + ++paren_count; + while (!pos->isEnd()) + { + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + + if (paren_count == 0) + break; + + kql_statement = kql_statement + " " + String(pos->begin,pos->end); + ++pos; + } + + Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); + IParser::Pos pos_kql(token_kql, pos.max_depth); + + if (kql_p.parse(pos_kql, select, expected)) + { + node = select; + ++pos; + return true; + } + } + pos = begin; + return false; +}; + } diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h index aa974504d92..864cda5531a 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.h +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -40,5 +40,12 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserKQLTaleFunction : public IParserBase +{ +protected: + const char * getName() const override { return "KQL() function"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 9e40e031c51..169176b7fb0 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -20,7 +20,7 @@ #include #include #include - +#include namespace DB { @@ -656,17 +656,25 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe /// ENGINE can not be specified for table functions. if (storage || !table_function_p.parse(pos, as_table_function, expected)) { - /// AS [db.]table - if (!name_p.parse(pos, as_table, expected)) - return false; - - if (s_dot.ignore(pos, expected)) + ParserKeyword s_kql("KQL"); + if (s_kql.ignore(pos, expected)) { - as_database = as_table; - if (!name_p.parse(pos, as_table, expected)) + if (!ParserKQLTaleFunction().parse(pos, select, expected)) return false; } + else + { + /// AS [db.]table + if (!name_p.parse(pos, as_table, expected)) + return false; + if (s_dot.ignore(pos, expected)) + { + as_database = as_table; + if (!name_p.parse(pos, as_table, expected)) + return false; + } + } /// Optional - ENGINE can be specified. if (!storage) storage_p.parse(pos, storage, expected); diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index 8601e12ebcb..8715dade90c 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -12,7 +12,7 @@ #include #include #include "Parsers/IAST_fwd.h" - +#include namespace DB { @@ -47,6 +47,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserFunction table_function_p{false}; ParserStringLiteral infile_name_p; ParserExpressionWithOptionalAlias exp_elem_p(false); + ParserKeyword s_kql("KQL"); /// create ASTPtr variables (result of parsing will be put in them). /// They will be used to initialize ASTInsertQuery's fields. @@ -183,6 +184,11 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserWatchQuery watch_p; watch_p.parse(pos, watch, expected); } + else if (!infile && s_kql.ignore(pos, expected)) + { + if (!ParserKQLTaleFunction().parse(pos, select, expected)) + return false; + } else if (!infile) { /// If all previous conditions were false and it's not FROM INFILE, query is incorrect From 91e6d407cb681d02a684da348bf49509980189a5 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 29 Jun 2022 13:02:14 -0700 Subject: [PATCH 025/279] Kusto-phase 2: Add more string operators --- src/Client/ClientBase.cpp | 40 +++++++++------ src/Parsers/Kusto/ParserKQLOperators.cpp | 64 ++++++++++++++++++++++-- src/Parsers/Kusto/ParserKQLOperators.h | 4 +- 3 files changed, 87 insertions(+), 21 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 2040c3f1440..b2984fe00bc 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -328,15 +328,13 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - std::shared_ptr parser; + ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert); ParserKQLStatement kql_parser(end, global_context->getSettings().allow_settings_after_format_in_insert); ASTPtr res; const auto & settings = global_context->getSettingsRef(); size_t max_length = 0; - auto begin = pos; - if (!allow_multi_statements) max_length = settings.max_query_size; @@ -352,26 +350,38 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (is_interactive || ignore_error) { String message; - res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + if (sql_dialect == "kusto") + res = tryParseQuery(kql_parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + else if (sql_dialect == "kusto_auto") + { + + res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + + if (!res) + res = tryParseQuery(kql_parser, begin, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + } + else + res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) { - if (sql_dialect != "kusto") - res = tryParseQuery(kql_parser, begin, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); - - if (!res) - { - std::cerr << std::endl << message << std::endl << std::endl; - return nullptr; - } + std::cerr << std::endl << message << std::endl << std::endl; + return nullptr; } } else { - res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + if (sql_dialect == "kusto") + res = parseQueryAndMovePosition(kql_parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + else if (sql_dialect == "kusto_auto") + { + res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); - if (!res && sql_dialect != "kusto") - res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + if (!res) + res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + } + else + res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 33e8813286a..254eea4a422 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -12,9 +12,9 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos) +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos) { - String new_expr, left_wildcards, right_wildcards; + String new_expr, left_wildcards, right_wildcards, left_space, right_space; ++token_pos; if (!s_lparen.ignore(token_pos, expected)) @@ -84,10 +84,29 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; } + switch (space_pos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + left_space =" "; + break; + + case WildcardsPos::right: + right_space = " "; + break; + + case WildcardsPos::both: + left_space =" "; + right_space = " "; + break; + } + ++token_pos; if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + String(token_pos->begin + 1,token_pos->end - 1) + right_wildcards + "')"; + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + left_space + String(token_pos->begin + 1,token_pos->end - 1) + right_space + right_wildcards + "')"; else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) { String tmp_arg = String(token_pos->begin,token_pos->end); @@ -98,7 +117,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos if (fun && fun->convert(new_arg,token_pos)) tmp_arg = new_arg; } - new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + "', " + tmp_arg +", '"+ right_wildcards + "'))"; + new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; } else throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); @@ -160,10 +179,15 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) op_value = KQLOperator[op]; String new_expr; + + if (op_value == KQLOperatorValue::none) tokens.push_back(op); else { + auto last_op = tokens.back(); + auto last_pos = pos; + switch (op_value) { case KQLOperatorValue::contains: @@ -233,27 +257,59 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) break; case KQLOperatorValue::hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::not_hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::not_hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::left); break; case KQLOperatorValue::hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::not_hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::not_hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::right); break; case KQLOperatorValue::in_cs: diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 20b60d31caf..861687c27d8 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -83,7 +83,7 @@ protected: {"hasprefix" , KQLOperatorValue::hasprefix}, {"!hasprefix" , KQLOperatorValue::not_hasprefix}, {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, - {"!hasprefix" , KQLOperatorValue::not_hasprefix_cs}, + {"!hasprefix_cs" , KQLOperatorValue::not_hasprefix_cs}, {"hassuffix" , KQLOperatorValue::hassuffix}, {"!hassuffix" , KQLOperatorValue::not_hassuffix}, {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, @@ -98,7 +98,7 @@ protected: {"startswith_cs" , KQLOperatorValue::startswith_cs}, {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; - static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos); + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); }; } From 8121f1087c5190f4a1c14c3e8d28451beeec2f4a Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 29 Jun 2022 23:01:17 -0700 Subject: [PATCH 026/279] Kusto-phase2 : Fix the function base64_decode_tostring() --- src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 0c8a0891a01..ba36e4e2e31 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -13,14 +13,14 @@ bool Base64EncodeToString::convertImpl(String &out,IParser::Pos &pos) bool Base64EncodeFromGuid::convertImpl(String &out,IParser::Pos &pos) { - return directMapping(out,pos,"base64Decode"); + String res = String(pos->begin,pos->end); + out = res; + return false; } bool Base64DecodeToString::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"base64Decode"); } bool Base64DecodeToArray::convertImpl(String &out,IParser::Pos &pos) From ae6e6b1c3bbdfcc84b2c8edf3ac18718e8ddc129 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 12 Jul 2022 08:49:42 -0700 Subject: [PATCH 027/279] Kusto-phase2: Changed dialect to use enumerate, Added subquery for in operator, fixed the multi query issue --- src/Client/ClientBase.cpp | 18 ++++++---- src/Core/SettingsEnums.cpp | 1 - src/Interpreters/executeQuery.cpp | 7 ++-- src/Parsers/Kusto/ParserKQLOperators.cpp | 45 ++++++++++++++++++++++-- src/Parsers/Kusto/ParserKQLOperators.h | 1 + src/Parsers/Kusto/ParserKQLQuery.cpp | 45 +++++++++++++----------- src/Parsers/Kusto/ParserKQLQuery.h | 7 ++-- src/Parsers/Kusto/ParserKQLSummarize.cpp | 42 ++++------------------ src/Parsers/tests/gtest_Parser.cpp | 5 +++ 9 files changed, 96 insertions(+), 75 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index b2984fe00bc..98723f1c998 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -350,15 +350,18 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (is_interactive || ignore_error) { String message; - if (sql_dialect == "kusto") + if (dialect == Dialect::kusto) res = tryParseQuery(kql_parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); - else if (sql_dialect == "kusto_auto") + else if (dialect == Dialect::kusto_auto) { res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) - res = tryParseQuery(kql_parser, begin, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + { + pos = begin; + res = tryParseQuery(kql_parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + } } else res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); @@ -371,14 +374,17 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu } else { - if (sql_dialect == "kusto") + if (dialect == Dialect::kusto) res = parseQueryAndMovePosition(kql_parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); - else if (sql_dialect == "kusto_auto") + else if (dialect == Dialect::kusto_auto) { res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) - res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + { + pos = begin; + res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + } } else res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index a30d8040f47..214ec1d22ea 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -140,7 +140,6 @@ IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, {{"clickhouse", Dialect::clickhouse}, {"kusto", Dialect::kusto}, {"prql", Dialect::prql}}) - // FIXME: do not add 'kusto_auto' to the list. Maybe remove it from code completely? IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGUMENTS, {{"default", ParallelReplicasCustomKeyFilterType::DEFAULT}, diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 0cba3714855..0de7f29c455 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -701,15 +701,14 @@ static std::tuple executeQueryImpl( /// Parse the query from string. try { - const String & sql_dialect = settings.sql_dialect; - assert(sql_dialect == "clickhouse" || sql_dialect == "kusto" || sql_dialect == "kusto_auto"); + const Dialect & dialect = settings.dialect; - if (sql_dialect == "kusto" && !internal) + if (dialect == Dialect::kusto && !internal) { ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); } - else if (sql_dialect == "kusto_auto" && !internal) + else if (dialect == Dialect::kusto_auto && !internal) { try { ParserQuery parser(end, settings.allow_settings_after_format_in_insert); diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 254eea4a422..f67e8916e17 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include namespace DB { @@ -12,6 +14,44 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } +String KQLOperators::genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op) +{ + ParserKQLTaleFunction kqlfun_p; + String new_expr; + + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + ASTPtr select; + Expected expected; + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + auto pos = token_pos; + if (kqlfun_p.parse(pos,select,expected)) + { + new_expr = ch_op + " kql"; + auto tmp_pos = token_pos; + while (tmp_pos != pos) + { + new_expr = new_expr + " " + String(tmp_pos->begin,tmp_pos->end); + ++tmp_pos; + } + + if (pos->type != TokenType::ClosingRoundBracket) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + token_pos = pos; + return new_expr; + } + + --token_pos; + --token_pos; + return ch_op; + +} + String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos) { String new_expr, left_wildcards, right_wildcards, left_space, right_space; @@ -180,7 +220,6 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) String new_expr; - if (op_value == KQLOperatorValue::none) tokens.push_back(op); else @@ -313,10 +352,10 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) break; case KQLOperatorValue::in_cs: - new_expr = "in"; + new_expr = genInOpExpr(pos,op,"in"); break; case KQLOperatorValue::not_in_cs: - new_expr = "not in"; + new_expr = genInOpExpr(pos,op,"not in"); break; case KQLOperatorValue::in: diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 861687c27d8..96be5b4179c 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -99,6 +99,7 @@ protected: {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, }; static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); + static String genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op); }; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 94d31d5d523..cd8c071e0fc 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -11,11 +11,14 @@ #include #include #include - -#include namespace DB { +namespace ErrorCodes +{ + extern const int UNKNOWN_FUNCTION; +} + bool ParserKQLBase :: parsePrepare(Pos & pos) { op_pos.push_back(pos); @@ -28,7 +31,7 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) std::vector tokens; std::unique_ptr kql_function; String alias; - + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String token = String(pos->begin,pos->end); @@ -48,27 +51,25 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) if (pos->type == TokenType::BareWord ) { kql_function = KQLFunctionFactory::get(token); - if (kql_function && kql_function->convert(new_token,pos)){ + if (kql_function && kql_function->convert(new_token,pos)) token = new_token; - } - + /* else if (!kql_function) + { + if ((++pos)->type == TokenType::OpeningRoundBracket) + throw Exception("Unknown function " + token, ErrorCodes::UNKNOWN_FUNCTION); + --pos; + }*/ } tokens.push_back(token); } - if (!alias.empty()) + if (pos->type == TokenType::Comma && !alias.empty()) { - if(pos->type == TokenType::Comma || token == "FROM") - { - tokens.pop_back(); - tokens.push_back("AS"); - tokens.push_back(alias); - if(pos->type == TokenType::Comma) - tokens.push_back(","); - else - tokens.push_back("FROM"); - alias.clear(); - } + tokens.pop_back(); + tokens.push_back("AS"); + tokens.push_back(alias); + tokens.push_back(","); + alias.clear(); } ++pos; } @@ -78,6 +79,7 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) tokens.push_back("AS"); tokens.push_back(alias); } + for (auto token:tokens) res = res + token +" "; return res; @@ -119,9 +121,9 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) operation_pos.push_back(std::make_pair("table",pos)); String table_name(pos->begin,pos->end); - while (!pos->isEnd()) + ++pos; + while (!pos->isEnd() && pos->type != TokenType::Semicolon) { - ++pos; if (pos->type == TokenType::PipeMark) { ++pos; @@ -130,7 +132,10 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; ++pos; operation_pos.push_back(std::make_pair(kql_operator,pos)); + kql_parser[kql_operator]->getExprFromToken(pos); } + else + ++pos; } for (auto &op_pos : operation_pos) diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 42122fb6e00..2cfec703fd4 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -8,12 +8,9 @@ class ParserKQLBase : public IParserBase { public: virtual bool parsePrepare(Pos & pos); - std::vector op_pos; - -protected: - - std::vector expressions; virtual String getExprFromToken(Pos &pos); + + std::vector op_pos; }; class ParserKQLQuery : public IParserBase diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 74a32bdba63..49a3569f963 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -116,57 +116,27 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte auto begin = pos; ASTPtr sub_qurery_table; -// rewrite this part, make it resusable (may contains bin etc, and please inmplement summarize age= avg(Age) for sub query too): if (op_pos.size() == 2) { - bool groupby = false; + String sub_query = "kql("+ table_name +"|summarize "; auto sub_pos = op_pos.front(); - String sub_aggregation; - String sub_groupby; - String sub_columns; + while (!sub_pos->isEnd() && sub_pos->type != TokenType::PipeMark && sub_pos->type != TokenType::Semicolon) { - if (String(sub_pos->begin,sub_pos->end) == "by") - groupby = true; - else - { - if (groupby) - sub_groupby = sub_groupby + String(sub_pos->begin,sub_pos->end) +" "; - else - sub_aggregation = sub_aggregation + String(sub_pos->begin,sub_pos->end) +" "; - } + sub_query = sub_query + " " +String(sub_pos->begin,sub_pos->end); ++sub_pos; } - - String sub_query; - if (sub_groupby.empty()) - { - sub_columns =sub_aggregation; - sub_query = "SELECT " + sub_columns+ " FROM "+ table_name+""; - } - else - { - if (sub_aggregation.empty()) - sub_columns = sub_groupby; - else - sub_columns = sub_groupby + "," + sub_aggregation; - sub_query = "SELECT " + sub_columns+ " FROM "+ table_name + " GROUP BY "+sub_groupby+""; - } + sub_query+=")"; Tokens token_subquery(sub_query.c_str(), sub_query.c_str()+sub_query.size()); IParser::Pos pos_subquery(token_subquery, pos.max_depth); - String converted_columns = getExprFromToken(pos_subquery); - converted_columns = "(" + converted_columns + ")"; - - Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); - IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); - if (!ParserTablesInSelectQuery().parse(pos_converted_columns, sub_qurery_table, expected)) + if (!ParserTablesInSelectQuery().parse(pos_subquery, sub_qurery_table, expected)) return false; + tables = sub_qurery_table; } - pos = op_pos.back(); String expr_aggregation; String expr_groupby; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 0fe289adcb9..3514debe279 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -481,7 +481,12 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where FirstName !startswith 'pet'", "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" + }, + { + "Customers | where Age in ((Customers|project Age|where Age < 30))", + "SELECT *\nFROM Customers\nWHERE Age IN (\n SELECT Age\n FROM Customers\n WHERE Age < 30\n)" } + }))); static constexpr size_t kDummyMaxQuerySize = 256 * 1024; From 95e088783ce58dd19747e6e934ba9f1742e6ffc4 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Thu, 14 Jul 2022 09:00:51 -0700 Subject: [PATCH 028/279] Implement some IP-handling functions --- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 139 +++++++++++------- src/Parsers/tests/gtest_Parser.cpp | 25 +++- 2 files changed, 110 insertions(+), 54 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index f271d924aff..bdda0827d03 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -1,40 +1,69 @@ -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include + +#include + +namespace DB::ErrorCodes +{ +extern const int SYNTAX_ERROR; +} + +namespace +{ +String trimQuotes(const String & str) +{ + static constexpr auto sQuote = '\''; + + const auto firstIndex = str.find(sQuote); + const auto lastIndex = str.rfind(sQuote); + if (firstIndex == String::npos || lastIndex == String::npos) + throw DB::Exception("Syntax error, improper quotation: " + str, DB::ErrorCodes::SYNTAX_ERROR); + + return str.substr(firstIndex + 1, lastIndex - firstIndex - 1); +} +} namespace DB { -bool Ipv4Compare::convertImpl(String &out,IParser::Pos &pos) +bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool Ipv4IsInRange::convertImpl(String &out,IParser::Pos &pos) +bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto functionName = getKQLFunctionName(pos); + ++pos; + + const auto ipAddress = getConvertedArgument(functionName, pos); + ++pos; + + const auto ipRange = getConvertedArgument(functionName, pos); + const auto slashIndex = ipRange.find('/'); + out = std::format(slashIndex == String::npos ? "{0} = {1}" : "isIPAddressInRange({0}, {1})", ipAddress, ipRange); + return true; } -bool Ipv4IsMatch::convertImpl(String &out,IParser::Pos &pos) +bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } @@ -46,65 +75,69 @@ bool Ipv4IsPrivate::convertImpl(String &out,IParser::Pos &pos) return false; } -bool Ipv4NetmaskSuffix::convertImpl(String &out,IParser::Pos &pos) +bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + static constexpr auto sDefaultNetmask = 32; + + const auto functionName = getKQLFunctionName(pos); + ++pos; + + const auto ipRange = trimQuotes(getConvertedArgument(functionName, pos)); + const auto slashIndex = ipRange.find('/'); + const auto ipAddress = ipRange.substr(0, slashIndex); + const auto netmask = slashIndex == String::npos ? sDefaultNetmask : std::strtol(ipRange.c_str() + slashIndex + 1, nullptr, 10); + out = std::format("if(and(isIPv4String('{0}'), {1} between 1 and 32), {1}, null)", ipAddress, netmask); + return true; +} + +bool ParseIpv4::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toIPv4OrNull"); +} + +bool ParseIpv4Mask::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); out = res; return false; } -bool ParseIpv4::convertImpl(String &out,IParser::Pos &pos) +bool Ipv6Compare::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ParseIpv4Mask::convertImpl(String &out,IParser::Pos &pos) +bool Ipv6IsMatch::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool Ipv6Compare::convertImpl(String &out,IParser::Pos &pos) +bool ParseIpv6::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + return directMapping(out, pos, "toIPv6OrNull"); +} + +bool ParseIpv6Mask::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); out = res; return false; } -bool Ipv6IsMatch::convertImpl(String &out,IParser::Pos &pos) +bool FormatIpv4::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ParseIpv6::convertImpl(String &out,IParser::Pos &pos) +bool FormatIpv4Mask::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; -} - -bool ParseIpv6Mask::convertImpl(String &out,IParser::Pos &pos) -{ - String res = String(pos->begin,pos->end); - out = res; - return false; -} - -bool FormatIpv4::convertImpl(String &out,IParser::Pos &pos) -{ - String res = String(pos->begin,pos->end); - out = res; - return false; -} - -bool FormatIpv4Mask::convertImpl(String &out,IParser::Pos &pos) -{ - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 3514debe279..6bd741aff31 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -485,8 +485,31 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | where Age in ((Customers|project Age|where Age < 30))", "SELECT *\nFROM Customers\nWHERE Age IN (\n SELECT Age\n FROM Customers\n WHERE Age < 30\n)" + }, + { + "Customers | project ipv4_is_in_range('127.0.0.1', '127.0.0.1')", + "SELECT '127.0.0.1' = '127.0.0.1'\nFROM Customers" + }, + { + "Customers | project ipv4_is_in_range('192.168.1.6', '192.168.1.1/24')", + "SELECT isIPAddressInRange('192.168.1.6', '192.168.1.1/24')\nFROM Customers" + }, + { + "Customers | project ipv4_netmask_suffix('192.168.1.1/24')", + "SELECT if(isIPv4String('192.168.1.1') AND ((24 >= 1) AND (24 <= 32)), 24, NULL)\nFROM Customers" + }, + { + "Customers | project ipv4_netmask_suffix('192.168.1.1')", + "SELECT if(isIPv4String('192.168.1.1') AND ((32 >= 1) AND (32 <= 32)), 32, NULL)\nFROM Customers" + }, + { + "Customers | project parse_ipv4('127.0.0.1')", + "SELECT toIPv4OrNull('127.0.0.1')\nFROM Customers" + }, + { + "Customers | project parse_ipv6('127.0.0.1')", + "SELECT toIPv6OrNull('127.0.0.1')\nFROM Customers" } - }))); static constexpr size_t kDummyMaxQuerySize = 256 * 1024; From c871183e565eb63c18eb2642442ee80c34a7ff75 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 15 Jul 2022 10:13:44 -0700 Subject: [PATCH 029/279] Implement review comments --- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 43 +++++++++++-------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index bdda0827d03..8e10d59c787 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -26,14 +26,14 @@ namespace { String trimQuotes(const String & str) { - static constexpr auto sQuote = '\''; + static constexpr auto QUOTE = '\''; - const auto firstIndex = str.find(sQuote); - const auto lastIndex = str.rfind(sQuote); - if (firstIndex == String::npos || lastIndex == String::npos) + const auto first_index = str.find(QUOTE); + const auto last_index = str.rfind(QUOTE); + if (first_index == String::npos || last_index == String::npos) throw DB::Exception("Syntax error, improper quotation: " + str, DB::ErrorCodes::SYNTAX_ERROR); - return str.substr(firstIndex + 1, lastIndex - firstIndex - 1); + return str.substr(first_index + 1, last_index - first_index - 1); } } @@ -49,15 +49,18 @@ bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) { - const auto functionName = getKQLFunctionName(pos); + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + ++pos; - const auto ipAddress = getConvertedArgument(functionName, pos); + const auto ip_address = getConvertedArgument(function_name, pos); ++pos; - const auto ipRange = getConvertedArgument(functionName, pos); - const auto slashIndex = ipRange.find('/'); - out = std::format(slashIndex == String::npos ? "{0} = {1}" : "isIPAddressInRange({0}, {1})", ipAddress, ipRange); + const auto ip_range = getConvertedArgument(function_name, pos); + const auto slash_index = ip_range.find('/'); + out = std::format(slash_index == String::npos ? "{0} = {1}" : "isIPAddressInRange({0}, {1})", ip_address, ip_range); return true; } @@ -72,21 +75,24 @@ bool Ipv4IsPrivate::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; - return false; + return false; } bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) { - static constexpr auto sDefaultNetmask = 32; + static constexpr auto DEFAULT_NETMASK = 32; + + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; - const auto functionName = getKQLFunctionName(pos); ++pos; - const auto ipRange = trimQuotes(getConvertedArgument(functionName, pos)); - const auto slashIndex = ipRange.find('/'); - const auto ipAddress = ipRange.substr(0, slashIndex); - const auto netmask = slashIndex == String::npos ? sDefaultNetmask : std::strtol(ipRange.c_str() + slashIndex + 1, nullptr, 10); - out = std::format("if(and(isIPv4String('{0}'), {1} between 1 and 32), {1}, null)", ipAddress, netmask); + const auto ip_range = trimQuotes(getConvertedArgument(function_name, pos)); + const auto slash_index = ip_range.find('/'); + const std::string_view ip_address(ip_range.c_str(), std::min(ip_range.length(), slash_index)); + const auto netmask = slash_index == String::npos ? DEFAULT_NETMASK : std::strtol(ip_range.c_str() + slash_index + 1, nullptr, 10); + out = std::format("if(and(isIPv4String('{0}'), {1} between 1 and 32), {1}, null)", ip_address, netmask); return true; } @@ -141,5 +147,4 @@ bool FormatIpv4Mask::convertImpl(String & out, IParser::Pos & pos) out = res; return false; } - } From c54c9efdb81756030e47348e051ca3cbeb508a4a Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 15 Jul 2022 10:14:01 -0700 Subject: [PATCH 030/279] Implement ipv4_is_private --- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 30 +++++++++++++++++-- src/Parsers/tests/gtest_Parser.cpp | 8 +++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 8e10d59c787..d8de9cc4e9a 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -71,11 +71,35 @@ bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) return false; } -bool Ipv4IsPrivate::convertImpl(String &out,IParser::Pos &pos) +bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; + static const std::array PRIVATE_SUBNETS{"10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"}; + + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) return false; + + const auto ip_address = trimQuotes(getConvertedArgument(function_name, pos)); + const auto slash_index = ip_address.find('/'); + + out += "or("; + for (int i = 0; i < std::ssize(PRIVATE_SUBNETS); ++i) + { + out += i > 0 ? ", " : ""; + + const auto & subnet = PRIVATE_SUBNETS[i]; + out += slash_index == String::npos + ? std::format("isIPAddressInRange('{0}', '{1}')", ip_address, subnet) + : std::format( + "and(isIPAddressInRange(IPv4NumToString(tupleElement((IPv4CIDRToRange(toIPv4('{0}'), {1}) as range), 1)) as begin, '{2}'), " + "isIPAddressInRange(IPv4NumToString(tupleElement(range, 2)) as end, '{2}'))", + std::string_view(ip_address.c_str(), slash_index), + std::string_view(ip_address.c_str() + slash_index + 1, ip_address.length() - slash_index - 1), + subnet); + } + + out += ")"; + return true; } bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 6bd741aff31..680e6410d01 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -494,6 +494,14 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers | project ipv4_is_in_range('192.168.1.6', '192.168.1.1/24')", "SELECT isIPAddressInRange('192.168.1.6', '192.168.1.1/24')\nFROM Customers" }, + { + "Customers | project ipv4_is_private('192.168.1.6')", + "SELECT isIPAddressInRange('192.168.1.6', '10.0.0.0/8') OR isIPAddressInRange('192.168.1.6', '172.16.0.0/12') OR isIPAddressInRange('192.168.1.6', '192.168.0.0/16')\nFROM Customers" + }, + { + "Customers | project ipv4_is_private('192.168.1.6/24')", + "SELECT (isIPAddressInRange(IPv4NumToString((IPv4CIDRToRange(toIPv4('192.168.1.6'), 24) AS range).1) AS begin, '10.0.0.0/8') AND isIPAddressInRange(IPv4NumToString(range.2) AS end, '10.0.0.0/8')) OR (isIPAddressInRange(begin, '172.16.0.0/12') AND isIPAddressInRange(end, '172.16.0.0/12')) OR (isIPAddressInRange(begin, '192.168.0.0/16') AND isIPAddressInRange(end, '192.168.0.0/16'))\nFROM Customers" + }, { "Customers | project ipv4_netmask_suffix('192.168.1.1/24')", "SELECT if(isIPv4String('192.168.1.1') AND ((24 >= 1) AND (24 <= 32)), 24, NULL)\nFROM Customers" From 1cac1be52b020ef8e6a6c5195cfdcd21ed2451a8 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 15 Jul 2022 06:54:23 -0700 Subject: [PATCH 031/279] Kusto-phase2: Added some string functions and release note --- src/Parsers/Kusto/KQL_ReleaseNote.md | 213 ++++++++ .../KustoFunctions/KQLStringFunctions.cpp | 501 ++++++++++++++---- src/Parsers/Kusto/ParserKQLOperators.cpp | 42 ++ src/Parsers/Kusto/ParserKQLOperators.h | 1 + 4 files changed, 647 insertions(+), 110 deletions(-) create mode 100644 src/Parsers/Kusto/KQL_ReleaseNote.md diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md new file mode 100644 index 00000000000..f5ee880db8b --- /dev/null +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -0,0 +1,213 @@ +# KQL implemented features. + + +# July 17, 2022 + +## Renamed dialect from sql_dialect to dialect + +`set sql_dialect='clickhouse'` +`set sql_dialect='kusto'` +`set sql_dialect='kusto_auto'` + +## string functions +- **support subquery for `in` orerator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) + (subquery need to be wraped with bracket inside bracket) + + `Customers | where Age in ((Customers|project Age|where Age < 30))` + Note: case-insensitive not supported yet +- **has_all** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator) + `Customers|where Occupation has_any ('Skilled','abcd')` + note : subquery not supported yet +- **has _any** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator) + `Customers|where Occupation has_all ('Skilled','abcd')` + note : subquery not supported yet +- **countof** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction) + `Customers | project countof('The cat sat on the mat', 'at')` + `Customers | project countof('The cat sat on the mat', 'at', 'normal')` + `Customers | project countof('The cat sat on the mat', 'at', 'regex')` +- **extract** ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction) +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 3, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20', typeof(real))` + +- **extract_all** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction) + + `Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 20')` + note: captureGroups not supported yet + +- **split** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction) + `Customers | project split('aa_bb', '_')` + `Customers | project split('aaa_bbb_ccc', '_', 1)` + `Customers | project split('', '_')` + `Customers | project split('a__b', '_')` + `Customers | project split('aabbcc', 'bb')` + +- **strcat_delim** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction) + `Customers | project strcat_delim('-', '1', '2', 'A') , 1s)` + `Customers | project strcat_delim('-', '1', '2', strcat('A','b'))` + note: only support string now. + +- **indexof** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction) + `Customers | project indexof('abcdefg','cde')` + `Customers | project indexof('abcdefg','cde',2)` + `Customers | project indexof('abcdefg','cde',6)` + note: length and occurrence not supported yet + + + + +# July 4, 2022 + +## sql_dialect + +- default is `clickhouse` + `set sql_dialect='clickhouse'` +- only process kql + `set sql_dialect='kusto'` +- process both kql and CH sql + `set sql_dialect='kusto_auto'` +## KQL() function + + - create table + `CREATE TABLE kql_table4 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName,Age);` + verify the content of `kql_table` + `select * from kql_table` + + - insert into table + create a tmp table: + ``` + CREATE TABLE temp + ( + FirstName Nullable(String), + LastName String, + Age Nullable(UInt8) + ) ENGINE = Memory; + ``` + `INSERT INTO temp select * from kql(Customers|project FirstName,LastName,Age);` + verify the content of `temp` + `select * from temp` + + - Select from kql() + `Select * from kql(Customers|project FirstName)` + +## KQL operators: + - Tabular expression statements + `Customers` + - Select Column + `Customers | project FirstName,LastName,Occupation` + - Limit returned results + `Customers | project FirstName,LastName,Occupation | take 1 | take 3` + - sort, order + `Customers | order by Age desc , FirstName asc` + - Filter + `Customers | where Occupation == 'Skilled Manual'` + - summarize + `Customers |summarize max(Age) by Occupation` + +## KQL string operators and functions + - contains + `Customers |where Education contains 'degree'` + - !contains + `Customers |where Education !contains 'degree'` + - contains_cs + `Customers |where Education contains 'Degree'` + - !contains_cs + `Customers |where Education !contains 'Degree'` + - endswith + `Customers | where FirstName endswith 'RE'` + - !endswith + `Customers | where !FirstName endswith 'RE'` + - endswith_cs + `Customers | where FirstName endswith_cs 're'` + - !endswith_cs + `Customers | where FirstName !endswith_cs 're'` + - == + `Customers | where Occupation == 'Skilled Manual'` + - != + `Customers | where Occupation != 'Skilled Manual'` + - has + `Customers | where Occupation has 'skilled'` + - !has + `Customers | where Occupation !has 'skilled'` + - has_cs + `Customers | where Occupation has 'Skilled'` + - !has_cs + `Customers | where Occupation !has 'Skilled'` + - hasprefix + `Customers | where Occupation hasprefix_cs 'Ab'` + - !hasprefix + `Customers | where Occupation !hasprefix_cs 'Ab'` + - hasprefix_cs + `Customers | where Occupation hasprefix_cs 'ab'` + - !hasprefix_cs + `Customers | where Occupation! hasprefix_cs 'ab'` + - hassuffix + `Customers | where Occupation hassuffix 'Ent'` + - !hassuffix + `Customers | where Occupation !hassuffix 'Ent'` + - hassuffix_cs + `Customers | where Occupation hassuffix 'ent'` + - !hassuffix_cs + `Customers | where Occupation hassuffix 'ent'` + - in + `Customers |where Education in ('Bachelors','High School')` + - !in + `Customers | where Education !in ('Bachelors','High School')` + - matches regex + `Customers | where FirstName matches regex 'P.*r'` + - startswith + `Customers | where FirstName startswith 'pet'` + - !startswith + `Customers | where FirstName !startswith 'pet'` + - startswith_cs + `Customers | where FirstName startswith_cs 'pet'` + - !startswith_cs + `Customers | where FirstName !startswith_cs 'pet'` + + - base64_encode_tostring() + `Customers | project base64_encode_tostring('Kusto1') | take 1` + - base64_decode_tostring() + `Customers | project base64_decode_tostring('S3VzdG8x') | take 1` + - isempty() + `Customers | where isempty(LastName)` + - isnotempty() + `Customers | where isnotempty(LastName)` + - isnotnull() + `Customers | where isnotnull(FirstName)` + - isnull() + `Customers | where isnull(FirstName)` + - url_decode() + `Customers | project url_decode('https%3A%2F%2Fwww.test.com%2Fhello%20word') | take 1` + - url_encode() + `Customers | project url_encode('https://www.test.com/hello word') | take 1` + - substring() + `Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))` + - strcat() + `Customers | project name = strcat(FirstName, ' ', LastName)` + - strlen() + `Customers | project FirstName, strlen(FirstName)` + - strrep() + `Customers | project strrep(FirstName,2,'_')` + - toupper() + `Customers | project toupper(FirstName)` + - tolower() + `Customers | project tolower(FirstName)` + + ## Aggregate Functions + - arg_max() + - arg_min() + - avg() + - avgif() + - count() + - countif() + - max() + - maxif() + - min() + - minif() + - sum() + - sumif() + - dcount() + - dcountif() + - bin \ No newline at end of file diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index ba36e4e2e31..919e620dac0 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -2,193 +2,475 @@ #include #include #include +#include +#include + +namespace DB::ErrorCodes +{ +extern const int SYNTAX_ERROR; +} namespace DB { -bool Base64EncodeToString::convertImpl(String &out,IParser::Pos &pos) +bool Base64EncodeToString::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"base64Encode"); } -bool Base64EncodeFromGuid::convertImpl(String &out,IParser::Pos &pos) +bool Base64EncodeFromGuid::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool Base64DecodeToString::convertImpl(String &out,IParser::Pos &pos) +bool Base64DecodeToString::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"base64Decode"); } -bool Base64DecodeToArray::convertImpl(String &out,IParser::Pos &pos) +bool Base64DecodeToArray::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool Base64DecodeToGuid::convertImpl(String &out,IParser::Pos &pos) +bool Base64DecodeToGuid::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool CountOf::convertImpl(String &out,IParser::Pos &pos) +bool CountOf::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto begin = pos; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const String search = getConvertedArgument(fn_name, pos); + + String kind = "'normal' "; + if (pos->type == TokenType::Comma) + { + ++pos; + kind = getConvertedArgument(fn_name,pos); + } + assert (kind =="'normal' " || kind =="'regex' "); + + if (pos->type == TokenType::ClosingRoundBracket) + { + if (kind == "'normal' " ) + out = "countSubstrings(" + source + ", " + search + ")"; + else + out = "countMatches("+ source + ", " + search + ")"; + return true; + } + pos = begin; + return false; +} + +bool Extract::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto begin = pos; + + ++pos; + String regex = getConvertedArgument(fn_name, pos); + + if (pos->type != TokenType::Comma) + return false; + + ++pos; + size_t capture_group = stoi(getConvertedArgument(fn_name, pos)); + + ++pos; + String source = getConvertedArgument(fn_name, pos); + + String type_literal; + + if (pos->type == TokenType::Comma) + { + ++pos; + type_literal = getConvertedArgument(fn_name, pos); + } + + if (capture_group == 0) + { + String tmp_regex; + for (auto c : regex) + { + if (c != '(' && c != ')') + tmp_regex += c; + } + regex = std::move(tmp_regex); + } + else + { + size_t group_idx = 0; + size_t str_idx = -1; + for (size_t i = 0; i < regex.length(); ++i) + { + if (regex[i] == '(') + { + ++group_idx; + if (group_idx == capture_group) + { + str_idx = i + 1; + break; + } + } + } + String tmp_regex; + if (str_idx > 0) + { + for (size_t i = str_idx; i < regex.length(); ++i) + { + if (regex[i] == ')') + break; + tmp_regex += regex[i]; + } + } + regex = "'" + tmp_regex + "'"; + } + + if (pos->type == TokenType::ClosingRoundBracket) + { + out = "extract(" + source + ", " + regex + ")"; + if (!type_literal.empty()) + { + std::unordered_map type_cast = + { {"bool", "Boolean"}, + {"boolean", "Boolean"}, + {"datetime", "DateTime"}, + {"date", "DateTime"}, + {"dynamic", "Array"}, + {"guid", "UUID"}, + {"int", "Int32"}, + {"long", "Int64"}, + {"real", "Float64"}, + {"double", "Float64"}, + {"string", "String"}, + {"decimal", "Decimal"} + }; + + Tokens token_type(type_literal.c_str(), type_literal.c_str() + type_literal.size()); + IParser::Pos pos_type(token_type, pos.max_depth); + ParserKeyword s_kql("typeof"); + Expected expected; + + if (s_kql.ignore(pos_type, expected)) + { + ++pos_type; + auto kql_type= String(pos_type->begin,pos_type->end); + if (type_cast.find(kql_type) == type_cast.end()) + return false; + auto ch_type = type_cast[kql_type]; + out = "CAST(" + out + ", '" + ch_type + "')"; + } + else + return false; + } + return true; + } + + pos = begin; + return false; +} + +bool ExtractAll::convertImpl(String & out,IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto begin = pos; + + ++pos; + const String regex = getConvertedArgument(fn_name, pos); + + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const String second_arg = getConvertedArgument(fn_name, pos); + + String third_arg; + if (pos->type == TokenType::Comma) + { + ++pos; + third_arg = getConvertedArgument(fn_name, pos); + } + + if (!third_arg.empty()) // currently the captureGroups not supported + return false; + + if (pos->type == TokenType::ClosingRoundBracket) + { + out = "extractAllGroups(" + second_arg + ", " + regex + ")"; + return true; + } + pos = begin; + return false; +} + +bool ExtractJson::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool Extract::convertImpl(String &out,IParser::Pos &pos) +bool HasAnyIndex::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ExtractAll::convertImpl(String &out,IParser::Pos &pos) +bool IndexOf::convertImpl(String & out,IParser::Pos & pos) +{ + int start_index = 0, length = -1, occurrence = 1; + + String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto begin = pos; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const String lookup = getConvertedArgument(fn_name, pos); + + if (pos->type == TokenType::Comma) + { + ++pos; + start_index = stoi(getConvertedArgument(fn_name, pos)); + + if (pos->type == TokenType::Comma) + { + ++pos; + length = stoi(getConvertedArgument(fn_name, pos)); + + if (pos->type == TokenType::Comma) + { + ++pos; + occurrence = stoi(getConvertedArgument(fn_name, pos)); + + } + } + } + + if (pos->type == TokenType::ClosingRoundBracket) + { + if (occurrence < 0 || length < -1) + out = ""; + else if (length == -1) + out = "position(" + source + ", " + lookup + ", " + std::to_string(start_index + 1) + ") - 1"; + else + { + + } + + return true; + } + + pos = begin; + return false; +} + +bool IsEmpty::convertImpl(String & out,IParser::Pos & pos) +{ + return directMapping(out, pos, "empty"); +} + +bool IsNotEmpty::convertImpl(String & out,IParser::Pos & pos) +{ + return directMapping(out, pos, "notEmpty"); +} + +bool IsNotNull::convertImpl(String & out,IParser::Pos & pos) +{ + return directMapping(out, pos, "isNotNull"); +} + +bool ParseCommandLine::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ExtractJson::convertImpl(String &out,IParser::Pos &pos) +bool IsNull::convertImpl(String & out,IParser::Pos & pos) +{ + return directMapping(out, pos, "isNull"); +} + +bool ParseCSV::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool HasAnyIndex::convertImpl(String &out,IParser::Pos &pos) +bool ParseJson::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool IndexOf::convertImpl(String &out,IParser::Pos &pos) +bool ParseURL::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool IsEmpty::convertImpl(String &out,IParser::Pos &pos) -{ - return directMapping(out,pos,"empty"); -} - -bool IsNotEmpty::convertImpl(String &out,IParser::Pos &pos) -{ - return directMapping(out,pos,"notEmpty"); -} - -bool IsNotNull::convertImpl(String &out,IParser::Pos &pos) -{ - return directMapping(out,pos,"isNotNull"); -} - -bool ParseCommandLine::convertImpl(String &out,IParser::Pos &pos) +bool ParseURLQuery::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool IsNull::convertImpl(String &out,IParser::Pos &pos) -{ - return directMapping(out,pos,"isNull"); -} - -bool ParseCSV::convertImpl(String &out,IParser::Pos &pos) +bool ParseVersion::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ParseJson::convertImpl(String &out,IParser::Pos &pos) +bool ReplaceRegex::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ParseURL::convertImpl(String &out,IParser::Pos &pos) +bool Reverse::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ParseURLQuery::convertImpl(String &out,IParser::Pos &pos) +bool Split::convertImpl(String & out,IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto begin = pos; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const String delimiter = getConvertedArgument(fn_name, pos); + + int requestedIndex = -1; + if (pos->type == TokenType::Comma) + { + ++pos; + requestedIndex = std::stoi(getConvertedArgument(fn_name, pos)); + } + + if (pos->type == TokenType::ClosingRoundBracket) + { + out = "splitByString(" + delimiter + ", " + source + ")"; + if (requestedIndex >= 0) + { + out = "arrayPushBack([],arrayElement(" + out + ", " + std::to_string(requestedIndex + 1) + "))"; + } + return true; + } + + pos = begin; + return false; +} + +bool StrCat::convertImpl(String & out,IParser::Pos & pos) +{ + return directMapping(out, pos, "concat"); +} + +bool StrCatDelim::convertImpl(String & out,IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto begin = pos; + + ++pos; + const String delimiter = getConvertedArgument(fn_name, pos); + if (pos->type != TokenType::Comma) + return false; + + int arg_count = 0; + String args; + + while (!pos->isEnd() && pos->type != TokenType::Semicolon && pos->type != TokenType::ClosingRoundBracket) + { + ++pos; + String arg = getConvertedArgument(fn_name, pos); + if (args.empty()) + args = "concat(" + arg; + else + args = args + ", " + delimiter + ", " + arg; + ++arg_count; + } + args += ")"; + + if (arg_count < 2 || arg_count > 64) + throw Exception("argument count out of bound in function: " + fn_name, ErrorCodes::SYNTAX_ERROR); + + if (pos->type == TokenType::ClosingRoundBracket) + { + out = std::move(args); + return true; + } + + pos = begin; + return false; +} + +bool StrCmp::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool ParseVersion::convertImpl(String &out,IParser::Pos &pos) +bool StrLen::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "lengthUTF8"); } -bool ReplaceRegex::convertImpl(String &out,IParser::Pos &pos) +bool StrRep::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; -} - -bool Reverse::convertImpl(String &out,IParser::Pos &pos) -{ - String res = String(pos->begin,pos->end); - out = res; - return false; -} - -bool Split::convertImpl(String &out,IParser::Pos &pos) -{ - String res = String(pos->begin,pos->end); - out = res; - return false; -} - -bool StrCat::convertImpl(String &out,IParser::Pos &pos) -{ - return directMapping(out,pos,"concat"); -} - -bool StrCatDelim::convertImpl(String &out,IParser::Pos &pos) -{ - String res = String(pos->begin,pos->end); - out = res; - return false; -} - -bool StrCmp::convertImpl(String &out,IParser::Pos &pos) -{ - String res = String(pos->begin,pos->end); - out = res; - return false; -} - -bool StrLen::convertImpl(String &out,IParser::Pos &pos) -{ - return directMapping(out,pos,"lengthUTF8"); -} - -bool StrRep::convertImpl(String &out,IParser::Pos &pos) -{ - String fn_name = getKQLFunctionName(pos); //String(pos->begin,pos->end); + String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; @@ -196,18 +478,18 @@ bool StrRep::convertImpl(String &out,IParser::Pos &pos) auto begin = pos; ++pos; - String value = getConvertedArgument(fn_name,pos); + String value = getConvertedArgument(fn_name, pos); if (pos->type != TokenType::Comma) return false; ++pos; - String multiplier = getConvertedArgument(fn_name,pos); + String multiplier = getConvertedArgument(fn_name, pos); String delimiter; if (pos->type == TokenType::Comma) { ++pos; - delimiter = getConvertedArgument(fn_name,pos); + delimiter = getConvertedArgument(fn_name, pos); } if (pos->type == TokenType::ClosingRoundBracket) @@ -227,10 +509,9 @@ bool StrRep::convertImpl(String &out,IParser::Pos &pos) return false; } - -bool SubString::convertImpl(String &out,IParser::Pos &pos) +bool SubString::convertImpl(String & out,IParser::Pos & pos) { - String fn_name = getKQLFunctionName(pos); + String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; @@ -238,19 +519,19 @@ bool SubString::convertImpl(String &out,IParser::Pos &pos) auto begin = pos; ++pos; - String source = getConvertedArgument(fn_name,pos); + String source = getConvertedArgument(fn_name, pos); if (pos->type != TokenType::Comma) return false; ++pos; - String startingIndex = getConvertedArgument(fn_name,pos); + String startingIndex = getConvertedArgument(fn_name, pos); String length; if (pos->type == TokenType::Comma) { ++pos; - length = getConvertedArgument(fn_name,pos); + length = getConvertedArgument(fn_name, pos); } if (pos->type == TokenType::ClosingRoundBracket) @@ -265,52 +546,52 @@ bool SubString::convertImpl(String &out,IParser::Pos &pos) return false; } -bool ToLower::convertImpl(String &out,IParser::Pos &pos) +bool ToLower::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"lower"); + return directMapping(out, pos, "lower"); } -bool ToUpper::convertImpl(String &out,IParser::Pos &pos) +bool ToUpper::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"upper"); + return directMapping(out, pos, "upper"); } -bool Translate::convertImpl(String &out,IParser::Pos &pos) +bool Translate::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool Trim::convertImpl(String &out,IParser::Pos &pos) +bool Trim::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool TrimEnd::convertImpl(String &out,IParser::Pos &pos) +bool TrimEnd::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool TrimStart::convertImpl(String &out,IParser::Pos &pos) +bool TrimStart::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool URLDecode::convertImpl(String &out,IParser::Pos &pos) +bool URLDecode::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"decodeURLComponent"); + return directMapping(out, pos, "decodeURLComponent"); } -bool URLEncode::convertImpl(String &out,IParser::Pos &pos) +bool URLEncode::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"encodeURLComponent"); + return directMapping(out, pos, "encodeURLComponent"); } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index f67e8916e17..40c703ff5d6 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -14,6 +14,46 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } +String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op) +{ + String new_expr; + Expected expected; + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + auto haystack = tokens.back(); + + String logic_op = (kql_op == "has_all") ? " and " : " or "; + + while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) + { + String tmp_arg = String(token_pos->begin, token_pos->end); + if (token_pos->type == TokenType::BareWord ) + { + String new_arg; + auto fun = KQLFunctionFactory::get(tmp_arg); + if (fun && fun->convert(new_arg,token_pos)) + tmp_arg = new_arg; + } + + if (token_pos->type == TokenType::Comma ) + new_expr = new_expr + logic_op; + else + new_expr = new_expr + ch_op + "(" + haystack + "," + tmp_arg + ")"; + + ++token_pos; + if (token_pos->type == TokenType::ClosingRoundBracket) + break; + + } + + tokens.pop_back(); + return new_expr; +} + String KQLOperators::genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op) { ParserKQLTaleFunction kqlfun_p; @@ -282,9 +322,11 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) break; case KQLOperatorValue::has_all: + new_expr = genHasAnyAllOpExpr(tokens,pos,"has_all", "hasTokenCaseInsensitive"); break; case KQLOperatorValue::has_any: + new_expr = genHasAnyAllOpExpr(tokens,pos,"has_any", "hasTokenCaseInsensitive"); break; case KQLOperatorValue::has_cs: diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 96be5b4179c..ba8b151bdad 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -100,6 +100,7 @@ protected: }; static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); static String genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op); + static String genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op); }; } From 06a23560efaa59f720ae3c701e1809220ed0b736 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 15 Jul 2022 10:32:49 -0700 Subject: [PATCH 032/279] Update release notes --- src/Parsers/Kusto/KQL_ReleaseNote.md | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index f5ee880db8b..9b5bfd182fa 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -5,9 +5,24 @@ ## Renamed dialect from sql_dialect to dialect -`set sql_dialect='clickhouse'` -`set sql_dialect='kusto'` -`set sql_dialect='kusto_auto'` +`set dialect='clickhouse'` +`set dialect='kusto'` +`set dialect='kusto_auto'` + +## IP functions +- [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) + `"Customers | project ipv4_is_private('192.168.1.6/24')"` + `"Customers | project ipv4_is_private('192.168.1.6')"` +- [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) + `"Customers | project ipv4_is_in_range('127.0.0.1', '127.0.0.1')"` + `"Customers | project ipv4_is_in_range('192.168.1.6', '192.168.1.1/24')"` +- [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) + `"Customers | project ipv4_netmask_suffix('192.168.1.1/24')"` + `"Customers | project ipv4_netmask_suffix('192.168.1.1')"` +- [parse_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4function) + `"Customers | project parse_ipv4('127.0.0.1')"` +- [parse_ipv6](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6function) + `"Customers | project parse_ipv6('127.0.0.1')"` ## string functions - **support subquery for `in` orerator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) From 648c9fbdb9b318f8ed5859f4f4102fc60038e2ea Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 15 Jul 2022 12:13:50 -0700 Subject: [PATCH 033/279] Updated release notes to indicate deficiencies --- src/Parsers/Kusto/KQL_ReleaseNote.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 9b5bfd182fa..47e2e817391 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -10,6 +10,13 @@ `set dialect='kusto_auto'` ## IP functions +- [parse_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4function) + `"Customers | project parse_ipv4('127.0.0.1')"` +- [parse_ipv6](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6function) + `"Customers | project parse_ipv6('127.0.0.1')"` + +Please note that the functions listed below only take constant parameters for now. Further improvement is to be expected to support expressions. + - [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) `"Customers | project ipv4_is_private('192.168.1.6/24')"` `"Customers | project ipv4_is_private('192.168.1.6')"` @@ -19,10 +26,6 @@ - [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) `"Customers | project ipv4_netmask_suffix('192.168.1.1/24')"` `"Customers | project ipv4_netmask_suffix('192.168.1.1')"` -- [parse_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4function) - `"Customers | project parse_ipv4('127.0.0.1')"` -- [parse_ipv6](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6function) - `"Customers | project parse_ipv6('127.0.0.1')"` ## string functions - **support subquery for `in` orerator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) From bfe2a9042457a6d3caf199b4df4fdc5444c5c124 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Jun 2022 11:04:20 -0700 Subject: [PATCH 034/279] Kusto summarize init --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 49a3569f963..651281fcdf5 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -51,6 +51,10 @@ std::pair ParserKQLSummarize::removeLastWord(String input) { return std::make_pair(first_part, temp[temp.size() - 1]); } + if (temp.size() > 0) + { + return std::make_pair(firstPart, temp[temp.size() - 1]); + } return std::make_pair("", ""); } From 6ec4e1e611914c550cff2ae989d8ddf1d51e08e8 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 11 Jun 2022 10:33:38 -0700 Subject: [PATCH 035/279] Kusto-phase1: Fixed style --- src/Parsers/Kusto/ParserKQLSummarize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 651281fcdf5..059488aca5c 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -51,9 +51,9 @@ std::pair ParserKQLSummarize::removeLastWord(String input) { return std::make_pair(first_part, temp[temp.size() - 1]); } - if (temp.size() > 0) + if (!temp.empty()) { - return std::make_pair(firstPart, temp[temp.size() - 1]); + return std::make_pair(first_part, temp[temp.size() - 1]); } return std::make_pair("", ""); From 32b4f5cfb244397bf61f74550a8f925951afa6e5 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 17 Jun 2022 08:47:08 -0700 Subject: [PATCH 036/279] Kusto-phase2 : Added KQL functions interface. changed the summarize class for new aggregation functions --- src/Parsers/Kusto/ParserKQLStatement.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 140684597bd..a9da3b47872 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -59,6 +59,17 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +/* +bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) +{ + return wrapParseImpl(pos, IncreaseDepthTag{}, [&] + { + bool res = convertImpl(out,pos); + if (!res) + out = ""; + return res; + }); +}*/ bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { From 28e4448f04cc7934d812d14bc0b74f9e0847bd99 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 21 Jun 2022 09:33:07 -0700 Subject: [PATCH 037/279] Kusto-phase2: Add KQL functions parser --- .../Kusto/KustoFunctions/KQLStringFunctions.cpp | 8 ++------ src/Parsers/Kusto/ParserKQLStatement.cpp | 11 ----------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 919e620dac0..ddd872c20f0 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -314,9 +314,7 @@ bool IsNotNull::convertImpl(String & out,IParser::Pos & pos) bool ParseCommandLine::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"isNull"); } bool IsNull::convertImpl(String & out,IParser::Pos & pos) @@ -458,9 +456,7 @@ bool StrCatDelim::convertImpl(String & out,IParser::Pos & pos) bool StrCmp::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"lengthUTF8"); } bool StrLen::convertImpl(String & out,IParser::Pos & pos) diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index a9da3b47872..140684597bd 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -59,17 +59,6 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } -/* -bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) -{ - return wrapParseImpl(pos, IncreaseDepthTag{}, [&] - { - bool res = convertImpl(out,pos); - if (!res) - out = ""; - return res; - }); -}*/ bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { From 7a115d0ab20db7e3e59a17b1f0b8e146cd11b97d Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Thu, 23 Jun 2022 14:26:37 -0700 Subject: [PATCH 038/279] Kusto-phase2: add kusto_auto dialect --- src/Client/ClientBase.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 98723f1c998..8355417f32d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -335,6 +335,8 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu const auto & settings = global_context->getSettingsRef(); size_t max_length = 0; + auto begin = pos; + if (!allow_multi_statements) max_length = settings.max_query_size; @@ -368,8 +370,14 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!res) { - std::cerr << std::endl << message << std::endl << std::endl; - return nullptr; + if (sql_dialect != "kusto") + res = tryParseQuery(kql_parser, begin, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + + if (!res) + { + std::cerr << std::endl << message << std::endl << std::endl; + return nullptr; + } } } else From 5ec628a6386f096e7aa4d83a9efd21d45cd58cc4 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sat, 16 Jul 2022 07:49:24 -0700 Subject: [PATCH 039/279] Kusto-phase2: Fixed the issue of conflict --- src/Client/ClientBase.cpp | 51 ++++++++++++--------------------------- 1 file changed, 15 insertions(+), 36 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 8355417f32d..34e06ac7dcd 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -335,8 +335,6 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu const auto & settings = global_context->getSettingsRef(); size_t max_length = 0; - auto begin = pos; - if (!allow_multi_statements) max_length = settings.max_query_size; @@ -370,14 +368,8 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!res) { - if (sql_dialect != "kusto") - res = tryParseQuery(kql_parser, begin, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); - - if (!res) - { - std::cerr << std::endl << message << std::endl << std::endl; - return nullptr; - } + std::cerr << std::endl << message << std::endl << std::endl; + return nullptr; } } else @@ -2203,21 +2195,9 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) bool ClientBase::processQueryText(const String & text) { - auto trimmed_input = trim(text, [](char c) { return isWhitespaceASCII(c) || c == ';'; }); - - if (exit_strings.end() != exit_strings.find(trimmed_input)) + if (exit_strings.end() != exit_strings.find(trim(text, [](char c) { return isWhitespaceASCII(c) || c == ';'; }))) return false; - if (trimmed_input.starts_with("\\i")) - { - size_t skip_prefix_size = std::strlen("\\i"); - auto file_name = trim( - trimmed_input.substr(skip_prefix_size, trimmed_input.size() - skip_prefix_size), - [](char c) { return isWhitespaceASCII(c); }); - - return processMultiQueryFromFile(file_name); - } - if (!is_multiquery) { assert(!query_fuzzer_runs); @@ -2490,17 +2470,6 @@ void ClientBase::runInteractive() } -bool ClientBase::processMultiQueryFromFile(const String & file_name) -{ - String queries_from_file; - - ReadBufferFromFile in(file_name); - readStringUntilEOF(queries_from_file, in); - - return executeMultiQuery(queries_from_file); -} - - void ClientBase::runNonInteractive() { if (delayed_interactive) @@ -2508,13 +2477,23 @@ void ClientBase::runNonInteractive() if (!queries_files.empty()) { + auto process_multi_query_from_file = [&](const String & file) + { + String queries_from_file; + + ReadBufferFromFile in(file); + readStringUntilEOF(queries_from_file, in); + + return executeMultiQuery(queries_from_file); + }; + for (const auto & queries_file : queries_files) { for (const auto & interleave_file : interleave_queries_files) - if (!processMultiQueryFromFile(interleave_file)) + if (!process_multi_query_from_file(interleave_file)) return; - if (!processMultiQueryFromFile(queries_file)) + if (!process_multi_query_from_file(queries_file)) return; } From b13cfcb0cec19db77fa9cb251ac9131699250458 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Tue, 26 Jul 2022 22:13:34 -0700 Subject: [PATCH 040/279] Add config entry to overwrite default dialect to kusto auto --- src/Parsers/Kusto/KQL_ReleaseNote.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 47e2e817391..5f3f5f343ac 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,6 +1,15 @@ # KQL implemented features. +The config setting to allow dialect setting from configuration XMLs. + - Set dialect setting during command line in batch mode ./clickhouse-client --dialect='kusto_auto' -q "KQL query" + - Set dialect setting client configuration XML and create a client connection using --config-file option. + For example: Clickouse-client.xml looks like as below + + ` + kusto_auto + ` - + Establish clickhouse-client connection using command `clickhouse-client --config-file=clickhouse-client.xml` + Note: Since it is user level setting, It is not required to udpate config.xml. # July 17, 2022 ## Renamed dialect from sql_dialect to dialect From 0f598491a9bd8e8fec5775a84edad7edc7fd54e8 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Thu, 28 Jul 2022 08:59:08 -0700 Subject: [PATCH 041/279] Updated Release notes with examples --- src/Parsers/Kusto/KQL_ReleaseNote.md | 34 ++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 5f3f5f343ac..4474b0ce22d 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,15 +1,29 @@ # KQL implemented features. -The config setting to allow dialect setting from configuration XMLs. - - Set dialect setting during command line in batch mode ./clickhouse-client --dialect='kusto_auto' -q "KQL query" - - Set dialect setting client configuration XML and create a client connection using --config-file option. - For example: Clickouse-client.xml looks like as below - - ` - kusto_auto - ` +The config setting to allow modify dialect setting. + - Set dialect setting in server configuration XML at user level(` users.xml `). This sets the ` dialect ` at server startup and CH will do query parsing for all users with ` default ` profile acording to dialect value. - Establish clickhouse-client connection using command `clickhouse-client --config-file=clickhouse-client.xml` - Note: Since it is user level setting, It is not required to udpate config.xml. + For example: + ` + + + random + kusto_auto + ` + + - Query can be executed with HTTP client as below once dialect is set in users.xml + ` echo "KQL query" | curl -sS "http://localhost:8123/?" --data-binary @- ` + + - To execute the query using clickhouse-client , Update clickhouse-client.xml as below and connect clickhouse-client with --config-file option (` clickhouse-client --config-file= `) + + ` + kusto_auto + ` + + OR + pass dialect setting with '--'. For example : + ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` + + # July 17, 2022 ## Renamed dialect from sql_dialect to dialect From 81cbd23b1336e629af203f4e1af426d95744eb25 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 19 Jul 2022 21:25:52 -0700 Subject: [PATCH 042/279] Kusto-phase2 Fixed bug of Syntax error when Order By is followed by another statement --- src/Parsers/Kusto/ParserKQLSort.cpp | 2 +- src/Parsers/Kusto/ParserKQLSummarize.cpp | 8 --- src/Parsers/tests/gtest_Parser.cpp | 69 ++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 9 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index ad2e8a05183..4097edf1de8 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -32,7 +32,7 @@ bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; pos = op_pos.back(); - while (!pos->isEnd() && pos->type != TokenType::PipeMark) + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String tmp(new_pos->begin, new_pos->end); if (tmp == "desc" || tmp == "asc") diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 059488aca5c..60ab6497f81 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -47,14 +47,6 @@ std::pair ParserKQLSummarize::removeLastWord(String input) { return std::make_pair(first_part, temp[temp.size() - 1]); } - if (!temp.empty()) - { - return std::make_pair(first_part, temp[temp.size() - 1]); - } - if (!temp.empty()) - { - return std::make_pair(first_part, temp[temp.size() - 1]); - } return std::make_pair("", ""); } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 680e6410d01..21f42fc5a96 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -517,6 +517,75 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | project parse_ipv6('127.0.0.1')", "SELECT toIPv6OrNull('127.0.0.1')\nFROM Customers" + }, + { + "Customers|where Occupation has_any ('Skilled','abcd')", + "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Occupation, 'Skilled') OR hasTokenCaseInsensitive(Occupation, 'abcd')" + }, + { + "Customers|where Occupation has_all ('Skilled','abcd')", + "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Occupation, 'Skilled') AND hasTokenCaseInsensitive(Occupation, 'abcd')" + }, + { + "Customers|where Occupation has_all (strcat('Skill','ed'),'Manual')", + "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Occupation, concat('Skill', 'ed')) AND hasTokenCaseInsensitive(Occupation, 'Manual')" + }, + { + "Customers | where Occupation == strcat('Pro','fessional') | take 1", + "SELECT *\nFROM Customers\nWHERE Occupation = concat('Pro', 'fessional')\nLIMIT 1" + }, + { + "Customers | project countof('The cat sat on the mat', 'at')", + "SELECT countSubstrings('The cat sat on the mat', 'at')\nFROM Customers" + }, + { + "Customers | project countof('The cat sat on the mat', 'at', 'normal')", + "SELECT countSubstrings('The cat sat on the mat', 'at')\nFROM Customers" + }, + { + "Customers | project countof('The cat sat on the mat', 'at', 'regex')", + "SELECT countMatches('The cat sat on the mat', 'at')\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 10')", + "SELECT extract('The price of PINEAPPLE ice cream is 10', '\\b[A-Z]+\\b.+\\b\\\\d+')\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20')", + "SELECT extract('The price of PINEAPPLE ice cream is 20', '\\b[A-Z]+\\b')\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 30')", + "SELECT extract('The price of PINEAPPLE ice cream is 30', '\\b\\\\d+')\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 40', typeof(int))", + "SELECT CAST(extract('The price of PINEAPPLE ice cream is 40', '\\b\\\\d+'), 'Int32')\nFROM Customers" + }, + { + "Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 50')", + "SELECT extractAllGroups('The price of PINEAPPLE ice cream is 50', '(\\\\w)(\\\\w+)(\\\\w)')\nFROM Customers" + }, + { + " Customers | project split('aa_bb', '_')", + "SELECT splitByString('_', 'aa_bb')\nFROM Customers" + }, + { + "Customers | project split('aaa_bbb_ccc', '_', 1)", + "SELECT arrayPushBack([], splitByString('_', 'aaa_bbb_ccc')[2])\nFROM Customers" + }, + { + "Customers | project strcat_delim('-', '1', '2', 'A')", + "SELECT concat('1', '-', '2', '-', 'A')\nFROM Customers" + }, + { + "Customers | project indexof('abcdefg','cde')", + "SELECT position('abcdefg', 'cde', 1) - 1\nFROM Customers" + }, + { + "Customers | project indexof('abcdefg','cde', 2) ", + "SELECT position('abcdefg', 'cde', 3) - 1\nFROM Customers" + } }))); From e8f451c1d5bf92d6ed452be34c975b663a57766f Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 20 Jul 2022 06:39:32 -0700 Subject: [PATCH 043/279] Kusto-phase: Add function to validate end of kql function --- .../Kusto/KustoFunctions/IParserKQLFunction.cpp | 17 ++++++++++++----- .../Kusto/KustoFunctions/IParserKQLFunction.h | 11 ++++++----- .../Kusto/KustoFunctions/KQLStringFunctions.cpp | 15 ++++++++++++++- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 73472a42010..c45ccdd3ab1 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -23,10 +23,11 @@ namespace DB namespace ErrorCodes { extern const int SYNTAX_ERROR; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) +bool IParserKQLFunction::convert(String & out,IParser::Pos & pos) { return wrapConvertImpl(pos, IncreaseDepthTag{}, [&] { @@ -37,7 +38,7 @@ bool IParserKQLFunction::convert(String &out,IParser::Pos &pos) }); } -bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const String &ch_fn) +bool IParserKQLFunction::directMapping(String & out,IParser::Pos & pos,const String & ch_fn) { std::vector arguments; @@ -75,7 +76,7 @@ bool IParserKQLFunction::directMapping(String &out,IParser::Pos &pos,const Strin return false; } -String IParserKQLFunction::getConvertedArgument(const String &fn_name, IParser::Pos &pos) +String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser::Pos & pos) { String converted_arg; std::vector tokens; @@ -85,7 +86,7 @@ String IParserKQLFunction::getConvertedArgument(const String &fn_name, IParser:: return converted_arg; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) - throw Exception("Syntax error near " + fn_name, ErrorCodes::SYNTAX_ERROR); + throw Exception("Need more argument(s) in function: " + fn_name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { @@ -119,7 +120,7 @@ String IParserKQLFunction::getConvertedArgument(const String &fn_name, IParser:: return converted_arg; } -String IParserKQLFunction::getKQLFunctionName(IParser::Pos &pos) +String IParserKQLFunction::getKQLFunctionName(IParser::Pos & pos) { String fn_name = String(pos->begin, pos->end); ++pos; @@ -131,4 +132,10 @@ String IParserKQLFunction::getKQLFunctionName(IParser::Pos &pos) return fn_name; } +void IParserKQLFunction::validateEndOfFunction(const String & fn_name, IParser::Pos & pos) +{ + if (pos->type != TokenType:: ClosingRoundBracket) + throw Exception("Too many arguments in function: " + fn_name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); +} + } diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index 8af2623a984..6e565eabe9e 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -28,14 +28,15 @@ public: pos = begin; return res; } - bool convert(String &out,IParser::Pos &pos); + bool convert(String & out,IParser::Pos & pos); virtual const char * getName() const = 0; virtual ~IParserKQLFunction() = default; protected: - virtual bool convertImpl(String &out,IParser::Pos &pos) = 0; - static bool directMapping(String &out,IParser::Pos &pos,const String &ch_fn); - static String getConvertedArgument(const String &fn_name, IParser::Pos &pos); - static String getKQLFunctionName(IParser::Pos &pos); + virtual bool convertImpl(String & out,IParser::Pos & pos) = 0; + static bool directMapping(String &out,IParser::Pos & pos,const String & ch_fn); + static String getConvertedArgument(const String & fn_name, IParser::Pos & pos); + static void validateEndOfFunction(const String & fn_name, IParser::Pos & pos); + static String getKQLFunctionName(IParser::Pos & pos); }; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index ddd872c20f0..6dd90121168 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -456,7 +457,19 @@ bool StrCatDelim::convertImpl(String & out,IParser::Pos & pos) bool StrCmp::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"lengthUTF8"); + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String string1 = getConvertedArgument(fn_name, pos); + ++pos; + const String string2 = getConvertedArgument(fn_name, pos); + + validateEndOfFunction(fn_name, pos); + + out = std::format("multiIf({0} == {1}, 0, {0} < {1}, -1, 1)", string1, string2); + return true; } bool StrLen::convertImpl(String & out,IParser::Pos & pos) From ff73f74612aa3b97bddbdcf88ce8cb0e72a57d35 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Mon, 18 Jul 2022 15:56:57 -0400 Subject: [PATCH 044/279] Add functional tests for tabular table summarize --- .../02366_kql_create_table.reference | 4 + .../0_stateless/02366_kql_create_table.sql | 34 +++++ .../0_stateless/02366_kql_summarize.reference | 25 ++++ .../0_stateless/02366_kql_summarize.sql | 39 +++++ .../0_stateless/02366_kql_tabular.reference | 111 +++++++++++++++ .../queries/0_stateless/02366_kql_tabular.sql | 133 ++++++++++++++++++ 6 files changed, 346 insertions(+) create mode 100644 tests/queries/0_stateless/02366_kql_create_table.reference create mode 100644 tests/queries/0_stateless/02366_kql_create_table.sql create mode 100644 tests/queries/0_stateless/02366_kql_summarize.reference create mode 100644 tests/queries/0_stateless/02366_kql_summarize.sql create mode 100644 tests/queries/0_stateless/02366_kql_tabular.reference create mode 100644 tests/queries/0_stateless/02366_kql_tabular.sql diff --git a/tests/queries/0_stateless/02366_kql_create_table.reference b/tests/queries/0_stateless/02366_kql_create_table.reference new file mode 100644 index 00000000000..35136b5ff42 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_create_table.reference @@ -0,0 +1,4 @@ +-- test create table -- +Theodore +Diaz +Theodore Diaz 28 diff --git a/tests/queries/0_stateless/02366_kql_create_table.sql b/tests/queries/0_stateless/02366_kql_create_table.sql new file mode 100644 index 00000000000..67f099a2d70 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_create_table.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28); +INSERT INTO Customers VALUES ('Stephanie','Cox','Management abcd defg','Bachelors',33); +INSERT INTO Customers VALUES ('Peter','Nara','Skilled Manual','Graduate Degree',26); +INSERT INTO Customers VALUES ('Latoya','Shen','Professional','Graduate Degree',25); +INSERT INTO Customers VALUES ('Apple','','Skilled Manual','Bachelors',28); +INSERT INTO Customers VALUES (NULL,'why','Professional','Partial College',38); + +Select '-- test create table --' ; +Select * from kql(Customers|project FirstName) limit 1;; +DROP TABLE IF EXISTS kql_table1; +CREATE TABLE kql_table1 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName | filter LastName=='Diaz'); +select LastName from kql_table1 limit 1; +DROP TABLE IF EXISTS kql_table2; +CREATE TABLE kql_table2 +( + FirstName Nullable(String), + LastName String, + Age Nullable(UInt8) +) ENGINE = Memory; +INSERT INTO kql_table2 select * from kql(Customers|project FirstName,LastName,Age | filter FirstName=='Theodore'); +select * from kql_table2 limit 1; +DROP TABLE IF EXISTS Customers; +DROP TABLE IF EXISTS kql_table1; +DROP TABLE IF EXISTS kql_table2; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_summarize.reference b/tests/queries/0_stateless/02366_kql_summarize.reference new file mode 100644 index 00000000000..d73f75b03c2 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_summarize.reference @@ -0,0 +1,25 @@ +-- test summarize -- +12 25 46 32.416666666666664 389 +Skilled Manual 5 26 36 30.2 151 +Professional 6 25 46 34.166666666666664 205 +Management abcd defg 1 33 33 33 33 +Skilled Manual 0 +Professional 2 +Management abcd defg 0 +Skilled Manual 36 +Professional 38 +Management abcd defg 33 +Skilled Manual 26 +Professional 25 +Management abcd defg 33 +Skilled Manual 30.2 +Professional 29.25 +Management abcd defg 33 +Skilled Manual 151 +Professional 117 +Management abcd defg 33 +4 +2 +40 2 +20 6 +30 4 diff --git a/tests/queries/0_stateless/02366_kql_summarize.sql b/tests/queries/0_stateless/02366_kql_summarize.sql new file mode 100644 index 00000000000..8eba49f92f0 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_summarize.sql @@ -0,0 +1,39 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28); +INSERT INTO Customers VALUES ('Stephanie','Cox','Management abcd defg','Bachelors',33); +INSERT INTO Customers VALUES ('Peter','Nara','Skilled Manual','Graduate Degree',26); +INSERT INTO Customers VALUES ('Latoya','Shen','Professional','Graduate Degree',25); +INSERT INTO Customers VALUES ('Joshua','Lee','Professional','Partial College',26); +INSERT INTO Customers VALUES ('Edward','Hernandez','Skilled Manual','High School',36); +INSERT INTO Customers VALUES ('Dalton','Wood','Professional','Partial College',42); +INSERT INTO Customers VALUES ('Christine','Nara','Skilled Manual','Partial College',33); +INSERT INTO Customers VALUES ('Cameron','Rodriguez','Professional','Partial College',28); +INSERT INTO Customers VALUES ('Angel','Stewart','Professional','Partial College',46); +INSERT INTO Customers VALUES ('Apple','','Skilled Manual','Bachelors',28); +INSERT INTO Customers VALUES (NULL,'why','Professional','Partial College',38); + +Select '-- test summarize --' ; +set dialect='kusto'; +Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age); +Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age) by Occupation; +Customers | summarize countif(Age>40) by Occupation; +Customers | summarize MyMax = maxif(Age, Age<40) by Occupation; +Customers | summarize MyMin = minif(Age, Age<40) by Occupation; +Customers | summarize MyAvg = avgif(Age, Age<40) by Occupation; +Customers | summarize MySum = sumif(Age, Age<40) by Occupation; +Customers | summarize dcount(Education, Occupation=='Professional'); +Customers | summarize dcountif(Education, Occupation=='Professional'); +Customers | summarize count() by bin(Age, 10) + +-- The following does not work +-- arg_max() +-- arg_min() diff --git a/tests/queries/0_stateless/02366_kql_tabular.reference b/tests/queries/0_stateless/02366_kql_tabular.reference new file mode 100644 index 00000000000..6fd5af9b60a --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_tabular.reference @@ -0,0 +1,111 @@ +-- test Query only has table name: -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Joshua Lee Professional Partial College 26 +Edward Hernandez Skilled Manual High School 36 +Dalton Wood Professional Partial College 42 +Christine Nara Skilled Manual Partial College 33 +Cameron Rodriguez Professional Partial College 28 +Angel Stewart Professional Partial College 46 +-- Query has Column Selection -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +Edward Hernandez Skilled Manual +Dalton Wood Professional +Christine Nara Skilled Manual +Cameron Rodriguez Professional +Angel Stewart Professional +-- Query has limit -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +-- Query has second limit with bigger value -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +-- Query has second limit with smaller value -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +-- Query has second Column selection -- +Theodore Diaz +Stephanie Cox +Peter Nara +-- Query has second Column selection with extra column -- +Theodore Diaz Bachelors +Stephanie Cox Bachelors +Peter Nara Graduate Degree +-- Test String Equals (==) -- +Theodore Diaz Skilled Manual +Peter Nara Skilled Manual +Edward Hernandez Skilled Manual +Christine Nara Skilled Manual +-- Test String Not equals (!=) -- +Stephanie Cox Management +Latoya Shen Professional +Joshua Lee Professional +Dalton Wood Professional +Cameron Rodriguez Professional +Angel Stewart Professional +-- Test Filter using a list (in) -- +Theodore Diaz Skilled Manual Bachelors +Stephanie Cox Management Bachelors +Edward Hernandez Skilled Manual High School +-- Test Filter using a list (!in) -- +Peter Nara Skilled Manual Graduate Degree +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Christine Nara Skilled Manual Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (contains_cs) -- +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Christine Nara Skilled Manual Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (startswith_cs) -- +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (endswith_cs) -- +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +-- Test Filter using numerical equal (==) -- +Peter Nara Skilled Manual Graduate Degree 26 +Joshua Lee Professional Partial College 26 +-- Test Filter using numerical great and less (> , <) -- +Stephanie Cox Management Bachelors 33 +Edward Hernandez Skilled Manual High School 36 +Christine Nara Skilled Manual Partial College 33 +-- Test Filter using multi where -- +Dalton Wood Professional Partial College 42 +Angel Stewart Professional Partial College 46 +-- test sort, order -- +Angel Stewart Professional Partial College 46 +Dalton Wood Professional Partial College 42 +Edward Hernandez Skilled Manual High School 36 +Christine Nara Skilled Manual Partial College 33 +Stephanie Cox Management Bachelors 33 +Cameron Rodriguez Professional Partial College 28 +Theodore Diaz Skilled Manual Bachelors 28 +Joshua Lee Professional Partial College 26 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 diff --git a/tests/queries/0_stateless/02366_kql_tabular.sql b/tests/queries/0_stateless/02366_kql_tabular.sql new file mode 100644 index 00000000000..3f16e63567b --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_tabular.sql @@ -0,0 +1,133 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management','Bachelors',33), ('Peter','Nara','Skilled Manual','Graduate Degree',26), ('Latoya','Shen','Professional','Graduate Degree',25), ('Joshua','Lee','Professional','Partial College',26), ('Edward','Hernandez','Skilled Manual','High School',36), ('Dalton','Wood','Professional','Partial College',42), ('Christine','Nara','Skilled Manual','Partial College',33), ('Cameron','Rodriguez','Professional','Partial College',28), ('Angel','Stewart','Professional','Partial College',46); + +set dialect='clickhouse'; +Select '-- test Query only has table name: --'; +set dialect='kusto'; +Customers; + +set dialect='clickhouse'; +Select '-- Query has Column Selection --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation; + +set dialect='clickhouse'; +Select '-- Query has limit --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | take 5; +Customers | project FirstName,LastName,Occupation | limit 5; + +set dialect='clickhouse'; +Select '-- Query has second limit with bigger value --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | take 5 | take 7; + +set dialect='clickhouse'; +Select '-- Query has second limit with smaller value --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | take 5 | take 3; + +set dialect='clickhouse'; +Select '-- Query has second Column selection --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName; + +set dialect='clickhouse'; +Select '-- Query has second Column selection with extra column --'; +set dialect='kusto'; +Customers| project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education; + +-- set dialect='clickhouse'; +-- Select '-- Query with desc sort --'; +-- set dialect='kusto'; +-- Customers | project FirstName,LastName,Occupation | take 5 | sort by FirstName desc; +-- Customers | project FirstName,LastName,Occupation | take 5 | order by Occupation desc; + +-- set dialect='clickhouse'; +-- Select '-- Query with asc sort --'; +-- set dialect='kusto'; +-- Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation asc; + +-- set dialect='clickhouse'; +-- Select '-- Query with sort (without keyword asc desc) --'; +-- set dialect='kusto'; +-- Customers | project FirstName,LastName,Occupation | take 5 | sort by FirstName; +-- Customers | project FirstName,LastName,Occupation | take 5 | order by Occupation; + +-- set dialect='clickhouse'; +-- Select '-- Query with sort 2 Columns with different direction --'; +-- set dialect='kusto'; +-- Customers | project FirstName,LastName,Occupation | take 5 |sort by Occupation asc, LastName desc; + +-- set dialect='clickhouse'; +-- Select '-- Query with second sort --'; +-- set dialect='kusto'; +-- Customers | project FirstName,LastName,Occupation | take 5 |sort by Occupation desc |sort by Occupation asc; + + +set dialect='clickhouse'; +Select '-- Test String Equals (==) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | where Occupation == 'Skilled Manual'; + +set dialect='clickhouse'; +Select '-- Test String Not equals (!=) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | where Occupation != 'Skilled Manual'; + +set dialect='clickhouse'; +Select '-- Test Filter using a list (in) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education | where Education in ('Bachelors','High School'); + +set dialect='clickhouse'; +Select '-- Test Filter using a list (!in) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education | where Education !in ('Bachelors','High School'); + +set dialect='clickhouse'; +Select '-- Test Filter using common string operations (contains_cs) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education | where Education contains_cs 'Coll'; + +set dialect='clickhouse'; +Select '-- Test Filter using common string operations (startswith_cs) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education | where Occupation startswith_cs 'Prof'; + +set dialect='clickhouse'; +Select '-- Test Filter using common string operations (endswith_cs) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education | where FirstName endswith_cs 'a'; + +set dialect='clickhouse'; +Select '-- Test Filter using numerical equal (==) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age == 26; + +set dialect='clickhouse'; +Select '-- Test Filter using numerical great and less (> , <) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 and Age < 40; + + +set dialect='clickhouse'; +Select '-- Test Filter using multi where --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 | where Occupation == 'Professional'; + +-- TODO: verify the issue that order by can not be followed by other statements +set dialect='clickhouse'; +Select '-- test sort, order --'; +set dialect='kusto'; +Customers | order by Age desc, FirstName asc; + From e83ab30f181cef7d1703a259a73c9302214fb538 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Wed, 20 Jul 2022 14:18:03 -0400 Subject: [PATCH 045/279] Added sorting test cases --- .../0_stateless/02366_kql_tabular.reference | 51 ++++++++++++++---- .../queries/0_stateless/02366_kql_tabular.sql | 53 ++++++++----------- 2 files changed, 62 insertions(+), 42 deletions(-) diff --git a/tests/queries/0_stateless/02366_kql_tabular.reference b/tests/queries/0_stateless/02366_kql_tabular.reference index 6fd5af9b60a..fa9ea03a9c0 100644 --- a/tests/queries/0_stateless/02366_kql_tabular.reference +++ b/tests/queries/0_stateless/02366_kql_tabular.reference @@ -49,6 +49,46 @@ Peter Nara Theodore Diaz Bachelors Stephanie Cox Bachelors Peter Nara Graduate Degree +-- Query with desc sort -- +Theodore +Stephanie +Peter +Latoya +Joshua +Skilled Manual +Skilled Manual +Skilled Manual +Skilled Manual +Professional +-- Query with asc sort -- +Management +Professional +Professional +Professional +Professional +-- Query with sort (without keyword asc desc) -- +Theodore +Stephanie +Peter +Latoya +Joshua +Skilled Manual +Skilled Manual +Skilled Manual +Skilled Manual +Professional +-- Query with sort 2 Columns with different direction -- +Stephanie Cox Management +Dalton Wood Professional +Angel Stewart Professional +Latoya Shen Professional +Cameron Rodriguez Professional +-- Query with second sort -- +Stephanie Cox Management +Dalton Wood Professional +Angel Stewart Professional +Latoya Shen Professional +Cameron Rodriguez Professional -- Test String Equals (==) -- Theodore Diaz Skilled Manual Peter Nara Skilled Manual @@ -98,14 +138,3 @@ Christine Nara Skilled Manual Partial College 33 -- Test Filter using multi where -- Dalton Wood Professional Partial College 42 Angel Stewart Professional Partial College 46 --- test sort, order -- -Angel Stewart Professional Partial College 46 -Dalton Wood Professional Partial College 42 -Edward Hernandez Skilled Manual High School 36 -Christine Nara Skilled Manual Partial College 33 -Stephanie Cox Management Bachelors 33 -Cameron Rodriguez Professional Partial College 28 -Theodore Diaz Skilled Manual Bachelors 28 -Joshua Lee Professional Partial College 26 -Peter Nara Skilled Manual Graduate Degree 26 -Latoya Shen Professional Graduate Degree 25 diff --git a/tests/queries/0_stateless/02366_kql_tabular.sql b/tests/queries/0_stateless/02366_kql_tabular.sql index 3f16e63567b..6a0a3417f42 100644 --- a/tests/queries/0_stateless/02366_kql_tabular.sql +++ b/tests/queries/0_stateless/02366_kql_tabular.sql @@ -46,33 +46,32 @@ Select '-- Query has second Column selection with extra column --'; set dialect='kusto'; Customers| project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education; --- set dialect='clickhouse'; --- Select '-- Query with desc sort --'; --- set dialect='kusto'; --- Customers | project FirstName,LastName,Occupation | take 5 | sort by FirstName desc; --- Customers | project FirstName,LastName,Occupation | take 5 | order by Occupation desc; +set dialect='clickhouse'; +Select '-- Query with desc sort --'; +set dialect='kusto'; +Customers | project FirstName | take 5 | sort by FirstName desc; +Customers | project Occupation | take 5 | order by Occupation desc; --- set dialect='clickhouse'; --- Select '-- Query with asc sort --'; --- set dialect='kusto'; --- Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation asc; +set dialect='clickhouse'; +Select '-- Query with asc sort --'; +set dialect='kusto'; +Customers | project Occupation | take 5 | sort by Occupation asc; --- set dialect='clickhouse'; --- Select '-- Query with sort (without keyword asc desc) --'; --- set dialect='kusto'; --- Customers | project FirstName,LastName,Occupation | take 5 | sort by FirstName; --- Customers | project FirstName,LastName,Occupation | take 5 | order by Occupation; +set dialect='clickhouse'; +Select '-- Query with sort (without keyword asc desc) --'; +set dialect='kusto'; +Customers | project FirstName | take 5 | sort by FirstName; +Customers | project Occupation | take 5 | order by Occupation; --- set dialect='clickhouse'; --- Select '-- Query with sort 2 Columns with different direction --'; --- set dialect='kusto'; --- Customers | project FirstName,LastName,Occupation | take 5 |sort by Occupation asc, LastName desc; - --- set dialect='clickhouse'; --- Select '-- Query with second sort --'; --- set dialect='kusto'; --- Customers | project FirstName,LastName,Occupation | take 5 |sort by Occupation desc |sort by Occupation asc; +set dialect='clickhouse'; +Select '-- Query with sort 2 Columns with different direction --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation asc, LastName desc; +set dialect='clickhouse'; +Select '-- Query with second sort --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation desc |sort by Occupation asc, LastName desc; set dialect='clickhouse'; Select '-- Test String Equals (==) --'; @@ -119,15 +118,7 @@ Select '-- Test Filter using numerical great and less (> , <) --'; set dialect='kusto'; Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 and Age < 40; - set dialect='clickhouse'; Select '-- Test Filter using multi where --'; set dialect='kusto'; Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 | where Occupation == 'Professional'; - --- TODO: verify the issue that order by can not be followed by other statements -set dialect='clickhouse'; -Select '-- test sort, order --'; -set dialect='kusto'; -Customers | order by Age desc, FirstName asc; - From b2be17acc66540a8679f085690e21729e135eeba Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Fri, 22 Jul 2022 05:52:26 -0700 Subject: [PATCH 046/279] Kusto-phase2: Add print operator --- src/Parsers/Kusto/ParserKQLPrint.cpp | 20 ++++++++++++++++++++ src/Parsers/Kusto/ParserKQLPrint.h | 17 +++++++++++++++++ src/Parsers/Kusto/ParserKQLQuery.cpp | 12 ++++++++++++ src/Parsers/tests/gtest_Parser.cpp | 5 ++++- 4 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 src/Parsers/Kusto/ParserKQLPrint.cpp create mode 100644 src/Parsers/Kusto/ParserKQLPrint.h diff --git a/src/Parsers/Kusto/ParserKQLPrint.cpp b/src/Parsers/Kusto/ParserKQLPrint.cpp new file mode 100644 index 00000000000..e6f07cd6534 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLPrint.cpp @@ -0,0 +1,20 @@ +#include +#include +#include +namespace DB +{ + +bool ParserKQLPrint::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + const String expr = getExprFromToken(pos); + + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(new_pos, node, expected)) + return false; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLPrint.h b/src/Parsers/Kusto/ParserKQLPrint.h new file mode 100644 index 00000000000..38cc9eb789c --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLPrint.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLPrint : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index cd8c071e0fc..d2c1e4943bf 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB { @@ -121,6 +122,17 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) operation_pos.push_back(std::make_pair("table",pos)); String table_name(pos->begin,pos->end); + if (table_name == "print") + { + ++pos; + if (!ParserKQLPrint().parse(pos, select_expression_list, expected)) + return false; + + select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + + return true; + } + ++pos; while (!pos->isEnd() && pos->type != TokenType::Semicolon) { diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 21f42fc5a96..32f1c0c5d8e 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -585,7 +585,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | project indexof('abcdefg','cde', 2) ", "SELECT position('abcdefg', 'cde', 3) - 1\nFROM Customers" - + }, + { + "print x=1, s=strcat('Hello', ', ', 'World!')", + "SELECT\n 1 AS x,\n concat('Hello', ', ', 'World!') AS s" } }))); From 652bef265d4bf6ef5bbeebd86c589f775e32b8dd Mon Sep 17 00:00:00 2001 From: root Date: Tue, 2 Aug 2022 19:03:22 -0700 Subject: [PATCH 047/279] Kusto Aggregate functions as of July 29 --- src/Parsers/Kusto/KQL_ReleaseNote.md | 30 +++---- .../KQLAggregationFunctions.cpp | 88 +++++++++++++------ src/Parsers/tests/gtest_Parser.cpp | 38 +++++++- 3 files changed, 113 insertions(+), 43 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 4474b0ce22d..365521d570b 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -236,19 +236,19 @@ Please note that the functions listed below only take constant parameters for no - tolower() `Customers | project tolower(FirstName)` +# July 29, 2022 ## Aggregate Functions - - arg_max() - - arg_min() - - avg() - - avgif() - - count() - - countif() - - max() - - maxif() - - min() - - minif() - - sum() - - sumif() - - dcount() - - dcountif() - - bin \ No newline at end of file + - make_list() + `Customers | summarize t = make_list(FirstName) by FirstName` + `Customers | summarize t = make_list(FirstName, 10) by FirstName` + - make_list_if() + `Customers | summarize t = make_list_if(FirstName, Age > 10) by FirstName` + `Customers | summarize t = make_list_if(FirstName, Age > 10, 10) by FirstName` + - make_list_with_nulls() + `Customers | summarize t = make_list_with_nulls(FirstName) by FirstName` + - make_set() + `Customers | summarize t = make_set(FirstName) by FirstName` + `Customers | summarize t = make_set(FirstName, 10) by FirstName` + - make_set_if() + `Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName` + `Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName` diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index 1bfb094518f..30b33b5933a 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -20,25 +20,21 @@ namespace DB bool ArgMax::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"argMax"); } bool ArgMin::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"argMin"); } bool Avg::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"avg"); } bool AvgIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"avgIf"); } @@ -72,13 +68,11 @@ bool BuildSchema::convertImpl(String &out,IParser::Pos &pos) bool Count::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"count"); } bool CountIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"countIf"); } @@ -125,60 +119,102 @@ bool MakeBagIf::convertImpl(String &out,IParser::Pos &pos) bool MakeList::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name,pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name,pos); + out = "groupArrayIf(" + max_size + ")(" + expr + " , " + expr + " IS NOT NULL)"; + } else + out = "groupArrayIf(" + expr + " , " + expr + " IS NOT NULL)"; + return true; } bool MakeListIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name,pos); + ++pos; + const auto predicate = getConvertedArgument(fn_name,pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name,pos); + out = "groupArrayIf(" + max_size + ")(" + expr + " , " + predicate+ " )"; + } else + out = "groupArrayIf(" + expr + " , " + predicate+ " )"; + return true; } bool MakeListWithNulls::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"groupArray"); //groupArray takes everything including NULLs } bool MakeSet::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name,pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name,pos); + out = "groupUniqArray(" + max_size + ")(" + expr + ")"; + } else + out = "groupUniqArray(" + expr + ")"; + return true; } bool MakeSetIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name,pos); + ++pos; + const auto predicate = getConvertedArgument(fn_name,pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name,pos); + out = "groupUniqArrayIf(" + max_size + ")(" + expr + " , " + predicate+ " )"; + } else + out = "groupUniqArrayIf(" + expr + " , " + predicate+ " )"; + return true; } bool Max::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"max"); } bool MaxIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"maxIf"); } bool Min::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"min"); } bool MinIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"minIf"); } @@ -226,13 +262,11 @@ bool StdevIf::convertImpl(String &out,IParser::Pos &pos) bool Sum::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"sum"); } bool SumIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); return directMapping(out,pos,"sumIf"); } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 32f1c0c5d8e..90df4ad27af 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -589,7 +589,43 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "print x=1, s=strcat('Hello', ', ', 'World!')", "SELECT\n 1 AS x,\n concat('Hello', ', ', 'World!') AS s" - } + }, + { + "Customers | summarize t = make_list(FirstName) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(FirstName, FirstName IS NOT NULL) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list(FirstName, 10) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(10)(FirstName, FirstName IS NOT NULL) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list_if(FirstName, Age > 10) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list_if(FirstName, Age > 10, 10) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(10)(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list_with_nulls(FirstName) by FirstName", + "SELECT\n FirstName,\n groupArray(FirstName) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set(FirstName) by FirstName", + "SELECT\n FirstName,\n groupUniqArray(FirstName) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set(FirstName, 10) by FirstName", + "SELECT\n FirstName,\n groupUniqArray(10)(FirstName) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName", + "SELECT\n FirstName,\n groupUniqArrayIf(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName", + "SELECT\n FirstName,\n groupUniqArrayIf(10)(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + } }))); static constexpr size_t kDummyMaxQuerySize = 256 * 1024; From 85825438166916d9e4b1cb233bf1b3d90b6343c2 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 3 Aug 2022 08:22:17 -0700 Subject: [PATCH 048/279] update release notes and test script --- src/Parsers/Kusto/KQL_ReleaseNote.md | 64 +++++++++++++++++++++------- src/Parsers/tests/gtest_Parser.cpp | 45 ++++++++++--------- 2 files changed, 71 insertions(+), 38 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 365521d570b..708e7ab9418 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -22,6 +22,39 @@ The config setting to allow modify dialect setting. OR pass dialect setting with '--'. For example : ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` +# Augest 1, 2022 +- **strcmp** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcmpfunction) + `print strcmp('abc','ABC')` + +- **parse_url** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parseurlfunction) + `print Result = parse_url('scheme://username:password@www.google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')` + +- **parse_urlquery** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parseurlqueryfunction) + `print Result = parse_urlquery('k1=v1&k2=v2&k3=v3')` + +- **print operator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/printoperator) + `print x=1, s=strcat('Hello', ', ', 'World!')` + +- **The following functions now support arbitrary expressions as their argument:** + - [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) + - [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) + - [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) + +- **Aggregate Functions:** + - [make_list()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makelist-aggfunction) + `Customers | summarize t = make_list(FirstName) by FirstName` + `Customers | summarize t = make_list(FirstName, 10) by FirstName` + - [make_list_if()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makelistif-aggfunction) + `Customers | summarize t = make_list_if(FirstName, Age > 10) by FirstName` + `Customers | summarize t = make_list_if(FirstName, Age > 10, 10) by FirstName` + - [make_list_with_nulls()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-list-with-nulls-aggfunction) + `Customers | summarize t = make_list_with_nulls(FirstName) by FirstName` + - [make_set()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makeset-aggfunction) + `Customers | summarize t = make_set(FirstName) by FirstName` + `Customers | summarize t = make_set(FirstName, 10) by FirstName` + - [make_set_if()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makesetif-aggfunction) + `Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName` + `Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName` # July 17, 2022 @@ -236,19 +269,20 @@ Please note that the functions listed below only take constant parameters for no - tolower() `Customers | project tolower(FirstName)` -# July 29, 2022 ## Aggregate Functions - - make_list() - `Customers | summarize t = make_list(FirstName) by FirstName` - `Customers | summarize t = make_list(FirstName, 10) by FirstName` - - make_list_if() - `Customers | summarize t = make_list_if(FirstName, Age > 10) by FirstName` - `Customers | summarize t = make_list_if(FirstName, Age > 10, 10) by FirstName` - - make_list_with_nulls() - `Customers | summarize t = make_list_with_nulls(FirstName) by FirstName` - - make_set() - `Customers | summarize t = make_set(FirstName) by FirstName` - `Customers | summarize t = make_set(FirstName, 10) by FirstName` - - make_set_if() - `Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName` - `Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName` + - arg_max() + - arg_min() + - avg() + - avgif() + - count() + - countif() + - max() + - maxif() + - min() + - minif() + - sum() + - sumif() + - dcount() + - dcountif() + - bin + \ No newline at end of file diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 90df4ad27af..9550f4112a8 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -487,36 +487,24 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "SELECT *\nFROM Customers\nWHERE Age IN (\n SELECT Age\n FROM Customers\n WHERE Age < 30\n)" }, { - "Customers | project ipv4_is_in_range('127.0.0.1', '127.0.0.1')", - "SELECT '127.0.0.1' = '127.0.0.1'\nFROM Customers" + "Customers | project ipv4_is_in_range(FirstName, LastName)", + "SELECT isIPAddressInRange(FirstName, concat(LastName, if(position(LastName, '/') > 0, '', '/32')))\nFROM Customers" }, { - "Customers | project ipv4_is_in_range('192.168.1.6', '192.168.1.1/24')", - "SELECT isIPAddressInRange('192.168.1.6', '192.168.1.1/24')\nFROM Customers" + "Customers | project ipv4_is_private(Occupation)", + "SELECT (((length(splitByChar('/', Occupation) AS tokens) = 1) AND isIPAddressInRange(tokens[1] AS ip, '10.0.0.0/8')) OR ((length(tokens) = 2) AND isIPAddressInRange(IPv4NumToString((IPv4CIDRToRange(toIPv4(ip), if((toUInt8OrNull(tokens[-1]) AS suffix) IS NULL, throwIf(true, 'Unable to parse suffix'), assumeNotNull(suffix))) AS range).1) AS begin, '10.0.0.0/8') AND isIPAddressInRange(IPv4NumToString(range.2) AS end, '10.0.0.0/8'))) OR (((length(tokens) = 1) AND isIPAddressInRange(ip, '172.16.0.0/12')) OR ((length(tokens) = 2) AND isIPAddressInRange(begin, '172.16.0.0/12') AND isIPAddressInRange(end, '172.16.0.0/12'))) OR (((length(tokens) = 1) AND isIPAddressInRange(ip, '192.168.0.0/16')) OR ((length(tokens) = 2) AND isIPAddressInRange(begin, '192.168.0.0/16') AND isIPAddressInRange(end, '192.168.0.0/16')))\nFROM Customers" }, { - "Customers | project ipv4_is_private('192.168.1.6')", - "SELECT isIPAddressInRange('192.168.1.6', '10.0.0.0/8') OR isIPAddressInRange('192.168.1.6', '172.16.0.0/12') OR isIPAddressInRange('192.168.1.6', '192.168.0.0/16')\nFROM Customers" + "Customers | project ipv4_netmask_suffix(Occupation)", + "SELECT if((length(splitByChar('/', Occupation) AS tokens) <= 2) AND isIPv4String(tokens[1]), if(length(tokens) != 2, 32, if(((toInt8OrNull(tokens[-1]) AS suffix) >= 1) AND (suffix <= 32), suffix, throwIf(true, 'Suffix must be between 1 and 32'))), throwIf(true, 'Unable to recognize and IP address with or without a suffix'))\nFROM Customers" }, { - "Customers | project ipv4_is_private('192.168.1.6/24')", - "SELECT (isIPAddressInRange(IPv4NumToString((IPv4CIDRToRange(toIPv4('192.168.1.6'), 24) AS range).1) AS begin, '10.0.0.0/8') AND isIPAddressInRange(IPv4NumToString(range.2) AS end, '10.0.0.0/8')) OR (isIPAddressInRange(begin, '172.16.0.0/12') AND isIPAddressInRange(end, '172.16.0.0/12')) OR (isIPAddressInRange(begin, '192.168.0.0/16') AND isIPAddressInRange(end, '192.168.0.0/16'))\nFROM Customers" + "Customers | project parse_ipv4(FirstName)", + "SELECT toIPv4OrNull(FirstName)\nFROM Customers" }, { - "Customers | project ipv4_netmask_suffix('192.168.1.1/24')", - "SELECT if(isIPv4String('192.168.1.1') AND ((24 >= 1) AND (24 <= 32)), 24, NULL)\nFROM Customers" - }, - { - "Customers | project ipv4_netmask_suffix('192.168.1.1')", - "SELECT if(isIPv4String('192.168.1.1') AND ((32 >= 1) AND (32 <= 32)), 32, NULL)\nFROM Customers" - }, - { - "Customers | project parse_ipv4('127.0.0.1')", - "SELECT toIPv4OrNull('127.0.0.1')\nFROM Customers" - }, - { - "Customers | project parse_ipv6('127.0.0.1')", - "SELECT toIPv6OrNull('127.0.0.1')\nFROM Customers" + "Customers | project parse_ipv6(LastName)", + "SELECT toIPv6OrNull(LastName)\nFROM Customers" }, { "Customers|where Occupation has_any ('Skilled','abcd')", @@ -590,7 +578,18 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "print x=1, s=strcat('Hello', ', ', 'World!')", "SELECT\n 1 AS x,\n concat('Hello', ', ', 'World!') AS s" }, - { + { + "print parse_urlquery('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')", + "SELECT concat('{', concat('\"Query Parameters\":', concat('{\"', replace(replace(if(position('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment', '?') > 0, queryString('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), 'https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '=', '\":\"'), '&', '\",\"'), '\"}')), '}')" + }, + { + "print strcmp('a','b')", + "SELECT multiIf('a' = 'b', 0, 'a' < 'b', -1, 1)" + }, + { + "print parse_url('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')", + "SELECT concat('{', concat('\"Scheme\":\"', protocol('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), ',', concat('\"Host\":\"', domain('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), ',', concat('\"Port\":\"', toString(port('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')), '\"'), ',', concat('\"Path\":\"', path('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), ',', concat('\"Username\":\"', splitByChar(':', splitByChar('@', netloc('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'))[1])[1], '\"'), ',', concat('\"Password\":\"', splitByChar(':', splitByChar('@', netloc('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'))[1])[2], '\"'), ',', concat('\"Query Parameters\":', concat('{\"', replace(replace(queryString('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '=', '\":\"'), '&', '\",\"'), '\"}')), ',', concat('\"Fragment\":\"', fragment('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '\"'), '}')" + },{ "Customers | summarize t = make_list(FirstName) by FirstName", "SELECT\n FirstName,\n groupArrayIf(FirstName, FirstName IS NOT NULL) AS t\nFROM Customers\nGROUP BY FirstName" }, From bfa2820985b21b3e2e4611734d8c5847f475fa0d Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Tue, 19 Jul 2022 09:52:54 -0700 Subject: [PATCH 049/279] Support expressions as IP function arguments --- src/Parsers/Kusto/KQL_ReleaseNote.md | 9 ++++ .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 51 +++++++------------ 2 files changed, 26 insertions(+), 34 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 708e7ab9418..8185cc00817 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -55,6 +55,15 @@ The config setting to allow modify dialect setting. - [make_set_if()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makesetif-aggfunction) `Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName` `Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName` +# July XX, 2022 + +## IP functions + +The following functions now support arbitrary expressions as their argument. + +- [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) +- [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) +- [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) # July 17, 2022 diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index d8de9cc4e9a..d1c7963b66b 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -22,21 +22,6 @@ namespace DB::ErrorCodes extern const int SYNTAX_ERROR; } -namespace -{ -String trimQuotes(const String & str) -{ - static constexpr auto QUOTE = '\''; - - const auto first_index = str.find(QUOTE); - const auto last_index = str.rfind(QUOTE); - if (first_index == String::npos || last_index == String::npos) - throw DB::Exception("Syntax error, improper quotation: " + str, DB::ErrorCodes::SYNTAX_ERROR); - - return str.substr(first_index + 1, last_index - first_index - 1); -} -} - namespace DB { @@ -59,8 +44,7 @@ bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) ++pos; const auto ip_range = getConvertedArgument(function_name, pos); - const auto slash_index = ip_range.find('/'); - out = std::format(slash_index == String::npos ? "{0} = {1}" : "isIPAddressInRange({0}, {1})", ip_address, ip_range); + out = std::format("isIPAddressInRange({0}, concat({1}, if(position({1}, '/') > 0, '', '/32')))", ip_address, ip_range); return true; } @@ -79,8 +63,9 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) if (function_name.empty()) return false; - const auto ip_address = trimQuotes(getConvertedArgument(function_name, pos)); - const auto slash_index = ip_address.find('/'); + ++pos; + + const auto ip_address = getConvertedArgument(function_name, pos); out += "or("; for (int i = 0; i < std::ssize(PRIVATE_SUBNETS); ++i) @@ -88,14 +73,13 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) out += i > 0 ? ", " : ""; const auto & subnet = PRIVATE_SUBNETS[i]; - out += slash_index == String::npos - ? std::format("isIPAddressInRange('{0}', '{1}')", ip_address, subnet) - : std::format( - "and(isIPAddressInRange(IPv4NumToString(tupleElement((IPv4CIDRToRange(toIPv4('{0}'), {1}) as range), 1)) as begin, '{2}'), " - "isIPAddressInRange(IPv4NumToString(tupleElement(range, 2)) as end, '{2}'))", - std::string_view(ip_address.c_str(), slash_index), - std::string_view(ip_address.c_str() + slash_index + 1, ip_address.length() - slash_index - 1), - subnet); + out += std::format( + "or(and(length(splitByChar('/', {0}) as tokens) = 1, isIPAddressInRange(tokens[1] as ip, '{1}')), " + "and(length(tokens) = 2, isIPAddressInRange(IPv4NumToString(tupleElement((IPv4CIDRToRange(toIPv4(ip), " + "if(isNull(toUInt8OrNull(tokens[-1]) as suffix), throwIf(true, 'Unable to parse suffix'), assumeNotNull(suffix))) as range), " + "1)) as begin, '{1}'), isIPAddressInRange(IPv4NumToString(tupleElement(range, 2)) as end, '{1}')))", + ip_address, + subnet); } out += ")"; @@ -104,19 +88,18 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) { - static constexpr auto DEFAULT_NETMASK = 32; - const auto function_name = getKQLFunctionName(pos); if (function_name.empty()) return false; ++pos; - const auto ip_range = trimQuotes(getConvertedArgument(function_name, pos)); - const auto slash_index = ip_range.find('/'); - const std::string_view ip_address(ip_range.c_str(), std::min(ip_range.length(), slash_index)); - const auto netmask = slash_index == String::npos ? DEFAULT_NETMASK : std::strtol(ip_range.c_str() + slash_index + 1, nullptr, 10); - out = std::format("if(and(isIPv4String('{0}'), {1} between 1 and 32), {1}, null)", ip_address, netmask); + const auto ip_range = getConvertedArgument(function_name, pos); + out = std::format( + "if(length(splitByChar('/', {0}) as tokens) <= 2 and isIPv4String(tokens[1]), if(length(tokens) != 2, 32, " + "if((toInt8OrNull(tokens[-1]) as suffix) between 1 and 32, suffix, throwIf(true, 'Suffix must be between 1 and 32'))), " + "throwIf(true, 'Unable to recognize and IP address with or without a suffix'))", + ip_range); return true; } From eb9747c2c22a88f71c090c49132d9b41b6d69000 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Tue, 26 Jul 2022 08:32:28 -0700 Subject: [PATCH 050/279] Provide conformance to the specification --- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index d1c7963b66b..0383292669d 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -67,22 +67,23 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getConvertedArgument(function_name, pos); - out += "or("; + out += std::format( + "multiIf(length(splitByChar('/', {0}) as tokens) > 2 or isNull(toIPv4OrNull(tokens[1]) as nullable_ip), null, " + "length(tokens) = 2 and isNull(toUInt8OrNull(tokens[-1]) as suffix), null, " + "ignore(assumeNotNull(nullable_ip) as ip, " + "IPv4CIDRToRange(ip, assumeNotNull(suffix)) as range, IPv4NumToString(tupleElement(range, 1)) as begin, " + "IPv4NumToString(tupleElement(range, 2)) as end), null, ", + ip_address); for (int i = 0; i < std::ssize(PRIVATE_SUBNETS); ++i) { - out += i > 0 ? ", " : ""; - const auto & subnet = PRIVATE_SUBNETS[i]; out += std::format( - "or(and(length(splitByChar('/', {0}) as tokens) = 1, isIPAddressInRange(tokens[1] as ip, '{1}')), " - "and(length(tokens) = 2, isIPAddressInRange(IPv4NumToString(tupleElement((IPv4CIDRToRange(toIPv4(ip), " - "if(isNull(toUInt8OrNull(tokens[-1]) as suffix), throwIf(true, 'Unable to parse suffix'), assumeNotNull(suffix))) as range), " - "1)) as begin, '{1}'), isIPAddressInRange(IPv4NumToString(tupleElement(range, 2)) as end, '{1}')))", - ip_address, + "length(tokens) = 1 and isIPAddressInRange(IPv4NumToString(ip), '{0}') or " + "isIPAddressInRange(begin, '{0}') and isIPAddressInRange(end, '{0}'), true, ", subnet); } - out += ")"; + out += "false)"; return true; } @@ -96,9 +97,8 @@ bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) const auto ip_range = getConvertedArgument(function_name, pos); out = std::format( - "if(length(splitByChar('/', {0}) as tokens) <= 2 and isIPv4String(tokens[1]), if(length(tokens) != 2, 32, " - "if((toInt8OrNull(tokens[-1]) as suffix) between 1 and 32, suffix, throwIf(true, 'Suffix must be between 1 and 32'))), " - "throwIf(true, 'Unable to recognize and IP address with or without a suffix'))", + "multiIf(length(splitByChar('/', {0}) as tokens) > 2 or not isIPv4String(tokens[1]), null, " + "length(tokens) = 1, 32, isNull(toUInt8OrNull(tokens[-1]) as suffix), null, toUInt8(min2(suffix, 32)))", ip_range); return true; } From 9d3a353dffef4f2c5673c86c7e2eb3e9c4f9d264 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 25 Jul 2022 00:01:19 -0700 Subject: [PATCH 051/279] Kusto-phase2: Added check end of function, and neww string functions --- src/Parsers/Kusto/KQL_ReleaseNote.md | 15 +- .../KustoFunctions/IParserKQLFunction.cpp | 23 +- .../Kusto/KustoFunctions/IParserKQLFunction.h | 1 + .../KustoFunctions/KQLStringFunctions.cpp | 247 +++++++----------- src/Parsers/Kusto/ParserKQLOperators.cpp | 19 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 15 +- 6 files changed, 120 insertions(+), 200 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 8185cc00817..ae3f9e1b671 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -35,11 +35,6 @@ The config setting to allow modify dialect setting. - **print operator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/printoperator) `print x=1, s=strcat('Hello', ', ', 'World!')` -- **The following functions now support arbitrary expressions as their argument:** - - [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) - - [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) - - [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) - - **Aggregate Functions:** - [make_list()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makelist-aggfunction) `Customers | summarize t = make_list(FirstName) by FirstName` @@ -59,12 +54,10 @@ The config setting to allow modify dialect setting. ## IP functions -The following functions now support arbitrary expressions as their argument. - -- [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) -- [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) -- [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) - +- **The following functions now support arbitrary expressions as their argument:** + - [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) + - [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) + - [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) # July 17, 2022 diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index c45ccdd3ab1..825c5eb5f92 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -96,12 +96,7 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: { if (pos->type == TokenType::BareWord ) { - String converted; - fun = KQLFunctionFactory::get(token); - if ( fun && fun->convert(converted,pos)) - tokens.push_back(converted); - else - tokens.push_back(token); + tokens.push_back(IParserKQLFunction::getExpression(pos)); } else if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) { @@ -138,4 +133,20 @@ void IParserKQLFunction::validateEndOfFunction(const String & fn_name, IParser:: throw Exception("Too many arguments in function: " + fn_name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } +String IParserKQLFunction::getExpression(IParser::Pos & pos) +{ + String arg = String(pos->begin, pos->end); + if (pos->type == TokenType::BareWord ) + { + String new_arg; + auto fun = KQLFunctionFactory::get(arg); + if (fun && fun->convert(new_arg, pos)) + { + validateEndOfFunction(arg, pos); + arg = new_arg; + } + } + return arg; +} + } diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index 6e565eabe9e..7ed3841583b 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -31,6 +31,7 @@ public: bool convert(String & out,IParser::Pos & pos); virtual const char * getName() const = 0; virtual ~IParserKQLFunction() = default; + static String getExpression(IParser::Pos & pos); protected: virtual bool convertImpl(String & out,IParser::Pos & pos) = 0; static bool directMapping(String &out,IParser::Pos & pos,const String & ch_fn); diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 6dd90121168..c2d1bd251da 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -51,14 +51,9 @@ bool CountOf::convertImpl(String & out, IParser::Pos & pos) if (fn_name.empty()) return false; - auto begin = pos; - ++pos; const String source = getConvertedArgument(fn_name, pos); - if (pos->type != TokenType::Comma) - return false; - ++pos; const String search = getConvertedArgument(fn_name, pos); @@ -70,16 +65,12 @@ bool CountOf::convertImpl(String & out, IParser::Pos & pos) } assert (kind =="'normal' " || kind =="'regex' "); - if (pos->type == TokenType::ClosingRoundBracket) - { - if (kind == "'normal' " ) - out = "countSubstrings(" + source + ", " + search + ")"; - else - out = "countMatches("+ source + ", " + search + ")"; - return true; - } - pos = begin; - return false; + if (kind == "'normal' " ) + out = "countSubstrings(" + source + ", " + search + ")"; + else + out = "countMatches("+ source + ", " + search + ")"; + return true; + } bool Extract::convertImpl(String & out, IParser::Pos & pos) @@ -88,14 +79,9 @@ bool Extract::convertImpl(String & out, IParser::Pos & pos) if (fn_name.empty()) return false; - auto begin = pos; - ++pos; String regex = getConvertedArgument(fn_name, pos); - if (pos->type != TokenType::Comma) - return false; - ++pos; size_t capture_group = stoi(getConvertedArgument(fn_name, pos)); @@ -149,48 +135,43 @@ bool Extract::convertImpl(String & out, IParser::Pos & pos) regex = "'" + tmp_regex + "'"; } - if (pos->type == TokenType::ClosingRoundBracket) + out = "extract(" + source + ", " + regex + ")"; + if (!type_literal.empty()) { - out = "extract(" + source + ", " + regex + ")"; - if (!type_literal.empty()) + std::unordered_map type_cast = + { {"bool", "Boolean"}, + {"boolean", "Boolean"}, + {"datetime", "DateTime"}, + {"date", "DateTime"}, + {"dynamic", "Array"}, + {"guid", "UUID"}, + {"int", "Int32"}, + {"long", "Int64"}, + {"real", "Float64"}, + {"double", "Float64"}, + {"string", "String"}, + {"decimal", "Decimal"} + }; + + Tokens token_type(type_literal.c_str(), type_literal.c_str() + type_literal.size()); + IParser::Pos pos_type(token_type, pos.max_depth); + ParserKeyword s_kql("typeof"); + Expected expected; + + if (s_kql.ignore(pos_type, expected)) { - std::unordered_map type_cast = - { {"bool", "Boolean"}, - {"boolean", "Boolean"}, - {"datetime", "DateTime"}, - {"date", "DateTime"}, - {"dynamic", "Array"}, - {"guid", "UUID"}, - {"int", "Int32"}, - {"long", "Int64"}, - {"real", "Float64"}, - {"double", "Float64"}, - {"string", "String"}, - {"decimal", "Decimal"} - }; - - Tokens token_type(type_literal.c_str(), type_literal.c_str() + type_literal.size()); - IParser::Pos pos_type(token_type, pos.max_depth); - ParserKeyword s_kql("typeof"); - Expected expected; - - if (s_kql.ignore(pos_type, expected)) - { - ++pos_type; - auto kql_type= String(pos_type->begin,pos_type->end); - if (type_cast.find(kql_type) == type_cast.end()) - return false; - auto ch_type = type_cast[kql_type]; - out = "CAST(" + out + ", '" + ch_type + "')"; - } - else + ++pos_type; + auto kql_type= String(pos_type->begin,pos_type->end); + if (type_cast.find(kql_type) == type_cast.end()) return false; + auto ch_type = type_cast[kql_type]; + out = "CAST(" + out + ", '" + ch_type + "')"; } - return true; + else + return false; } + return true; - pos = begin; - return false; } bool ExtractAll::convertImpl(String & out,IParser::Pos & pos) @@ -199,14 +180,9 @@ bool ExtractAll::convertImpl(String & out,IParser::Pos & pos) if (fn_name.empty()) return false; - auto begin = pos; - ++pos; const String regex = getConvertedArgument(fn_name, pos); - if (pos->type != TokenType::Comma) - return false; - ++pos; const String second_arg = getConvertedArgument(fn_name, pos); @@ -220,13 +196,8 @@ bool ExtractAll::convertImpl(String & out,IParser::Pos & pos) if (!third_arg.empty()) // currently the captureGroups not supported return false; - if (pos->type == TokenType::ClosingRoundBracket) - { - out = "extractAllGroups(" + second_arg + ", " + regex + ")"; - return true; - } - pos = begin; - return false; + out = "extractAllGroups(" + second_arg + ", " + regex + ")"; + return true; } bool ExtractJson::convertImpl(String & out,IParser::Pos & pos) @@ -247,16 +218,12 @@ bool IndexOf::convertImpl(String & out,IParser::Pos & pos) { int start_index = 0, length = -1, occurrence = 1; - String fn_name = getKQLFunctionName(pos); + const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; - auto begin = pos; - ++pos; const String source = getConvertedArgument(fn_name, pos); - if (pos->type != TokenType::Comma) - return false; ++pos; const String lookup = getConvertedArgument(fn_name, pos); @@ -275,7 +242,6 @@ bool IndexOf::convertImpl(String & out,IParser::Pos & pos) { ++pos; occurrence = stoi(getConvertedArgument(fn_name, pos)); - } } } @@ -294,7 +260,6 @@ bool IndexOf::convertImpl(String & out,IParser::Pos & pos) return true; } - pos = begin; return false; } @@ -339,16 +304,41 @@ bool ParseJson::convertImpl(String & out,IParser::Pos & pos) bool ParseURL::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String url = getConvertedArgument(fn_name, pos); + + const String scheme = std::format("concat('\"Scheme\":\"', protocol({0}),'\"')",url); + const String host = std::format("concat('\"Host\":\"', domain({0}),'\"')",url); + const String port = std::format("concat('\"Port\":\"', toString(port({0})),'\"')",url); + const String path = std::format("concat('\"Path\":\"', path({0}),'\"')",url); + const String username_pwd = std::format("netloc({0})",url); + const String query_string = std::format("queryString({0})",url); + const String fragment = std::format("concat('\"Fragment\":\"',fragment({0}),'\"')",url); + const String username = std::format("concat('\"Username\":\"', arrayElement(splitByChar(':',arrayElement(splitByChar('@',{0}) ,1)),1),'\"')", username_pwd); + const String password = std::format("concat('\"Password\":\"', arrayElement(splitByChar(':',arrayElement(splitByChar('@',{0}) ,1)),2),'\"')", username_pwd); + const String query_parameters = std::format("concat('\"Query Parameters\":', concat('{{\"', replace(replace({}, '=', '\":\"'),'&','\",\"') ,'\"}}'))", query_string); + + out = std::format("concat('{{',{},',',{},',',{},',',{},',',{},',',{},',',{},',',{},'}}')",scheme, host, port, path, username, password, query_parameters,fragment); + return true; } bool ParseURLQuery::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String query = getConvertedArgument(fn_name, pos); + + const String query_string = std::format("if (position({},'?') > 0, queryString({}), {})", query, query, query); + const String query_parameters = std::format("concat('\"Query Parameters\":', concat('{{\"', replace(replace({}, '=', '\":\"'),'&','\",\"') ,'\"}}'))", query_string); + out = std::format("concat('{{',{},'}}')",query_parameters); + return true; } bool ParseVersion::convertImpl(String & out,IParser::Pos & pos) @@ -378,35 +368,25 @@ bool Split::convertImpl(String & out,IParser::Pos & pos) if (fn_name.empty()) return false; - auto begin = pos; - ++pos; const String source = getConvertedArgument(fn_name, pos); - if (pos->type != TokenType::Comma) - return false; ++pos; const String delimiter = getConvertedArgument(fn_name, pos); - int requestedIndex = -1; + int requested_index = -1; if (pos->type == TokenType::Comma) { ++pos; - requestedIndex = std::stoi(getConvertedArgument(fn_name, pos)); + requested_index = std::stoi(getConvertedArgument(fn_name, pos)); } - if (pos->type == TokenType::ClosingRoundBracket) + out = "splitByString(" + delimiter + ", " + source + ")"; + if (requested_index >= 0) { - out = "splitByString(" + delimiter + ", " + source + ")"; - if (requestedIndex >= 0) - { - out = "arrayPushBack([],arrayElement(" + out + ", " + std::to_string(requestedIndex + 1) + "))"; - } - return true; + out = "arrayPushBack([],arrayElement(" + out + ", " + std::to_string(requested_index + 1) + "))"; } - - pos = begin; - return false; + return true; } bool StrCat::convertImpl(String & out,IParser::Pos & pos) @@ -420,12 +400,8 @@ bool StrCatDelim::convertImpl(String & out,IParser::Pos & pos) if (fn_name.empty()) return false; - auto begin = pos; - ++pos; const String delimiter = getConvertedArgument(fn_name, pos); - if (pos->type != TokenType::Comma) - return false; int arg_count = 0; String args; @@ -445,14 +421,8 @@ bool StrCatDelim::convertImpl(String & out,IParser::Pos & pos) if (arg_count < 2 || arg_count > 64) throw Exception("argument count out of bound in function: " + fn_name, ErrorCodes::SYNTAX_ERROR); - if (pos->type == TokenType::ClosingRoundBracket) - { - out = std::move(args); - return true; - } - - pos = begin; - return false; + out = std::move(args); + return true; } bool StrCmp::convertImpl(String & out,IParser::Pos & pos) @@ -466,8 +436,6 @@ bool StrCmp::convertImpl(String & out,IParser::Pos & pos) ++pos; const String string2 = getConvertedArgument(fn_name, pos); - validateEndOfFunction(fn_name, pos); - out = std::format("multiIf({0} == {1}, 0, {0} < {1}, -1, 1)", string1, string2); return true; } @@ -479,43 +447,28 @@ bool StrLen::convertImpl(String & out,IParser::Pos & pos) bool StrRep::convertImpl(String & out,IParser::Pos & pos) { - String fn_name = getKQLFunctionName(pos); + const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; - auto begin = pos; + ++pos; + const String value = getConvertedArgument(fn_name, pos); ++pos; - String value = getConvertedArgument(fn_name, pos); - if (pos->type != TokenType::Comma) - return false; + const String multiplier = getConvertedArgument(fn_name, pos); - ++pos; - String multiplier = getConvertedArgument(fn_name, pos); - - String delimiter; if (pos->type == TokenType::Comma) { ++pos; - delimiter = getConvertedArgument(fn_name, pos); + const String delimiter = getConvertedArgument(fn_name, pos); + const String repeated_str = "repeat(concat("+value+"," + delimiter + ")," + multiplier + ")"; + out = "substr("+ repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; } + else + out = "repeat("+ value + ", " + multiplier + ")"; - if (pos->type == TokenType::ClosingRoundBracket) - { - if (!delimiter.empty()) - { - String repeated_str = "repeat(concat("+value+"," + delimiter + ")," + multiplier + ")"; - out = "substr("+ repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; - } - else - out = "repeat("+ value + ", " + multiplier + ")"; - - return true; - } - - pos = begin; - return false; + return true; } bool SubString::convertImpl(String & out,IParser::Pos & pos) @@ -525,34 +478,22 @@ bool SubString::convertImpl(String & out,IParser::Pos & pos) if (fn_name.empty()) return false; - auto begin = pos; - ++pos; String source = getConvertedArgument(fn_name, pos); - - if (pos->type != TokenType::Comma) - return false; ++pos; String startingIndex = getConvertedArgument(fn_name, pos); - String length; if (pos->type == TokenType::Comma) { ++pos; - length = getConvertedArgument(fn_name, pos); + auto length = getConvertedArgument(fn_name, pos); + out = "substr("+ source + ", " + startingIndex + " + 1, " + length + ")"; } + else + out = "substr("+ source + "," + startingIndex + " + 1)"; - if (pos->type == TokenType::ClosingRoundBracket) - { - if (length.empty()) - out = "substr("+ source + "," + startingIndex +" + 1)"; - else - out = "substr("+ source + ", " + startingIndex +" + 1, " + length + ")"; - return true; - } - pos = begin; - return false; + return true; } bool ToLower::convertImpl(String & out,IParser::Pos & pos) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 40c703ff5d6..78421b2745a 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -30,15 +30,7 @@ String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) { - String tmp_arg = String(token_pos->begin, token_pos->end); - if (token_pos->type == TokenType::BareWord ) - { - String new_arg; - auto fun = KQLFunctionFactory::get(tmp_arg); - if (fun && fun->convert(new_arg,token_pos)) - tmp_arg = new_arg; - } - + auto tmp_arg = IParserKQLFunction::getExpression(token_pos); if (token_pos->type == TokenType::Comma ) new_expr = new_expr + logic_op; else @@ -189,14 +181,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + left_space + String(token_pos->begin + 1,token_pos->end - 1) + right_space + right_wildcards + "')"; else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) { - String tmp_arg = String(token_pos->begin,token_pos->end); - if (token_pos->type == TokenType::BareWord ) - { - String new_arg; - auto fun = KQLFunctionFactory::get(tmp_arg); - if (fun && fun->convert(new_arg,token_pos)) - tmp_arg = new_arg; - } + auto tmp_arg = IParserKQLFunction::getExpression(token_pos); new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; } else diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index d2c1e4943bf..7f00a76fa72 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -36,7 +36,7 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { String token = String(pos->begin,pos->end); - String new_token; + if (token == "=") { ++pos; @@ -49,18 +49,7 @@ String ParserKQLBase :: getExprFromToken(Pos &pos) } else if (!KQLOperators().convert(tokens,pos)) { - if (pos->type == TokenType::BareWord ) - { - kql_function = KQLFunctionFactory::get(token); - if (kql_function && kql_function->convert(new_token,pos)) - token = new_token; - /* else if (!kql_function) - { - if ((++pos)->type == TokenType::OpeningRoundBracket) - throw Exception("Unknown function " + token, ErrorCodes::UNKNOWN_FUNCTION); - --pos; - }*/ - } + token = IParserKQLFunction::getExpression(pos); tokens.push_back(token); } From 735e81f7c746c52b257f3bb3e5e7e11b77dabdd0 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Tue, 26 Jul 2022 20:24:29 -0400 Subject: [PATCH 052/279] Added func tests for string and ip --- .../0_stateless/02366_kql_func_ip.reference | 28 ++ .../queries/0_stateless/02366_kql_func_ip.sql | 36 +++ .../02366_kql_func_string.reference | 262 ++++++++++++++++++ .../0_stateless/02366_kql_func_string.sql | 185 +++++++++++++ .../0_stateless/02366_kql_summarize.reference | 2 +- .../0_stateless/02366_kql_summarize.sql | 4 +- .../queries/0_stateless/02366_kql_tabular.sql | 86 ++---- 7 files changed, 536 insertions(+), 67 deletions(-) create mode 100644 tests/queries/0_stateless/02366_kql_func_ip.reference create mode 100644 tests/queries/0_stateless/02366_kql_func_ip.sql create mode 100644 tests/queries/0_stateless/02366_kql_func_string.reference create mode 100644 tests/queries/0_stateless/02366_kql_func_string.sql diff --git a/tests/queries/0_stateless/02366_kql_func_ip.reference b/tests/queries/0_stateless/02366_kql_func_ip.reference new file mode 100644 index 00000000000..050096fe2d1 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_ip.reference @@ -0,0 +1,28 @@ +-- ipv4_is_private(\'127.0.0.1\') +false +-- ipv4_is_private(\'10.1.2.3\') +true +-- ipv4_is_private(\'192.168.1.1/24\') +true +ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\')) +true +-- ipv4_is_private(\'abc\') +\N +-- ipv4_netmask_suffix(\'192.168.1.1/24\') +24 +-- ipv4_netmask_suffix(\'192.168.1.1\') +32 +-- ipv4_netmask_suffix(\'127.0.0.1/16\') +16 +-- ipv4_netmask_suffix(\'abc\') +\N +ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\')) +16 +-- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\') +1 +-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\') +1 +-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\') +0 +ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\') +0 diff --git a/tests/queries/0_stateless/02366_kql_func_ip.sql b/tests/queries/0_stateless/02366_kql_func_ip.sql new file mode 100644 index 00000000000..3c35e7f58cc --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_ip.sql @@ -0,0 +1,36 @@ +set dialect='kusto'; +print '-- ipv4_is_private(\'127.0.0.1\')'; +print ipv4_is_private('127.0.0.1'); +print '-- ipv4_is_private(\'10.1.2.3\')'; +print ipv4_is_private('10.1.2.3'); +print '-- ipv4_is_private(\'192.168.1.1/24\')'; +print ipv4_is_private('192.168.1.1/24'); +print 'ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\'))'; +print ipv4_is_private(strcat('192.','168.','1.','1','/24')); +print '-- ipv4_is_private(\'abc\')'; +print ipv4_is_private('abc'); -- == null + +print '-- ipv4_netmask_suffix(\'192.168.1.1/24\')'; +print ipv4_netmask_suffix('192.168.1.1/24'); -- == 24 +print '-- ipv4_netmask_suffix(\'192.168.1.1\')'; +print ipv4_netmask_suffix('192.168.1.1'); -- == 32 +print '-- ipv4_netmask_suffix(\'127.0.0.1/16\')'; +print ipv4_netmask_suffix('127.0.0.1/16'); -- == 16 +print '-- ipv4_netmask_suffix(\'abc\')'; +print ipv4_netmask_suffix('abc'); -- == null +print 'ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\'))'; +print ipv4_netmask_suffix(strcat('127.', '0.', '0.1/16')); -- == 16 + +print '-- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\')'; +print ipv4_is_in_range('127.0.0.1', '127.0.0.1'); -- == true +print '-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\')'; +print ipv4_is_in_range('192.168.1.6', '192.168.1.1/24'); -- == true +print '-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\')'; +print ipv4_is_in_range('192.168.1.1', '192.168.2.1/24'); -- == false +print 'ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\')'; +print ipv4_is_in_range(strcat('192.','168.', '1.1'), '192.168.2.1/24'); -- == false + +-- TODO: +-- print ipv4_is_in_range('abc', '127.0.0.1'); -- == null +-- parse_ipv4() +-- parse_ipv6() \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_string.reference b/tests/queries/0_stateless/02366_kql_func_string.reference new file mode 100644 index 00000000000..255acb486cd --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_string.reference @@ -0,0 +1,262 @@ +-- test String Functions -- +-- Customers |where Education contains \'degree\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers |where Education !contains \'degree\' +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers |where Education contains \'Degree\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers |where Education !contains \'Degree\' +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where FirstName endswith \'RE\' +Theodore Diaz Skilled Manual Bachelors 28 + +-- Customers | where ! FirstName endswith \'RE\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +--Customers | where FirstName endswith_cs \'re\' +Theodore Diaz Skilled Manual Bachelors 28 + +-- Customers | where FirstName !endswith_cs \'re\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation == \'Skilled Manual\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation != \'Skilled Manual\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation has \'skilled\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation !has \'skilled\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation has \'Skilled\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation !has \'Skilled\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation hasprefix_cs \'Ab\' + +-- Customers | where Occupation !hasprefix_cs \'Ab\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hasprefix_cs \'ab\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation !hasprefix_cs \'ab\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hassuffix \'Ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation !hassuffix \'Ent\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hassuffix \'ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation hassuffix \'ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers |where Education in (\'Bachelors\',\'High School\') +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Education !in (\'Bachelors\',\'High School\') +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName matches regex \'P.*r\' +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName startswith \'pet\' +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName !startswith \'pet\' +Latoya Shen Professional Graduate Degree 25 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where FirstName startswith_cs \'pet\' + +-- Customers | where FirstName !startswith_cs \'pet\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | project base64_encode_tostring(\'Kusto1\') | take 1 +S3VzdG8x + +-- Customers | project base64_decode_tostring(\'S3VzdG8x\') | take 1 +Kusto1 + +-- Customers | where isempty(LastName) +Apple Skilled Manual Bachelors 28 + +-- Customers | where isnotempty(LastName) +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +\N why Professional Partial College 38 + +-- Customers | where isnotnull(FirstName) +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where isnull(FirstName) +\N why Professional Partial College 38 + +-- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1 +https://www.test.com/hello word + +-- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1 +https%3A%2F%2Fwww.test.com%2Fhello%20word + +-- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2)) +\N +Lat en +Pet ra +The az +Ste x +App + +-- Customers | project name = strcat(FirstName, \' \', LastName) +\N +Latoya Shen +Peter Nara +Theodore Diaz +Stephanie Cox +Apple + +-- Customers | project FirstName, strlen(FirstName) +\N \N +Latoya 6 +Peter 5 +Theodore 8 +Stephanie 9 +Apple 5 + +-- Customers | project strrep(FirstName,2,\'_\') +\N +Latoya_Latoya +Peter_Peter +Theodore_Theodore +Stephanie_Stephanie +Apple_Apple + +-- Customers | project toupper(FirstName) +\N +LATOYA +PETER +THEODORE +STEPHANIE +APPLE + +-- Customers | project tolower(FirstName) +\N +latoya +peter +theodore +stephanie +apple + +-- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Apple Skilled Manual Bachelors 28 + +-- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction) +3 +3 +1 + +-- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction) +PINEAPPLE ice cream is 20 +PINEAPPLE +20 + +20 + +-- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction); TODO: captureGroups not supported yet +[['T','h','e'],['p','ric','e'],['P','INEAPPL','E'],['i','c','e'],['c','rea','m']] + +-- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction) +['aa','bb'] +['bbb'] +[''] +['a','','b'] +['aa','cc'] + +-- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now. +1-2-Ab + +-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction); TODO: length and occurrence not supported yet +2 +2 +-1 diff --git a/tests/queries/0_stateless/02366_kql_func_string.sql b/tests/queries/0_stateless/02366_kql_func_string.sql new file mode 100644 index 00000000000..cdf9b1e4b17 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_string.sql @@ -0,0 +1,185 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +set dialect='kusto'; +print '-- test String Functions --'; + +print '-- Customers |where Education contains \'degree\''; +Customers |where Education contains 'degree' | order by LastName; +print ''; +print '-- Customers |where Education !contains \'degree\''; +Customers |where Education !contains 'degree' | order by LastName; +print ''; +print '-- Customers |where Education contains \'Degree\''; +Customers |where Education contains 'Degree' | order by LastName; +print ''; +print '-- Customers |where Education !contains \'Degree\''; +Customers |where Education !contains 'Degree' | order by LastName; +print ''; +print '-- Customers | where FirstName endswith \'RE\''; +Customers | where FirstName endswith 'RE' | order by LastName; +print ''; +print '-- Customers | where ! FirstName endswith \'RE\''; +Customers | where FirstName ! endswith 'RE' | order by LastName; +print ''; +print '--Customers | where FirstName endswith_cs \'re\''; +Customers | where FirstName endswith_cs 're' | order by LastName; +print ''; +print '-- Customers | where FirstName !endswith_cs \'re\''; +Customers | where FirstName !endswith_cs 're' | order by LastName; +print ''; +print '-- Customers | where Occupation == \'Skilled Manual\''; +Customers | where Occupation == 'Skilled Manual' | order by LastName; +print ''; +print '-- Customers | where Occupation != \'Skilled Manual\''; +Customers | where Occupation != 'Skilled Manual' | order by LastName; +print ''; +print '-- Customers | where Occupation has \'skilled\''; +Customers | where Occupation has 'skilled' | order by LastName; +print ''; +print '-- Customers | where Occupation !has \'skilled\''; +Customers | where Occupation !has 'skilled' | order by LastName; +print ''; +print '-- Customers | where Occupation has \'Skilled\''; +Customers | where Occupation has 'Skilled'| order by LastName; +print ''; +print '-- Customers | where Occupation !has \'Skilled\''; +Customers | where Occupation !has 'Skilled'| order by LastName; +print ''; +print '-- Customers | where Occupation hasprefix_cs \'Ab\''; +Customers | where Occupation hasprefix_cs 'Ab'| order by LastName; +print ''; +print '-- Customers | where Occupation !hasprefix_cs \'Ab\''; +Customers | where Occupation !hasprefix_cs 'Ab'| order by LastName; +print ''; +print '-- Customers | where Occupation hasprefix_cs \'ab\''; +Customers | where Occupation hasprefix_cs 'ab'| order by LastName; +print ''; +print '-- Customers | where Occupation !hasprefix_cs \'ab\''; +Customers | where Occupation !hasprefix_cs 'ab'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'Ent\''; +Customers | where Occupation hassuffix 'Ent'| order by LastName; +print ''; +print '-- Customers | where Occupation !hassuffix \'Ent\''; +Customers | where Occupation !hassuffix 'Ent'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'ent\''; +Customers | where Occupation hassuffix 'ent'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'ent\''; +Customers | where Occupation hassuffix 'ent'| order by LastName; +print ''; +print '-- Customers |where Education in (\'Bachelors\',\'High School\')'; +Customers |where Education in ('Bachelors','High School')| order by LastName; +print ''; +print '-- Customers | where Education !in (\'Bachelors\',\'High School\')'; +Customers | where Education !in ('Bachelors','High School')| order by LastName; +print ''; +print '-- Customers | where FirstName matches regex \'P.*r\''; +Customers | where FirstName matches regex 'P.*r'| order by LastName; +print ''; +print '-- Customers | where FirstName startswith \'pet\''; +Customers | where FirstName startswith 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName !startswith \'pet\''; +Customers | where FirstName !startswith 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName startswith_cs \'pet\''; +Customers | where FirstName startswith_cs 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName !startswith_cs \'pet\''; +Customers | where FirstName !startswith_cs 'pet'| order by LastName; +print ''; +print '-- Customers | project base64_encode_tostring(\'Kusto1\') | take 1'; +Customers | project base64_encode_tostring('Kusto1') | take 1; +print ''; +print '-- Customers | project base64_decode_tostring(\'S3VzdG8x\') | take 1'; +Customers | project base64_decode_tostring('S3VzdG8x') | take 1; +print ''; +print '-- Customers | where isempty(LastName)'; +Customers | where isempty(LastName); +print ''; +print '-- Customers | where isnotempty(LastName)'; +Customers | where isnotempty(LastName); +print ''; +print '-- Customers | where isnotnull(FirstName)'; +Customers | where isnotnull(FirstName)| order by LastName; +print ''; +print '-- Customers | where isnull(FirstName)'; +Customers | where isnull(FirstName)| order by LastName; +print ''; +print '-- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1'; +Customers | project url_decode('https%3A%2F%2Fwww.test.com%2Fhello%20word') | take 1; +print ''; +print '-- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1'; +Customers | project url_encode('https://www.test.com/hello word') | take 1; +print ''; +print '-- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2))'; +Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))| order by LastName; +print ''; +print '-- Customers | project name = strcat(FirstName, \' \', LastName)'; +Customers | project name = strcat(FirstName, ' ', LastName)| order by LastName; +print ''; +print '-- Customers | project FirstName, strlen(FirstName)'; +Customers | project FirstName, strlen(FirstName)| order by LastName; +print ''; +print '-- Customers | project strrep(FirstName,2,\'_\')'; +Customers | project strrep(FirstName,2,'_')| order by LastName; +print ''; +print '-- Customers | project toupper(FirstName)'; +Customers | project toupper(FirstName)| order by LastName; +print ''; +print '-- Customers | project tolower(FirstName)'; +Customers | project tolower(FirstName)| order by LastName; +print ''; +print '-- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet'; +Customers | where Age in ((Customers|project Age|where Age < 30)) | order by LastName; +-- Customer | where LastName in~ ("diaz", "cox") +print ''; +print '-- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet'; +Customers | where Occupation has_all ('manual', 'skilled') | order by LastName; +print ''; +print '-- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet'; +Customers|where Occupation has_any ('Skilled','abcd'); +print ''; +print '-- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction)'; +Customers | project countof('The cat sat on the mat', 'at') | take 1; +Customers | project countof('The cat sat on the mat', 'at', 'normal') | take 1; +Customers | project countof('The cat sat on the mat', '\\s.he', 'regex') | take 1; +print ''; +print '-- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction)'; +Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 20') | take 1; +Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20') | take 1; +Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20') | take 1; +Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 3, 'The price of PINEAPPLE ice cream is 20') | take 1; +Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20', typeof(real)) | take 1; +print ''; +print '-- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction); TODO: captureGroups not supported yet'; +Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 20') | take 1; +print ''; +print '-- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction)'; +Customers | project split('aa_bb', '_') | take 1; +Customers | project split('aaa_bbb_ccc', '_', 1) | take 1; +Customers | project split('', '_') | take 1; +Customers | project split('a__b', '_') | take 1; +Customers | project split('aabbcc', 'bb') | take 1; +print ''; +print '-- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now.'; +Customers | project strcat_delim('-', '1', '2', strcat('A','b')) | take 1; +-- Customers | project strcat_delim('-', '1', '2', 'A' , 1s); +print ''; +print '-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction); TODO: length and occurrence not supported yet'; +Customers | project indexof('abcdefg','cde') | take 1; +Customers | project indexof('abcdefg','cde',2) | take 1; +Customers | project indexof('abcdefg','cde',6) | take 1; + diff --git a/tests/queries/0_stateless/02366_kql_summarize.reference b/tests/queries/0_stateless/02366_kql_summarize.reference index d73f75b03c2..ef5ff544f63 100644 --- a/tests/queries/0_stateless/02366_kql_summarize.reference +++ b/tests/queries/0_stateless/02366_kql_summarize.reference @@ -20,6 +20,6 @@ Professional 117 Management abcd defg 33 4 2 -40 2 20 6 30 4 +40 2 diff --git a/tests/queries/0_stateless/02366_kql_summarize.sql b/tests/queries/0_stateless/02366_kql_summarize.sql index 8eba49f92f0..88d7641f3b9 100644 --- a/tests/queries/0_stateless/02366_kql_summarize.sql +++ b/tests/queries/0_stateless/02366_kql_summarize.sql @@ -30,9 +30,9 @@ Customers | summarize MyMax = maxif(Age, Age<40) by Occupation; Customers | summarize MyMin = minif(Age, Age<40) by Occupation; Customers | summarize MyAvg = avgif(Age, Age<40) by Occupation; Customers | summarize MySum = sumif(Age, Age<40) by Occupation; -Customers | summarize dcount(Education, Occupation=='Professional'); +Customers | summarize dcount(Education); Customers | summarize dcountif(Education, Occupation=='Professional'); -Customers | summarize count() by bin(Age, 10) +Customers | summarize count() by bin(Age, 10) | order by count() ASC; -- The following does not work -- arg_max() diff --git a/tests/queries/0_stateless/02366_kql_tabular.sql b/tests/queries/0_stateless/02366_kql_tabular.sql index 6a0a3417f42..e7f715eaedb 100644 --- a/tests/queries/0_stateless/02366_kql_tabular.sql +++ b/tests/queries/0_stateless/02366_kql_tabular.sql @@ -10,115 +10,73 @@ CREATE TABLE Customers INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management','Bachelors',33), ('Peter','Nara','Skilled Manual','Graduate Degree',26), ('Latoya','Shen','Professional','Graduate Degree',25), ('Joshua','Lee','Professional','Partial College',26), ('Edward','Hernandez','Skilled Manual','High School',36), ('Dalton','Wood','Professional','Partial College',42), ('Christine','Nara','Skilled Manual','Partial College',33), ('Cameron','Rodriguez','Professional','Partial College',28), ('Angel','Stewart','Professional','Partial College',46); -set dialect='clickhouse'; -Select '-- test Query only has table name: --'; set dialect='kusto'; +print '-- test Query only has table name: --'; Customers; -set dialect='clickhouse'; -Select '-- Query has Column Selection --'; -set dialect='kusto'; +print '-- Query has Column Selection --'; Customers | project FirstName,LastName,Occupation; -set dialect='clickhouse'; -Select '-- Query has limit --'; -set dialect='kusto'; +print '-- Query has limit --'; Customers | project FirstName,LastName,Occupation | take 5; Customers | project FirstName,LastName,Occupation | limit 5; -set dialect='clickhouse'; -Select '-- Query has second limit with bigger value --'; -set dialect='kusto'; +print '-- Query has second limit with bigger value --'; Customers | project FirstName,LastName,Occupation | take 5 | take 7; -set dialect='clickhouse'; -Select '-- Query has second limit with smaller value --'; -set dialect='kusto'; +print '-- Query has second limit with smaller value --'; Customers | project FirstName,LastName,Occupation | take 5 | take 3; -set dialect='clickhouse'; -Select '-- Query has second Column selection --'; -set dialect='kusto'; +print '-- Query has second Column selection --'; Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName; -set dialect='clickhouse'; -Select '-- Query has second Column selection with extra column --'; -set dialect='kusto'; +print '-- Query has second Column selection with extra column --'; Customers| project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education; -set dialect='clickhouse'; -Select '-- Query with desc sort --'; -set dialect='kusto'; +print '-- Query with desc sort --'; Customers | project FirstName | take 5 | sort by FirstName desc; Customers | project Occupation | take 5 | order by Occupation desc; -set dialect='clickhouse'; -Select '-- Query with asc sort --'; -set dialect='kusto'; +print '-- Query with asc sort --'; Customers | project Occupation | take 5 | sort by Occupation asc; -set dialect='clickhouse'; -Select '-- Query with sort (without keyword asc desc) --'; -set dialect='kusto'; +print '-- Query with sort (without keyword asc desc) --'; Customers | project FirstName | take 5 | sort by FirstName; Customers | project Occupation | take 5 | order by Occupation; -set dialect='clickhouse'; -Select '-- Query with sort 2 Columns with different direction --'; -set dialect='kusto'; +print '-- Query with sort 2 Columns with different direction --'; Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation asc, LastName desc; -set dialect='clickhouse'; -Select '-- Query with second sort --'; -set dialect='kusto'; +print '-- Query with second sort --'; Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation desc |sort by Occupation asc, LastName desc; -set dialect='clickhouse'; -Select '-- Test String Equals (==) --'; -set dialect='kusto'; +print '-- Test String Equals (==) --'; Customers | project FirstName,LastName,Occupation | where Occupation == 'Skilled Manual'; -set dialect='clickhouse'; -Select '-- Test String Not equals (!=) --'; -set dialect='kusto'; +print '-- Test String Not equals (!=) --'; Customers | project FirstName,LastName,Occupation | where Occupation != 'Skilled Manual'; -set dialect='clickhouse'; -Select '-- Test Filter using a list (in) --'; -set dialect='kusto'; +print '-- Test Filter using a list (in) --'; Customers | project FirstName,LastName,Occupation,Education | where Education in ('Bachelors','High School'); -set dialect='clickhouse'; -Select '-- Test Filter using a list (!in) --'; +print '-- Test Filter using a list (!in) --'; set dialect='kusto'; Customers | project FirstName,LastName,Occupation,Education | where Education !in ('Bachelors','High School'); -set dialect='clickhouse'; -Select '-- Test Filter using common string operations (contains_cs) --'; -set dialect='kusto'; +print '-- Test Filter using common string operations (contains_cs) --'; Customers | project FirstName,LastName,Occupation,Education | where Education contains_cs 'Coll'; -set dialect='clickhouse'; -Select '-- Test Filter using common string operations (startswith_cs) --'; -set dialect='kusto'; +print '-- Test Filter using common string operations (startswith_cs) --'; Customers | project FirstName,LastName,Occupation,Education | where Occupation startswith_cs 'Prof'; -set dialect='clickhouse'; -Select '-- Test Filter using common string operations (endswith_cs) --'; -set dialect='kusto'; +print '-- Test Filter using common string operations (endswith_cs) --'; Customers | project FirstName,LastName,Occupation,Education | where FirstName endswith_cs 'a'; -set dialect='clickhouse'; -Select '-- Test Filter using numerical equal (==) --'; -set dialect='kusto'; +print '-- Test Filter using numerical equal (==) --'; Customers | project FirstName,LastName,Occupation,Education,Age | where Age == 26; -set dialect='clickhouse'; -Select '-- Test Filter using numerical great and less (> , <) --'; -set dialect='kusto'; +print '-- Test Filter using numerical great and less (> , <) --'; Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 and Age < 40; -set dialect='clickhouse'; -Select '-- Test Filter using multi where --'; -set dialect='kusto'; +print '-- Test Filter using multi where --'; Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 | where Occupation == 'Professional'; From e404685f7d344aab6a5371f3589753934490d78d Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Wed, 27 Jul 2022 12:44:08 -0700 Subject: [PATCH 053/279] Improve conformance to the specifications --- .../KustoFunctions/IParserKQLFunction.cpp | 2 +- .../Kusto/KustoFunctions/IParserKQLFunction.h | 4 +- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 94 ++++++++++++++----- 3 files changed, 75 insertions(+), 25 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 825c5eb5f92..56dc9e1b114 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -76,7 +76,7 @@ bool IParserKQLFunction::directMapping(String & out,IParser::Pos & pos,const Str return false; } -String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser::Pos & pos) +String getConvertedArgument(const String & fn_name, IParser::Pos & pos) { String converted_arg; std::vector tokens; diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index 7ed3841583b..492d721f7ea 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -2,6 +2,7 @@ #include #include + namespace DB { class IParserKQLFunction @@ -35,10 +36,9 @@ public: protected: virtual bool convertImpl(String & out,IParser::Pos & pos) = 0; static bool directMapping(String &out,IParser::Pos & pos,const String & ch_fn); - static String getConvertedArgument(const String & fn_name, IParser::Pos & pos); static void validateEndOfFunction(const String & fn_name, IParser::Pos & pos); static String getKQLFunctionName(IParser::Pos & pos); }; +String getConvertedArgument(const String & fn_name, IParser::Pos & pos); } - diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 0383292669d..ddbd25d1fd4 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -22,9 +22,41 @@ namespace DB::ErrorCodes extern const int SYNTAX_ERROR; } +namespace +{ +std::optional getOptionalArgument(const String & function_name, DB::IParser::Pos & pos) +{ + std::optional argument; + if (const auto & type = pos->type; type != DB::TokenType::Comma && type != DB::TokenType::OpeningRoundBracket) + return {}; + + ++pos; + return getConvertedArgument(function_name, pos); +} + +String getArgument(const String & function_name, DB::IParser::Pos & pos) +{ + return getOptionalArgument(function_name, pos).value(); +} + +String kqlCallToExpression( + const String & function_name, std::initializer_list> params, const uint32_t max_depth) +{ + const auto params_str = std::accumulate( + std::cbegin(params), + std::cend(params), + String(), + [](auto acc, const auto & param) { return (acc.empty() ? "" : ", ") + std::move(acc) + param.get(); }); + + const auto kql_call = std::format("{}({})", function_name, params_str); + DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); + DB::IParser::Pos tokens_pos(call_tokens, max_depth); + return DB::IParserKQLFunction::getExpression(tokens_pos); +} +} + namespace DB { - bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) { String res = String(pos->begin, pos->end); @@ -38,13 +70,15 @@ bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) if (function_name.empty()) return false; - ++pos; - - const auto ip_address = getConvertedArgument(function_name, pos); - ++pos; - - const auto ip_range = getConvertedArgument(function_name, pos); - out = std::format("isIPAddressInRange({0}, concat({1}, if(position({1}, '/') > 0, '', '/32')))", ip_address, ip_range); + const auto ip_address = getArgument(function_name, pos); + const auto ip_range = getArgument(function_name, pos); + out = std::format( + "if(isNull(IPv4StringToNumOrNull({0}) as ip) or isNull({2} as calculated_mask) or " + "isNull(toIPv4OrNull(tokens[1]) as range_prefix_ip), null, isIPAddressInRange(IPv4NumToString(assumeNotNull(ip)), " + "concat(IPv4NumToString(assumeNotNull(range_prefix_ip)), '/', toString(assumeNotNull(calculated_mask)))))", + ip_address, + ip_range, + kqlCallToExpression("ipv4_netmask_suffix", {ip_range}, pos.max_depth)); return true; } @@ -57,26 +91,24 @@ bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) { - static const std::array PRIVATE_SUBNETS{"10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"}; + static const std::array s_private_subnets{"10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"}; const auto function_name = getKQLFunctionName(pos); if (function_name.empty()) return false; - ++pos; - - const auto ip_address = getConvertedArgument(function_name, pos); + const auto ip_address = getArgument(function_name, pos); out += std::format( "multiIf(length(splitByChar('/', {0}) as tokens) > 2 or isNull(toIPv4OrNull(tokens[1]) as nullable_ip), null, " - "length(tokens) = 2 and isNull(toUInt8OrNull(tokens[-1]) as suffix), null, " + "length(tokens) = 2 and isNull(toUInt8OrNull(tokens[-1]) as mask), null, " "ignore(assumeNotNull(nullable_ip) as ip, " - "IPv4CIDRToRange(ip, assumeNotNull(suffix)) as range, IPv4NumToString(tupleElement(range, 1)) as begin, " + "IPv4CIDRToRange(ip, assumeNotNull(mask)) as range, IPv4NumToString(tupleElement(range, 1)) as begin, " "IPv4NumToString(tupleElement(range, 2)) as end), null, ", ip_address); - for (int i = 0; i < std::ssize(PRIVATE_SUBNETS); ++i) + for (int i = 0; i < std::ssize(s_private_subnets); ++i) { - const auto & subnet = PRIVATE_SUBNETS[i]; + const auto & subnet = s_private_subnets[i]; out += std::format( "length(tokens) = 1 and isIPAddressInRange(IPv4NumToString(ip), '{0}') or " "isIPAddressInRange(begin, '{0}') and isIPAddressInRange(end, '{0}'), true, ", @@ -93,19 +125,27 @@ bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) if (function_name.empty()) return false; - ++pos; - - const auto ip_range = getConvertedArgument(function_name, pos); + const auto ip_range = getArgument(function_name, pos); out = std::format( "multiIf(length(splitByChar('/', {0}) as tokens) > 2 or not isIPv4String(tokens[1]), null, " - "length(tokens) = 1, 32, isNull(toUInt8OrNull(tokens[-1]) as suffix), null, toUInt8(min2(suffix, 32)))", + "length(tokens) = 1, 32, isNull(toUInt8OrNull(tokens[-1]) as mask), null, toUInt8(min2(mask, 32)))", ip_range); return true; } bool ParseIpv4::convertImpl(String & out, IParser::Pos & pos) { - return directMapping(out, pos, "toIPv4OrNull"); + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + out = std::format( + "multiIf(length(splitByChar('/', {0}) as tokens) = 1, IPv4StringToNumOrNull(tokens[1]) as ip, " + "length(tokens) = 2 and isNotNull(ip) and isNotNull(toUInt8OrNull(tokens[-1]) as mask), " + "tupleElement(IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)), 1), null)", + ip_address); + return true; } bool ParseIpv4Mask::convertImpl(String & out, IParser::Pos & pos) @@ -131,7 +171,17 @@ bool Ipv6IsMatch::convertImpl(String & out, IParser::Pos & pos) bool ParseIpv6::convertImpl(String & out, IParser::Pos & pos) { - return directMapping(out, pos, "toIPv6OrNull"); + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + out = std::format( + "if(isNull(ifNull(if(isNull({1} as ipv4), null, IPv4ToIPv6(ipv4)), IPv6StringToNumOrNull({0})) as ipv6), null, " + "arrayStringConcat(flatten(extractAllGroups(lower(hex(assumeNotNull(ipv6))), '([\\da-f]{{4}})')), ':'))", + ip_address, + kqlCallToExpression("parse_ipv4", {ip_address}, pos.max_depth)); + return true; } bool ParseIpv6Mask::convertImpl(String & out, IParser::Pos & pos) From b2be70a4e5f8678463dcc8050a562ca62b3d41ca Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Thu, 28 Jul 2022 07:24:45 -0700 Subject: [PATCH 054/279] Extract common functions --- .../KustoFunctions/IParserKQLFunction.cpp | 37 ++++++++++++++++-- .../Kusto/KustoFunctions/IParserKQLFunction.h | 23 ++++++++--- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 38 ------------------- 3 files changed, 50 insertions(+), 48 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 56dc9e1b114..a1dc9132b1b 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -17,16 +17,16 @@ #include #include +#include + namespace DB { - namespace ErrorCodes { extern const int SYNTAX_ERROR; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } - bool IParserKQLFunction::convert(String & out,IParser::Pos & pos) { return wrapConvertImpl(pos, IncreaseDepthTag{}, [&] @@ -76,7 +76,12 @@ bool IParserKQLFunction::directMapping(String & out,IParser::Pos & pos,const Str return false; } -String getConvertedArgument(const String & fn_name, IParser::Pos & pos) +String IParserKQLFunction::getArgument(const String & function_name, DB::IParser::Pos & pos) +{ + return getOptionalArgument(function_name, pos).value(); +} + +String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser::Pos & pos) { String converted_arg; std::vector tokens; @@ -115,6 +120,16 @@ String getConvertedArgument(const String & fn_name, IParser::Pos & pos) return converted_arg; } +std::optional IParserKQLFunction::getOptionalArgument(const String & function_name, DB::IParser::Pos & pos) +{ + std::optional argument; + if (const auto & type = pos->type; type != DB::TokenType::Comma && type != DB::TokenType::OpeningRoundBracket) + return {}; + + ++pos; + return getConvertedArgument(function_name, pos); +} + String IParserKQLFunction::getKQLFunctionName(IParser::Pos & pos) { String fn_name = String(pos->begin, pos->end); @@ -127,6 +142,21 @@ String IParserKQLFunction::getKQLFunctionName(IParser::Pos & pos) return fn_name; } +String IParserKQLFunction::kqlCallToExpression( + const String & function_name, std::initializer_list> params, const uint32_t max_depth) +{ + const auto params_str = std::accumulate( + std::cbegin(params), + std::cend(params), + String(), + [](auto acc, const auto & param) { return (acc.empty() ? "" : ", ") + std::move(acc) + param.get(); }); + + const auto kql_call = std::format("{}({})", function_name, params_str); + DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); + DB::IParser::Pos tokens_pos(call_tokens, max_depth); + return DB::IParserKQLFunction::getExpression(tokens_pos); +} + void IParserKQLFunction::validateEndOfFunction(const String & fn_name, IParser::Pos & pos) { if (pos->type != TokenType:: ClosingRoundBracket) @@ -148,5 +178,4 @@ String IParserKQLFunction::getExpression(IParser::Pos & pos) } return arg; } - } diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index 492d721f7ea..3613cb71fac 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -17,7 +17,11 @@ public: pos = begin; return res; } - struct IncreaseDepthTag {}; + + struct IncreaseDepthTag + { + }; + template ALWAYS_INLINE static bool wrapConvertImpl(IParser::Pos & pos, IncreaseDepthTag, const F & func) { @@ -29,16 +33,23 @@ public: pos = begin; return res; } - bool convert(String & out,IParser::Pos & pos); + + bool convert(String & out, IParser::Pos & pos); virtual const char * getName() const = 0; virtual ~IParserKQLFunction() = default; + static String getExpression(IParser::Pos & pos); + protected: - virtual bool convertImpl(String & out,IParser::Pos & pos) = 0; - static bool directMapping(String &out,IParser::Pos & pos,const String & ch_fn); + virtual bool convertImpl(String & out, IParser::Pos & pos) = 0; + + static bool directMapping(String & out, IParser::Pos & pos, const String & ch_fn); + static String getArgument(const String & function_name, DB::IParser::Pos & pos); + static String getConvertedArgument(const String & fn_name, IParser::Pos & pos); + static std::optional getOptionalArgument(const String & function_name, DB::IParser::Pos & pos); + static String kqlCallToExpression( + const String & function_name, std::initializer_list> params, uint32_t max_depth); static void validateEndOfFunction(const String & fn_name, IParser::Pos & pos); static String getKQLFunctionName(IParser::Pos & pos); }; - -String getConvertedArgument(const String & fn_name, IParser::Pos & pos); } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index ddbd25d1fd4..63a4ade7ca9 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -17,44 +17,6 @@ #include -namespace DB::ErrorCodes -{ -extern const int SYNTAX_ERROR; -} - -namespace -{ -std::optional getOptionalArgument(const String & function_name, DB::IParser::Pos & pos) -{ - std::optional argument; - if (const auto & type = pos->type; type != DB::TokenType::Comma && type != DB::TokenType::OpeningRoundBracket) - return {}; - - ++pos; - return getConvertedArgument(function_name, pos); -} - -String getArgument(const String & function_name, DB::IParser::Pos & pos) -{ - return getOptionalArgument(function_name, pos).value(); -} - -String kqlCallToExpression( - const String & function_name, std::initializer_list> params, const uint32_t max_depth) -{ - const auto params_str = std::accumulate( - std::cbegin(params), - std::cend(params), - String(), - [](auto acc, const auto & param) { return (acc.empty() ? "" : ", ") + std::move(acc) + param.get(); }); - - const auto kql_call = std::format("{}({})", function_name, params_str); - DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); - DB::IParser::Pos tokens_pos(call_tokens, max_depth); - return DB::IParserKQLFunction::getExpression(tokens_pos); -} -} - namespace DB { bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) From bb4b8a94682b04268c573a7a658ecfb3baf41c58 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Wed, 3 Aug 2022 14:06:02 -0700 Subject: [PATCH 055/279] Fix bug in clickhouse-client for non-interactive mode --- src/Client/ClientBase.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 34e06ac7dcd..578e1ff3432 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -354,9 +354,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu res = tryParseQuery(kql_parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); else if (dialect == Dialect::kusto_auto) { - res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); - if (!res) { pos = begin; @@ -378,13 +376,15 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu res = parseQueryAndMovePosition(kql_parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); else if (dialect == Dialect::kusto_auto) { - res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); - - if (!res) - { - pos = begin; - res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); - } + try + { + res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + } + catch(...) + { + pos = begin; + res = parseQueryAndMovePosition(kql_parser, begin, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + } } else res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); From 5cfed5b2178a3122e4c90886bc3dafb4f99e1ed9 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Wed, 3 Aug 2022 16:49:36 -0700 Subject: [PATCH 056/279] Fix some IP function unit tests --- src/Parsers/tests/gtest_Parser.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 9550f4112a8..1559d4d44f3 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -486,25 +486,25 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers | where Age in ((Customers|project Age|where Age < 30))", "SELECT *\nFROM Customers\nWHERE Age IN (\n SELECT Age\n FROM Customers\n WHERE Age < 30\n)" }, - { +{ "Customers | project ipv4_is_in_range(FirstName, LastName)", - "SELECT isIPAddressInRange(FirstName, concat(LastName, if(position(LastName, '/') > 0, '', '/32')))\nFROM Customers" + "SELECT if(((IPv4StringToNumOrNull(FirstName) AS ip) IS NULL) OR ((multiIf((length(splitByChar('/', LastName) AS tokens) > 2) OR (NOT isIPv4String(tokens[1])), NULL, length(tokens) = 1, 32, (toUInt8OrNull(tokens[-1]) AS mask) IS NULL, NULL, toUInt8(min2(mask, 32))) AS calculated_mask) IS NULL) OR ((toIPv4OrNull(tokens[1]) AS range_prefix_ip) IS NULL), NULL, isIPAddressInRange(IPv4NumToString(assumeNotNull(ip)), concat(IPv4NumToString(assumeNotNull(range_prefix_ip)), '/', toString(assumeNotNull(calculated_mask)))))\nFROM Customers" }, { "Customers | project ipv4_is_private(Occupation)", - "SELECT (((length(splitByChar('/', Occupation) AS tokens) = 1) AND isIPAddressInRange(tokens[1] AS ip, '10.0.0.0/8')) OR ((length(tokens) = 2) AND isIPAddressInRange(IPv4NumToString((IPv4CIDRToRange(toIPv4(ip), if((toUInt8OrNull(tokens[-1]) AS suffix) IS NULL, throwIf(true, 'Unable to parse suffix'), assumeNotNull(suffix))) AS range).1) AS begin, '10.0.0.0/8') AND isIPAddressInRange(IPv4NumToString(range.2) AS end, '10.0.0.0/8'))) OR (((length(tokens) = 1) AND isIPAddressInRange(ip, '172.16.0.0/12')) OR ((length(tokens) = 2) AND isIPAddressInRange(begin, '172.16.0.0/12') AND isIPAddressInRange(end, '172.16.0.0/12'))) OR (((length(tokens) = 1) AND isIPAddressInRange(ip, '192.168.0.0/16')) OR ((length(tokens) = 2) AND isIPAddressInRange(begin, '192.168.0.0/16') AND isIPAddressInRange(end, '192.168.0.0/16')))\nFROM Customers" + "SELECT multiIf((length(splitByChar('/', Occupation) AS tokens) > 2) OR ((toIPv4OrNull(tokens[1]) AS nullable_ip) IS NULL), NULL, (length(tokens) = 2) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NULL), NULL, ignore(assumeNotNull(nullable_ip) AS ip, IPv4CIDRToRange(ip, assumeNotNull(mask)) AS range, IPv4NumToString(range.1) AS begin, IPv4NumToString(range.2) AS end), NULL, ((length(tokens) = 1) AND isIPAddressInRange(IPv4NumToString(ip), '10.0.0.0/8')) OR (isIPAddressInRange(begin, '10.0.0.0/8') AND isIPAddressInRange(end, '10.0.0.0/8')), true, ((length(tokens) = 1) AND isIPAddressInRange(IPv4NumToString(ip), '172.16.0.0/12')) OR (isIPAddressInRange(begin, '172.16.0.0/12') AND isIPAddressInRange(end, '172.16.0.0/12')), true, ((length(tokens) = 1) AND isIPAddressInRange(IPv4NumToString(ip), '192.168.0.0/16')) OR (isIPAddressInRange(begin, '192.168.0.0/16') AND isIPAddressInRange(end, '192.168.0.0/16')), true, false)\nFROM Customers" }, { "Customers | project ipv4_netmask_suffix(Occupation)", - "SELECT if((length(splitByChar('/', Occupation) AS tokens) <= 2) AND isIPv4String(tokens[1]), if(length(tokens) != 2, 32, if(((toInt8OrNull(tokens[-1]) AS suffix) >= 1) AND (suffix <= 32), suffix, throwIf(true, 'Suffix must be between 1 and 32'))), throwIf(true, 'Unable to recognize and IP address with or without a suffix'))\nFROM Customers" + "SELECT multiIf((length(splitByChar('/', Occupation) AS tokens) > 2) OR (NOT isIPv4String(tokens[1])), NULL, length(tokens) = 1, 32, (toUInt8OrNull(tokens[-1]) AS mask) IS NULL, NULL, toUInt8(min2(mask, 32)))\nFROM Customers" }, { "Customers | project parse_ipv4(FirstName)", - "SELECT toIPv4OrNull(FirstName)\nFROM Customers" + "SELECT multiIf(length(splitByChar('/', FirstName) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)\nFROM Customers" }, { "Customers | project parse_ipv6(LastName)", - "SELECT toIPv6OrNull(LastName)\nFROM Customers" + "SELECT if((ifNull(if((multiIf(length(splitByChar('/', LastName) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL) AS ipv4) IS NULL, NULL, IPv4ToIPv6(ipv4)), IPv6StringToNumOrNull(LastName)) AS ipv6) IS NULL, NULL, arrayStringConcat(flatten(extractAllGroups(lower(hex(assumeNotNull(ipv6))), '([\\\\da-f]{4})')), ':'))\nFROM Customers" }, { "Customers|where Occupation has_any ('Skilled','abcd')", From 732c8ed83f7fb1232b74fa831bcefc8186b5bb27 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Thu, 4 Aug 2022 20:55:33 -0400 Subject: [PATCH 057/279] Fix rebase conflicts. --- src/Parsers/Kusto/ParserKQLOperators.h | 2 +- src/Parsers/Kusto/ParserKQLTable.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index ba8b151bdad..72e25cc3cf9 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -2,7 +2,7 @@ #include #include - +#include namespace DB { diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index f1fc13d2c48..fadf5305e89 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -3,7 +3,7 @@ #include #include #include - +#include namespace DB { From 2e6e958ea209ca23fa45f5e4e1e705cc33b2176a Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Thu, 4 Aug 2022 22:33:08 -0400 Subject: [PATCH 058/279] Add make list set and more ip tests --- .../0_stateless/02366_kql_func_ip.reference | 10 ++++- .../queries/0_stateless/02366_kql_func_ip.sql | 18 ++++++-- .../0_stateless/02366_kql_summarize.reference | 24 ++++++++++ .../0_stateless/02366_kql_summarize.sql | 44 +++++++++++++------ 4 files changed, 78 insertions(+), 18 deletions(-) diff --git a/tests/queries/0_stateless/02366_kql_func_ip.reference b/tests/queries/0_stateless/02366_kql_func_ip.reference index 050096fe2d1..b6074a33b55 100644 --- a/tests/queries/0_stateless/02366_kql_func_ip.reference +++ b/tests/queries/0_stateless/02366_kql_func_ip.reference @@ -24,5 +24,13 @@ ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\')) 1 -- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\') 0 -ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\') +-- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\') 0 +-- ipv4_is_in_range(\'abc\', \'127.0.0.1\') +\N +-- parse_ipv6(127.0.0.1) +0000:0000:0000:0000:0000:ffff:7f00:0001 +-- parse_ipv6(fe80::85d:e82c:9446:7994) +fe80:0000:0000:0000:085d:e82c:9446:7994 +-- parse_ipv4(\'127.0.0.1\') +2130706433 diff --git a/tests/queries/0_stateless/02366_kql_func_ip.sql b/tests/queries/0_stateless/02366_kql_func_ip.sql index 3c35e7f58cc..a625c0bf470 100644 --- a/tests/queries/0_stateless/02366_kql_func_ip.sql +++ b/tests/queries/0_stateless/02366_kql_func_ip.sql @@ -27,10 +27,20 @@ print '-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\')'; print ipv4_is_in_range('192.168.1.6', '192.168.1.1/24'); -- == true print '-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\')'; print ipv4_is_in_range('192.168.1.1', '192.168.2.1/24'); -- == false -print 'ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\')'; +print '-- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\')'; print ipv4_is_in_range(strcat('192.','168.', '1.1'), '192.168.2.1/24'); -- == false +print '-- ipv4_is_in_range(\'abc\', \'127.0.0.1\')'; -- == null +print ipv4_is_in_range('abc', '127.0.0.1'); +print '-- parse_ipv6(127.0.0.1)'; +print parse_ipv6('127.0.0.1'); +print '-- parse_ipv6(fe80::85d:e82c:9446:7994)'; +print parse_ipv6('fe80::85d:e82c:9446:7994'); +print '-- parse_ipv4(\'127.0.0.1\')'; +print parse_ipv4('127.0.0.1'); -- TODO: --- print ipv4_is_in_range('abc', '127.0.0.1'); -- == null --- parse_ipv4() --- parse_ipv6() \ No newline at end of file +-- print parse_ipv4('192.1.168.1') < parse_ipv4('192.1.168.2'); -- == true + + + + diff --git a/tests/queries/0_stateless/02366_kql_summarize.reference b/tests/queries/0_stateless/02366_kql_summarize.reference index ef5ff544f63..dce19393a44 100644 --- a/tests/queries/0_stateless/02366_kql_summarize.reference +++ b/tests/queries/0_stateless/02366_kql_summarize.reference @@ -23,3 +23,27 @@ Management abcd defg 33 20 6 30 4 40 2 +Skilled Manual ['Bachelors','Graduate Degree','High School','Partial College','Bachelors'] +Professional ['Graduate Degree','Partial College','Partial College','Partial College','Partial College','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Bachelors','Graduate Degree'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Edward','Christine'] +Professional ['Dalton','Angel'] +Management abcd defg ['Stephanie'] +Skilled Manual ['Edward'] +Professional ['Dalton'] +Management abcd defg ['Stephanie'] +Skilled Manual ['Graduate Degree','High School','Partial College','Bachelors'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Graduate Degree','Bachelors'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Partial College','High School'] +Professional ['Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['High School'] +Professional ['Partial College'] +Management abcd defg ['Bachelors'] diff --git a/tests/queries/0_stateless/02366_kql_summarize.sql b/tests/queries/0_stateless/02366_kql_summarize.sql index 88d7641f3b9..048bdc9e712 100644 --- a/tests/queries/0_stateless/02366_kql_summarize.sql +++ b/tests/queries/0_stateless/02366_kql_summarize.sql @@ -8,18 +8,21 @@ CREATE TABLE Customers Age Nullable(UInt8) ) ENGINE = Memory; -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28); -INSERT INTO Customers VALUES ('Stephanie','Cox','Management abcd defg','Bachelors',33); -INSERT INTO Customers VALUES ('Peter','Nara','Skilled Manual','Graduate Degree',26); -INSERT INTO Customers VALUES ('Latoya','Shen','Professional','Graduate Degree',25); -INSERT INTO Customers VALUES ('Joshua','Lee','Professional','Partial College',26); -INSERT INTO Customers VALUES ('Edward','Hernandez','Skilled Manual','High School',36); -INSERT INTO Customers VALUES ('Dalton','Wood','Professional','Partial College',42); -INSERT INTO Customers VALUES ('Christine','Nara','Skilled Manual','Partial College',33); -INSERT INTO Customers VALUES ('Cameron','Rodriguez','Professional','Partial College',28); -INSERT INTO Customers VALUES ('Angel','Stewart','Professional','Partial College',46); -INSERT INTO Customers VALUES ('Apple','','Skilled Manual','Bachelors',28); -INSERT INTO Customers VALUES (NULL,'why','Professional','Partial College',38); +-- INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28); +-- INSERT INTO Customers VALUES ('Stephanie','Cox','Management abcd defg','Bachelors',33); +-- INSERT INTO Customers VALUES ('Peter','Nara','Skilled Manual','Graduate Degree',26); +-- INSERT INTO Customers VALUES ('Latoya','Shen','Professional','Graduate Degree',25); +-- INSERT INTO Customers VALUES ('Joshua','Lee','Professional','Partial College',26); +-- INSERT INTO Customers VALUES ('Edward','Hernandez','Skilled Manual','High School',36); +-- INSERT INTO Customers VALUES ('Dalton','Wood','Professional','Partial College',42); +-- INSERT INTO Customers VALUES ('Christine','Nara','Skilled Manual','Partial College',33); +-- INSERT INTO Customers VALUES ('Cameron','Rodriguez','Professional','Partial College',28); +-- INSERT INTO Customers VALUES ('Angel','Stewart','Professional','Partial College',46); +-- INSERT INTO Customers VALUES ('Apple','','Skilled Manual','Bachelors',28); +-- INSERT INTO Customers VALUES (NULL,'why','Professional','Partial College',38); + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Joshua','Lee','Professional','Partial College',26),('Edward','Hernandez','Skilled Manual','High School',36),('Dalton','Wood','Professional','Partial College',42),('Christine','Nara','Skilled Manual','Partial College',33),('Cameron','Rodriguez','Professional','Partial College',28),('Angel','Stewart','Professional','Partial College',46),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + Select '-- test summarize --' ; set dialect='kusto'; @@ -34,6 +37,21 @@ Customers | summarize dcount(Education); Customers | summarize dcountif(Education, Occupation=='Professional'); Customers | summarize count() by bin(Age, 10) | order by count() ASC; --- The following does not work +-- make_list() +Customers | summarize f_list = make_list(Education) by Occupation; +Customers | summarize f_list = make_list(Education, 2) by Occupation; +-- make_list_if() +Customers | summarize f_list = make_list_if(FirstName, Age>30) by Occupation; +Customers | summarize f_list = make_list_if(FirstName, Age>30, 1) by Occupation; +-- make_set() +Customers | summarize f_list = make_set(Education) by Occupation; +Customers | summarize f_list = make_set(Education, 2) by Occupation; +-- make_set_if() +Customers | summarize f_list = make_set_if(Education, Age>30) by Occupation; +Customers | summarize f_list = make_set_if(Education, Age>30, 1) by Occupation; + +-- TODO: -- arg_max() -- arg_min() +-- make_list_with_nulls() +-- Customers | sort by FirstName | summarize count(Education) by Occupation; \ No newline at end of file From e77a6333a1a0740f31ef30185e7f31bd6a6dd6ce Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Tue, 2 Aug 2022 07:35:21 -0700 Subject: [PATCH 059/279] Implement KQL functions handling IPv4 --- .../KustoFunctions/IParserKQLFunction.cpp | 90 +++++++++++-------- .../Kusto/KustoFunctions/IParserKQLFunction.h | 7 +- .../KustoFunctions/KQLCastingFunctions.cpp | 36 ++++---- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 82 +++++++++++++---- 4 files changed, 142 insertions(+), 73 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index a1dc9132b1b..bf46364f1f0 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -1,21 +1,21 @@ -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include @@ -27,22 +27,25 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -bool IParserKQLFunction::convert(String & out,IParser::Pos & pos) +bool IParserKQLFunction::convert(String & out, IParser::Pos & pos) { - return wrapConvertImpl(pos, IncreaseDepthTag{}, [&] - { - bool res = convertImpl(out,pos); - if (!res) - out = ""; - return res; - }); + return wrapConvertImpl( + pos, + IncreaseDepthTag{}, + [&] + { + bool res = convertImpl(out, pos); + if (!res) + out = ""; + return res; + }); } -bool IParserKQLFunction::directMapping(String & out,IParser::Pos & pos,const String & ch_fn) +bool IParserKQLFunction::directMapping(String & out, IParser::Pos & pos, const String & ch_fn) { std::vector arguments; - String fn_name = getKQLFunctionName(pos); + String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; @@ -52,17 +55,17 @@ bool IParserKQLFunction::directMapping(String & out,IParser::Pos & pos,const Str ++pos; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - String argument = getConvertedArgument(fn_name,pos); + String argument = getConvertedArgument(fn_name, pos); arguments.push_back(argument); if (pos->type == TokenType::ClosingRoundBracket) { - for (auto arg : arguments) + for (auto arg : arguments) { if (res.empty()) res = ch_fn + "(" + arg; else - res = res + ", "+ arg; + res = res + ", " + arg; } res += ")"; @@ -78,7 +81,10 @@ bool IParserKQLFunction::directMapping(String & out,IParser::Pos & pos,const Str String IParserKQLFunction::getArgument(const String & function_name, DB::IParser::Pos & pos) { - return getOptionalArgument(function_name, pos).value(); + if (auto optionalArgument = getOptionalArgument(function_name, pos)) + return std::move(*optionalArgument); + + throw Exception(std::format("Required argument was not provided in {}", function_name), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser::Pos & pos) @@ -95,11 +101,11 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - String token = String(pos->begin,pos->end); + String token = String(pos->begin, pos->end); String new_token; - if (!KQLOperators().convert(tokens,pos)) + if (!KQLOperators().convert(tokens, pos)) { - if (pos->type == TokenType::BareWord ) + if (pos->type == TokenType::BareWord) { tokens.push_back(IParserKQLFunction::getExpression(pos)); } @@ -114,15 +120,14 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) break; } - for (auto token : tokens) - converted_arg = converted_arg + token +" "; + for (auto token : tokens) + converted_arg = converted_arg + token + " "; return converted_arg; } std::optional IParserKQLFunction::getOptionalArgument(const String & function_name, DB::IParser::Pos & pos) { - std::optional argument; if (const auto & type = pos->type; type != DB::TokenType::Comma && type != DB::TokenType::OpeningRoundBracket) return {}; @@ -139,17 +144,24 @@ String IParserKQLFunction::getKQLFunctionName(IParser::Pos & pos) --pos; return ""; } - return fn_name; + return fn_name; } String IParserKQLFunction::kqlCallToExpression( - const String & function_name, std::initializer_list> params, const uint32_t max_depth) + const String & function_name, std::initializer_list params, const uint32_t max_depth) { const auto params_str = std::accumulate( std::cbegin(params), std::cend(params), String(), - [](auto acc, const auto & param) { return (acc.empty() ? "" : ", ") + std::move(acc) + param.get(); }); + [](String acc, const std::string_view param) + { + if (!acc.empty()) + acc.append(", "); + + acc.append(param.data(), param.length()); + return acc; + }); const auto kql_call = std::format("{}({})", function_name, params_str); DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); @@ -159,14 +171,14 @@ String IParserKQLFunction::kqlCallToExpression( void IParserKQLFunction::validateEndOfFunction(const String & fn_name, IParser::Pos & pos) { - if (pos->type != TokenType:: ClosingRoundBracket) + if (pos->type != TokenType::ClosingRoundBracket) throw Exception("Too many arguments in function: " + fn_name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } String IParserKQLFunction::getExpression(IParser::Pos & pos) { String arg = String(pos->begin, pos->end); - if (pos->type == TokenType::BareWord ) + if (pos->type == TokenType::BareWord) { String new_arg; auto fun = KQLFunctionFactory::get(arg); diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index 3613cb71fac..b7f8427043c 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -17,7 +17,7 @@ public: pos = begin; return res; } - + struct IncreaseDepthTag { }; @@ -33,7 +33,7 @@ public: pos = begin; return res; } - + bool convert(String & out, IParser::Pos & pos); virtual const char * getName() const = 0; virtual ~IParserKQLFunction() = default; @@ -47,8 +47,7 @@ protected: static String getArgument(const String & function_name, DB::IParser::Pos & pos); static String getConvertedArgument(const String & fn_name, IParser::Pos & pos); static std::optional getOptionalArgument(const String & function_name, DB::IParser::Pos & pos); - static String kqlCallToExpression( - const String & function_name, std::initializer_list> params, uint32_t max_depth); + static String kqlCallToExpression(const String & function_name, std::initializer_list params, uint32_t max_depth); static void validateEndOfFunction(const String & fn_name, IParser::Pos & pos); static String getKQLFunctionName(IParser::Pos & pos); }; diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp index 9129d82aa78..acbb7468d20 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -1,48 +1,54 @@ #include #include -#include #include +#include + +#include namespace DB { -bool ToBool::convertImpl(String &out,IParser::Pos &pos) +bool ToBool::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ToDateTime::convertImpl(String &out,IParser::Pos &pos) +bool ToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ToDouble::convertImpl(String &out,IParser::Pos &pos) +bool ToDouble::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ToInt::convertImpl(String &out,IParser::Pos &pos) +bool ToInt::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ToString::convertImpl(String &out,IParser::Pos &pos) +bool ToString::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format("ifNull(toString({0}), '')", param); + return true; } -bool ToTimeSpan::convertImpl(String &out,IParser::Pos &pos) +bool ToTimeSpan::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 63a4ade7ca9..c8d16b8b918 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -21,9 +21,25 @@ namespace DB { bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + const auto mask = getOptionalArgument(function_name, pos); + + out = std::format( + "multiIf(length(splitByChar('/', {0}) as lhs) > 2 or length(splitByChar('/', {1}) as rhs) > 2, null, " + "isNull(toIPv4OrNull(lhs[1]) as lhs_ip) or length(lhs) = 2 and isNull(toUInt8OrNull(lhs[-1]) as lhs_mask) or " + "isNull(toIPv4OrNull(rhs[1]) as rhs_ip) or length(rhs) = 2 and isNull(toUInt8OrNull(rhs[-1]) as rhs_mask), null, " + "ignore(toUInt8(min2(32, min2({2}, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) as mask), null, " + "sign(toInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask), 1))" + " - toInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask), 1))))", + lhs, + rhs, + mask ? *mask : "32"); + return true; } bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) @@ -46,9 +62,16 @@ bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + const auto mask = getOptionalArgument(function_name, pos); + + out = std::format("{} = 0", kqlCallToExpression("ipv4_compare", {lhs, rhs, mask ? *mask : "32"}, pos.max_depth)); + return true; } bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) @@ -112,9 +135,18 @@ bool ParseIpv4::convertImpl(String & out, IParser::Pos & pos) bool ParseIpv4Mask::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + const auto mask = getArgument(function_name, pos); + out = std::format( + "if(isNull(toIPv4OrNull({0}) as ip) or isNull(toUInt8OrNull(toString({1})) as mask), null, " + "toUInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(ip), toUInt8(max2(0, min2(32, assumeNotNull(mask))))), 1)))", + ip_address, + mask); + return true; } bool Ipv6Compare::convertImpl(String & out, IParser::Pos & pos) @@ -155,15 +187,35 @@ bool ParseIpv6Mask::convertImpl(String & out, IParser::Pos & pos) bool FormatIpv4::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + const auto mask = getOptionalArgument(function_name, pos); + out = std::format( + "ifNull(multiIf(isNotNull(toUInt32OrNull(toString({0})) as param_as_uint32) and toTypeName({0}) = 'String' or {1} < 0, null, " + "isNull(ifNull(param_as_uint32, {2}) as ip_as_number), null, " + "IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - {1}) - 1))))), '')", + ip_address, + mask ? *mask : "32", + kqlCallToExpression("parse_ipv4", {"tostring(" + ip_address + ")"}, pos.max_depth)); + return true; } bool FormatIpv4Mask::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + const auto mask = getOptionalArgument(function_name, pos); + const auto calculated_mask = mask ? *mask : "32"; + out = std::format( + "if(empty({1} as formatted_ip) or not {0} between 0 and 32, '', concat(formatted_ip, '/', toString({0})))", + calculated_mask, + kqlCallToExpression("format_ipv4", {ip_address, calculated_mask}, pos.max_depth)); + return true; } } From cdfec101d0bc93fe6948bd45aa4c84273f476fd4 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Tue, 2 Aug 2022 16:28:50 -0700 Subject: [PATCH 060/279] Add unit tests and release notes --- src/Parsers/Kusto/KQL_ReleaseNote.md | 29 +++++++++++++++++++-- src/Parsers/tests/gtest_Parser.cpp | 38 +++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index ae3f9e1b671..b37b991a2a2 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,4 +1,5 @@ -# KQL implemented features. +# August XX, 2022 +## KQL implemented features The config setting to allow modify dialect setting. - Set dialect setting in server configuration XML at user level(` users.xml `). This sets the ` dialect ` at server startup and CH will do query parsing for all users with ` default ` profile acording to dialect value. @@ -22,7 +23,31 @@ The config setting to allow modify dialect setting. OR pass dialect setting with '--'. For example : ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` -# Augest 1, 2022 + +## IP functions +- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) + `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` + `print format_ipv4(3232236031, 24) == '192.168.1.0'` +- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) + `print format_ipv4_mask('192.168.1.255', 24) == '192.168.1.0/24'` + `print format_ipv4_mask(3232236031, 24) == '192.168.1.0/24'` +- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) + `print ipv4_compare('127.0.0.1', '127.0.0.1') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255') < 0` + `print ipv4_compare('192.168.1.1/24', '192.168.1.255/24') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255', 24) == 0` +- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) + `print ipv4_is_match('127.0.0.1', '127.0.0.1') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` + `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` +- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) + `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` + `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` + `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` + `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` + +# August 1, 2022 - **strcmp** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcmpfunction) `print strcmp('abc','ABC')` diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 1559d4d44f3..6590fc962cb 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -486,7 +486,43 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers | where Age in ((Customers|project Age|where Age < 30))", "SELECT *\nFROM Customers\nWHERE Age IN (\n SELECT Age\n FROM Customers\n WHERE Age < 30\n)" }, -{ + { + "print format_ipv4(ip)", + "SELECT ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (32 < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - 32) - 1))))), '')" + }, + { + "print format_ipv4(ip, mask)", + "SELECT ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (mask < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - mask) - 1))))), '')" + }, + { + "print format_ipv4_mask(ip)", + "SELECT if(empty(ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (32 < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - 32) - 1))))), '') AS formatted_ip) OR (NOT ((32 >= 0) AND (32 <= 32))), '', concat(formatted_ip, '/', toString(32)))" + }, + { + "print format_ipv4_mask(ip, mask)", + "SELECT if(empty(ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (mask < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - mask) - 1))))), '') AS formatted_ip) OR (NOT ((mask >= 0) AND (mask <= 32))), '', concat(formatted_ip, '/', toString(mask)))" + }, + { + "print ipv4_compare(ip1, ip2)", + "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(32, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1)))" + }, + { + "print ipv4_compare(ip1, ip2, mask)", + "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(mask, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1)))" + }, + { + "print ipv4_is_match(ip1, ip2)", + "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(32, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1))) = 0" + }, + { + "print ipv4_is_match(ip1, ip2, mask)", + "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(mask, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1))) = 0" + }, + { + "print parse_ipv4_mask(ip, mask)", + "SELECT if(((toIPv4OrNull(ip) AS ip) IS NULL) OR ((toUInt8OrNull(toString(mask)) AS mask) IS NULL), NULL, toUInt32(IPv4CIDRToRange(assumeNotNull(ip), toUInt8(max2(0, min2(32, assumeNotNull(mask))))).1))" + }, + { "Customers | project ipv4_is_in_range(FirstName, LastName)", "SELECT if(((IPv4StringToNumOrNull(FirstName) AS ip) IS NULL) OR ((multiIf((length(splitByChar('/', LastName) AS tokens) > 2) OR (NOT isIPv4String(tokens[1])), NULL, length(tokens) = 1, 32, (toUInt8OrNull(tokens[-1]) AS mask) IS NULL, NULL, toUInt8(min2(mask, 32))) AS calculated_mask) IS NULL) OR ((toIPv4OrNull(tokens[1]) AS range_prefix_ip) IS NULL), NULL, isIPAddressInRange(IPv4NumToString(assumeNotNull(ip)), concat(IPv4NumToString(assumeNotNull(range_prefix_ip)), '/', toString(assumeNotNull(calculated_mask)))))\nFROM Customers" }, From 1200ab01d455521a65b7294526528aa56d2c9393 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Thu, 4 Aug 2022 10:16:23 -0700 Subject: [PATCH 061/279] Make aliases unique --- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 99 +++++++++++-------- 1 file changed, 60 insertions(+), 39 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index c8d16b8b918..ddd9b9ab0ab 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -15,8 +15,19 @@ #include #include +#include + #include +namespace +{ +String generateUniqueIdentifier() +{ + static pcg32_unique unique_random_generator; + return std::to_string(unique_random_generator()); +} +} + namespace DB { bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) @@ -30,15 +41,17 @@ bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) const auto mask = getOptionalArgument(function_name, pos); out = std::format( - "multiIf(length(splitByChar('/', {0}) as lhs) > 2 or length(splitByChar('/', {1}) as rhs) > 2, null, " - "isNull(toIPv4OrNull(lhs[1]) as lhs_ip) or length(lhs) = 2 and isNull(toUInt8OrNull(lhs[-1]) as lhs_mask) or " - "isNull(toIPv4OrNull(rhs[1]) as rhs_ip) or length(rhs) = 2 and isNull(toUInt8OrNull(rhs[-1]) as rhs_mask), null, " - "ignore(toUInt8(min2(32, min2({2}, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) as mask), null, " - "sign(toInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask), 1))" - " - toInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask), 1))))", - lhs, - rhs, - mask ? *mask : "32"); + "multiIf(isNull({0} as lhs_ip_{5}) or isNull({1} as lhs_mask_{5}), null, " + "isNull({2} as rhs_ip_{5}) or isNull({3} as rhs_mask_{5}), null, " + "ignore(toUInt8(min2({4}, min2(assumeNotNull(lhs_mask_{5}), assumeNotNull(rhs_mask_{5})))) as mask_{5}), null, " + "sign(toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(lhs_ip_{5}), mask_{5}), 1))" + " - toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(rhs_ip_{5}), mask_{5}), 1))))", + kqlCallToExpression("parse_ipv4", {lhs}, pos.max_depth), + kqlCallToExpression("ipv4_netmask_suffix", {lhs}, pos.max_depth), + kqlCallToExpression("parse_ipv4", {rhs}, pos.max_depth), + kqlCallToExpression("ipv4_netmask_suffix", {rhs}, pos.max_depth), + mask ? *mask : "32", + generateUniqueIdentifier()); return true; } @@ -51,12 +64,13 @@ bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); const auto ip_range = getArgument(function_name, pos); out = std::format( - "if(isNull(IPv4StringToNumOrNull({0}) as ip) or isNull({2} as calculated_mask) or " - "isNull(toIPv4OrNull(tokens[1]) as range_prefix_ip), null, isIPAddressInRange(IPv4NumToString(assumeNotNull(ip)), " - "concat(IPv4NumToString(assumeNotNull(range_prefix_ip)), '/', toString(assumeNotNull(calculated_mask)))))", + "multiIf(isNull(IPv4StringToNumOrNull({0}) as ip_{3}), null, " + "isNull({1} as range_start_ip_{3}) or isNull({2} as range_mask_{3}), null, " + "bitXor(range_start_ip_{3}, bitAnd(ip_{3}, bitNot(toUInt32(intExp2(32 - range_mask_{3}) - 1)))) = 0)", ip_address, - ip_range, - kqlCallToExpression("ipv4_netmask_suffix", {ip_range}, pos.max_depth)); + kqlCallToExpression("parse_ipv4", {ip_range}, pos.max_depth), + kqlCallToExpression("ipv4_netmask_suffix", {ip_range}, pos.max_depth), + generateUniqueIdentifier()); return true; } @@ -85,19 +99,21 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); out += std::format( - "multiIf(length(splitByChar('/', {0}) as tokens) > 2 or isNull(toIPv4OrNull(tokens[1]) as nullable_ip), null, " - "length(tokens) = 2 and isNull(toUInt8OrNull(tokens[-1]) as mask), null, " - "ignore(assumeNotNull(nullable_ip) as ip, " - "IPv4CIDRToRange(ip, assumeNotNull(mask)) as range, IPv4NumToString(tupleElement(range, 1)) as begin, " - "IPv4NumToString(tupleElement(range, 2)) as end), null, ", - ip_address); + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) > 2 or isNull(toIPv4OrNull(tokens_{1}[1]) as nullable_ip_{1}), null, " + "length(tokens_{1}) = 2 and isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, " + "ignore(assumeNotNull(nullable_ip_{1}) as ip_{1}, " + "IPv4CIDRToRange(ip_{1}, assumeNotNull(mask_{1})) as range_{1}, IPv4NumToString(tupleElement(range_{1}, 1)) as begin_{1}, " + "IPv4NumToString(tupleElement(range_{1}, 2)) as end_{1}), null, ", + ip_address, + generateUniqueIdentifier()); for (int i = 0; i < std::ssize(s_private_subnets); ++i) { const auto & subnet = s_private_subnets[i]; out += std::format( - "length(tokens) = 1 and isIPAddressInRange(IPv4NumToString(ip), '{0}') or " - "isIPAddressInRange(begin, '{0}') and isIPAddressInRange(end, '{0}'), true, ", - subnet); + "length(tokens_{1}) = 1 and isIPAddressInRange(IPv4NumToString(ip_{1}), '{0}') or " + "isIPAddressInRange(begin_{1}, '{0}') and isIPAddressInRange(end_{1}, '{0}'), true, ", + subnet, + generateUniqueIdentifier()); } out += "false)"; @@ -112,9 +128,10 @@ bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) const auto ip_range = getArgument(function_name, pos); out = std::format( - "multiIf(length(splitByChar('/', {0}) as tokens) > 2 or not isIPv4String(tokens[1]), null, " - "length(tokens) = 1, 32, isNull(toUInt8OrNull(tokens[-1]) as mask), null, toUInt8(min2(mask, 32)))", - ip_range); + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) > 2 or not isIPv4String(tokens_{1}[1]), null, " + "length(tokens_{1}) = 1, 32, isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, toUInt8(min2(mask_{1}, 32)))", + ip_range, + generateUniqueIdentifier()); return true; } @@ -126,10 +143,11 @@ bool ParseIpv4::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); out = std::format( - "multiIf(length(splitByChar('/', {0}) as tokens) = 1, IPv4StringToNumOrNull(tokens[1]) as ip, " - "length(tokens) = 2 and isNotNull(ip) and isNotNull(toUInt8OrNull(tokens[-1]) as mask), " - "tupleElement(IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)), 1), null)", - ip_address); + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) = 1, IPv4StringToNumOrNull(tokens_{1}[1]) as ip_{1}, " + "length(tokens_{1}) = 2 and isNotNull(ip_{1}) and isNotNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), " + "tupleElement(IPv4CIDRToRange(assumeNotNull(ip_{1}), assumeNotNull(mask_{1})), 1), null)", + ip_address, + generateUniqueIdentifier()); return true; } @@ -142,10 +160,11 @@ bool ParseIpv4Mask::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); const auto mask = getArgument(function_name, pos); out = std::format( - "if(isNull(toIPv4OrNull({0}) as ip) or isNull(toUInt8OrNull(toString({1})) as mask), null, " - "toUInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(ip), toUInt8(max2(0, min2(32, assumeNotNull(mask))))), 1)))", + "if(isNull(toIPv4OrNull({0}) as ip_{2}) or isNull(toUInt8OrNull(toString({1})) as mask_{2}), null, " + "toUInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(ip_{2}), toUInt8(max2(0, min2(32, assumeNotNull(mask_{2}))))), 1)))", ip_address, - mask); + mask, + generateUniqueIdentifier()); return true; } @@ -194,12 +213,13 @@ bool FormatIpv4::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); const auto mask = getOptionalArgument(function_name, pos); out = std::format( - "ifNull(multiIf(isNotNull(toUInt32OrNull(toString({0})) as param_as_uint32) and toTypeName({0}) = 'String' or {1} < 0, null, " - "isNull(ifNull(param_as_uint32, {2}) as ip_as_number), null, " - "IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - {1}) - 1))))), '')", + "ifNull(multiIf(isNotNull(toUInt32OrNull(toString({0})) as param_as_uint32_{3}) and toTypeName({0}) = 'String' or {1} < 0, null, " + "isNull(ifNull(param_as_uint32_{3}, {2}) as ip_as_number_{3}), null, " + "IPv4NumToString(bitAnd(ip_as_number_{3}, bitNot(toUInt32(intExp2(32 - {1}) - 1))))), '')", ip_address, mask ? *mask : "32", - kqlCallToExpression("parse_ipv4", {"tostring(" + ip_address + ")"}, pos.max_depth)); + kqlCallToExpression("parse_ipv4", {"tostring(" + ip_address + ")"}, pos.max_depth), + generateUniqueIdentifier()); return true; } @@ -213,9 +233,10 @@ bool FormatIpv4Mask::convertImpl(String & out, IParser::Pos & pos) const auto mask = getOptionalArgument(function_name, pos); const auto calculated_mask = mask ? *mask : "32"; out = std::format( - "if(empty({1} as formatted_ip) or not {0} between 0 and 32, '', concat(formatted_ip, '/', toString({0})))", + "if(empty({1} as formatted_ip_{2}) or not {0} between 0 and 32, '', concat(formatted_ip_{2}, '/', toString({0})))", calculated_mask, - kqlCallToExpression("format_ipv4", {ip_address, calculated_mask}, pos.max_depth)); + kqlCallToExpression("format_ipv4", {ip_address, calculated_mask}, pos.max_depth), + generateUniqueIdentifier()); return true; } } From 4f45a199f9101dbc4afa3e1781985e8468b5a445 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 5 Aug 2022 13:58:32 -0700 Subject: [PATCH 062/279] Implement unit tests for IP functions --- contrib/googletest-cmake/CMakeLists.txt | 4 +- src/CMakeLists.txt | 1 + .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 7 +- src/Parsers/tests/gtest_KQL.cpp | 86 +++++++++++++++++++ src/Parsers/tests/gtest_Parser.cpp | 65 +------------- src/Parsers/tests/gtest_common.h | 7 ++ 6 files changed, 102 insertions(+), 68 deletions(-) create mode 100644 src/Parsers/tests/gtest_KQL.cpp create mode 100644 src/Parsers/tests/gtest_common.h diff --git a/contrib/googletest-cmake/CMakeLists.txt b/contrib/googletest-cmake/CMakeLists.txt index 3905df03155..94c35656987 100644 --- a/contrib/googletest-cmake/CMakeLists.txt +++ b/contrib/googletest-cmake/CMakeLists.txt @@ -14,12 +14,12 @@ add_library(_gtest_all INTERFACE) target_link_libraries(_gtest_all INTERFACE _gtest _gtest_main) add_library(ch_contrib::gtest_all ALIAS _gtest_all) - add_library(_gmock "${SRC_DIR}/googlemock/src/gmock-all.cc") set_target_properties(_gmock PROPERTIES VERSION "1.0.0") target_compile_definitions (_gmock PUBLIC GTEST_HAS_POSIX_RE=0) -target_include_directories(_gmock SYSTEM PUBLIC "${SRC_DIR}/googlemock/include" "${SRC_DIR}/googletest/include") +target_include_directories(_gmock SYSTEM PUBLIC "${SRC_DIR}/googlemock/include") target_include_directories(_gmock PRIVATE "${SRC_DIR}/googlemock") +target_link_libraries(_gmock PUBLIC _gtest) add_library(_gmock_main "${SRC_DIR}/googlemock/src/gmock_main.cc") set_target_properties(_gmock_main PROPERTIES VERSION "1.0.0") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 81c74234473..65cf68c1608 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -588,6 +588,7 @@ if (ENABLE_TESTS) ) target_link_libraries(unit_tests_dbms PRIVATE + ch_contrib::gmock_all ch_contrib::gtest_all ch_contrib::gmock_all clickhouse_functions diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index ddd9b9ab0ab..2ee483fa843 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -190,10 +190,11 @@ bool ParseIpv6::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); out = std::format( - "if(isNull(ifNull(if(isNull({1} as ipv4), null, IPv4ToIPv6(ipv4)), IPv6StringToNumOrNull({0})) as ipv6), null, " - "arrayStringConcat(flatten(extractAllGroups(lower(hex(assumeNotNull(ipv6))), '([\\da-f]{{4}})')), ':'))", + "if(isNull(ifNull(if(isNull({1} as ipv4_{2}), null, IPv4ToIPv6(ipv4_{2})), IPv6StringToNumOrNull({0})) as ipv6_{2}), null, " + "arrayStringConcat(flatten(extractAllGroups(lower(hex(assumeNotNull(ipv6_{2}))), '([\\da-f]{{4}})')), ':'))", ip_address, - kqlCallToExpression("parse_ipv4", {ip_address}, pos.max_depth)); + kqlCallToExpression("parse_ipv4", {ip_address}, pos.max_depth), + generateUniqueIdentifier()); return true; } diff --git a/src/Parsers/tests/gtest_KQL.cpp b/src/Parsers/tests/gtest_KQL.cpp new file mode 100644 index 00000000000..5cd4e39e278 --- /dev/null +++ b/src/Parsers/tests/gtest_KQL.cpp @@ -0,0 +1,86 @@ +#include "gtest_common.h" + +#include +#include +#include + +#include +#include + +class ParserRegexTest : public ::testing::TestWithParam, ParserTestCase>> +{}; + +TEST_P(ParserRegexTest, parseQuery) +{ + const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + + ASSERT_TRUE(parser); + ASSERT_TRUE(expected_ast); + + DB::ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + EXPECT_THAT(serializeAST(*ast->clone(), false), ::testing::MatchesRegex(expected_ast)); +} + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserRegexTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print format_ipv4(A)", + "SELECT ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(32 < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\)" + }, + { + "print format_ipv4(A, B)", + "SELECT ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(B < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - B\\) - 1\\)\\)\\)\\)\\), ''\\)" + }, + { + "print format_ipv4_mask(A)", + "SELECT if\\(empty\\(ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(32 < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\) AS formatted_ip_\\d+\\) OR \\(NOT \\(\\(32 >= 0\\) AND \\(32 <= 32\\)\\)\\), '', concat\\(formatted_ip_\\d+, '/', toString\\(32\\)\\)\\)" + }, + { + "print format_ipv4_mask(A, B)", + "SELECT if\\(empty\\(ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(B < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - B\\) - 1\\)\\)\\)\\)\\), ''\\) AS formatted_ip_\\d+\\) OR \\(NOT \\(\\(B >= 0\\) AND \\(B <= 32\\)\\)\\), '', concat\\(formatted_ip_\\d+, '/', toString\\(B\\)\\)\\)" + }, + { + "print ipv4_compare(A, B)", + "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(32, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\)" + }, + { + "print ipv4_compare(A, B, C)", + "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\)" + }, + { + "print ipv4_is_match(A, B)", + "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(32, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" + }, + { + "print ipv4_is_match(A, B, C)", + "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" + }, + { + "print parse_ipv4_mask(A, B)", + "SELECT if\\(\\(\\(toIPv4OrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(toUInt8OrNull\\(toString\\(B\\)\\) AS mask_\\d+\\) IS NULL\\), NULL, toUInt32\\(IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(max2\\(0, min2\\(32, assumeNotNull\\(mask_\\d+\\)\\)\\)\\)\\).1\\)\\)" + }, + { + "print ipv4_is_in_range(A, B)", + "SELECT multiIf\\(\\(IPv4StringToNumOrNull\\(A\\) AS ip_\\d+\\) IS NULL, NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS range_start_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS range_mask_\\d+\\) IS NULL\\), NULL, bitXor\\(range_start_ip_\\d+, bitAnd\\(ip_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - range_mask_\\d+\\) - 1\\)\\)\\)\\) = 0\\)" + }, + { + "print ipv4_is_private(A)", + "SELECT multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(toIPv4OrNull\\(tokens_\\d+\\[1\\]\\) AS nullable_ip_\\d+\\) IS NULL\\), NULL, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\), NULL, ignore\\(assumeNotNull\\(nullable_ip_\\d+\\) AS ip_\\d+, IPv4CIDRToRange\\(ip_\\d+, assumeNotNull\\(mask_\\d+\\)\\) AS range_\\d+, IPv4NumToString\\(range_\\d+.1\\) AS begin_\\d+, IPv4NumToString\\(range_\\d+.2\\) AS end_\\d+\\), NULL, \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '10.0.0.0/8'\\)\\) OR \\(isIPAddressInRange\\(begin_\\d+, '10.0.0.0/8'\\) AND isIPAddressInRange\\(end_\\d+, '10.0.0.0/8'\\)\\), true, \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '172.16.0.0/12'\\)\\) OR \\(isIPAddressInRange\\(begin_\\d+, '172.16.0.0/12'\\) AND isIPAddressInRange\\(end_\\d+, '172.16.0.0/12'\\)\\), true, \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '192.168.0.0/16'\\)\\) OR \\(isIPAddressInRange\\(begin_\\d+, '192.168.0.0/16'\\) AND isIPAddressInRange\\(end_\\d+, '192.168.0.0/16'\\)\\), true, false\\)" + }, + { + "print ipv4_netmask_suffix(A)", + "SELECT multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\)" + }, + { + "print parse_ipv4(A)", + "SELECT multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)" + }, + { + "print parse_ipv6(A)", + "SELECT if\\(\\(ifNull\\(if\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS ipv4_\\d+\\) IS NULL, NULL, IPv4ToIPv6\\(ipv4_\\d+\\)\\), IPv6StringToNumOrNull\\(A\\)\\) AS ipv6_\\d+\\) IS NULL, NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(assumeNotNull\\(ipv6_\\d+\\)\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)" + } +}))); diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 6590fc962cb..87df4a3fdee 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -1,3 +1,5 @@ +#include "gtest_common.h" + #include #include #include @@ -25,13 +27,6 @@ using namespace DB; using namespace std::literals; } - -struct ParserTestCase -{ - const std::string_view input_text; - const char * expected_ast = nullptr; -}; - std::ostream & operator<<(std::ostream & ostr, const std::shared_ptr parser) { return ostr << "Parser: " << parser->getName(); @@ -486,62 +481,6 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "Customers | where Age in ((Customers|project Age|where Age < 30))", "SELECT *\nFROM Customers\nWHERE Age IN (\n SELECT Age\n FROM Customers\n WHERE Age < 30\n)" }, - { - "print format_ipv4(ip)", - "SELECT ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (32 < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - 32) - 1))))), '')" - }, - { - "print format_ipv4(ip, mask)", - "SELECT ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (mask < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - mask) - 1))))), '')" - }, - { - "print format_ipv4_mask(ip)", - "SELECT if(empty(ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (32 < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - 32) - 1))))), '') AS formatted_ip) OR (NOT ((32 >= 0) AND (32 <= 32))), '', concat(formatted_ip, '/', toString(32)))" - }, - { - "print format_ipv4_mask(ip, mask)", - "SELECT if(empty(ifNull(multiIf((((toUInt32OrNull(toString(ip)) AS param_as_uint32) IS NOT NULL) AND (toTypeName(ip) = 'String')) OR (mask < 0), NULL, (ifNull(param_as_uint32, multiIf(length(splitByChar('/', ifNull(toString(ip), '')) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)) AS ip_as_number) IS NULL, NULL, IPv4NumToString(bitAnd(ip_as_number, bitNot(toUInt32(intExp2(32 - mask) - 1))))), '') AS formatted_ip) OR (NOT ((mask >= 0) AND (mask <= 32))), '', concat(formatted_ip, '/', toString(mask)))" - }, - { - "print ipv4_compare(ip1, ip2)", - "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(32, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1)))" - }, - { - "print ipv4_compare(ip1, ip2, mask)", - "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(mask, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1)))" - }, - { - "print ipv4_is_match(ip1, ip2)", - "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(32, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1))) = 0" - }, - { - "print ipv4_is_match(ip1, ip2, mask)", - "SELECT multiIf((length(splitByChar('/', ip1) AS lhs) > 2) OR (length(splitByChar('/', ip2) AS rhs) > 2), NULL, ((toIPv4OrNull(lhs[1]) AS lhs_ip) IS NULL) OR ((length(lhs) = 2) AND ((toUInt8OrNull(lhs[-1]) AS lhs_mask) IS NULL)) OR ((toIPv4OrNull(rhs[1]) AS rhs_ip) IS NULL) OR ((length(rhs) = 2) AND ((toUInt8OrNull(rhs[-1]) AS rhs_mask) IS NULL)), NULL, ignore(toUInt8(min2(32, min2(mask, min2(ifNull(lhs_mask, 32), ifNull(rhs_mask, 32))))) AS mask), NULL, sign(toInt32(IPv4CIDRToRange(assumeNotNull(lhs_ip), mask).1) - toInt32(IPv4CIDRToRange(assumeNotNull(rhs_ip), mask).1))) = 0" - }, - { - "print parse_ipv4_mask(ip, mask)", - "SELECT if(((toIPv4OrNull(ip) AS ip) IS NULL) OR ((toUInt8OrNull(toString(mask)) AS mask) IS NULL), NULL, toUInt32(IPv4CIDRToRange(assumeNotNull(ip), toUInt8(max2(0, min2(32, assumeNotNull(mask))))).1))" - }, - { - "Customers | project ipv4_is_in_range(FirstName, LastName)", - "SELECT if(((IPv4StringToNumOrNull(FirstName) AS ip) IS NULL) OR ((multiIf((length(splitByChar('/', LastName) AS tokens) > 2) OR (NOT isIPv4String(tokens[1])), NULL, length(tokens) = 1, 32, (toUInt8OrNull(tokens[-1]) AS mask) IS NULL, NULL, toUInt8(min2(mask, 32))) AS calculated_mask) IS NULL) OR ((toIPv4OrNull(tokens[1]) AS range_prefix_ip) IS NULL), NULL, isIPAddressInRange(IPv4NumToString(assumeNotNull(ip)), concat(IPv4NumToString(assumeNotNull(range_prefix_ip)), '/', toString(assumeNotNull(calculated_mask)))))\nFROM Customers" - }, - { - "Customers | project ipv4_is_private(Occupation)", - "SELECT multiIf((length(splitByChar('/', Occupation) AS tokens) > 2) OR ((toIPv4OrNull(tokens[1]) AS nullable_ip) IS NULL), NULL, (length(tokens) = 2) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NULL), NULL, ignore(assumeNotNull(nullable_ip) AS ip, IPv4CIDRToRange(ip, assumeNotNull(mask)) AS range, IPv4NumToString(range.1) AS begin, IPv4NumToString(range.2) AS end), NULL, ((length(tokens) = 1) AND isIPAddressInRange(IPv4NumToString(ip), '10.0.0.0/8')) OR (isIPAddressInRange(begin, '10.0.0.0/8') AND isIPAddressInRange(end, '10.0.0.0/8')), true, ((length(tokens) = 1) AND isIPAddressInRange(IPv4NumToString(ip), '172.16.0.0/12')) OR (isIPAddressInRange(begin, '172.16.0.0/12') AND isIPAddressInRange(end, '172.16.0.0/12')), true, ((length(tokens) = 1) AND isIPAddressInRange(IPv4NumToString(ip), '192.168.0.0/16')) OR (isIPAddressInRange(begin, '192.168.0.0/16') AND isIPAddressInRange(end, '192.168.0.0/16')), true, false)\nFROM Customers" - }, - { - "Customers | project ipv4_netmask_suffix(Occupation)", - "SELECT multiIf((length(splitByChar('/', Occupation) AS tokens) > 2) OR (NOT isIPv4String(tokens[1])), NULL, length(tokens) = 1, 32, (toUInt8OrNull(tokens[-1]) AS mask) IS NULL, NULL, toUInt8(min2(mask, 32)))\nFROM Customers" - }, - { - "Customers | project parse_ipv4(FirstName)", - "SELECT multiIf(length(splitByChar('/', FirstName) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL)\nFROM Customers" - }, - { - "Customers | project parse_ipv6(LastName)", - "SELECT if((ifNull(if((multiIf(length(splitByChar('/', LastName) AS tokens) = 1, IPv4StringToNumOrNull(tokens[1]) AS ip, (length(tokens) = 2) AND (ip IS NOT NULL) AND ((toUInt8OrNull(tokens[-1]) AS mask) IS NOT NULL), IPv4CIDRToRange(assumeNotNull(ip), assumeNotNull(mask)).1, NULL) AS ipv4) IS NULL, NULL, IPv4ToIPv6(ipv4)), IPv6StringToNumOrNull(LastName)) AS ipv6) IS NULL, NULL, arrayStringConcat(flatten(extractAllGroups(lower(hex(assumeNotNull(ipv6))), '([\\\\da-f]{4})')), ':'))\nFROM Customers" - }, { "Customers|where Occupation has_any ('Skilled','abcd')", "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Occupation, 'Skilled') OR hasTokenCaseInsensitive(Occupation, 'abcd')" diff --git a/src/Parsers/tests/gtest_common.h b/src/Parsers/tests/gtest_common.h new file mode 100644 index 00000000000..abbc1a0cb0e --- /dev/null +++ b/src/Parsers/tests/gtest_common.h @@ -0,0 +1,7 @@ +#include + +struct ParserTestCase +{ + const std::string_view input_text; + const char * expected_ast = nullptr; +}; From d8f43dc0ff71b6580a2be5c5a533f97a3f856da8 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Mon, 8 Aug 2022 07:27:00 -0700 Subject: [PATCH 063/279] Move KQL tests into their own folder --- src/Parsers/tests/{gtest_KQL.cpp => KQL/gtest_KQL_IP.cpp} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/Parsers/tests/{gtest_KQL.cpp => KQL/gtest_KQL_IP.cpp} (99%) diff --git a/src/Parsers/tests/gtest_KQL.cpp b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp similarity index 99% rename from src/Parsers/tests/gtest_KQL.cpp rename to src/Parsers/tests/KQL/gtest_KQL_IP.cpp index 5cd4e39e278..cf1d0838176 100644 --- a/src/Parsers/tests/gtest_KQL.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp @@ -1,4 +1,4 @@ -#include "gtest_common.h" +#include #include #include From 198976c730aa70f9635f8f6fe32c9ecc5cb90352 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Mon, 8 Aug 2022 07:38:51 -0700 Subject: [PATCH 064/279] Improve performance of IPv4 functions --- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 2ee483fa843..47d01e42ae3 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -39,12 +39,11 @@ bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) const auto lhs = getArgument(function_name, pos); const auto rhs = getArgument(function_name, pos); const auto mask = getOptionalArgument(function_name, pos); - out = std::format( - "multiIf(isNull({0} as lhs_ip_{5}) or isNull({1} as lhs_mask_{5}), null, " - "isNull({2} as rhs_ip_{5}) or isNull({3} as rhs_mask_{5}), null, " - "ignore(toUInt8(min2({4}, min2(assumeNotNull(lhs_mask_{5}), assumeNotNull(rhs_mask_{5})))) as mask_{5}), null, " - "sign(toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(lhs_ip_{5}), mask_{5}), 1))" + "if(isNull({0} as lhs_ip_{5}) or isNull({1} as lhs_mask_{5}) " + "or isNull({2} as rhs_ip_{5}) or isNull({3} as rhs_mask_{5}), null, " + "sign(toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(lhs_ip_{5}), " + "toUInt8(min2({4}, min2(assumeNotNull(lhs_mask_{5}), assumeNotNull(rhs_mask_{5})))) as mask_{5}), 1))" " - toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(rhs_ip_{5}), mask_{5}), 1))))", kqlCallToExpression("parse_ipv4", {lhs}, pos.max_depth), kqlCallToExpression("ipv4_netmask_suffix", {lhs}, pos.max_depth), @@ -64,8 +63,8 @@ bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); const auto ip_range = getArgument(function_name, pos); out = std::format( - "multiIf(isNull(IPv4StringToNumOrNull({0}) as ip_{3}), null, " - "isNull({1} as range_start_ip_{3}) or isNull({2} as range_mask_{3}), null, " + "if(isNull(IPv4StringToNumOrNull({0}) as ip_{3}) " + "or isNull({1} as range_start_ip_{3}) or isNull({2} as range_mask_{3}), null, " "bitXor(range_start_ip_{3}, bitAnd(ip_{3}, bitNot(toUInt32(intExp2(32 - range_mask_{3}) - 1)))) = 0)", ip_address, kqlCallToExpression("parse_ipv4", {ip_range}, pos.max_depth), @@ -83,7 +82,6 @@ bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) const auto lhs = getArgument(function_name, pos); const auto rhs = getArgument(function_name, pos); const auto mask = getOptionalArgument(function_name, pos); - out = std::format("{} = 0", kqlCallToExpression("ipv4_compare", {lhs, rhs, mask ? *mask : "32"}, pos.max_depth)); return true; } @@ -97,26 +95,30 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) return false; const auto ip_address = getArgument(function_name, pos); + const auto unique_identifier = generateUniqueIdentifier(); out += std::format( - "multiIf(length(splitByChar('/', {0}) as tokens_{1}) > 2 or isNull(toIPv4OrNull(tokens_{1}[1]) as nullable_ip_{1}), null, " - "length(tokens_{1}) = 2 and isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, " + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) > 2 or isNull(toIPv4OrNull(tokens_{1}[1]) as nullable_ip_{1}) " + "or length(tokens_{1}) = 2 and isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, " "ignore(assumeNotNull(nullable_ip_{1}) as ip_{1}, " "IPv4CIDRToRange(ip_{1}, assumeNotNull(mask_{1})) as range_{1}, IPv4NumToString(tupleElement(range_{1}, 1)) as begin_{1}, " "IPv4NumToString(tupleElement(range_{1}, 2)) as end_{1}), null, ", ip_address, - generateUniqueIdentifier()); + unique_identifier); for (int i = 0; i < std::ssize(s_private_subnets); ++i) { + if (i > 0) + out += " or"; + const auto & subnet = s_private_subnets[i]; out += std::format( "length(tokens_{1}) = 1 and isIPAddressInRange(IPv4NumToString(ip_{1}), '{0}') or " - "isIPAddressInRange(begin_{1}, '{0}') and isIPAddressInRange(end_{1}, '{0}'), true, ", + "length(tokens_{1}) = 2 and isIPAddressInRange(begin_{1}, '{0}') and isIPAddressInRange(end_{1}, '{0}')", subnet, - generateUniqueIdentifier()); + unique_identifier); } - out += "false)"; + out += ")"; return true; } @@ -214,8 +216,8 @@ bool FormatIpv4::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); const auto mask = getOptionalArgument(function_name, pos); out = std::format( - "ifNull(multiIf(isNotNull(toUInt32OrNull(toString({0})) as param_as_uint32_{3}) and toTypeName({0}) = 'String' or {1} < 0, null, " - "isNull(ifNull(param_as_uint32_{3}, {2}) as ip_as_number_{3}), null, " + "ifNull(if(isNotNull(toUInt32OrNull(toString({0})) as param_as_uint32_{3}) and toTypeName({0}) = 'String' or {1} < 0 " + "or isNull(ifNull(param_as_uint32_{3}, {2}) as ip_as_number_{3}), null, " "IPv4NumToString(bitAnd(ip_as_number_{3}, bitNot(toUInt32(intExp2(32 - {1}) - 1))))), '')", ip_address, mask ? *mask : "32", From 1823c074ec5f16b6675427ce938c918b215fa95c Mon Sep 17 00:00:00 2001 From: kashwy Date: Tue, 9 Aug 2022 06:11:39 -0700 Subject: [PATCH 065/279] Kusto-phase2: Add kusto data types --- .../KustoFunctions/IParserKQLFunction.cpp | 10 + .../KustoFunctions/KQLDataTypeFunctions.cpp | 141 +++++++++ .../KustoFunctions/KQLDataTypeFunctions.h | 78 +++++ .../KustoFunctions/KQLDateTimeFunctions.cpp | 21 +- .../KustoFunctions/KQLDateTimeFunctions.h | 4 +- .../KustoFunctions/KQLFunctionFactory.cpp | 59 +++- .../Kusto/KustoFunctions/KQLFunctionFactory.h | 16 +- .../KustoFunctions/KQLGeneralFunctions.cpp | 38 +++ .../KustoFunctions/KQLGeneralFunctions.h | 7 + .../KustoFunctions/KQLStringFunctions.cpp | 66 +++- .../Kusto/ParserKQLDateTypeTimespan.cpp | 121 ++++++++ src/Parsers/Kusto/ParserKQLDateTypeTimespan.h | 36 +++ src/Parsers/Kusto/ParserKQLMakeSeries.cpp | 286 ++++++++++++++++++ src/Parsers/Kusto/ParserKQLMakeSeries.h | 45 +++ src/Parsers/Kusto/ParserKQLQuery.cpp | 8 + src/Parsers/Kusto/ParserKQLQuery.h | 2 +- 16 files changed, 909 insertions(+), 29 deletions(-) create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp create mode 100644 src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h create mode 100644 src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp create mode 100644 src/Parsers/Kusto/ParserKQLDateTypeTimespan.h create mode 100644 src/Parsers/Kusto/ParserKQLMakeSeries.cpp create mode 100644 src/Parsers/Kusto/ParserKQLMakeSeries.h diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index bf46364f1f0..0b7eb403a22 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -187,6 +188,15 @@ String IParserKQLFunction::getExpression(IParser::Pos & pos) validateEndOfFunction(arg, pos); arg = new_arg; } + else + { + ParserKQLDateTypeTimespan time_span; + ASTPtr node; + Expected expected; + + if (time_span.parse(pos, node, expected)) + arg = std::to_string(time_span.toSeconds()); + } } return arg; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp new file mode 100644 index 00000000000..2a59ab8b72a --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include +#include +#include +/* +#include +#include +#include +#include +#include +#include +#include +#include +#include +*/ +#include +#include + +namespace DB +{ + +bool DatatypeBool::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatatypeDatetime::convertImpl(String &out,IParser::Pos &pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String datetime_str; + + ++pos; + if (pos->type == TokenType::QuotedIdentifier) + datetime_str = std::format("'{}'", String(pos->begin+1, pos->end -1)); + else if (pos->type == TokenType::StringLiteral) + datetime_str = String(pos->begin, pos->end); + else + { auto start = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + ++pos; + if (pos->type == TokenType::ClosingRoundBracket) + break; + } + --pos; + datetime_str = std::format("'{}'",String(start->begin,pos->end)); + } + out = std::format("toDateTime64({},9,'UTC')", datetime_str); + ++pos; + return true; +} + +bool DatatypeDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatatypeGuid::convertImpl(String &out,IParser::Pos &pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String guid_str; + + ++pos; + if (pos->type == TokenType::QuotedIdentifier) + guid_str = std::format("'{}'", String(pos->begin+1, pos->end -1)); + else if (pos->type == TokenType::StringLiteral) + guid_str = String(pos->begin, pos->end); + else + { auto start = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + ++pos; + if (pos->type == TokenType::ClosingRoundBracket) + break; + } + --pos; + guid_str = std::format("'{}'",String(start->begin,pos->end)); + } + out = guid_str; + ++pos; + return true; +} + +bool DatatypeInt::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatatypeLong::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatatypeReal::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatatypeString::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool DatatypeTimespan::convertImpl(String &out,IParser::Pos &pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + + out = getConvertedArgument(fn_name, pos); + return true; +} + +bool DatatypeDecimal::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h new file mode 100644 index 00000000000..325fb3457ff --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h @@ -0,0 +1,78 @@ +#pragma once + +#include +#include +namespace DB +{ +class DatatypeBool : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bool(),boolean()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeDatetime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime(),date()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "guid()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeInt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "int()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeLong : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "long()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeReal : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "real(),double()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "string()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeTimespan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "timespan(), time()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeDecimal : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "decimal()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 0f098cbebda..3b00ccbceb8 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB { @@ -24,13 +25,13 @@ bool TimeSpan::convertImpl(String &out,IParser::Pos &pos) out = res; return false; } - +/* bool DateTime::convertImpl(String &out,IParser::Pos &pos) { String res = String(pos->begin,pos->end); out = res; return false; -} +}*/ bool Ago::convertImpl(String &out,IParser::Pos &pos) { @@ -153,9 +154,19 @@ bool MakeDateTime::convertImpl(String &out,IParser::Pos &pos) bool Now::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + { + const auto offset = getConvertedArgument(fn_name, pos); + out = std::format("now('UTC') + {}", offset); + } + else + out = "now('UTC')"; + return true; } bool StartOfDay::convertImpl(String &out,IParser::Pos &pos) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h index 7627465ab5b..ee87be15eda 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -11,13 +11,13 @@ protected: const char * getName() const override { return "timespan()"; } bool convertImpl(String &out,IParser::Pos &pos) override; }; - +/* class DateTime : public IParserKQLFunction { protected: const char * getName() const override { return "datetime()"; } bool convertImpl(String &out,IParser::Pos &pos) override; -}; +};*/ class Ago : public IParserKQLFunction { diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp index 25e0c2af2f9..c66bfd60647 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -15,12 +15,12 @@ #include #include #include +#include namespace DB { std::unordered_map KQLFunctionFactory::kql_functions = { - {"datetime", KQLFunctionValue::datetime}, {"ago", KQLFunctionValue::ago}, {"datetime_add", KQLFunctionValue::datetime_add}, {"datetime_part", KQLFunctionValue::datetime_part}, @@ -202,7 +202,24 @@ namespace DB {"binary_shift_right", KQLFunctionValue::binary_shift_right}, {"binary_xor", KQLFunctionValue::binary_xor}, {"bitset_count_ones", KQLFunctionValue::bitset_count_ones}, - {"bin", KQLFunctionValue::bin} + + {"bin", KQLFunctionValue::bin}, + {"bin_at", KQLFunctionValue::bin_at}, + + {"bool", KQLFunctionValue::datatype_bool}, + {"boolean", KQLFunctionValue::datatype_bool}, + {"datetime", KQLFunctionValue::datatype_datetime}, + {"date", KQLFunctionValue::datatype_datetime}, + {"dynamic", KQLFunctionValue::datatype_dynamic}, + {"guid", KQLFunctionValue::datatype_guid}, + {"int", KQLFunctionValue::datatype_int}, + {"long", KQLFunctionValue::datatype_long}, + {"real", KQLFunctionValue::datatype_real}, + {"double", KQLFunctionValue::datatype_real}, + {"string", KQLFunctionValue::datatype_string}, + {"timespan", KQLFunctionValue::datatype_timespan}, + {"time", KQLFunctionValue::datatype_timespan}, + {"decimal", KQLFunctionValue::datatype_decimal} }; @@ -220,8 +237,8 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function case KQLFunctionValue::timespan: return std::make_unique(); - case KQLFunctionValue::datetime: - return std::make_unique(); + // case KQLFunctionValue::datetime: + // return std::make_unique(); case KQLFunctionValue::ago: return std::make_unique(); @@ -732,6 +749,40 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function case KQLFunctionValue::bin: return std::make_unique(); + + case KQLFunctionValue::bin_at: + return std::make_unique(); + + case KQLFunctionValue::datatype_bool: + return std::make_unique(); + + case KQLFunctionValue::datatype_datetime: + return std::make_unique(); + + case KQLFunctionValue::datatype_dynamic: + return std::make_unique(); + + case KQLFunctionValue::datatype_guid: + return std::make_unique(); + + case KQLFunctionValue::datatype_int: + return std::make_unique(); + + case KQLFunctionValue::datatype_long: + return std::make_unique(); + + case KQLFunctionValue::datatype_real: + return std::make_unique(); + + case KQLFunctionValue::datatype_string: + return std::make_unique(); + + case KQLFunctionValue::datatype_timespan: + return std::make_unique(); + + case KQLFunctionValue::datatype_decimal: + return std::make_unique(); + } } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h index 7c5f0d54734..7cbb0877c90 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -8,7 +8,7 @@ namespace DB enum class KQLFunctionValue : uint16_t { none, timespan, - datetime, + // datetime, ago, datetime_add, datetime_part, @@ -186,7 +186,19 @@ namespace DB binary_xor, bitset_count_ones, - bin + bin, + bin_at, + + datatype_bool, + datatype_datetime, + datatype_dynamic, + datatype_guid, + datatype_int, + datatype_long, + datatype_real, + datatype_string, + datatype_timespan, + datatype_decimal }; class KQLFunctionFactory { diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp index 253292a7d9d..714265633d5 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -14,6 +14,8 @@ #include #include #include +#include +#include namespace DB { @@ -25,4 +27,40 @@ bool Bin::convertImpl(String &out,IParser::Pos &pos) return false; } +bool BinAt::convertImpl(String & out,IParser::Pos & pos) +{ + ParserKQLDateTypeTimespan time_span; + double bin_size; + + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + String expression_str = getConvertedArgument(fn_name, pos); + ++pos; + String bin_size_str = getConvertedArgument(fn_name, pos); + ++pos; + String fixed_point_str = getConvertedArgument(fn_name, pos); + + bin_size_str = bin_size_str.substr(0, bin_size_str.size()-1); + + auto t1 = std::format("toFloat64({})", fixed_point_str); + auto t2 = std::format("toFloat64({})", expression_str); + int dir = t2 >= t1 ? 0 : -1; + + if (time_span.parseConstKQLTimespan(bin_size_str)) + { + bin_size = time_span.toSeconds(); + + out = std::format("toDateTime64({} + toInt64(({} -{}) / {} + {}) * {}, 9, 'UTC')", t1, t2, t1, bin_size, dir, bin_size); + } + else + { + bin_size = std::stod(bin_size_str); + out = std::format("{} + toInt64(({} -{}) / {} + {}) * {}", t1, t2, t1, bin_size, dir, bin_size); + } + return true; +} + } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h index 802fd152333..76ead441dfc 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h @@ -11,5 +11,12 @@ protected: bool convertImpl(String &out,IParser::Pos &pos) override; }; +class BinAt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bin_at()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index c2d1bd251da..76707598788 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -21,9 +21,16 @@ bool Base64EncodeToString::convertImpl(String & out,IParser::Pos & pos) bool Base64EncodeFromGuid::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String guid = getConvertedArgument(fn_name, pos); + + out = std::format("base64Encode({})", guid); + + return true; } bool Base64DecodeToString::convertImpl(String & out,IParser::Pos & pos) @@ -33,16 +40,21 @@ bool Base64DecodeToString::convertImpl(String & out,IParser::Pos & pos) bool Base64DecodeToArray::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String str = getConvertedArgument(fn_name, pos); + + out = std::format("arrayMap(x -> (reinterpretAsUInt8(x)), splitByRegexp ('',base64Decode({})))", str); + + return true; } bool Base64DecodeToGuid::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"base64Decode"); } bool CountOf::convertImpl(String & out, IParser::Pos & pos) @@ -209,9 +221,19 @@ bool ExtractJson::convertImpl(String & out,IParser::Pos & pos) bool HasAnyIndex::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + + ++pos; + const String lookup = getConvertedArgument(fn_name, pos); + String src_array = std::format("splitByChar(' ',{})", source); + out = std::format("if (empty({1}), -1, indexOf(arrayMap ( x -> (x in {0}), if (empty({1}),[''], arrayMap(x->(toString(x)),{1}))),1) - 1)", + src_array, lookup); + return true; } bool IndexOf::convertImpl(String & out,IParser::Pos & pos) @@ -508,9 +530,23 @@ bool ToUpper::convertImpl(String & out,IParser::Pos & pos) bool Translate::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String from = getConvertedArgument(fn_name, pos); + ++pos; + String to = getConvertedArgument(fn_name, pos); + ++pos; + String source = getConvertedArgument(fn_name, pos); + + String len_diff = std::format("length({}) - length({})", from, to); + String to_str = std::format("multiIf(length({1}) = 0, {0}, {2} > 0, concat({1},repeat(substr({1},length({1}),1),toUInt16({2}))),{2} < 0 , substr({1},1,length({0})),{1})", + from, to, len_diff); + out = std::format("if (length({3}) = 0,'',translate({0},{1},{2}))", source, from, to_str, to); + return true; } bool Trim::convertImpl(String & out,IParser::Pos & pos) diff --git a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp new file mode 100644 index 00000000000..d83ef4e2f53 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp @@ -0,0 +1,121 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLDateTypeTimespan :: parseImpl(Pos & pos, [[maybe_unused]] ASTPtr & node, Expected & expected) +{ + const String token(pos->begin,pos->end); + const char * current_word = pos->begin; + expected.add(pos, current_word); + + if (!parseConstKQLTimespan(token)) + return false; + + return true; +} + +double ParserKQLDateTypeTimespan :: toSeconds() +{ + switch (time_span_unit) + { + case KQLTimespanUint::day: + return time_span * 24 * 60 * 60; + case KQLTimespanUint::hour: + return time_span * 60 * 60; + case KQLTimespanUint::minute: + return time_span * 60; + case KQLTimespanUint::second: + return time_span ; + case KQLTimespanUint::millisec: + return time_span / 1000.0; + case KQLTimespanUint::microsec: + return time_span / 1000000.0; + case KQLTimespanUint::nanosec: + return time_span / 1000000000.0; + case KQLTimespanUint::tick: + return time_span / 10000000000.0; + } +} + +bool ParserKQLDateTypeTimespan :: parseConstKQLTimespan(const String & text) +{ + std::unordered_map TimespanSuffixes = + { + {"d", KQLTimespanUint::day}, + {"day", KQLTimespanUint::day}, + {"days", KQLTimespanUint::day}, + {"h", KQLTimespanUint::hour}, + {"hr", KQLTimespanUint::hour}, + {"hrs", KQLTimespanUint::hour}, + {"hour", KQLTimespanUint::hour}, + {"hours", KQLTimespanUint::hour}, + {"m", KQLTimespanUint::minute}, + {"min", KQLTimespanUint::minute}, + {"minute", KQLTimespanUint::minute}, + {"minutes", KQLTimespanUint::minute}, + {"s", KQLTimespanUint::second}, + {"sec", KQLTimespanUint::second}, + {"second", KQLTimespanUint::second}, + {"seconds", KQLTimespanUint::second}, + {"ms", KQLTimespanUint::millisec}, + {"milli", KQLTimespanUint::millisec}, + {"millis", KQLTimespanUint::millisec}, + {"millisec", KQLTimespanUint::millisec}, + {"millisecond", KQLTimespanUint::millisec}, + {"milliseconds", KQLTimespanUint::millisec}, + {"micro", KQLTimespanUint::microsec}, + {"micros", KQLTimespanUint::microsec}, + {"microsec", KQLTimespanUint::microsec}, + {"microsecond", KQLTimespanUint::microsec}, + {"microseconds", KQLTimespanUint::microsec}, + {"nano", KQLTimespanUint::nanosec}, + {"nanos", KQLTimespanUint::nanosec}, + {"nanosec", KQLTimespanUint::nanosec}, + {"nanosecond", KQLTimespanUint::nanosec}, + {"nanoseconds", KQLTimespanUint::nanosec}, + {"tick", KQLTimespanUint::tick}, + {"ticks", KQLTimespanUint::tick} + }; + + + const char * ptr = text.c_str(); + + auto scanDigit = [&](const char *start) + { + auto index = start; + while (isdigit(*index)) + ++index; + return index > start ? index - start : -1; + }; + + int number_len = scanDigit(ptr); + if (number_len <= 0) + return false; + + if (*(ptr + number_len) == '.') + { + auto fractionLen = scanDigit(ptr + number_len + 1); + if (fractionLen >= 0) + { + number_len += fractionLen + 1; + } + } + + String timespan_suffix(ptr + number_len, ptr+text.size()); + if (TimespanSuffixes.find(timespan_suffix) == TimespanSuffixes.end()) + return false; + + time_span = std::stod(String(ptr, ptr + number_len)); + time_span_unit =TimespanSuffixes[timespan_suffix] ; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.h b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.h new file mode 100644 index 00000000000..11c74ddedad --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLDateTypeTimespan : public ParserKQLBase +{ +public: + enum class KQLTimespanUint: uint8_t + { + day, + hour, + minute, + second, + millisec, + microsec, + nanosec, + tick + }; + bool parseConstKQLTimespan(const String &text); + double toSeconds(); + +protected: + const char * getName() const override { return "KQLDateTypeTimespan"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + double time_span; + KQLTimespanUint time_span_unit; +}; + +} + diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp new file mode 100644 index 00000000000..0c658b0ba7f --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -0,0 +1,286 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLMakeSeries :: parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos) +{ + std::unordered_set allowed_aggregation + ({ + "avg", + "avgif", + "count", + "countif", + "dcount", + "dcountif", + "max", + "maxif", + "min", + "minif", + "percentile", + "take_any", + "stdev", + "sum", + "sumif", + "variance" + }); + + Expected expected; + ParserKeyword s_default("default"); + ParserToken equals(TokenType::Equals); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserToken close_bracket(TokenType::ClosingRoundBracket); + ParserToken comma(TokenType::Comma); + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + String alias; + String aggregation_fun; + String column; + double default_value = 0; + + String first_token(pos->begin,pos->end); + + ++pos; + if (equals.ignore(pos, expected)) + { + alias = std::move(first_token); + aggregation_fun = String(pos->begin,pos->end); + } + else + aggregation_fun = std::move(first_token); + + if (allowed_aggregation.find(aggregation_fun) == allowed_aggregation.end()) + return false; + + ++pos; + if (open_bracket.ignore(pos, expected)) + column = String(pos->begin,pos->end); + else + return false; + + ++pos; + if (!close_bracket.ignore(pos, expected)) + return false; + + if (s_default.ignore(pos, expected)) + { + if (!equals.ignore(pos, expected)) + return false; + + default_value = std::stod(String(pos->begin,pos->end)); + ++pos; + } + if (alias.empty()) + alias = std::format("{}_{}", aggregation_fun, column); + aggregation_columns.push_back(AggregationColumn(alias, aggregation_fun, column, default_value)); + + if (!comma.ignore(pos, expected)) + break; + } + return true; +} + +bool ParserKQLMakeSeries :: parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos) +{ + auto begin = pos; + auto from_pos = begin; + auto to_pos = begin; + auto step_pos = begin; + auto end_pos = begin; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if ( String(pos->begin, pos->end) == "from") + from_pos = pos; + if ( String(pos->begin, pos->end) == "to") + to_pos = pos; + if ( String(pos->begin, pos->end) == "step") + step_pos = pos; + if ( String(pos->begin, pos->end) == "by") + { + end_pos = pos; + break; + } + ++pos; + } + + if (end_pos == begin) + end_pos = pos; + + if (step_pos == begin) + return false; + + if (String(from_pos->begin, from_pos->end) == "from") + { + ++from_pos; + auto end_from_pos = (to_pos != begin) ? to_pos : step_pos; + --end_from_pos; + from_to_step.from = String(from_pos->begin, end_from_pos->end); + } + + if (to_pos != begin) + { ++to_pos; + --step_pos; + from_to_step.to = String(to_pos->begin, step_pos->end); + ++step_pos; + ++step_pos; + } + --end_pos; + from_to_step.step = String(step_pos->begin, end_pos->end); + return true; +} + + +bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (op_pos.empty()) + return true; + + auto begin = pos; + + pos = op_pos.back(); + + String axis_column; + String group_expression; + + ParserKeyword s_on("on"); + ParserKeyword s_by("by"); + + ParserToken equals(TokenType::Equals); + ParserToken comma(TokenType::Comma); + + AggregationColumns aggregation_columns; + FromToStepClause from_to_step; + + ParserKQLDateTypeTimespan time_span; + + //const auto make_series_parameters = getMakeSeriesParameters(pos); + + if (!parseAggregationColumns(aggregation_columns, pos)) + return false; + + if (!s_on.ignore(pos, expected)) + return false; + + axis_column = String(pos->begin, pos->end); + ++pos; + + if (!parseFromToStepClause(from_to_step, pos)) + return false; + + // 'on' statement parameter, expecting scalar value of type 'int', 'long', 'real', 'datetime' or 'timespan'. + + if (s_by.ignore(pos, expected)) + { + group_expression = getExprFromToken(pos); + if (group_expression.empty()) + return false; + } + + String subquery_columns; + + for (auto agg_column : aggregation_columns) + { + String column_str = std::format("{}({}) AS {}_ali", agg_column.aggregation_fun, agg_column.column, agg_column.alias); + if (subquery_columns.empty()) + subquery_columns = column_str; + else + subquery_columns += ", "+ column_str; + } + + ASTPtr sub_qurery_table; + double step; + String sub_query ; + String main_query ; + String group_by; + + String start_str = getExprFromToken(from_to_step.from, pos.max_depth); + String end_str = getExprFromToken(from_to_step.to, pos.max_depth); + String step_str = from_to_step.step; + + if (time_span.parseConstKQLTimespan(step_str)) + { + step = time_span.toSeconds(); + + auto bin_str = std::format(" toUInt64(toFloat64(toDateTime64({},6,'UTC')) / {}) * {} AS {}_ali ", axis_column, step,step, axis_column); + auto sub_sub_query = std::format(" (Select {},{}, {} FROM {} GROUP BY {},{}_ali ORDER BY {}_ali) ", group_expression, subquery_columns, bin_str, table_name, group_expression, axis_column, axis_column); + + auto start = std::format("toUInt64(toDateTime64({},6,'UTC'))", start_str); + auto end = std::format("toUInt64(toDateTime64({},6,'UTC'))", end_str); + auto range = std::format("range({},{}, toUInt64({}))", start, end, step); + auto range_len = std::format("length({})", range); + main_query = std::format("{} ", group_expression); + + auto axis_and_agg_alias_list = axis_column; + auto final_axis_agg_alias_list =std::format("tupleElement(zipped,1) AS {}",axis_column); + int idx = 2; + for (auto agg_column : aggregation_columns) + { + String agg_group_column = std::format("arrayConcat(groupArrayIf ({}_ali,{}_ali >= {} and {}_ali <= {}) as ga, arrayMap(x -> ({}),range(0,toUInt32 ({} - length(ga) < 0 ? 0 : {} - length(ga)),1) )) as {}", + agg_column.alias, axis_column, start, axis_column, end, agg_column.default_value, range_len, range_len, agg_column.alias); + main_query +=", " + agg_group_column; + + axis_and_agg_alias_list +=", " + agg_column.alias; + final_axis_agg_alias_list += std::format(", tupleElement(zipped,{}) AS {}", idx, agg_column.alias); + } + auto axis_str = std::format("arrayDistinct(arrayConcat(groupArrayIf(toDateTime64({}_ali,6,'UTC'),{}_ali >= {} and {}_ali <= {}), arrayMap( x->(toDateTime64(x,6,'UTC')), {}) )) as {}", + axis_column, axis_column, start, axis_column, end, range, axis_column); + + main_query += ", " + axis_str; + auto sub_group_by = std::format("{}", group_expression); + + sub_query = std::format("( SELECT min({}_ali) AS low,max({}_ali) AS high, arraySort(arrayZip({})) as zipped, {} FROM {} GROUP BY {} )", + axis_column, axis_column,axis_and_agg_alias_list,main_query,sub_sub_query, sub_group_by); + + main_query = std::format("{},{}", group_expression, final_axis_agg_alias_list); + + } + else + { + step = stod(step_str); + + sub_query = std::format("kql( {} | summarize {}, {} = toint({} / {}) * {} by {},{} )", + table_name, subquery_columns, axis_column, axis_column, step, subquery_columns, axis_column); + } + + Tokens token_subquery(sub_query.c_str(), sub_query.c_str()+sub_query.size()); + IParser::Pos pos_subquery(token_subquery, pos.max_depth); + + if (!ParserTablesInSelectQuery().parse(pos_subquery, sub_qurery_table, expected)) + return false; + tables = std::move(sub_qurery_table); + + String converted_columns = main_query; + + Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, node, expected)) + return false; + + if (!group_by.empty()) + { + String converted_groupby = group_by; + + Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); + IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); + + if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) + return false; + } + + pos = begin; + return true; + +} + +} diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.h b/src/Parsers/Kusto/ParserKQLMakeSeries.h new file mode 100644 index 00000000000..b30155b1bd8 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.h @@ -0,0 +1,45 @@ + +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLMakeSeries : public ParserKQLBase +{ +public: + ASTPtr group_expression_list; + ASTPtr tables; + void setTableName(String table_name_) {table_name = table_name_;} + +protected: + struct AggregationColumn { + String alias; + String aggregation_fun; + String column; + double default_value; + AggregationColumn(String alias_, String aggregation_fun_, String column_, double default_value_ ) + :alias(alias_), aggregation_fun(aggregation_fun_), column(column_), default_value(default_value_){} + }; + using AggregationColumns = std::vector; + + struct FromToStepClause { + String from; + String to; + String step; + }; + + bool parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos); + bool parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos); + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +private: + String table_name; +}; + +} + + + diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 7f00a76fa72..bfa52368c16 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -26,6 +26,14 @@ bool ParserKQLBase :: parsePrepare(Pos & pos) return true; } +String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth) +{ + Tokens tokens(text.c_str(), text.c_str() + text.size()); + IParser::Pos pos(tokens, max_depth); + + return getExprFromToken(pos); +} + String ParserKQLBase :: getExprFromToken(Pos &pos) { String res; diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 2cfec703fd4..ac8715ae894 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -9,7 +9,7 @@ class ParserKQLBase : public IParserBase public: virtual bool parsePrepare(Pos & pos); virtual String getExprFromToken(Pos &pos); - + virtual String getExprFromToken(const String & text, const uint32_t & max_depth); std::vector op_pos; }; From 7b2fc8c3b78a4ef357a7027a60a57c746fb993b1 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Tue, 9 Aug 2022 10:02:09 -0700 Subject: [PATCH 066/279] Update unit tests for IP functions --- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 4 ++-- src/Parsers/tests/KQL/gtest_KQL_IP.cpp | 20 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 47d01e42ae3..765912ff936 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -108,7 +108,7 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) for (int i = 0; i < std::ssize(s_private_subnets); ++i) { if (i > 0) - out += " or"; + out += " or "; const auto & subnet = s_private_subnets[i]; out += std::format( @@ -118,7 +118,7 @@ bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) unique_identifier); } - out += ")"; + out.push_back(')'); return true; } diff --git a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp index cf1d0838176..e6338ab5218 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp @@ -29,35 +29,35 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserRegexTest, ::testing::ValuesIn(std::initializer_list{ { "print format_ipv4(A)", - "SELECT ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(32 < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\)" + "SELECT ifNull\\(if\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(32 < 0\\) OR \\(\\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL\\), NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\)" }, { "print format_ipv4(A, B)", - "SELECT ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(B < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - B\\) - 1\\)\\)\\)\\)\\), ''\\)" + "SELECT ifNull\\(if\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(B < 0\\) OR \\(\\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL\\), NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - B\\) - 1\\)\\)\\)\\)\\), ''\\)" }, { "print format_ipv4_mask(A)", - "SELECT if\\(empty\\(ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(32 < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\) AS formatted_ip_\\d+\\) OR \\(NOT \\(\\(32 >= 0\\) AND \\(32 <= 32\\)\\)\\), '', concat\\(formatted_ip_\\d+, '/', toString\\(32\\)\\)\\)" + "SELECT if\\(empty\\(ifNull\\(if\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(32 < 0\\) OR \\(\\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL\\), NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\) AS formatted_ip_\\d+\\) OR \\(NOT \\(\\(32 >= 0\\) AND \\(32 <= 32\\)\\)\\), '', concat\\(formatted_ip_\\d+, '/', toString\\(32\\)\\)\\)" }, { "print format_ipv4_mask(A, B)", - "SELECT if\\(empty\\(ifNull\\(multiIf\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(B < 0\\), NULL, \\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL, NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - B\\) - 1\\)\\)\\)\\)\\), ''\\) AS formatted_ip_\\d+\\) OR \\(NOT \\(\\(B >= 0\\) AND \\(B <= 32\\)\\)\\), '', concat\\(formatted_ip_\\d+, '/', toString\\(B\\)\\)\\)" + "SELECT if\\(empty\\(ifNull\\(if\\(\\(\\(\\(toUInt32OrNull\\(toString\\(A\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(A\\) = 'String'\\)\\) OR \\(B < 0\\) OR \\(\\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(A\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL\\), NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - B\\) - 1\\)\\)\\)\\)\\), ''\\) AS formatted_ip_\\d+\\) OR \\(NOT \\(\\(B >= 0\\) AND \\(B <= 32\\)\\)\\), '', concat\\(formatted_ip_\\d+, '/', toString\\(B\\)\\)\\)" }, { "print ipv4_compare(A, B)", - "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(32, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\)" + "SELECT if\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), toUInt8\\(min2\\(32, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\)" }, { "print ipv4_compare(A, B, C)", - "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\)" + "SELECT if\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\)" }, { "print ipv4_is_match(A, B)", - "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(32, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" + "SELECT if\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), toUInt8\\(min2\\(32, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" }, { "print ipv4_is_match(A, B, C)", - "SELECT multiIf\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\), NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, ignore\\(toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" + "SELECT if\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" }, { "print parse_ipv4_mask(A, B)", @@ -65,11 +65,11 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserRegexTest, }, { "print ipv4_is_in_range(A, B)", - "SELECT multiIf\\(\\(IPv4StringToNumOrNull\\(A\\) AS ip_\\d+\\) IS NULL, NULL, \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS range_start_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS range_mask_\\d+\\) IS NULL\\), NULL, bitXor\\(range_start_ip_\\d+, bitAnd\\(ip_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - range_mask_\\d+\\) - 1\\)\\)\\)\\) = 0\\)" + "SELECT if\\(\\(\\(IPv4StringToNumOrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS range_start_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS range_mask_\\d+\\) IS NULL\\), NULL, bitXor\\(range_start_ip_\\d+, bitAnd\\(ip_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - range_mask_\\d+\\) - 1\\)\\)\\)\\) = 0\\)" }, { "print ipv4_is_private(A)", - "SELECT multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(toIPv4OrNull\\(tokens_\\d+\\[1\\]\\) AS nullable_ip_\\d+\\) IS NULL\\), NULL, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\), NULL, ignore\\(assumeNotNull\\(nullable_ip_\\d+\\) AS ip_\\d+, IPv4CIDRToRange\\(ip_\\d+, assumeNotNull\\(mask_\\d+\\)\\) AS range_\\d+, IPv4NumToString\\(range_\\d+.1\\) AS begin_\\d+, IPv4NumToString\\(range_\\d+.2\\) AS end_\\d+\\), NULL, \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '10.0.0.0/8'\\)\\) OR \\(isIPAddressInRange\\(begin_\\d+, '10.0.0.0/8'\\) AND isIPAddressInRange\\(end_\\d+, '10.0.0.0/8'\\)\\), true, \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '172.16.0.0/12'\\)\\) OR \\(isIPAddressInRange\\(begin_\\d+, '172.16.0.0/12'\\) AND isIPAddressInRange\\(end_\\d+, '172.16.0.0/12'\\)\\), true, \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '192.168.0.0/16'\\)\\) OR \\(isIPAddressInRange\\(begin_\\d+, '192.168.0.0/16'\\) AND isIPAddressInRange\\(end_\\d+, '192.168.0.0/16'\\)\\), true, false\\)" + "SELECT multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(toIPv4OrNull\\(tokens_\\d+\\[1\\]\\) AS nullable_ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, ignore\\(assumeNotNull\\(nullable_ip_\\d+\\) AS ip_\\d+, IPv4CIDRToRange\\(ip_\\d+, assumeNotNull\\(mask_\\d+\\)\\) AS range_\\d+, IPv4NumToString\\(range_\\d+.1\\) AS begin_\\d+, IPv4NumToString\\(range_\\d+.2\\) AS end_\\d+\\), NULL, \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '10.0.0.0/8'\\)\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND isIPAddressInRange\\(begin_\\d+, '10.0.0.0/8'\\) AND isIPAddressInRange\\(end_\\d+, '10.0.0.0/8'\\)\\) OR \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '172.16.0.0/12'\\)\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND isIPAddressInRange\\(begin_\\d+, '172.16.0.0/12'\\) AND isIPAddressInRange\\(end_\\d+, '172.16.0.0/12'\\)\\) OR \\(\\(length\\(tokens_\\d+\\) = 1\\) AND isIPAddressInRange\\(IPv4NumToString\\(ip_\\d+\\), '192.168.0.0/16'\\)\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND isIPAddressInRange\\(begin_\\d+, '192.168.0.0/16'\\) AND isIPAddressInRange\\(end_\\d+, '192.168.0.0/16'\\)\\)\\)" }, { "print ipv4_netmask_suffix(A)", From 232e4c73f9bacc214014178b2a68d1c25c37cee4 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Tue, 9 Aug 2022 09:40:35 -0700 Subject: [PATCH 067/279] Date_Time functions PART 1 --- .../KustoFunctions/KQLDateTimeFunctions.cpp | 138 +++++++++++++----- .../KustoFunctions/KQLDateTimeFunctions.h | 11 +- .../KustoFunctions/KQLFunctionFactory.cpp | 13 +- .../Kusto/KustoFunctions/KQLFunctionFactory.h | 5 +- 4 files changed, 120 insertions(+), 47 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 3b00ccbceb8..aea1bc3127a 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -63,23 +63,23 @@ bool DatetimeDiff::convertImpl(String &out,IParser::Pos &pos) bool DayOfMonth::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toDayOfMonth"); } bool DayOfWeek::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + + out = std::format("toDayOfWeek() + %7"); + return true; } bool DayOfYear::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toDayOfYear"); } bool EndOfDay::convertImpl(String &out,IParser::Pos &pos) @@ -119,23 +119,17 @@ bool FormatTimeSpan::convertImpl(String &out,IParser::Pos &pos) bool GetMonth::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toMonth"); } bool GetYear::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toYear"); } bool HoursOfDay::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toHour"); } bool MakeTimeSpan::convertImpl(String &out,IParser::Pos &pos) @@ -162,39 +156,91 @@ bool Now::convertImpl(String &out,IParser::Pos &pos) if (pos->type != TokenType::ClosingRoundBracket) { const auto offset = getConvertedArgument(fn_name, pos); - out = std::format("now('UTC') + {}", offset); + out = std::format("now64(9,'UTC') + {}", offset); } else - out = "now('UTC')"; + out = "now64(9,'UTC')"; return true; } bool StartOfDay::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + + } + out = std::format("date_add(DAY,{}, toDateTime64((toStartOfDay({})) , 9 , 'UTC')) ", offset, datetime_str); + return true; } bool StartOfMonth::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + + } + out = std::format("date_add(MONTH,{}, toDateTime64((toStartOfMonth({})) , 9 , 'UTC')) ", offset, datetime_str); + return true; } bool StartOfWeek::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + + } + out = std::format("date_add(Week,{}, toDateTime64((toStartOfWeek({})) , 9 , 'UTC')) ", offset, datetime_str); + return true; } bool StartOfYear::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset ; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + } + out = std::format("date_add(YEAR,{}, toDateTime64((toStartOfYear({}, 'UTC')) , 9 , 'UTC'))", offset, datetime_str); + return true; } bool UnixTimeMicrosecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) @@ -220,16 +266,32 @@ bool UnixTimeNanosecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) bool UnixTimeSecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String value = getConvertedArgument(fn_name, pos); + out = std::format("toDateTime64({},9,'UTC')", value); + return true; } bool WeekOfYear::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + const String time_str = getConvertedArgument(fn_name, pos); + out = std::format("toWeek({},3,'UTC')", time_str); + return true; +} + +bool MonthOfYear::convertImpl(String &out,IParser::Pos &pos) +{ + + return directMapping(out, pos, "toMonth"); } } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h index ee87be15eda..adf95a39a64 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -120,7 +120,7 @@ protected: class HoursOfDay : public IParserKQLFunction { protected: - const char * getName() const override { return "hoursofday()"; } + const char * getName() const override { return "hourofday()"; } bool convertImpl(String &out,IParser::Pos &pos) override; }; @@ -204,7 +204,14 @@ protected: class WeekOfYear : public IParserKQLFunction { protected: - const char * getName() const override { return "weekofyear()"; } + const char * getName() const override { return "week_of_year()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MonthOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "monthofyear()"; } bool convertImpl(String &out,IParser::Pos &pos) override; }; diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp index c66bfd60647..075d56d9608 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -35,7 +35,7 @@ namespace DB {"format_timespan", KQLFunctionValue::format_timespan}, {"getmonth", KQLFunctionValue::getmonth}, {"getyear", KQLFunctionValue::getyear}, - {"hoursofday", KQLFunctionValue::hoursofday}, + {"hourofday", KQLFunctionValue::hourofday}, {"make_timespan", KQLFunctionValue::make_timespan}, {"make_datetime", KQLFunctionValue::make_datetime}, {"now", KQLFunctionValue::now}, @@ -49,8 +49,8 @@ namespace DB {"unixtime_milliseconds_todatetime", KQLFunctionValue::unixtime_milliseconds_todatetime}, {"unixtime_nanoseconds_todatetime", KQLFunctionValue::unixtime_nanoseconds_todatetime}, {"unixtime_seconds_todatetime", KQLFunctionValue::unixtime_seconds_todatetime}, - {"weekofyear", KQLFunctionValue::weekofyear}, - + {"week_of_year", KQLFunctionValue::week_of_year}, + {"monthofyear", KQLFunctionValue::monthofyear}, {"base64_encode_tostring", KQLFunctionValue::base64_encode_tostring}, {"base64_encode_fromguid", KQLFunctionValue::base64_encode_fromguid}, {"base64_decode_tostring", KQLFunctionValue::base64_decode_tostring}, @@ -269,6 +269,9 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function case KQLFunctionValue::endofyear: return std::make_unique(); + + case KQLFunctionValue::monthofyear: + return std::make_unique(); case KQLFunctionValue::format_datetime: return std::make_unique(); @@ -282,7 +285,7 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function case KQLFunctionValue::getyear: return std::make_unique(); - case KQLFunctionValue::hoursofday: + case KQLFunctionValue::hourofday: return std::make_unique(); case KQLFunctionValue::make_timespan: @@ -318,7 +321,7 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function case KQLFunctionValue::unixtime_seconds_todatetime: return std::make_unique(); - case KQLFunctionValue::weekofyear: + case KQLFunctionValue::week_of_year: return std::make_unique(); case KQLFunctionValue::base64_encode_tostring: diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h index 7cbb0877c90..ed747964175 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -19,11 +19,12 @@ namespace DB endofday, endofweek, endofyear, + monthofyear, format_datetime, format_timespan, getmonth, getyear, - hoursofday, + hourofday, make_timespan, make_datetime, now, @@ -37,7 +38,7 @@ namespace DB unixtime_milliseconds_todatetime, unixtime_nanoseconds_todatetime, unixtime_seconds_todatetime, - weekofyear, + week_of_year, base64_encode_tostring, base64_encode_fromguid, From 09f117cfb1f8ea74355e73f846efaed76b56de6f Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Wed, 10 Aug 2022 13:04:07 -0700 Subject: [PATCH 068/279] Added test and review comments --- src/Parsers/Kusto/KQL_ReleaseNote.md | 60 +++++++++ .../KustoFunctions/KQLDateTimeFunctions.cpp | 101 +++++++------- .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 126 ++++++++++++++++++ 3 files changed, 237 insertions(+), 50 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index b37b991a2a2..0f475654d5f 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,4 +1,64 @@ # August XX, 2022 +- **DateTimeFunctions** +- [startofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofyearfunction) + `print startofyear(datetime(2017-01-01 10:10:17), -1)` + `print startofyear(datetime(2017-01-01 10:10:17), 0)` + `print startofyear(datetime(2017-01-01 10:10:17), 1)` +- [weekofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/weekofyearfunction) + `print week_of_year(datetime(2020-12-31))` + `print week_of_year(datetime(2020-06-15))` + `print week_of_year(datetime(1970-01-01))` + `print week_of_year(datetime(2000-01-01))` + +- [startofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofweekfunction) + `print startofweek(datetime(2017-01-01 10:10:17), -1)` + `print startofweek(datetime(2017-01-01 10:10:17), 0)` + `print startofweek(datetime(2017-01-01 10:10:17), 1)` + +- [startofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofmonthfunction) + `print startofmonth(datetime(2017-01-01 10:10:17), -1)` + `print startofmonth(datetime(2017-01-01 10:10:17), 0)` + `print startofmonth(datetime(2017-01-01 10:10:17), 1)` + +- [startofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofdayfunction) + `print startofday(datetime(2017-01-01 10:10:17), -1)` + `print startofday(datetime(2017-01-01 10:10:17), 0)` + `print startofday(datetime(2017-01-01 10:10:17), 1)` + +- [monthofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/monthofyearfunction) + `print monthofyear(datetime("2015-12-14"))` + +- [hourofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/hourofdayfunction) + `print hourofday(datetime(2015-12-14 18:54:00))` + +- [getyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getyearfunction) + `print getyear(datetime(2015-10-12))` + +- [getmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getmonthfunction) + `print getmonth(datetime(2015-10-12))` + +- [dayofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofyearfunction) + `print dayofyear(datetime(2015-12-14))` + +- [dayofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofmonthfunction) + `print (datetime(2015-12-14))` + +- [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) + `print unixtime_seconds_todatetime(1546300800)` + +- [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) + `print dayofweek(datetime(2015-12-20))` + +- [now](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/nowfunction) + `print now()` + `print now(2d)` + `print now(-2h)` + `print now(5 microseconds)` + `print now(5 seconds)` + `print now(6minutes)` + `print now(-2d) ` + `print now(time(1d))` + ## KQL implemented features The config setting to allow modify dialect setting. - Set dialect setting in server configuration XML at user level(` users.xml `). This sets the ` dialect ` at server startup and CH will do query parsing for all users with ` default ` profile acording to dialect value. diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index aea1bc3127a..58d8536fb49 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -19,134 +19,136 @@ namespace DB { -bool TimeSpan::convertImpl(String &out,IParser::Pos &pos) +bool TimeSpan::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } /* -bool DateTime::convertImpl(String &out,IParser::Pos &pos) +bool DateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; }*/ -bool Ago::convertImpl(String &out,IParser::Pos &pos) +bool Ago::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool DatetimeAdd::convertImpl(String &out,IParser::Pos &pos) +bool DatetimeAdd::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; }; -bool DatetimePart::convertImpl(String &out,IParser::Pos &pos) +bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool DatetimeDiff::convertImpl(String &out,IParser::Pos &pos) +bool DatetimeDiff::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool DayOfMonth::convertImpl(String &out,IParser::Pos &pos) +bool DayOfMonth::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "toDayOfMonth"); } -bool DayOfWeek::convertImpl(String &out,IParser::Pos &pos) +bool DayOfWeek::convertImpl(String & out, IParser::Pos & pos) { const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; ++pos; - out = std::format("toDayOfWeek() + %7"); + const String datetime_str = getConvertedArgument(fn_name, pos); + + out = std::format("toDayOfWeek({})%7",datetime_str); return true; } -bool DayOfYear::convertImpl(String &out,IParser::Pos &pos) +bool DayOfYear::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "toDayOfYear"); } -bool EndOfDay::convertImpl(String &out,IParser::Pos &pos) +bool EndOfDay::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool EndOfWeek::convertImpl(String &out,IParser::Pos &pos) +bool EndOfWeek::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool EndOfYear::convertImpl(String &out,IParser::Pos &pos) +bool EndOfYear::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool FormatDateTime::convertImpl(String &out,IParser::Pos &pos) +bool FormatDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool FormatTimeSpan::convertImpl(String &out,IParser::Pos &pos) +bool FormatTimeSpan::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool GetMonth::convertImpl(String &out,IParser::Pos &pos) +bool GetMonth::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "toMonth"); } -bool GetYear::convertImpl(String &out,IParser::Pos &pos) +bool GetYear::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "toYear"); } -bool HoursOfDay::convertImpl(String &out,IParser::Pos &pos) +bool HoursOfDay::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "toHour"); } -bool MakeTimeSpan::convertImpl(String &out,IParser::Pos &pos) +bool MakeTimeSpan::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool MakeDateTime::convertImpl(String &out,IParser::Pos &pos) +bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool Now::convertImpl(String &out,IParser::Pos &pos) +bool Now::convertImpl(String & out, IParser::Pos & pos) { const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) @@ -163,7 +165,7 @@ bool Now::convertImpl(String &out,IParser::Pos &pos) return true; } -bool StartOfDay::convertImpl(String &out,IParser::Pos &pos) +bool StartOfDay::convertImpl(String & out, IParser::Pos & pos) { const String fn_name = getKQLFunctionName(pos); @@ -184,7 +186,7 @@ bool StartOfDay::convertImpl(String &out,IParser::Pos &pos) return true; } -bool StartOfMonth::convertImpl(String &out,IParser::Pos &pos) +bool StartOfMonth::convertImpl(String & out, IParser::Pos & pos) { const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) @@ -204,7 +206,7 @@ bool StartOfMonth::convertImpl(String &out,IParser::Pos &pos) return true; } -bool StartOfWeek::convertImpl(String &out,IParser::Pos &pos) +bool StartOfWeek::convertImpl(String & out, IParser::Pos & pos) { const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) @@ -224,9 +226,9 @@ bool StartOfWeek::convertImpl(String &out,IParser::Pos &pos) return true; } -bool StartOfYear::convertImpl(String &out,IParser::Pos &pos) +bool StartOfYear::convertImpl(String & out, IParser::Pos & pos) { - const String fn_name = getKQLFunctionName(pos); + const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; @@ -243,30 +245,30 @@ bool StartOfYear::convertImpl(String &out,IParser::Pos &pos) return true; } -bool UnixTimeMicrosecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +bool UnixTimeMicrosecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool UnixTimeMillisecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +bool UnixTimeMillisecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool UnixTimeNanosecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +bool UnixTimeNanosecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool UnixTimeSecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) +bool UnixTimeSecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) { - const String fn_name = getKQLFunctionName(pos); + const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; @@ -276,9 +278,8 @@ bool UnixTimeSecondsToDateTime::convertImpl(String &out,IParser::Pos &pos) return true; } -bool WeekOfYear::convertImpl(String &out,IParser::Pos &pos) +bool WeekOfYear::convertImpl(String & out, IParser::Pos & pos) { - const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; @@ -288,7 +289,7 @@ bool WeekOfYear::convertImpl(String &out,IParser::Pos &pos) return true; } -bool MonthOfYear::convertImpl(String &out,IParser::Pos &pos) +bool MonthOfYear::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "toMonth"); diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp new file mode 100644 index 00000000000..77ad9714735 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -0,0 +1,126 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ +using namespace DB; +using namespace std::literals; +} +class ParserDateTimeFuncTest : public ::testing::TestWithParam, ParserTestCase>> +{}; + +TEST_P(ParserDateTimeFuncTest, ParseQuery) +{ const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + ASSERT_NE(nullptr, parser); + if (expected_ast) + { + if (std::string(expected_ast).starts_with("throws")) + { + EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } + else + { + ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + if (std::string("CREATE USER or ALTER USER query") != parser->getName() + && std::string("ATTACH access entity query") != parser->getName()) + { + EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); + } + else + { + if (input_text.starts_with("ATTACH")) + { + auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); + EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); + } + else + { + EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); + } + } + } + } + else + { + ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } +} + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print week_of_year(datetime(2020-12-31))", + "SELECT toWeek(toDateTime64('2020-12-31', 9, 'UTC'), 3, 'UTC')" + }, + { + "print startofweek(datetime(2017-01-01 10:10:17), -1)", + "SELECT toDateTime64(toStartOfWeek(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalWeek(-1)" + }, + { + "print startofmonth(datetime(2017-01-01 10:10:17), -1)", + "SELECT toDateTime64(toStartOfMonth(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalMonth(-1)" + }, + { + "print startofday(datetime(2017-01-01 10:10:17), -1)", + "SELECT toDateTime64(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalDay(-1)" + + }, + { + "print monthofyear(datetime(2015-12-14))", + "SELECT toMonth(toDateTime64('2015-12-14', 9, 'UTC'))" + }, + { + "print hourofday(datetime(2015-12-14 10:54:00))", + "SELECT toHour(toDateTime64('2015-12-14 10:54:00', 9, 'UTC'))" + }, + { + "print getyear(datetime(2015-10-12))", + "SELECT toYear(toDateTime64('2015-10-12', 9, 'UTC'))" + }, + { + "print getmonth(datetime(2015-10-12))", + "SELECT toMonth(toDateTime64('2015-10-12', 9, 'UTC'))" + }, + { + "print dayofyear(datetime(2015-10-12))", + "SELECT toDayOfYear(toDateTime64('2015-10-12', 9, 'UTC'))" + }, + { + "print dayofmonth(datetime(2015-10-12))", + "SELECT toDayOfMonth(toDateTime64('2015-10-12', 9, 'UTC'))" + }, + { + "print unixtime_seconds_todatetime(1546300899)", + "SELECT toDateTime64(1546300899, 9, 'UTC')" + }, + { + "print dayofweek(datetime(2015-12-20))", + "SELECT toDayOfWeek(toDateTime64('2015-12-20', 9, 'UTC')) % 7" + }, + { + "print now()", + "SELECT now64(9, 'UTC')" + } + +}))); From 77e80445043fc02dd9e98d32bbf85aab5a28fbfc Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Wed, 10 Aug 2022 07:46:29 -0700 Subject: [PATCH 069/279] Implement KQL binary functions --- .../KustoFunctions/KQLBinaryFunctions.cpp | 109 +++++++++++------- src/Parsers/tests/KQL/gtest_KQL_Binary.cpp | 39 +++++++ src/Parsers/tests/KQL/gtest_KQL_IP.cpp | 2 +- src/Parsers/tests/gtest_Parser.cpp | 4 - src/Parsers/tests/gtest_common.h | 7 ++ 5 files changed, 116 insertions(+), 45 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_Binary.cpp diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp index 2a06c4e715b..f8765b116d4 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp @@ -1,70 +1,99 @@ -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include + +#include namespace DB { -bool BinaryAnd::convertImpl(String &out,IParser::Pos &pos) +bool BinaryAnd::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + out = std::format("bitAnd(cast({0}, 'Int64'), cast({1}, 'Int64'))", lhs, rhs); + return true; } -bool BinaryNot::convertImpl(String &out,IParser::Pos &pos) +bool BinaryNot::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto value = getArgument(function_name, pos); + out = std::format("bitNot(cast({0}, 'Int64'))", value); + return true; } -bool BinaryOr::convertImpl(String &out,IParser::Pos &pos) +bool BinaryOr::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + out = std::format("bitOr(cast({0}, 'Int64'), cast({1}, 'Int64'))", lhs, rhs); + return true; } -bool BinaryShiftLeft::convertImpl(String &out,IParser::Pos &pos) +bool BinaryShiftLeft::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto value = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + out = std::format("if({1} < 0, null, bitShiftLeft(cast({0}, 'Int64'), {1}))", value, count); + return true; } -bool BinaryShiftRight::convertImpl(String &out,IParser::Pos &pos) +bool BinaryShiftRight::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto value = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + out = std::format("if({1} < 0, null, bitShiftRight(cast({0}, 'Int64'), {1}))", value, count); + return true; } -bool BinaryXor::convertImpl(String &out,IParser::Pos &pos) +bool BinaryXor::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + out = std::format("bitXor(cast({0}, 'Int64'), cast({1}, 'Int64'))", lhs, rhs); + return true; } -bool BitsetCountOnes::convertImpl(String &out,IParser::Pos &pos) +bool BitsetCountOnes::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "bitCount"); } } diff --git a/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp b/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp new file mode 100644 index 00000000000..600965dcef6 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp @@ -0,0 +1,39 @@ +#include + +#include +#include +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Binary, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print binary_and(A, B)", + "SELECT bitAnd(CAST(A, 'Int64'), CAST(B, 'Int64'))" + }, + { + "print binary_not(A)", + "SELECT bitNot(CAST(A, 'Int64'))" + }, + { + "print binary_or(A, B)", + "SELECT bitOr(CAST(A, 'Int64'), CAST(B, 'Int64'))" + }, + { + "print binary_shift_left(A, B)", + "SELECT if(B < 0, NULL, bitShiftLeft(CAST(A, 'Int64'), B))" + }, + { + "print binary_shift_right(A, B)", + "SELECT if(B < 0, NULL, bitShiftRight(CAST(A, 'Int64'), B))" + }, + { + "print binary_xor(A, B)", + "SELECT bitXor(CAST(A, 'Int64'), CAST(B, 'Int64'))" + }, + { + "print bitset_count_ones(A)", + "SELECT bitCount(A)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp index e6338ab5218..c2257d05500 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp @@ -23,7 +23,7 @@ TEST_P(ParserRegexTest, parseQuery) EXPECT_THAT(serializeAST(*ast->clone(), false), ::testing::MatchesRegex(expected_ast)); } -INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserRegexTest, +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, ::testing::Combine( ::testing::Values(std::make_shared()), ::testing::ValuesIn(std::initializer_list{ diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 87df4a3fdee..c97080482ed 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -19,7 +19,6 @@ #include #include #include -#include namespace { @@ -37,9 +36,6 @@ std::ostream & operator<<(std::ostream & ostr, const ParserTestCase & test_case) return ostr << "ParserTestCase input: " << test_case.input_text; } -class ParserTest : public ::testing::TestWithParam, ParserTestCase>> -{}; - TEST_P(ParserTest, parseQuery) { const auto & parser = std::get<0>(GetParam()); diff --git a/src/Parsers/tests/gtest_common.h b/src/Parsers/tests/gtest_common.h index abbc1a0cb0e..aac3dddb117 100644 --- a/src/Parsers/tests/gtest_common.h +++ b/src/Parsers/tests/gtest_common.h @@ -1,3 +1,7 @@ +#include + +#include + #include struct ParserTestCase @@ -5,3 +9,6 @@ struct ParserTestCase const std::string_view input_text; const char * expected_ast = nullptr; }; + +class ParserTest : public ::testing::TestWithParam, ParserTestCase>> +{}; From 8b9ff2283eeaa4ff22f767f174ec96e5f7395a84 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Wed, 10 Aug 2022 07:58:07 -0700 Subject: [PATCH 070/279] Update release notes --- src/Parsers/Kusto/KQL_ReleaseNote.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 0f475654d5f..ed64110487e 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -84,6 +84,26 @@ The config setting to allow modify dialect setting. pass dialect setting with '--'. For example : ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` +## Binary functions +- [binary_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-andfunction) + `print binary_and(15, 3) == 3` + `print binary_and(1, 2) == 0` +- [binary_not](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-notfunction) + `print binary_not(1) == -2` +- [binary_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-orfunction) + `print binary_or(3, 8) == 11` + `print binary_or(1, 2) == 3` +- [binary_shift_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-leftfunction) + `print binary_shift_left(1, 1) == 2` + `print binary_shift_left(1, 64) == 1` +- [binary_shift_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-rightfunction) + `print binary_shift_right(1, 1) == 0` + `print binary_shift_right(1, 64) == 1` +- [binary_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-xorfunction) + `print binary_xor(1, 3) == 2` +- [bitset_count_ones](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/bitset-count-onesfunction) + `print bitset_count_ones(42) == 3` + ## IP functions - [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` From e22413823e39166179f2c17f0b7b933648e36c65 Mon Sep 17 00:00:00 2001 From: kashwy Date: Thu, 11 Aug 2022 12:38:49 -0700 Subject: [PATCH 071/279] Kusto-pahse2: fixed toimspan issue and other functions --- src/Parsers/Kusto/KQL_ReleaseNote.md | 275 +++++++++++------- .../KustoFunctions/IParserKQLFunction.cpp | 9 +- .../KustoFunctions/KQLDataTypeFunctions.cpp | 76 ++--- .../KustoFunctions/KQLStringFunctions.cpp | 47 ++- .../Kusto/ParserKQLDateTypeTimespan.cpp | 71 ++++- src/Parsers/Lexer.cpp | 2 +- .../tests/KQL/gtest_KQL_StringFunctions.cpp | 169 +++++++++++ .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 4 + 8 files changed, 493 insertions(+), 160 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index ed64110487e..fa0a4c1240b 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,66 +1,171 @@ -# August XX, 2022 -- **DateTimeFunctions** -- [startofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofyearfunction) - `print startofyear(datetime(2017-01-01 10:10:17), -1)` - `print startofyear(datetime(2017-01-01 10:10:17), 0)` - `print startofyear(datetime(2017-01-01 10:10:17), 1)` -- [weekofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/weekofyearfunction) - `print week_of_year(datetime(2020-12-31))` - `print week_of_year(datetime(2020-06-15))` - `print week_of_year(datetime(1970-01-01))` - `print week_of_year(datetime(2000-01-01))` - -- [startofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofweekfunction) - `print startofweek(datetime(2017-01-01 10:10:17), -1)` - `print startofweek(datetime(2017-01-01 10:10:17), 0)` - `print startofweek(datetime(2017-01-01 10:10:17), 1)` - -- [startofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofmonthfunction) - `print startofmonth(datetime(2017-01-01 10:10:17), -1)` - `print startofmonth(datetime(2017-01-01 10:10:17), 0)` - `print startofmonth(datetime(2017-01-01 10:10:17), 1)` - -- [startofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofdayfunction) - `print startofday(datetime(2017-01-01 10:10:17), -1)` - `print startofday(datetime(2017-01-01 10:10:17), 0)` - `print startofday(datetime(2017-01-01 10:10:17), 1)` - -- [monthofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/monthofyearfunction) - `print monthofyear(datetime("2015-12-14"))` - -- [hourofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/hourofdayfunction) - `print hourofday(datetime(2015-12-14 18:54:00))` - -- [getyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getyearfunction) - `print getyear(datetime(2015-10-12))` - -- [getmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getmonthfunction) - `print getmonth(datetime(2015-10-12))` - -- [dayofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofyearfunction) - `print dayofyear(datetime(2015-12-14))` - -- [dayofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofmonthfunction) - `print (datetime(2015-12-14))` - -- [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) - `print unixtime_seconds_todatetime(1546300800)` - -- [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) - `print dayofweek(datetime(2015-12-20))` - -- [now](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/nowfunction) - `print now()` - `print now(2d)` - `print now(-2h)` - `print now(5 microseconds)` - `print now(5 seconds)` - `print now(6minutes)` - `print now(-2d) ` - `print now(time(1d))` - ## KQL implemented features -The config setting to allow modify dialect setting. + +# August 15, 2022 + +## DateTpye +- [bool,boolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/bool) + `print bool(1)` + `print boolean(0)` + +- [datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/datetime) + `print datetime(2015-12-31 23:59:59.9)` + `print datetime('2015-12-31 23:59:59.9')` + `print datetime("2015-12-31:)` + +- [guid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/guid) + `print guid(74be27de-1e4e-49d9-b579-fe0b331d3642)` + `print guid('74be27de-1e4e-49d9-b579-fe0b331d3642')` + `print guid('74be27de1e4e49d9b579fe0b331d3642')` + +- [int](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/int) + `print int(1)` + +- [long](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/long) + `print long(16)` + +- [real](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/real) + `print real(1)` + +- [timespan ,time](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/timespan) + **Note** the timespan is used for calculating datatime, so the output is in seconds. e.g. time(1h) = 3600 + `print 1d` + `print 30m` + `print time('0.12:34:56.7')` + `print time(2h)` + `print timespan(2h)` + + +## StringFunctions + +- [base64_encode_fromguid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64-encode-fromguid-function) +`print Quine = base64_encode_fromguid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')` +- [base64_decode_toarray](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64_decode_toarrayfunction) +`print base64_decode_toarray('S3VzdG8=')` +- [base64_decode_toguid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64-decode-toguid-function) +`print base64_decode_toguid('YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi')` +- [replace_regex](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/replace-regex-function) +`print replace_regex('Hello, World!', '.', '\\0\\0')` +- [has_any_index](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-any-index-function) +`print idx = has_any_index('this is an example', dynamic(['this', 'example']))` +- [translate](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/translatefunction) +`print translate('krasp', 'otsku', 'spark')` +- [trim](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimfunction) +`print trim('--', '--https://bing.com--')` +- [trim_end](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimendfunction) +`print trim_end('.com', 'bing.com')` +- [trim_start](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimstartfunction) +`print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))` + + + +## DateTimeFunctions +- [startofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofyearfunction) + `print startofyear(datetime(2017-01-01 10:10:17), -1)` + `print startofyear(datetime(2017-01-01 10:10:17), 0)` + `print startofyear(datetime(2017-01-01 10:10:17), 1)` +- [weekofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/weekofyearfunction) + `print week_of_year(datetime(2020-12-31))` + `print week_of_year(datetime(2020-06-15))` + `print week_of_year(datetime(1970-01-01))` + `print week_of_year(datetime(2000-01-01))` + +- [startofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofweekfunction) + `print startofweek(datetime(2017-01-01 10:10:17), -1)` + `print startofweek(datetime(2017-01-01 10:10:17), 0)` + `print startofweek(datetime(2017-01-01 10:10:17), 1)` + +- [startofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofmonthfunction) + `print startofmonth(datetime(2017-01-01 10:10:17), -1)` + `print startofmonth(datetime(2017-01-01 10:10:17), 0)` + `print startofmonth(datetime(2017-01-01 10:10:17), 1)` + +- [startofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofdayfunction) + `print startofday(datetime(2017-01-01 10:10:17), -1)` + `print startofday(datetime(2017-01-01 10:10:17), 0)` + `print startofday(datetime(2017-01-01 10:10:17), 1)` + +- [monthofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/monthofyearfunction) + `print monthofyear(datetime("2015-12-14"))` + +- [hourofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/hourofdayfunction) + `print hourofday(datetime(2015-12-14 18:54:00))` + +- [getyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getyearfunction) + `print getyear(datetime(2015-10-12))` + +- [getmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getmonthfunction) + `print getmonth(datetime(2015-10-12))` + +- [dayofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofyearfunction) + `print dayofyear(datetime(2015-12-14))` + +- [dayofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofmonthfunction) + `print (datetime(2015-12-14))` + +- [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) + `print unixtime_seconds_todatetime(1546300800)` + +- [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) + `print dayofweek(datetime(2015-12-20))` + +- [now](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/nowfunction) + `print now()` + `print now(2d)` + `print now(-2h)` + `print now(5microseconds)` + `print now(5seconds)` + `print now(6minutes)` + `print now(-2d) ` + `print now(time(1d))` + + +## Binary functions +- [binary_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-andfunction) + `print binary_and(15, 3) == 3` + `print binary_and(1, 2) == 0` +- [binary_not](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-notfunction) + `print binary_not(1) == -2` +- [binary_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-orfunction) + `print binary_or(3, 8) == 11` + `print binary_or(1, 2) == 3` +- [binary_shift_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-leftfunction) + `print binary_shift_left(1, 1) == 2` + `print binary_shift_left(1, 64) == 1` +- [binary_shift_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-rightfunction) + `print binary_shift_right(1, 1) == 0` + `print binary_shift_right(1, 64) == 1` +- [binary_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-xorfunction) + `print binary_xor(1, 3) == 2` +- [bitset_count_ones](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/bitset-count-onesfunction) + `print bitset_count_ones(42) == 3` + +## IP functions +- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) + `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` + `print format_ipv4(3232236031, 24) == '192.168.1.0'` +- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) + `print format_ipv4_mask('192.168.1.255', 24) == '192.168.1.0/24'` + `print format_ipv4_mask(3232236031, 24) == '192.168.1.0/24'` +- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) + `print ipv4_compare('127.0.0.1', '127.0.0.1') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255') < 0` + `print ipv4_compare('192.168.1.1/24', '192.168.1.255/24') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255', 24) == 0` +- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) + `print ipv4_is_match('127.0.0.1', '127.0.0.1') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` + `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` +- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) + `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` + `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` + `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` + `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` + + +# August 1, 2022 + +**The config setting to allow modify dialect setting**. - Set dialect setting in server configuration XML at user level(` users.xml `). This sets the ` dialect ` at server startup and CH will do query parsing for all users with ` default ` profile acording to dialect value. For example: @@ -83,51 +188,6 @@ The config setting to allow modify dialect setting. OR pass dialect setting with '--'. For example : ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` - -## Binary functions -- [binary_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-andfunction) - `print binary_and(15, 3) == 3` - `print binary_and(1, 2) == 0` -- [binary_not](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-notfunction) - `print binary_not(1) == -2` -- [binary_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-orfunction) - `print binary_or(3, 8) == 11` - `print binary_or(1, 2) == 3` -- [binary_shift_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-leftfunction) - `print binary_shift_left(1, 1) == 2` - `print binary_shift_left(1, 64) == 1` -- [binary_shift_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-rightfunction) - `print binary_shift_right(1, 1) == 0` - `print binary_shift_right(1, 64) == 1` -- [binary_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-xorfunction) - `print binary_xor(1, 3) == 2` -- [bitset_count_ones](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/bitset-count-onesfunction) - `print bitset_count_ones(42) == 3` - -## IP functions -- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) - `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` - `print format_ipv4(3232236031, 24) == '192.168.1.0'` -- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) - `print format_ipv4_mask('192.168.1.255', 24) == '192.168.1.0/24'` - `print format_ipv4_mask(3232236031, 24) == '192.168.1.0/24'` -- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) - `print ipv4_compare('127.0.0.1', '127.0.0.1') == 0` - `print ipv4_compare('192.168.1.1', '192.168.1.255') < 0` - `print ipv4_compare('192.168.1.1/24', '192.168.1.255/24') == 0` - `print ipv4_compare('192.168.1.1', '192.168.1.255', 24) == 0` -- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) - `print ipv4_is_match('127.0.0.1', '127.0.0.1') == true` - `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` - `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` - `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` -- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) - `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` - `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` - `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` - `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` - -# August 1, 2022 - **strcmp** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcmpfunction) `print strcmp('abc','ABC')` @@ -155,7 +215,6 @@ The config setting to allow modify dialect setting. - [make_set_if()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makesetif-aggfunction) `Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName` `Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName` -# July XX, 2022 ## IP functions diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 0b7eb403a22..243b67b7308 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -102,7 +102,6 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - String token = String(pos->begin, pos->end); String new_token; if (!KQLOperators().convert(tokens, pos)) { @@ -115,7 +114,15 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: break; } else + { + String token; + if (pos->type == TokenType::QuotedIdentifier) + token = "'" + String(pos->begin + 1,pos->end - 1) + "'"; + else + token = String(pos->begin, pos->end); + tokens.push_back(token); + } } ++pos; if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp index 2a59ab8b72a..0f60bf6d326 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp @@ -5,28 +5,21 @@ #include #include #include -/* -#include -#include -#include -#include -#include -#include -#include -#include -#include -*/ #include +#include #include namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + bool DatatypeBool::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toBool"); } bool DatatypeDatetime::convertImpl(String &out,IParser::Pos &pos) @@ -59,9 +52,24 @@ bool DatatypeDatetime::convertImpl(String &out,IParser::Pos &pos) bool DatatypeDynamic::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String res = String(pos->begin, pos->end); + String array; + ++pos; //go pass "dynamic" string + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos->type != TokenType::OpeningSquareBracket && pos->type != TokenType::ClosingSquareBracket) + { + array += String(pos->begin, pos->end); + } + ++pos; + } + if (pos->type == TokenType::ClosingRoundBracket) + array += String(pos->begin, pos->end); + else + return false; + + out = "array" + array; + return true; } bool DatatypeGuid::convertImpl(String &out,IParser::Pos &pos) @@ -72,10 +80,8 @@ bool DatatypeGuid::convertImpl(String &out,IParser::Pos &pos) String guid_str; ++pos; - if (pos->type == TokenType::QuotedIdentifier) - guid_str = std::format("'{}'", String(pos->begin+1, pos->end -1)); - else if (pos->type == TokenType::StringLiteral) - guid_str = String(pos->begin, pos->end); + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + guid_str = String(pos->begin+1, pos->end -1); else { auto start = pos; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) @@ -85,32 +91,26 @@ bool DatatypeGuid::convertImpl(String &out,IParser::Pos &pos) break; } --pos; - guid_str = std::format("'{}'",String(start->begin,pos->end)); + guid_str = String(start->begin,pos->end); } - out = guid_str; + out = std::format("toUUID('{}')", guid_str); ++pos; return true; } bool DatatypeInt::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toInt32"); } bool DatatypeLong::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toInt64"); } bool DatatypeReal::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "toFloat64"); } bool DatatypeString::convertImpl(String &out,IParser::Pos &pos) @@ -122,12 +122,22 @@ bool DatatypeString::convertImpl(String &out,IParser::Pos &pos) bool DatatypeTimespan::convertImpl(String &out,IParser::Pos &pos) { + ParserKQLDateTypeTimespan time_span; + ASTPtr node; + Expected expected; + const String fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; ++pos; - out = getConvertedArgument(fn_name, pos); + if (time_span.parse(pos, node, expected)) + { + out = std::to_string(time_span.toSeconds()); + ++pos; + } + else + throw Exception("Not a correct timespan expression: " + fn_name, ErrorCodes::BAD_ARGUMENTS); return true; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 76707598788..2a88a56b844 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -372,9 +372,7 @@ bool ParseVersion::convertImpl(String & out,IParser::Pos & pos) bool ReplaceRegex::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "replaceRegexpAll"); } bool Reverse::convertImpl(String & out,IParser::Pos & pos) @@ -551,23 +549,48 @@ bool Translate::convertImpl(String & out,IParser::Pos & pos) bool Trim::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String regex = getConvertedArgument(fn_name, pos); + ++pos; + String source = getConvertedArgument(fn_name, pos); + String ltrim = std::format("if ((replaceRegexpOne(concat('random_str', {0}) as srcl, concat('random_str', {1}),'') as dstl) = srcl, {0}, dstl)", source, regex); + out = std::format("if ((replaceRegexpOne(concat('random_str', reverse({0})) as srcr, concat('random_str', reverse({1})),'') as dstr) = srcr, {0}, reverse(dstr))", ltrim, regex); + + return true; } bool TrimEnd::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String regex = getConvertedArgument(fn_name, pos); + ++pos; + String source = getConvertedArgument(fn_name, pos); + out = std::format("if ((replaceRegexpOne(concat('random_str', reverse({0})) as src, concat('random_str', reverse({1})),'') as dst) = src, {0}, reverse(dst))", source, regex); + + return true; } bool TrimStart::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String regex = getConvertedArgument(fn_name, pos); + ++pos; + String source = getConvertedArgument(fn_name, pos); + out = std::format("if ((replaceRegexpOne(concat('random_str', {0}) as src, concat('random_str', {1}),'') as dst) = src, {0}, dst)", source, regex); + + return true; } bool URLDecode::convertImpl(String & out,IParser::Pos & pos) diff --git a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp index d83ef4e2f53..af3c4e45875 100644 --- a/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp +++ b/src/Parsers/Kusto/ParserKQLDateTypeTimespan.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -11,10 +12,15 @@ namespace DB bool ParserKQLDateTypeTimespan :: parseImpl(Pos & pos, [[maybe_unused]] ASTPtr & node, Expected & expected) { - const String token(pos->begin,pos->end); + String token; const char * current_word = pos->begin; expected.add(pos, current_word); + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral ) + token = String(pos->begin + 1, pos->end -1); + else + token = String(pos->begin, pos->end); + if (!parseConstKQLTimespan(token)) return false; @@ -84,6 +90,7 @@ bool ParserKQLDateTypeTimespan :: parseConstKQLTimespan(const String & text) {"ticks", KQLTimespanUint::tick} }; + uint16_t days = 0, hours = 0, minutes = 0, seconds = 0, milliseconds = 0; const char * ptr = text.c_str(); @@ -99,21 +106,75 @@ bool ParserKQLDateTypeTimespan :: parseConstKQLTimespan(const String & text) if (number_len <= 0) return false; + days = std::stoi(String(ptr, ptr + number_len)); + if (*(ptr + number_len) == '.') { auto fractionLen = scanDigit(ptr + number_len + 1); if (fractionLen >= 0) { + hours = std::stoi(String(ptr + number_len + 1, ptr + number_len + 1 + fractionLen)); number_len += fractionLen + 1; } + else + { + hours = days; + days = 0; + } } - String timespan_suffix(ptr + number_len, ptr+text.size()); - if (TimespanSuffixes.find(timespan_suffix) == TimespanSuffixes.end()) + if (hours > 23) return false; - time_span = std::stod(String(ptr, ptr + number_len)); - time_span_unit =TimespanSuffixes[timespan_suffix] ; + if (*(ptr + number_len) != ':') + { + String timespan_suffix(ptr + number_len, ptr + text.size()); + + trim(timespan_suffix); + if (TimespanSuffixes.find(timespan_suffix) == TimespanSuffixes.end()) + return false; + + time_span = std::stod(String(ptr, ptr + number_len)); + time_span_unit = TimespanSuffixes[timespan_suffix] ; + + return true; + } + + auto min_len = scanDigit(ptr + number_len + 1); + if (min_len < 0) + return false; + + minutes = std::stoi(String(ptr + number_len + 1, ptr + number_len + 1 + min_len)); + if (minutes > 59) + return false; + + number_len += min_len + 1; + if (*(ptr + number_len) == ':') + { + auto sec_len = scanDigit(ptr + number_len + 1); + if (sec_len > 0) + { + seconds = std::stoi(String(ptr + number_len + 1, ptr + number_len + 1 + sec_len)); + if (seconds > 59) + return false; + + number_len += sec_len + 1; + if (*(ptr + number_len) == '.') + { + auto milli_len = scanDigit(ptr + number_len + 1); + if (milli_len > 0) + { + milliseconds = std::stoi(String(ptr + number_len + 1, ptr + number_len + 1 + milli_len)); + + if (milliseconds > 1000) + return false; + } + } + } + } + + time_span = days * 86400 + hours * 3600 + minutes * 60 + seconds + milliseconds / 1000; + time_span_unit = KQLTimespanUint::second; return true; } diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index be67807ad8f..449b6972cd1 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -213,7 +213,7 @@ Token Lexer::nextTokenImpl() for (const char * iterator = token_begin; iterator < pos; ++iterator) { - if (!isWordCharASCII(*iterator) && *iterator != '$') + if (!isWordCharASCII(*iterator) && *iterator != '$' && *iterator != '.') return Token(TokenType::ErrorWrongNumber, token_begin, pos); } diff --git a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp new file mode 100644 index 00000000000..f2994464e14 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp @@ -0,0 +1,169 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ +using namespace DB; +using namespace std::literals; +} +class ParserStringFuncTest : public ::testing::TestWithParam, ParserTestCase>> +{}; + +TEST_P(ParserStringFuncTest, ParseQuery) +{ const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + ASSERT_NE(nullptr, parser); + if (expected_ast) + { + if (std::string(expected_ast).starts_with("throws")) + { + EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } + else + { + ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + if (std::string("CREATE USER or ALTER USER query") != parser->getName() + && std::string("ATTACH access entity query") != parser->getName()) + { + EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); + } + else + { + if (input_text.starts_with("ATTACH")) + { + auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); + EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); + } + else + { + EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); + } + } + } + } + else + { + ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } +} + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserStringFuncTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print Quine = base64_encode_fromguid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')", + "SELECT base64Encode('ae3133f2-6e22-49ae-b06a-16e6a9b212eb') AS Quine" + }, + { + "print base64_decode_toguid('YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi')", + "SELECT base64Decode('YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi')" + }, + { + "print base64_decode_toarray('S3VzdG8=')", + "SELECT arrayMap(x -> reinterpretAsUInt8(x), splitByRegexp('', base64Decode('S3VzdG8=')))" + }, + { + "print replace_regex('Hello, World!', '.', '\\0\\0')", + "SELECT replaceRegexpAll('Hello, World!', '.', '\\0\\0')" + }, + { + "print idx = has_any_index('this is an example', dynamic(['this', 'example'])) ", + "SELECT if(empty(['this', 'example']), -1, indexOf(arrayMap(x -> (x IN splitByChar(' ', 'this is an example')), if(empty(['this', 'example']), [''], arrayMap(x -> toString(x), ['this', 'example']))), 1) - 1) AS idx" + }, + { + "print idx = has_any_index('this is an example', dynamic([]))", + "SELECT if(empty([]), -1, indexOf(arrayMap(x -> (x IN splitByChar(' ', 'this is an example')), if(empty([]), [''], arrayMap(x -> toString(x), []))), 1) - 1) AS idx" + }, + { + "print translate('krasp', 'otsku', 'spark')", + "SELECT if(length('otsku') = 0, '', translate('spark', 'krasp', multiIf(length('otsku') = 0, 'krasp', (length('krasp') - length('otsku')) > 0, concat('otsku', repeat(substr('otsku', length('otsku'), 1), toUInt16(length('krasp') - length('otsku')))), (length('krasp') - length('otsku')) < 0, substr('otsku', 1, length('krasp')), 'otsku')))" + }, + { + "print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))", + "SELECT if((replaceRegexpOne(concat('random_str', concat('- ', 'Te st1', '// $')) AS src, concat('random_str', '[^\\\\w]+'), '') AS dst) = src, concat('- ', 'Te st1', '// $'), dst)" + }, + { + "print trim_end('.com', 'bing.com')", + "SELECT if((replaceRegexpOne(concat('random_str', reverse('bing.com')) AS src, concat('random_str', reverse('.com')), '') AS dst) = src, 'bing.com', reverse(dst))" + }, + { + "print trim('--', '--https://bing.com--')", + "SELECT if((replaceRegexpOne(concat('random_str', reverse(if((replaceRegexpOne(concat('random_str', '--https://bing.com--') AS srcl, concat('random_str', '--'), '') AS dstl) = srcl, '--https://bing.com--', dstl))) AS srcr, concat('random_str', reverse('--')), '') AS dstr) = srcr, if(dstl = srcl, '--https://bing.com--', dstl), reverse(dstr))" + }, + { + "print bool(1)", + "SELECT toBool(1)" + }, + { + "print datetime(2015-12-31 23:59:59.9)", + "SELECT toDateTime64('2015-12-31 23:59:59.9', 9, 'UTC')" + }, + { + "print datetime(\"2015-12-31 23:59:59.9\")", + "SELECT toDateTime64('2015-12-31 23:59:59.9', 9, 'UTC')" + }, + { + "print datetime('2015-12-31 23:59:59.9')", + "SELECT toDateTime64('2015-12-31 23:59:59.9', 9, 'UTC')" + }, + { + "print guid(74be27de-1e4e-49d9-b579-fe0b331d3642)", + "SELECT toUUID('74be27de-1e4e-49d9-b579-fe0b331d3642')" + }, + { + "print guid('74be27de-1e4e-49d9-b579-fe0b331d3642')", + "SELECT toUUID('74be27de-1e4e-49d9-b579-fe0b331d3642')" + }, + { + "print guid('74be27de1e4e49d9b579fe0b331d3642')", + "SELECT toUUID('74be27de1e4e49d9b579fe0b331d3642')" + }, + { + "print int(32.5)", + "SELECT toInt32(32.5)" + }, + { + "print long(32.5)", + "SELECT toInt64(32.5)" + }, + { + "print real(32.5)", + "SELECT toFloat64(32.5)" + }, + { + "print time('1.22:34:8.128')", + "SELECT 167648." + }, + { + "print time('1d')", + "SELECT 86400." + }, + { + "print time('1.5d')", + "SELECT 129600." + }, + { + "print timespan('1.5d')", + "SELECT 129600." + } + +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp index 77ad9714735..74d13c60d05 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -121,6 +121,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, { "print now()", "SELECT now64(9, 'UTC')" + }, + { + "print now(1d)", + "SELECT now64(9, 'UTC') + 86400." } }))); From 18f3c5c5c895824220606dfb7131c985ca5514e5 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 17 Aug 2022 18:42:46 -0700 Subject: [PATCH 072/279] Aggregate functions added --- src/Parsers/Kusto/KQL_ReleaseNote.md | 33 +++ .../KQLAggregationFunctions.cpp | 209 +++++++++++++++--- .../KQL/gtest_KQL_AggregateFunctions.cpp | 113 ++++++++++ 3 files changed, 328 insertions(+), 27 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index fa0a4c1240b..7206c3cec89 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,5 +1,38 @@ ## KQL implemented features +# August XX, 2022 + +## Aggregate Functions +- [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) + `Customers | summarize t = stdev(Age) by FirstName` + +- [stdevif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdevif-aggfunction) + `Customers | summarize t = stdevif(Age, Age < 10) by FirstName` + +- [binary_all_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-and-aggfunction) + `Customers | summarize t = binary_all_and(Age) by FirstName` + +- [binary_all_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-or-aggfunction) + `Customers | summarize t = binary_all_or(Age) by FirstName` + +- [binary_all_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-xor-aggfunction) + `Customers | summarize t = binary_all_xor(Age) by FirstName` + +- [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName` + +- [percentiles_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + **do not support `range()` now** + `Customers | summarize t = percentiles_array(Age, 10, 20, 30, 50) by FirstName` + `Customers | summarize t = percentiles_array(Age, dynamic([10, 20, 30, 50])) by FirstName` + +- [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)` + +- [percentilesw_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + **do not support `range()` now** + `DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))` + # August 15, 2022 ## DateTpye diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index 30b33b5933a..a16c4f6ea22 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -40,23 +40,17 @@ bool AvgIf::convertImpl(String &out,IParser::Pos &pos) bool BinaryAllAnd::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"groupBitAnd"); } bool BinaryAllOr::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"groupBitOr"); } bool BinaryAllXor::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"groupBitXor"); } bool BuildSchema::convertImpl(String &out,IParser::Pos &pos) @@ -220,44 +214,205 @@ bool MinIf::convertImpl(String &out,IParser::Pos &pos) bool Percentiles::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name,pos); + column_name.pop_back(); + String expr = ""; + String value; + String value_in_column; + while(pos->type != TokenType::ClosingRoundBracket) + { + if(pos->type != TokenType::Comma){ + value = String(pos->begin, pos->end); + value_in_column = ""; + + for(size_t i = 0; i < value.size(); i++) + { + if(value[i] == '.') + value_in_column += '_'; + else + value_in_column += value[i]; + } + expr = expr + "quantile( " + value + "/100)(" + column_name + ") AS percentile_" + column_name + "_" + value_in_column; + ++pos; + if(pos->type != TokenType::ClosingRoundBracket) + expr += ", "; + } + else + ++pos; + } + out = expr; + return true; } bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name,pos); + column_name.pop_back(); + String expr = "quantiles("; + String value; + while(pos->type != TokenType::ClosingRoundBracket) + { + if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" + && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket){ + + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + + if(pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket) + expr += ", "; + ++pos; + } + else + { + ++pos; + } + + } + ++pos; + if(pos->type != TokenType::ClosingRoundBracket) + --pos; + + expr.pop_back(); + expr.pop_back(); + expr = expr + ")(" + column_name + ")"; + out = expr; + return true; } bool Percentilesw::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name,pos); + bucket_column.pop_back(); + + ++pos; + String frequency_column = getConvertedArgument(fn_name,pos); + frequency_column.pop_back(); + + String expr = ""; + String value; + String value_in_column; + + while(pos->type != TokenType::ClosingRoundBracket) + { + if(pos->type != TokenType::Comma){ + value = String(pos->begin, pos->end); + value_in_column = ""; + + for(size_t i = 0; i < value.size(); i++) + { + if(value[i] == '.') + value_in_column += '_'; + else + value_in_column += value[i]; + } + + expr = expr + "quantileExactWeighted( " + value + "/100)(" + bucket_column + ","+frequency_column + ") AS percentile_" + bucket_column + "_" + value_in_column; + ++pos; + if(pos->type != TokenType::ClosingRoundBracket) + expr += ", "; + } + else + ++pos; + } + out = expr; + return true; } bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name,pos); + bucket_column.pop_back(); + + ++pos; + String frequency_column = getConvertedArgument(fn_name,pos); + frequency_column.pop_back(); + + String expr = "quantilesExactWeighted("; + String value; + while(pos->type != TokenType::ClosingRoundBracket) + { + if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" + && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket){ + + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + + if(pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket) + expr += ", "; + ++pos; + } + else + { + ++pos; + } + + } + ++pos; + if(pos->type != TokenType::ClosingRoundBracket) + --pos; + + expr.pop_back(); + expr.pop_back(); + expr = expr + ")(" + bucket_column + ","+frequency_column + ")"; + out = expr; + return true; } bool Stdev::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name,pos); + out = "sqrt(varSamp(" + expr + "))"; + return true; } bool StdevIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name,pos); + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const auto predicate = getConvertedArgument(fn_name,pos); + out = "sqrt(varSampIf(" + expr + ", " + predicate + "))"; + return true; } bool Sum::convertImpl(String &out,IParser::Pos &pos) diff --git a/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp new file mode 100644 index 00000000000..83bec1d5333 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp @@ -0,0 +1,113 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ +using namespace DB; +using namespace std::literals; +} +class ParserAggregateFuncTest : public ::testing::TestWithParam, ParserTestCase>> +{}; + +TEST_P(ParserAggregateFuncTest, ParseQuery) +{ const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + ASSERT_NE(nullptr, parser); + if (expected_ast) + { + if (std::string(expected_ast).starts_with("throws")) + { + EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } + else + { + ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + if (std::string("CREATE USER or ALTER USER query") != parser->getName() + && std::string("ATTACH access entity query") != parser->getName()) + { + EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); + } + else + { + if (input_text.starts_with("ATTACH")) + { + auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); + EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); + } + else + { + EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); + } + } + } + } + else + { + ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } +} + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserAggregateFuncTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | summarize t = stdev(Age) by FirstName", + "SELECT\n FirstName,\n sqrt(varSamp(Age)) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = stdevif(Age, Age < 10) by FirstName", + "SELECT\n FirstName,\n sqrt(varSampIf(Age, Age < 10)) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = binary_all_and(Age) by FirstName", + "SELECT\n FirstName,\n groupBitAnd(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = binary_all_or(Age) by FirstName", + "SELECT\n FirstName,\n groupBitOr(Age) AS t\nFROM Customers\nGROUP BY FirstName" + + }, + { + "Customers | summarize t = binary_all_xor(Age) by FirstName", + "SELECT\n FirstName,\n groupBitXor(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName", + "SELECT\n FirstName,\n quantile(30 / 100)(Age) AS percentile_Age_30,\n quantile(40 / 100)(Age) AS percentile_Age_40,\n quantile(50 / 100)(Age) AS percentile_Age_50,\n quantile(60 / 100)(Age) AS percentile_Age_60,\n quantile(70 / 100)(Age) AS percentile_Age_70\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = percentiles_array(Age, 10, 20, 30, 50) by FirstName", + "SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = percentiles_array(Age, dynamic([10, 20, 30, 50])) by FirstName", + "SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)", + "SELECT\n quantileExactWeighted(50 / 100)(Bucket, Frequency) AS percentile_Bucket_50,\n quantileExactWeighted(75 / 100)(Bucket, Frequency) AS percentile_Bucket_75,\n quantileExactWeighted(99.9 / 100)(Bucket, Frequency) AS percentile_Bucket_99_9\nFROM DataTable" + }, + { + "DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))", + "SELECT quantilesExactWeighted(10 / 100, 50 / 100, 30 / 100)(Bucket, Frequency) AS t\nFROM DataTable" + } +}))); From 33c16cdfe5d67c98607c2aea9e3f0d8a10863b00 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 18 Aug 2022 06:53:29 -0700 Subject: [PATCH 073/279] applied changes asked by Yong --- .../Kusto/KustoFunctions/KQLAggregationFunctions.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index a16c4f6ea22..b0410ed4cfb 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB { @@ -221,8 +222,8 @@ bool Percentiles::convertImpl(String &out,IParser::Pos &pos) ++pos; String column_name = getConvertedArgument(fn_name,pos); - column_name.pop_back(); - String expr = ""; + trim(column_name); + String expr; String value; String value_in_column; while(pos->type != TokenType::ClosingRoundBracket) @@ -259,7 +260,7 @@ bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos) ++pos; String column_name = getConvertedArgument(fn_name,pos); - column_name.pop_back(); + trim(column_name); String expr = "quantiles("; String value; while(pos->type != TokenType::ClosingRoundBracket) @@ -308,7 +309,7 @@ bool Percentilesw::convertImpl(String &out,IParser::Pos &pos) String frequency_column = getConvertedArgument(fn_name,pos); frequency_column.pop_back(); - String expr = ""; + String expr; String value; String value_in_column; From b712c1075cf10916782783f9d2aa6747ac295aca Mon Sep 17 00:00:00 2001 From: root Date: Fri, 19 Aug 2022 07:10:00 -0700 Subject: [PATCH 074/279] added percentile() and percentilew() --- src/Parsers/Kusto/KQL_ReleaseNote.md | 6 +++ .../KQLAggregationFunctions.cpp | 53 +++++++++++++++++-- .../KustoFunctions/KQLAggregationFunctions.h | 14 +++++ .../KustoFunctions/KQLFunctionFactory.cpp | 8 +++ .../Kusto/KustoFunctions/KQLFunctionFactory.h | 2 + .../KQL/gtest_KQL_AggregateFunctions.cpp | 8 +++ 6 files changed, 87 insertions(+), 4 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 7206c3cec89..d077d199dd9 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -33,6 +33,12 @@ **do not support `range()` now** `DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))` +- [percentile](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `Customers | summarize t = percentile(Age, 50) by FirstName` + +- [percentilew](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `DataTable | summarize t = percentilew(Bucket, Frequency, 50)` + # August 15, 2022 ## DateTpye diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index b0410ed4cfb..54ac82a1fcc 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -213,6 +213,51 @@ bool MinIf::convertImpl(String &out,IParser::Pos &pos) return directMapping(out,pos,"minIf"); } +bool Percentile::convertImpl(String &out,IParser::Pos &pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name,pos); + trim(column_name); + + if(pos->type != TokenType::Comma) + return false; + ++pos; + String value = getConvertedArgument(fn_name,pos); + trim(value); + + out = "quantile(" + value + "/100)(" + column_name + ")"; + std::cout << "Mallik: " << out << std::endl; + return true; +} + +bool Percentilew::convertImpl(String &out,IParser::Pos &pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name,pos); + trim(bucket_column); + + ++pos; + String frequency_column = getConvertedArgument(fn_name,pos); + trim(frequency_column); + + ++pos; + String value = getConvertedArgument(fn_name,pos); + trim(value); + + out = "quantileExactWeighted( " + value + "/100)(" + bucket_column + ","+frequency_column + ")"; + return true; +} + bool Percentiles::convertImpl(String &out,IParser::Pos &pos) { String fn_name = getKQLFunctionName(pos); @@ -303,11 +348,11 @@ bool Percentilesw::convertImpl(String &out,IParser::Pos &pos) ++pos; String bucket_column = getConvertedArgument(fn_name,pos); - bucket_column.pop_back(); + trim(bucket_column); ++pos; String frequency_column = getConvertedArgument(fn_name,pos); - frequency_column.pop_back(); + trim(frequency_column); String expr; String value; @@ -348,11 +393,11 @@ bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos) ++pos; String bucket_column = getConvertedArgument(fn_name,pos); - bucket_column.pop_back(); + trim(bucket_column); ++pos; String frequency_column = getConvertedArgument(fn_name,pos); - frequency_column.pop_back(); + trim(frequency_column); String expr = "quantilesExactWeighted("; String value; diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h index 6e7130420f4..86d94859922 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h @@ -165,6 +165,20 @@ protected: bool convertImpl(String &out,IParser::Pos &pos) override; }; +class Percentile : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentile()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Percentilew : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilew()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + class Percentiles : public IParserKQLFunction { protected: diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp index 075d56d9608..6cfd67514a2 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -152,6 +152,8 @@ namespace DB {"maxif", KQLFunctionValue::maxif}, {"min", KQLFunctionValue::min}, {"minif", KQLFunctionValue::minif}, + {"percentile", KQLFunctionValue::percentile}, + {"percentilew", KQLFunctionValue::percentilew}, {"percentiles", KQLFunctionValue::percentiles}, {"percentiles_array", KQLFunctionValue::percentiles_array}, {"percentilesw", KQLFunctionValue::percentilesw}, @@ -609,6 +611,12 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function case KQLFunctionValue::minif: return std::make_unique(); + case KQLFunctionValue::percentile: + return std::make_unique(); + + case KQLFunctionValue::percentilew: + return std::make_unique(); + case KQLFunctionValue::percentiles: return std::make_unique(); diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h index ed747964175..38bac6d641a 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -136,6 +136,8 @@ namespace DB maxif, min, minif, + percentile, + percentilew, percentiles, percentiles_array, percentilesw, diff --git a/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp index 83bec1d5333..aaa980ddcce 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp @@ -109,5 +109,13 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserAggregateFuncTest, { "DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))", "SELECT quantilesExactWeighted(10 / 100, 50 / 100, 30 / 100)(Bucket, Frequency) AS t\nFROM DataTable" + }, + { + "Customers | summarize t = percentile(Age, 50) by FirstName", + "SELECT\n FirstName,\n quantile(50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "DataTable | summarize t = percentilew(Bucket, Frequency, 50)", + "SELECT quantileExactWeighted(50 / 100)(Bucket, Frequency) AS t\nFROM DataTable" } }))); From b65901442f140cb29fa3bf452703e309134f05a4 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 23 Aug 2022 07:40:55 -0700 Subject: [PATCH 075/279] addressed change requests by Yong --- .../KQLAggregationFunctions.cpp | 93 +++++++++---------- 1 file changed, 45 insertions(+), 48 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index 54ac82a1fcc..5a2fa0c984b 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -19,59 +19,59 @@ namespace DB { -bool ArgMax::convertImpl(String &out,IParser::Pos &pos) +bool ArgMax::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"argMax"); } -bool ArgMin::convertImpl(String &out,IParser::Pos &pos) +bool ArgMin::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"argMin"); } -bool Avg::convertImpl(String &out,IParser::Pos &pos) +bool Avg::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"avg"); } -bool AvgIf::convertImpl(String &out,IParser::Pos &pos) +bool AvgIf::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"avgIf"); } -bool BinaryAllAnd::convertImpl(String &out,IParser::Pos &pos) +bool BinaryAllAnd::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"groupBitAnd"); } -bool BinaryAllOr::convertImpl(String &out,IParser::Pos &pos) +bool BinaryAllOr::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"groupBitOr"); } -bool BinaryAllXor::convertImpl(String &out,IParser::Pos &pos) +bool BinaryAllXor::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"groupBitXor"); } -bool BuildSchema::convertImpl(String &out,IParser::Pos &pos) +bool BuildSchema::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool Count::convertImpl(String &out,IParser::Pos &pos) +bool Count::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"count"); } -bool CountIf::convertImpl(String &out,IParser::Pos &pos) +bool CountIf::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"countIf"); } -bool DCount::convertImpl(String &out,IParser::Pos &pos) +bool DCount::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -84,7 +84,7 @@ bool DCount::convertImpl(String &out,IParser::Pos &pos) return true; } -bool DCountIf::convertImpl(String &out,IParser::Pos &pos) +bool DCountIf::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -98,21 +98,21 @@ bool DCountIf::convertImpl(String &out,IParser::Pos &pos) return true; } -bool MakeBag::convertImpl(String &out,IParser::Pos &pos) +bool MakeBag::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool MakeBagIf::convertImpl(String &out,IParser::Pos &pos) +bool MakeBagIf::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool MakeList::convertImpl(String &out,IParser::Pos &pos) +bool MakeList::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -130,7 +130,7 @@ bool MakeList::convertImpl(String &out,IParser::Pos &pos) return true; } -bool MakeListIf::convertImpl(String &out,IParser::Pos &pos) +bool MakeListIf::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -150,12 +150,12 @@ bool MakeListIf::convertImpl(String &out,IParser::Pos &pos) return true; } -bool MakeListWithNulls::convertImpl(String &out,IParser::Pos &pos) +bool MakeListWithNulls::convertImpl(String & out,IParser::Pos & pos) { - return directMapping(out,pos,"groupArray"); //groupArray takes everything including NULLs + return directMapping(out,pos,"groupArray"); } -bool MakeSet::convertImpl(String &out,IParser::Pos &pos) +bool MakeSet::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -169,11 +169,11 @@ bool MakeSet::convertImpl(String &out,IParser::Pos &pos) const auto max_size = getConvertedArgument(fn_name,pos); out = "groupUniqArray(" + max_size + ")(" + expr + ")"; } else - out = "groupUniqArray(" + expr + ")"; + out = "groupUniqArray(" + expr + ")"; return true; } -bool MakeSetIf::convertImpl(String &out,IParser::Pos &pos) +bool MakeSetIf::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -193,27 +193,27 @@ bool MakeSetIf::convertImpl(String &out,IParser::Pos &pos) return true; } -bool Max::convertImpl(String &out,IParser::Pos &pos) +bool Max::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"max"); } -bool MaxIf::convertImpl(String &out,IParser::Pos &pos) +bool MaxIf::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"maxIf"); } -bool Min::convertImpl(String &out,IParser::Pos &pos) +bool Min::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"min"); } -bool MinIf::convertImpl(String &out,IParser::Pos &pos) +bool MinIf::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"minIf"); } -bool Percentile::convertImpl(String &out,IParser::Pos &pos) +bool Percentile::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -224,18 +224,15 @@ bool Percentile::convertImpl(String &out,IParser::Pos &pos) String column_name = getConvertedArgument(fn_name,pos); trim(column_name); - if(pos->type != TokenType::Comma) - return false; ++pos; String value = getConvertedArgument(fn_name,pos); trim(value); out = "quantile(" + value + "/100)(" + column_name + ")"; - std::cout << "Mallik: " << out << std::endl; return true; } -bool Percentilew::convertImpl(String &out,IParser::Pos &pos) +bool Percentilew::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -258,7 +255,7 @@ bool Percentilew::convertImpl(String &out,IParser::Pos &pos) return true; } -bool Percentiles::convertImpl(String &out,IParser::Pos &pos) +bool Percentiles::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -296,7 +293,7 @@ bool Percentiles::convertImpl(String &out,IParser::Pos &pos) return true; } -bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos) +bool PercentilesArray::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -310,10 +307,10 @@ bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos) String value; while(pos->type != TokenType::ClosingRoundBracket) { - if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" + if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket && pos->type != TokenType::ClosingSquareBracket){ - + value = String(pos->begin, pos->end); expr = expr + value + "/100"; @@ -339,7 +336,7 @@ bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos) return true; } -bool Percentilesw::convertImpl(String &out,IParser::Pos &pos) +bool Percentilesw::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -384,7 +381,7 @@ bool Percentilesw::convertImpl(String &out,IParser::Pos &pos) return true; } -bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos) +bool PercentileswArray::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -403,10 +400,11 @@ bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos) String value; while(pos->type != TokenType::ClosingRoundBracket) { - if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" + if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket - && pos->type != TokenType::ClosingSquareBracket){ - + && pos->type != TokenType::ClosingSquareBracket) + { + value = String(pos->begin, pos->end); expr = expr + value + "/100"; @@ -432,7 +430,7 @@ bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos) return true; } -bool Stdev::convertImpl(String &out,IParser::Pos &pos) +bool Stdev::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -444,7 +442,7 @@ bool Stdev::convertImpl(String &out,IParser::Pos &pos) return true; } -bool StdevIf::convertImpl(String &out,IParser::Pos &pos) +bool StdevIf::convertImpl(String & out,IParser::Pos & pos) { String fn_name = getKQLFunctionName(pos); @@ -461,42 +459,41 @@ bool StdevIf::convertImpl(String &out,IParser::Pos &pos) return true; } -bool Sum::convertImpl(String &out,IParser::Pos &pos) +bool Sum::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"sum"); } -bool SumIf::convertImpl(String &out,IParser::Pos &pos) +bool SumIf::convertImpl(String & out,IParser::Pos & pos) { return directMapping(out,pos,"sumIf"); } -bool TakeAny::convertImpl(String &out,IParser::Pos &pos) +bool TakeAny::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool TakeAnyIf::convertImpl(String &out,IParser::Pos &pos) +bool TakeAnyIf::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool Variance::convertImpl(String &out,IParser::Pos &pos) +bool Variance::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } -bool VarianceIf::convertImpl(String &out,IParser::Pos &pos) +bool VarianceIf::convertImpl(String & out,IParser::Pos & pos) { String res = String(pos->begin,pos->end); out = res; return false; } - } From 239d0dc35b589398f1ab7da89e818bdbd7cf17b9 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 24 Aug 2022 06:42:21 -0700 Subject: [PATCH 076/279] updated release note + test file --- src/Parsers/Kusto/KQL_ReleaseNote.md | 9 --- .../KQL/gtest_KQL_AggregateFunctions.cpp | 66 +------------------ 2 files changed, 1 insertion(+), 74 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index d077d199dd9..0a9a6d87df1 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -21,18 +21,9 @@ - [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) `Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName` -- [percentiles_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - **do not support `range()` now** - `Customers | summarize t = percentiles_array(Age, 10, 20, 30, 50) by FirstName` - `Customers | summarize t = percentiles_array(Age, dynamic([10, 20, 30, 50])) by FirstName` - - [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) `DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)` -- [percentilesw_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - **do not support `range()` now** - `DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))` - - [percentile](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) `Customers | summarize t = percentile(Age, 50) by FirstName` diff --git a/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp index aaa980ddcce..1a532f27ac0 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp @@ -1,71 +1,7 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -namespace -{ -using namespace DB; -using namespace std::literals; -} -class ParserAggregateFuncTest : public ::testing::TestWithParam, ParserTestCase>> -{}; - -TEST_P(ParserAggregateFuncTest, ParseQuery) -{ const auto & parser = std::get<0>(GetParam()); - const auto & [input_text, expected_ast] = std::get<1>(GetParam()); - ASSERT_NE(nullptr, parser); - if (expected_ast) - { - if (std::string(expected_ast).starts_with("throws")) - { - EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } - else - { - ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); - if (std::string("CREATE USER or ALTER USER query") != parser->getName() - && std::string("ATTACH access entity query") != parser->getName()) - { - EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); - } - else - { - if (input_text.starts_with("ATTACH")) - { - auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); - EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); - } - else - { - EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); - } - } - } - } - else - { - ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } -} - -INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserAggregateFuncTest, +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, ::testing::Combine( ::testing::Values(std::make_shared()), ::testing::ValuesIn(std::initializer_list{ From f39b18cd19413555f971b7a9d47a290a33841d70 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 24 Aug 2022 06:47:39 -0700 Subject: [PATCH 077/279] updated release notes to resolve conflicts --- src/Parsers/Kusto/KQL_ReleaseNote.md | 46 +++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 0a9a6d87df1..c3c038eb90d 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,6 +1,50 @@ ## KQL implemented features -# August XX, 2022 +# August 29, 2022 + +## Dynamic functions +- [array_concat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayconcatfunction) + `print array_concat(dynamic([1, 2, 3]), dynamic([4, 5]), dynamic([6, 7, 8, 9])) == dynamic([1, 2, 3, 4, 5, 6, 7, 8, 9])` + +- [array_iff / array_iif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayifffunction) + `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` + `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` + `print array_iif(dynamic([true, false, true, false]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, null])` + `print array_iif(dynamic([1, 0, -1, 44, 0]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, 4, null])` + +- [array_slice](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayslicefunction) + `print array_slice(dynamic([1,2,3]), 1, 2) == dynamic([2, 3])` + `print array_slice(dynamic([1,2,3,4,5]), 2, -1) == dynamic([3, 4, 5])` + `print array_slice(dynamic([1,2,3,4,5]), -3, -2) == dynamic([3, 4])` + +- [array_split](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysplitfunction) + `print array_split(dynamic([1,2,3,4,5]), 2) == dynamic([[1,2],[3,4,5]])` + `print array_split(dynamic([1,2,3,4,5]), dynamic([1,3])) == dynamic([[1],[2,3],[4,5]])` + +## DateTimeFunctions + +- [ago](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/agofunction) + `print ago(2h)` + +- [endofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofdayfunction) + `print endofday(datetime(2017-01-01 10:10:17), -1)` + `print endofday(datetime(2017-01-01 10:10:17), 1)` + `print endofday(datetime(2017-01-01 10:10:17))` + +- [endofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofmonthfunction) + `print endofmonth(datetime(2017-01-01 10:10:17), -1)` + `print endofmonth(datetime(2017-01-01 10:10:17), 1)` + `print endofmonth(datetime(2017-01-01 10:10:17))` + +- [endofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofweekfunction) + `print endofweek(datetime(2017-01-01 10:10:17), 1)` + `print endofweek(datetime(2017-01-01 10:10:17), -1)` + `print endofweek(datetime(2017-01-01 10:10:17))` + +- [endofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofyearfunction) + `print endofyear(datetime(2017-01-01 10:10:17), -1)` + `print endofyear(datetime(2017-01-01 10:10:17), 1)` + `print endofyear(datetime(2017-01-01 10:10:17))` ## Aggregate Functions - [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) From e4454d250495b60d633f42fe49673ff417da62c1 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 24 Aug 2022 06:57:54 -0700 Subject: [PATCH 078/279] updated release notes to resolve conflicts --- src/Parsers/Kusto/KQL_ReleaseNote.md | 96 ++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 6 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index c3c038eb90d..7e89108e78e 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,7 +1,7 @@ + ## KQL implemented features # August 29, 2022 - ## Dynamic functions - [array_concat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayconcatfunction) `print array_concat(dynamic([1, 2, 3]), dynamic([4, 5]), dynamic([6, 7, 8, 9])) == dynamic([1, 2, 3, 4, 5, 6, 7, 8, 9])` @@ -46,7 +46,27 @@ `print endofyear(datetime(2017-01-01 10:10:17), 1)` `print endofyear(datetime(2017-01-01 10:10:17))` -## Aggregate Functions +- [make_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-datetimefunction) + `print make_datetime(2017,10,01)` + `print make_datetime(2017,10,01,12,10)` + `print make_datetime(2017,10,01,12,11,0.1234567)` + +- [datetime_diff](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-difffunction) + `print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))` + `print datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30))` + `print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))` + +- [unixtime_microseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-microseconds-todatetimefunction) + `print unixtime_microseconds_todatetime(1546300800000000)` + +- [unixtime_milliseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-milliseconds-todatetimefunction) + `print unixtime_milliseconds_todatetime(1546300800000)` + +- [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) + `print unixtime_nanoseconds_todatetime(1546300800000000000)` + +## Aggregate Functions + - [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) `Customers | summarize t = stdev(Age) by FirstName` @@ -75,8 +95,59 @@ `DataTable | summarize t = percentilew(Bucket, Frequency, 50)` # August 15, 2022 + **double quote support** + ``print res = strcat("double ","quote")`` +## Aggregate functions + - [bin_at](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binatfunction) + `print res = bin_at(6.5, 2.5, 7)` + `print res = bin_at(1h, 1d, 12h)` + `print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))` + `print res = bin_at(datetime(2017-05-17 10:20:00.0), 7d, datetime(2017-06-04 00:00:00.0))` -## DateTpye + - [array_index_of](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayindexoffunction) + *Supports only basic lookup. Do not support start_index, length and occurrence* + `print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')` + `print output = array_index_of(dynamic([1, 2, 3]), 2)` + - [array_sum](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array-sum-function) + `print output = array_sum(dynamic([2, 5, 3]))` + `print output = array_sum(dynamic([2.5, 5.5, 3]))` + - [array_length](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraylengthfunction) + `print output = array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))` + `print output = array_length(dynamic([1, 2, 3]))` + +## Conversion +- [tobool / toboolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/toboolfunction) + `print tobool(true) == true` + `print toboolean(false) == false` + `print tobool(0) == false` + `print toboolean(19819823) == true` + `print tobool(-2) == true` + `print isnull(toboolean('a'))` + `print tobool('true') == true` + `print toboolean('false') == false` + +- [todouble / toreal](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/todoublefunction) + `print todouble(4) == 4` + `print toreal(4.2) == 4.2` + `print isnull(todouble('a'))` + `print toreal('-0.3') == -0.3` + +- [toint](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tointfunction) + `print isnull(toint('a'))` + `print toint(4) == 4` + `print toint('4') == 4` + `print isnull(toint(4.2))` + +- [tostring](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tostringfunction) + `print tostring(123) == '123'` + `print tostring('asd') == 'asd'` + +## Data Types + - [dynamic](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic) + *Supports only 1D array* + `print output = dynamic(['a', 'b', 'c'])` + `print output = dynamic([1, 2, 3])` + - [bool,boolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/bool) `print bool(1)` `print boolean(0)` @@ -130,8 +201,6 @@ - [trim_start](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimstartfunction) `print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))` - - ## DateTimeFunctions - [startofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofyearfunction) `print startofyear(datetime(2017-01-01 10:10:17), -1)` @@ -230,12 +299,26 @@ `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` +- [ipv6_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) + `print ipv6_compare('::ffff:7f00:1', '127.0.0.1') == 0` + `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') < 0` + `print ipv6_compare('192.168.1.1/24', '192.168.1.255/24') == 0` + `print ipv6_compare('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == 0` + `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == 0` +- [ipv6_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-is-matchfunction) + `print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true` + `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false` + `print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true` + `print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true` + `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true` - [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` - +- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6-maskfunction) + `print parse_ipv6_mask('127.0.0.1', 24) == '0000:0000:0000:0000:0000:ffff:7f00:0000'` + `print parse_ipv6_mask('fe80::85d:e82c:9446:7994', 120) == 'fe80:0000:0000:0000:085d:e82c:9446:7900'` # August 1, 2022 @@ -262,6 +345,7 @@ OR pass dialect setting with '--'. For example : ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` + - **strcmp** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcmpfunction) `print strcmp('abc','ABC')` From e544bc395af6a45732867661712f48fe9e676ccd Mon Sep 17 00:00:00 2001 From: root Date: Wed, 24 Aug 2022 07:24:06 -0700 Subject: [PATCH 079/279] retry resolve merge conflict --- src/Parsers/Kusto/KQL_ReleaseNote.md | 59 ++++++++++++++-------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 7e89108e78e..ec1ed8c2c55 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -2,6 +2,34 @@ ## KQL implemented features # August 29, 2022 +## Aggregate Functions +- [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) + `Customers | summarize t = stdev(Age) by FirstName` + +- [stdevif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdevif-aggfunction) + `Customers | summarize t = stdevif(Age, Age < 10) by FirstName` + +- [binary_all_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-and-aggfunction) + `Customers | summarize t = binary_all_and(Age) by FirstName` + +- [binary_all_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-or-aggfunction) + `Customers | summarize t = binary_all_or(Age) by FirstName` + +- [binary_all_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-xor-aggfunction) + `Customers | summarize t = binary_all_xor(Age) by FirstName` + +- [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName` + +- [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)` + +- [percentile](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `Customers | summarize t = percentile(Age, 50) by FirstName` + +- [percentilew](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `DataTable | summarize t = percentilew(Bucket, Frequency, 50)` + ## Dynamic functions - [array_concat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayconcatfunction) `print array_concat(dynamic([1, 2, 3]), dynamic([4, 5]), dynamic([6, 7, 8, 9])) == dynamic([1, 2, 3, 4, 5, 6, 7, 8, 9])` @@ -63,36 +91,7 @@ `print unixtime_milliseconds_todatetime(1546300800000)` - [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) - `print unixtime_nanoseconds_todatetime(1546300800000000000)` - -## Aggregate Functions - -- [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) - `Customers | summarize t = stdev(Age) by FirstName` - -- [stdevif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdevif-aggfunction) - `Customers | summarize t = stdevif(Age, Age < 10) by FirstName` - -- [binary_all_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-and-aggfunction) - `Customers | summarize t = binary_all_and(Age) by FirstName` - -- [binary_all_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-or-aggfunction) - `Customers | summarize t = binary_all_or(Age) by FirstName` - -- [binary_all_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-xor-aggfunction) - `Customers | summarize t = binary_all_xor(Age) by FirstName` - -- [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName` - -- [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)` - -- [percentile](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `Customers | summarize t = percentile(Age, 50) by FirstName` - -- [percentilew](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `DataTable | summarize t = percentilew(Bucket, Frequency, 50)` + `print unixtime_nanoseconds_todatetime(1546300800000000000)` # August 15, 2022 **double quote support** From f2442a84213ae9d8babe0444aac1eaacbe09ef8e Mon Sep 17 00:00:00 2001 From: root Date: Thu, 11 Aug 2022 17:11:22 -0700 Subject: [PATCH 080/279] Resubmit Aggregate functions - array_index_of, length, sum and dynamic data type --- src/Parsers/Kusto/KQL_ReleaseNote.md | 130 +----------------- .../KustoFunctions/KQLDynamicFunctions.cpp | 23 ++-- src/Parsers/tests/gtest_Parser.cpp | 36 +++++ 3 files changed, 51 insertions(+), 138 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index ec1ed8c2c55..31f09cc5071 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,108 +1,8 @@ ## KQL implemented features -# August 29, 2022 -## Aggregate Functions -- [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) - `Customers | summarize t = stdev(Age) by FirstName` - -- [stdevif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdevif-aggfunction) - `Customers | summarize t = stdevif(Age, Age < 10) by FirstName` - -- [binary_all_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-and-aggfunction) - `Customers | summarize t = binary_all_and(Age) by FirstName` - -- [binary_all_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-or-aggfunction) - `Customers | summarize t = binary_all_or(Age) by FirstName` - -- [binary_all_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-xor-aggfunction) - `Customers | summarize t = binary_all_xor(Age) by FirstName` - -- [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName` - -- [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)` - -- [percentile](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `Customers | summarize t = percentile(Age, 50) by FirstName` - -- [percentilew](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) - `DataTable | summarize t = percentilew(Bucket, Frequency, 50)` - -## Dynamic functions -- [array_concat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayconcatfunction) - `print array_concat(dynamic([1, 2, 3]), dynamic([4, 5]), dynamic([6, 7, 8, 9])) == dynamic([1, 2, 3, 4, 5, 6, 7, 8, 9])` - -- [array_iff / array_iif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayifffunction) - `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` - `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` - `print array_iif(dynamic([true, false, true, false]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, null])` - `print array_iif(dynamic([1, 0, -1, 44, 0]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, 4, null])` - -- [array_slice](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayslicefunction) - `print array_slice(dynamic([1,2,3]), 1, 2) == dynamic([2, 3])` - `print array_slice(dynamic([1,2,3,4,5]), 2, -1) == dynamic([3, 4, 5])` - `print array_slice(dynamic([1,2,3,4,5]), -3, -2) == dynamic([3, 4])` - -- [array_split](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysplitfunction) - `print array_split(dynamic([1,2,3,4,5]), 2) == dynamic([[1,2],[3,4,5]])` - `print array_split(dynamic([1,2,3,4,5]), dynamic([1,3])) == dynamic([[1],[2,3],[4,5]])` - -## DateTimeFunctions - -- [ago](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/agofunction) - `print ago(2h)` - -- [endofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofdayfunction) - `print endofday(datetime(2017-01-01 10:10:17), -1)` - `print endofday(datetime(2017-01-01 10:10:17), 1)` - `print endofday(datetime(2017-01-01 10:10:17))` - -- [endofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofmonthfunction) - `print endofmonth(datetime(2017-01-01 10:10:17), -1)` - `print endofmonth(datetime(2017-01-01 10:10:17), 1)` - `print endofmonth(datetime(2017-01-01 10:10:17))` - -- [endofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofweekfunction) - `print endofweek(datetime(2017-01-01 10:10:17), 1)` - `print endofweek(datetime(2017-01-01 10:10:17), -1)` - `print endofweek(datetime(2017-01-01 10:10:17))` - -- [endofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofyearfunction) - `print endofyear(datetime(2017-01-01 10:10:17), -1)` - `print endofyear(datetime(2017-01-01 10:10:17), 1)` - `print endofyear(datetime(2017-01-01 10:10:17))` - -- [make_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-datetimefunction) - `print make_datetime(2017,10,01)` - `print make_datetime(2017,10,01,12,10)` - `print make_datetime(2017,10,01,12,11,0.1234567)` - -- [datetime_diff](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-difffunction) - `print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))` - `print datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30))` - `print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))` - -- [unixtime_microseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-microseconds-todatetimefunction) - `print unixtime_microseconds_todatetime(1546300800000000)` - -- [unixtime_milliseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-milliseconds-todatetimefunction) - `print unixtime_milliseconds_todatetime(1546300800000)` - -- [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) - `print unixtime_nanoseconds_todatetime(1546300800000000000)` - # August 15, 2022 - **double quote support** - ``print res = strcat("double ","quote")`` ## Aggregate functions - - [bin_at](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binatfunction) - `print res = bin_at(6.5, 2.5, 7)` - `print res = bin_at(1h, 1d, 12h)` - `print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))` - `print res = bin_at(datetime(2017-05-17 10:20:00.0), 7d, datetime(2017-06-04 00:00:00.0))` - - [array_index_of](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayindexoffunction) *Supports only basic lookup. Do not support start_index, length and occurrence* `print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')` @@ -113,35 +13,7 @@ - [array_length](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraylengthfunction) `print output = array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))` `print output = array_length(dynamic([1, 2, 3]))` - -## Conversion -- [tobool / toboolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/toboolfunction) - `print tobool(true) == true` - `print toboolean(false) == false` - `print tobool(0) == false` - `print toboolean(19819823) == true` - `print tobool(-2) == true` - `print isnull(toboolean('a'))` - `print tobool('true') == true` - `print toboolean('false') == false` - -- [todouble / toreal](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/todoublefunction) - `print todouble(4) == 4` - `print toreal(4.2) == 4.2` - `print isnull(todouble('a'))` - `print toreal('-0.3') == -0.3` - -- [toint](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tointfunction) - `print isnull(toint('a'))` - `print toint(4) == 4` - `print toint('4') == 4` - `print isnull(toint(4.2))` - -- [tostring](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tostringfunction) - `print tostring(123) == '123'` - `print tostring('asd') == 'asd'` - -## Data Types +## DateType - [dynamic](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic) *Supports only 1D array* `print output = dynamic(['a', 'b', 'c'])` diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp index a6ff0a374eb..3f534679c58 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -34,16 +34,23 @@ bool ArrayIif::convertImpl(String &out,IParser::Pos &pos) bool ArrayIndexOf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String array = getConvertedArgument(fn_name, pos); + ++pos; + const auto needle = getConvertedArgument(fn_name, pos); + out = "minus(indexOf(" + array + ", " + needle + ") , 1)"; + + return true; } bool ArrayLength::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "length"); } bool ArrayReverse::convertImpl(String &out,IParser::Pos &pos) @@ -111,9 +118,7 @@ bool ArraySplit::convertImpl(String &out,IParser::Pos &pos) bool ArraySum::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "arraySum"); } bool BagKeys::convertImpl(String &out,IParser::Pos &pos) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index c97080482ed..bbd2313cd48 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -595,6 +595,42 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName", "SELECT\n FirstName,\n groupUniqArrayIf(10)(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "print output = dynamic([1, 2, 3])", + "SELECT [1, 2, 3] AS output" + }, + { + "print output = dynamic(['a', 'b', 'c'])", + "SELECT ['a', 'b', 'c'] AS output" + }, + { + "print output = array_index_of(dynamic([1, 2, 3]), 2)", + "SELECT indexOf([1, 2, 3], 2) - 1 AS output" + }, + { + "print output = array_index_of(dynamic(['a', 'b', 'c']), 'b')", + "SELECT indexOf(['a', 'b', 'c'], 'b') - 1 AS output" + }, + { + "print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')", + "SELECT indexOf(['John', 'Denver', 'Bob', 'Marley'], 'Marley') - 1 AS output" + }, + { + "print output = array_length(dynamic([1, 2, 3]))", + "SELECT length([1, 2, 3]) AS output" + }, + { + "print output = array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))", + "SELECT length(['John', 'Denver', 'Bob', 'Marley']) AS output" + }, + { + "print output = array_sum(dynamic([2, 5, 3]))", + "SELECT arraySum([2, 5, 3]) AS output" + }, + { + "print output = array_sum(dynamic([2.5, 5.5, 3]))", + "SELECT arraySum([2.5, 5.5, 3]) AS output" } }))); From a0b6a324ba29a2382eb793a436e2b6102f00f67f Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 12 Aug 2022 07:32:42 -0700 Subject: [PATCH 081/279] Implement KQL IPv6 functions --- src/Parsers/Kusto/KQL_ReleaseNote.md | 16 ++--- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 63 +++++++++++++++---- 2 files changed, 58 insertions(+), 21 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 31f09cc5071..04397646612 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -154,40 +154,40 @@ `print bitset_count_ones(42) == 3` ## IP functions -- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) +- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` `print format_ipv4(3232236031, 24) == '192.168.1.0'` -- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) +- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) `print format_ipv4_mask('192.168.1.255', 24) == '192.168.1.0/24'` `print format_ipv4_mask(3232236031, 24) == '192.168.1.0/24'` -- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) +- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) `print ipv4_compare('127.0.0.1', '127.0.0.1') == 0` `print ipv4_compare('192.168.1.1', '192.168.1.255') < 0` `print ipv4_compare('192.168.1.1/24', '192.168.1.255/24') == 0` `print ipv4_compare('192.168.1.1', '192.168.1.255', 24) == 0` -- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) +- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) `print ipv4_is_match('127.0.0.1', '127.0.0.1') == true` `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` -- [ipv6_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) +- [ipv6_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) `print ipv6_compare('::ffff:7f00:1', '127.0.0.1') == 0` `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') < 0` `print ipv6_compare('192.168.1.1/24', '192.168.1.255/24') == 0` `print ipv6_compare('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == 0` `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == 0` -- [ipv6_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-is-matchfunction) +- [ipv6_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-is-matchfunction) `print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true` `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false` `print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true` `print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true` `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true` -- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) +- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` -- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6-maskfunction) +- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) `print parse_ipv6_mask('127.0.0.1', 24) == '0000:0000:0000:0000:0000:ffff:7f00:0000'` `print parse_ipv6_mask('fe80::85d:e82c:9446:7994', 120) == 'fe80:0000:0000:0000:085d:e82c:9446:7900'` diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 765912ff936..a0e7cb30f79 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -82,7 +82,7 @@ bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) const auto lhs = getArgument(function_name, pos); const auto rhs = getArgument(function_name, pos); const auto mask = getOptionalArgument(function_name, pos); - out = std::format("{} = 0", kqlCallToExpression("ipv4_compare", {lhs, rhs, mask ? *mask : "32"}, pos.max_depth)); + out = std::format("equals({}, 0)", kqlCallToExpression("ipv4_compare", {lhs, rhs, mask ? *mask : "32"}, pos.max_depth)); return true; } @@ -172,16 +172,43 @@ bool ParseIpv4Mask::convertImpl(String & out, IParser::Pos & pos) bool Ipv6Compare::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + const auto mask = getOptionalArgument(function_name, pos); + const auto calculated_mask = mask ? *mask : "128"; + out = std::format( + "if(length(splitByChar('/', {1}) as lhs_tokens_{0}) > 2 or length(splitByChar('/', {2}) as rhs_tokens_{0}) > 2 " + "or isNull(IPv6StringToNumOrNull(lhs_tokens_{0}[1]) as lhs_ipv6_{0}) or length(lhs_tokens_{0}) = 2 " + "and isNull((if(isIPv4String(lhs_tokens_{0}[1]), 96, 0) + toUInt8OrNull(lhs_tokens_{0}[-1])) as lhs_suffix_{0}) " + "or isNull(IPv6StringToNumOrNull(rhs_tokens_{0}[1]) as rhs_ipv6_{0}) or length(rhs_tokens_{0}) = 2 " + "and isNull((if(isIPv4String(rhs_tokens_{0}[1]), 96, 0) + toUInt8OrNull(rhs_tokens_{0}[-1])) as rhs_suffix_{0}) " + "or isNull(toUInt8(min2({3}, min2(ifNull(lhs_suffix_{0}, 128), ifNull(rhs_suffix_{0}, 128)))) as suffix_{0}) " + "or isNull(bitShiftLeft(bitShiftRight(bitNot(reinterpretAsFixedString(0::UInt128)), (128 - suffix_{0}) as zeroes_{0}), " + "zeroes_{0}) as mask_{0}) or isNull(bitAnd(lhs_ipv6_{0}, mask_{0}) as lhs_base_{0}) " + "or isNull(bitAnd(rhs_ipv6_{0}, mask_{0}) as rhs_base_{0}), null, " + "multiIf(lhs_base_{0} < rhs_base_{0}, -1, lhs_base_{0} > rhs_base_{0}, 1, 0))", + generateUniqueIdentifier(), + lhs, + rhs, + calculated_mask); + return true; } bool Ipv6IsMatch::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + const auto mask = getOptionalArgument(function_name, pos); + out = std::format("equals({}, 0)", kqlCallToExpression("ipv6_compare", {lhs, rhs, mask ? *mask : "128"}, pos.max_depth)); + return true; } bool ParseIpv6::convertImpl(String & out, IParser::Pos & pos) @@ -192,19 +219,29 @@ bool ParseIpv6::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); out = std::format( - "if(isNull(ifNull(if(isNull({1} as ipv4_{2}), null, IPv4ToIPv6(ipv4_{2})), IPv6StringToNumOrNull({0})) as ipv6_{2}), null, " - "arrayStringConcat(flatten(extractAllGroups(lower(hex(assumeNotNull(ipv6_{2}))), '([\\da-f]{{4}})')), ':'))", + "if(length(splitByChar('/', {0}) as tokens_{1}) > 2 or isNull(IPv6StringToNumOrNull(tokens_{1}[1]) as ip_{1}) " + "or length(tokens_{1}) = 2 and isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, " + "arrayStringConcat(flatten(extractAllGroups(lower(hex(tupleElement(IPv6CIDRToRange(assumeNotNull(ip_{1}), toUInt8(ifNull(mask_{1} " + "+ if(isIPv4String(tokens_{1}[1]), 96, 0), 128))), 1))), '([\\da-f]{{4}})')), ':'))", ip_address, - kqlCallToExpression("parse_ipv4", {ip_address}, pos.max_depth), generateUniqueIdentifier()); return true; } bool ParseIpv6Mask::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + const auto mask = getArgument(function_name, pos); + out = std::format( + "if(isNull({0} as ipv4), {1}, {2})", + kqlCallToExpression("parse_ipv4_mask", {ip_address, mask}, pos.max_depth), + kqlCallToExpression("parse_ipv6", {"strcat(tostring(parse_ipv6(" + ip_address + ")), '/', tostring(" + mask + "))"}, pos.max_depth), + kqlCallToExpression("parse_ipv6", {"format_ipv4(ipv4)"}, pos.max_depth)); + return true; } bool FormatIpv4::convertImpl(String & out, IParser::Pos & pos) From 6947de5b03af1e3da2632ac444c62cbba6a08791 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 12 Aug 2022 07:54:38 -0700 Subject: [PATCH 082/279] Correct rebase error --- src/Parsers/Kusto/KQL_ReleaseNote.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 04397646612..a6073d8e00d 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -154,40 +154,40 @@ `print bitset_count_ones(42) == 3` ## IP functions -- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) +- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` `print format_ipv4(3232236031, 24) == '192.168.1.0'` -- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) +- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) `print format_ipv4_mask('192.168.1.255', 24) == '192.168.1.0/24'` `print format_ipv4_mask(3232236031, 24) == '192.168.1.0/24'` -- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) +- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) `print ipv4_compare('127.0.0.1', '127.0.0.1') == 0` `print ipv4_compare('192.168.1.1', '192.168.1.255') < 0` `print ipv4_compare('192.168.1.1/24', '192.168.1.255/24') == 0` `print ipv4_compare('192.168.1.1', '192.168.1.255', 24) == 0` -- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) +- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) `print ipv4_is_match('127.0.0.1', '127.0.0.1') == true` `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` -- [ipv6_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) +- [ipv6_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) `print ipv6_compare('::ffff:7f00:1', '127.0.0.1') == 0` `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') < 0` `print ipv6_compare('192.168.1.1/24', '192.168.1.255/24') == 0` `print ipv6_compare('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == 0` `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == 0` -- [ipv6_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-is-matchfunction) +- [ipv6_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-is-matchfunction) `print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true` `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false` `print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true` `print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true` `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true` -- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) +- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` -- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) +- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) `print parse_ipv6_mask('127.0.0.1', 24) == '0000:0000:0000:0000:0000:ffff:7f00:0000'` `print parse_ipv6_mask('fe80::85d:e82c:9446:7994', 120) == 'fe80:0000:0000:0000:085d:e82c:9446:7900'` From f731c677eed80940c2d5dcdf2c76d8f58a4dceb1 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 12 Aug 2022 09:54:49 -0700 Subject: [PATCH 083/279] Add unit tests --- src/Parsers/tests/KQL/gtest_KQL_IP.cpp | 30 +++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp index c2257d05500..731715b4542 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp @@ -51,6 +51,18 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, "print ipv4_compare(A, B, C)", "SELECT if\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\)" }, + { + "print ipv6_compare(A, B)", + "SELECT if\\(\\(length\\(splitByChar\\('/', A\\) AS lhs_tokens_\\d+\\) > 2\\) OR \\(length\\(splitByChar\\('/', B\\) AS rhs_tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(lhs_tokens_\\d+\\[1\\]\\) AS lhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(lhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(lhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(lhs_tokens_\\d+\\[-1\\]\\)\\) AS lhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(IPv6StringToNumOrNull\\(rhs_tokens_\\d+\\[1\\]\\) AS rhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(rhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(rhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(rhs_tokens_\\d+\\[-1\\]\\)\\) AS rhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(toUInt8\\(min2\\(128, min2\\(ifNull\\(lhs_suffix_\\d+, 128\\), ifNull\\(rhs_suffix_\\d+, 128\\)\\)\\)\\) AS suffix_\\d+\\) IS NULL\\) OR \\(\\(bitShiftLeft\\(bitShiftRight\\(bitNot\\(reinterpretAsFixedString\\(CAST\\('0', 'UInt128'\\)\\)\\), 128 - suffix_\\d+ AS zeroes_\\d+\\), zeroes_\\d+\\) AS mask_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(lhs_ipv6_\\d+, mask_\\d+\\) AS lhs_base_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(rhs_ipv6_\\d+, mask_\\d+\\) AS rhs_base_\\d+\\) IS NULL\\), NULL, multiIf\\(lhs_base_\\d+ < rhs_base_\\d+, -1, lhs_base_\\d+ > rhs_base_\\d+, 1, 0\\)\\)" + }, + { + "print ipv6_compare(A, B, C)", + "SELECT if\\(\\(length\\(splitByChar\\('/', A\\) AS lhs_tokens_\\d+\\) > 2\\) OR \\(length\\(splitByChar\\('/', B\\) AS rhs_tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(lhs_tokens_\\d+\\[1\\]\\) AS lhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(lhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(lhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(lhs_tokens_\\d+\\[-1\\]\\)\\) AS lhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(IPv6StringToNumOrNull\\(rhs_tokens_\\d+\\[1\\]\\) AS rhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(rhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(rhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(rhs_tokens_\\d+\\[-1\\]\\)\\) AS rhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(toUInt8\\(min2\\(C, min2\\(ifNull\\(lhs_suffix_\\d+, 128\\), ifNull\\(rhs_suffix_\\d+, 128\\)\\)\\)\\) AS suffix_\\d+\\) IS NULL\\) OR \\(\\(bitShiftLeft\\(bitShiftRight\\(bitNot\\(reinterpretAsFixedString\\(CAST\\('0', 'UInt128'\\)\\)\\), 128 - suffix_\\d+ AS zeroes_\\d+\\), zeroes_\\d+\\) AS mask_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(lhs_ipv6_\\d+, mask_\\d+\\) AS lhs_base_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(rhs_ipv6_\\d+, mask_\\d+\\) AS rhs_base_\\d+\\) IS NULL\\), NULL, multiIf\\(lhs_base_\\d+ < rhs_base_\\d+, -1, lhs_base_\\d+ > rhs_base_\\d+, 1, 0\\)\\)" + }, + { + "print ipv4_is_in_range(A, B)", + "SELECT if\\(\\(\\(IPv4StringToNumOrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS range_start_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS range_mask_\\d+\\) IS NULL\\), NULL, bitXor\\(range_start_ip_\\d+, bitAnd\\(ip_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - range_mask_\\d+\\) - 1\\)\\)\\)\\) = 0\\)" + }, { "print ipv4_is_match(A, B)", "SELECT if\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), toUInt8\\(min2\\(32, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" @@ -60,12 +72,12 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, "SELECT if\\(\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS lhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS lhs_mask_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS rhs_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS rhs_mask_\\d+\\) IS NULL\\), NULL, sign\\(toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(lhs_ip_\\d+\\), toUInt8\\(min2\\(C, min2\\(assumeNotNull\\(lhs_mask_\\d+\\), assumeNotNull\\(rhs_mask_\\d+\\)\\)\\)\\) AS mask_\\d+\\).1\\) - toInt64\\(IPv4CIDRToRange\\(assumeNotNull\\(rhs_ip_\\d+\\), mask_\\d+\\).1\\)\\)\\) = 0" }, { - "print parse_ipv4_mask(A, B)", - "SELECT if\\(\\(\\(toIPv4OrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(toUInt8OrNull\\(toString\\(B\\)\\) AS mask_\\d+\\) IS NULL\\), NULL, toUInt32\\(IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(max2\\(0, min2\\(32, assumeNotNull\\(mask_\\d+\\)\\)\\)\\)\\).1\\)\\)" + "print ipv6_is_match(A, B)", + "SELECT if\\(\\(length\\(splitByChar\\('/', A\\) AS lhs_tokens_\\d+\\) > 2\\) OR \\(length\\(splitByChar\\('/', B\\) AS rhs_tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(lhs_tokens_\\d+\\[1\\]\\) AS lhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(lhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(lhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(lhs_tokens_\\d+\\[-1\\]\\)\\) AS lhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(IPv6StringToNumOrNull\\(rhs_tokens_\\d+\\[1\\]\\) AS rhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(rhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(rhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(rhs_tokens_\\d+\\[-1\\]\\)\\) AS rhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(toUInt8\\(min2\\(128, min2\\(ifNull\\(lhs_suffix_\\d+, 128\\), ifNull\\(rhs_suffix_\\d+, 128\\)\\)\\)\\) AS suffix_\\d+\\) IS NULL\\) OR \\(\\(bitShiftLeft\\(bitShiftRight\\(bitNot\\(reinterpretAsFixedString\\(CAST\\('0', 'UInt128'\\)\\)\\), 128 - suffix_\\d+ AS zeroes_\\d+\\), zeroes_\\d+\\) AS mask_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(lhs_ipv6_\\d+, mask_\\d+\\) AS lhs_base_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(rhs_ipv6_\\d+, mask_\\d+\\) AS rhs_base_\\d+\\) IS NULL\\), NULL, multiIf\\(lhs_base_\\d+ < rhs_base_\\d+, -1, lhs_base_\\d+ > rhs_base_\\d+, 1, 0\\)\\) = 0" }, { - "print ipv4_is_in_range(A, B)", - "SELECT if\\(\\(\\(IPv4StringToNumOrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS range_start_ip_\\d+\\) IS NULL\\) OR \\(\\(multiIf\\(\\(length\\(splitByChar\\('/', B\\) AS tokens_\\d+\\) > 2\\) OR \\(NOT isIPv4String\\(tokens_\\d+\\[1\\]\\)\\), NULL, length\\(tokens_\\d+\\) = 1, 32, \\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL, NULL, toUInt8\\(min2\\(mask_\\d+, 32\\)\\)\\) AS range_mask_\\d+\\) IS NULL\\), NULL, bitXor\\(range_start_ip_\\d+, bitAnd\\(ip_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - range_mask_\\d+\\) - 1\\)\\)\\)\\) = 0\\)" + "print ipv6_is_match(A, B, C)", + "SELECT if\\(\\(length\\(splitByChar\\('/', A\\) AS lhs_tokens_\\d+\\) > 2\\) OR \\(length\\(splitByChar\\('/', B\\) AS rhs_tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(lhs_tokens_\\d+\\[1\\]\\) AS lhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(lhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(lhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(lhs_tokens_\\d+\\[-1\\]\\)\\) AS lhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(IPv6StringToNumOrNull\\(rhs_tokens_\\d+\\[1\\]\\) AS rhs_ipv6_\\d+\\) IS NULL\\) OR \\(\\(length\\(rhs_tokens_\\d+\\) = 2\\) AND \\(\\(\\(if\\(isIPv4String\\(rhs_tokens_\\d+\\[1\\]\\), 96, 0\\) \\+ toUInt8OrNull\\(rhs_tokens_\\d+\\[-1\\]\\)\\) AS rhs_suffix_\\d+\\) IS NULL\\)\\) OR \\(\\(toUInt8\\(min2\\(C, min2\\(ifNull\\(lhs_suffix_\\d+, 128\\), ifNull\\(rhs_suffix_\\d+, 128\\)\\)\\)\\) AS suffix_\\d+\\) IS NULL\\) OR \\(\\(bitShiftLeft\\(bitShiftRight\\(bitNot\\(reinterpretAsFixedString\\(CAST\\('0', 'UInt128'\\)\\)\\), 128 - suffix_\\d+ AS zeroes_\\d+\\), zeroes_\\d+\\) AS mask_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(lhs_ipv6_\\d+, mask_\\d+\\) AS lhs_base_\\d+\\) IS NULL\\) OR \\(\\(bitAnd\\(rhs_ipv6_\\d+, mask_\\d+\\) AS rhs_base_\\d+\\) IS NULL\\), NULL, multiIf\\(lhs_base_\\d+ < rhs_base_\\d+, -1, lhs_base_\\d+ > rhs_base_\\d+, 1, 0\\)\\) = 0" }, { "print ipv4_is_private(A)", @@ -79,8 +91,16 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, "print parse_ipv4(A)", "SELECT multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)" }, + { + "print parse_ipv4_mask(A, B)", + "SELECT if\\(\\(\\(toIPv4OrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(toUInt8OrNull\\(toString\\(B\\)\\) AS mask_\\d+\\) IS NULL\\), NULL, toUInt32\\(IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(max2\\(0, min2\\(32, assumeNotNull\\(mask_\\d+\\)\\)\\)\\)\\).1\\)\\)" + }, { "print parse_ipv6(A)", - "SELECT if\\(\\(ifNull\\(if\\(\\(multiIf\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\) AS ipv4_\\d+\\) IS NULL, NULL, IPv4ToIPv6\\(ipv4_\\d+\\)\\), IPv6StringToNumOrNull\\(A\\)\\) AS ipv6_\\d+\\) IS NULL, NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(assumeNotNull\\(ipv6_\\d+\\)\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)" + "SELECT if\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)" + }, + { + "print parse_ipv6_mask(A, B)", + "SELECT if\\(\\(if\\(\\(\\(toIPv4OrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(toUInt8OrNull\\(toString\\(B\\)\\) AS mask_\\d+\\) IS NULL\\), NULL, toUInt32\\(IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(max2\\(0, min2\\(32, assumeNotNull\\(mask_\\d+\\)\\)\\)\\)\\).1\\)\\) AS ipv4\\) IS NULL, if\\(\\(length\\(splitByChar\\('/', concat\\(ifNull\\(toString\\(if\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)\\), ''\\), '/', ifNull\\(toString\\(B\\), ''\\)\\)\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\), if\\(\\(length\\(splitByChar\\('/', ifNull\\(if\\(\\(\\(\\(toUInt32OrNull\\(toString\\(ipv4\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(ipv4\\) = 'String'\\)\\) OR \\(32 < 0\\) OR \\(\\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(ipv4\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL\\), NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\)\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)\\)" } }))); From a56f2f754f099698ad2fa205bcc63bb7bf72c243 Mon Sep 17 00:00:00 2001 From: kashwy Date: Fri, 12 Aug 2022 11:47:25 -0700 Subject: [PATCH 084/279] Kusto-phase2: add bin_at function. fix trim error --- src/Parsers/Kusto/KQL_ReleaseNote.md | 8 +++++++ .../KustoFunctions/KQLGeneralFunctions.cpp | 24 ++++++++++--------- .../KustoFunctions/KQLStringFunctions.cpp | 8 +++---- .../tests/KQL/gtest_KQL_StringFunctions.cpp | 18 +++++++++++--- 4 files changed, 40 insertions(+), 18 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index a6073d8e00d..91b3630f3f7 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -2,7 +2,15 @@ ## KQL implemented features # August 15, 2022 + **double quote support** + ``print res = strcat("double ","quote")`` ## Aggregate functions + - [bin_at](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binatfunction) + `print res = bin_at(6.5, 2.5, 7)` + `print res = bin_at(1h, 1d, 12h)` + `print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))` + `print res = bin_at(datetime(2017-05-17 10:20:00.0), 7d, datetime(2017-06-04 00:00:00.0))` + - [array_index_of](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayindexoffunction) *Supports only basic lookup. Do not support start_index, length and occurrence* `print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')` diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp index 714265633d5..dd79cc06898 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -29,36 +29,38 @@ bool Bin::convertImpl(String &out,IParser::Pos &pos) bool BinAt::convertImpl(String & out,IParser::Pos & pos) { - ParserKQLDateTypeTimespan time_span; double bin_size; - const String fn_name = getKQLFunctionName(pos); - if (fn_name.empty()) return false; + ++pos; + String origal_expr(pos->begin, pos->end); String expression_str = getConvertedArgument(fn_name, pos); + ++pos; String bin_size_str = getConvertedArgument(fn_name, pos); + ++pos; String fixed_point_str = getConvertedArgument(fn_name, pos); - bin_size_str = bin_size_str.substr(0, bin_size_str.size()-1); - auto t1 = std::format("toFloat64({})", fixed_point_str); auto t2 = std::format("toFloat64({})", expression_str); int dir = t2 >= t1 ? 0 : -1; + bin_size = std::stod(bin_size_str); - if (time_span.parseConstKQLTimespan(bin_size_str)) + if (origal_expr == "datetime" or origal_expr == "date") { - bin_size = time_span.toSeconds(); - - out = std::format("toDateTime64({} + toInt64(({} -{}) / {} + {}) * {}, 9, 'UTC')", t1, t2, t1, bin_size, dir, bin_size); + out = std::format("toDateTime64({} + toInt64(({} - {}) / {} + {}) * {}, 9, 'UTC')", t1, t2, t1, bin_size, dir, bin_size); + } + else if (origal_expr == "timespan" or origal_expr =="time" or ParserKQLDateTypeTimespan().parseConstKQLTimespan(origal_expr)) + { + String bin_value = std::format("{} + toInt64(({} - {}) / {} + {}) * {}", t1, t2, t1, bin_size, dir, bin_size); + out = std::format("concat(toString( toInt32((({}) as x) / 3600)),':', toString( toInt32(x % 3600 / 60)),':',toString( toInt32(x % 3600 % 60)))", bin_value); } else { - bin_size = std::stod(bin_size_str); - out = std::format("{} + toInt64(({} -{}) / {} + {}) * {}", t1, t2, t1, bin_size, dir, bin_size); + out = std::format("{} + toInt64(({} - {}) / {} + {}) * {}", t1, t2, t1, bin_size, dir, bin_size); } return true; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 2a88a56b844..285ed5c4a17 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -557,8 +557,8 @@ bool Trim::convertImpl(String & out,IParser::Pos & pos) String regex = getConvertedArgument(fn_name, pos); ++pos; String source = getConvertedArgument(fn_name, pos); - String ltrim = std::format("if ((replaceRegexpOne(concat('random_str', {0}) as srcl, concat('random_str', {1}),'') as dstl) = srcl, {0}, dstl)", source, regex); - out = std::format("if ((replaceRegexpOne(concat('random_str', reverse({0})) as srcr, concat('random_str', reverse({1})),'') as dstr) = srcr, {0}, reverse(dstr))", ltrim, regex); + String ltrim = std::format("if ((replaceRegexpOne(concat('start_random_str_', {0}) as srcl, concat('start_random_str_', {1}),'') as dstl) = srcl, {0}, dstl)", source, regex); + out = std::format("if ((replaceRegexpOne(concat({0}, '_end_random_str') as srcr, concat({1}, '_end_random_str'),'') as dstr) = srcr, {0}, dstr)", ltrim, regex); return true; } @@ -573,7 +573,7 @@ bool TrimEnd::convertImpl(String & out,IParser::Pos & pos) String regex = getConvertedArgument(fn_name, pos); ++pos; String source = getConvertedArgument(fn_name, pos); - out = std::format("if ((replaceRegexpOne(concat('random_str', reverse({0})) as src, concat('random_str', reverse({1})),'') as dst) = src, {0}, reverse(dst))", source, regex); + out = std::format("if ((replaceRegexpOne(concat({0}, '_end_random_str') as src, concat({1},'_end_random_str'),'') as dst) = src, {0}, dst)", source, regex); return true; } @@ -588,7 +588,7 @@ bool TrimStart::convertImpl(String & out,IParser::Pos & pos) String regex = getConvertedArgument(fn_name, pos); ++pos; String source = getConvertedArgument(fn_name, pos); - out = std::format("if ((replaceRegexpOne(concat('random_str', {0}) as src, concat('random_str', {1}),'') as dst) = src, {0}, dst)", source, regex); + out = std::format("if ((replaceRegexpOne(concat('start_random_str_', {0}) as src, concat('start_random_str_', {1}),'') as dst) = src, {0}, dst)", source, regex); return true; } diff --git a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp index f2994464e14..0ef816646a4 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp @@ -99,15 +99,15 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserStringFuncTest, }, { "print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))", - "SELECT if((replaceRegexpOne(concat('random_str', concat('- ', 'Te st1', '// $')) AS src, concat('random_str', '[^\\\\w]+'), '') AS dst) = src, concat('- ', 'Te st1', '// $'), dst)" + "SELECT if((replaceRegexpOne(concat('start_random_str_', concat('- ', 'Te st1', '// $')) AS src, concat('start_random_str_', '[^\\\\w]+'), '') AS dst) = src, concat('- ', 'Te st1', '// $'), dst)" }, { "print trim_end('.com', 'bing.com')", - "SELECT if((replaceRegexpOne(concat('random_str', reverse('bing.com')) AS src, concat('random_str', reverse('.com')), '') AS dst) = src, 'bing.com', reverse(dst))" + "SELECT if((replaceRegexpOne(concat('bing.com', '_end_random_str') AS src, concat('.com', '_end_random_str'), '') AS dst) = src, 'bing.com', dst)" }, { "print trim('--', '--https://bing.com--')", - "SELECT if((replaceRegexpOne(concat('random_str', reverse(if((replaceRegexpOne(concat('random_str', '--https://bing.com--') AS srcl, concat('random_str', '--'), '') AS dstl) = srcl, '--https://bing.com--', dstl))) AS srcr, concat('random_str', reverse('--')), '') AS dstr) = srcr, if(dstl = srcl, '--https://bing.com--', dstl), reverse(dstr))" + "SELECT if((replaceRegexpOne(concat(if((replaceRegexpOne(concat('start_random_str_', '--https://bing.com--') AS srcl, concat('start_random_str_', '--'), '') AS dstl) = srcl, '--https://bing.com--', dstl), '_end_random_str') AS srcr, concat('--', '_end_random_str'), '') AS dstr) = srcr, if(dstl = srcl, '--https://bing.com--', dstl), dstr)" }, { "print bool(1)", @@ -164,6 +164,18 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserStringFuncTest, { "print timespan('1.5d')", "SELECT 129600." + }, + { + "print res = bin_at(6.5, 2.5, 7)", + "SELECT toFloat64(7) + (toInt64(((toFloat64(6.5) - toFloat64(7)) / 2.5) + -1) * 2.5) AS res" + }, + { + "print res = bin_at(1h, 1d, 12h)", + "SELECT concat(toString(toInt32(((toFloat64(43200.) + (toInt64(((toFloat64(3600.) - toFloat64(43200.)) / 86400) + -1) * 86400)) AS x) / 3600)), ':', toString(toInt32((x % 3600) / 60)), ':', toString(toInt32((x % 3600) % 60))) AS res" + }, + { + "print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))", + "SELECT toDateTime64(toFloat64(toDateTime64('1970-01-01 12:00:00.0', 9, 'UTC')) + (toInt64(((toFloat64(toDateTime64('2017-05-15 10:20:00.0', 9, 'UTC')) - toFloat64(toDateTime64('1970-01-01 12:00:00.0', 9, 'UTC'))) / 86400) + 0) * 86400), 9, 'UTC') AS res" } }))); From aacb33049ddabf71e25979bd25e130dbf5f7be99 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 12 Aug 2022 13:34:55 -0700 Subject: [PATCH 085/279] Implement some KQL conversion functions --- src/Parsers/Kusto/KQL_ReleaseNote.md | 28 +++++++++++++++ .../KustoFunctions/IParserKQLFunction.cpp | 8 +++++ .../Kusto/KustoFunctions/IParserKQLFunction.h | 1 + .../KustoFunctions/KQLCastingFunctions.cpp | 34 +++++++++++++----- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 11 ------ src/Parsers/tests/KQL/gtest_KQL_Binary.cpp | 2 -- .../tests/KQL/gtest_KQL_Conversion.cpp | 35 +++++++++++++++++++ 7 files changed, 97 insertions(+), 22 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 91b3630f3f7..b948dbf2443 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -21,6 +21,34 @@ - [array_length](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraylengthfunction) `print output = array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))` `print output = array_length(dynamic([1, 2, 3]))` + +## Conversion +- [tobool / toboolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/toboolfunction) + `print tobool(true) == true` + `print toboolean(false) == false` + `print tobool(0) == false` + `print toboolean(19819823) == true` + `print tobool(-2) == true` + `print isnull(toboolean('a'))` + `print tobool('true') == true` + `print toboolean('false') == false` + +- [todouble / toreal](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/todoublefunction) + `print todouble(4) == 4` + `print toreal(4.2) == 4.2` + `print isnull(todouble('a'))` + `print toreal('-0.3') == -0.3` + +- [toint](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tointfunction) + `print isnull(toint('a'))` + `print toint(4) == 4` + `print toint('4') == 4` + `print isnull(toint(4.2))` + +- [tostring](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tostringfunction) + `print tostring(123) == '123'` + `print tostring('asd') == 'asd'` + ## DateType - [dynamic](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic) *Supports only 1D array* diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 243b67b7308..2310879862a 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -18,6 +18,8 @@ #include #include +#include + #include namespace DB @@ -80,6 +82,12 @@ bool IParserKQLFunction::directMapping(String & out, IParser::Pos & pos, const S return false; } +String IParserKQLFunction::generateUniqueIdentifier() +{ + static pcg32_unique unique_random_generator; + return std::to_string(unique_random_generator()); +} + String IParserKQLFunction::getArgument(const String & function_name, DB::IParser::Pos & pos) { if (auto optionalArgument = getOptionalArgument(function_name, pos)) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index b7f8427043c..5758356b81e 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -44,6 +44,7 @@ protected: virtual bool convertImpl(String & out, IParser::Pos & pos) = 0; static bool directMapping(String & out, IParser::Pos & pos, const String & ch_fn); + static String generateUniqueIdentifier(); static String getArgument(const String & function_name, DB::IParser::Pos & pos); static String getConvertedArgument(const String & fn_name, IParser::Pos & pos); static std::optional getOptionalArgument(const String & function_name, DB::IParser::Pos & pos); diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp index acbb7468d20..b6082995ec1 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -9,9 +9,17 @@ namespace DB { bool ToBool::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format( + "multiIf(toString({0}) = 'true', true, " + "toString({0}) = 'false', false, toInt64OrNull(toString({0})) != 0)", + param, + generateUniqueIdentifier()); + return true; } bool ToDateTime::convertImpl(String & out, IParser::Pos & pos) @@ -23,16 +31,24 @@ bool ToDateTime::convertImpl(String & out, IParser::Pos & pos) bool ToDouble::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format("toFloat64OrNull(toString({0}))", param); + return true; } bool ToInt::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format("toInt32OrNull(toString({0}))", param); + return true; } bool ToString::convertImpl(String & out, IParser::Pos & pos) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index a0e7cb30f79..40f34f766b5 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -15,19 +15,8 @@ #include #include -#include - #include -namespace -{ -String generateUniqueIdentifier() -{ - static pcg32_unique unique_random_generator; - return std::to_string(unique_random_generator()); -} -} - namespace DB { bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) diff --git a/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp b/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp index 600965dcef6..a1b26ee5614 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp @@ -1,7 +1,5 @@ #include -#include -#include #include INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Binary, ParserTest, diff --git a/src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp b/src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp new file mode 100644 index 00000000000..27e1167bde3 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp @@ -0,0 +1,35 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P( + ParserKQLQuery_Conversion, + ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print tobool(A)", + "SELECT multiIf(toString(A) = 'true', true, toString(A) = 'false', false, toInt64OrNull(toString(A)) != 0)" + }, + { + "print toboolean(A)", + "SELECT multiIf(toString(A) = 'true', true, toString(A) = 'false', false, toInt64OrNull(toString(A)) != 0)" + }, + { + "print todouble(A)", + "SELECT toFloat64OrNull(toString(A))" + }, + { + "print toint(A)", + "SELECT toInt32OrNull(toString(A))" + }, + { + "print toreal(A)", + "SELECT toFloat64OrNull(toString(A))" + }, + { + "print tostring(A)", + "SELECT ifNull(toString(A), '')" + } +}))); From 6fee015ccbd5e48e22503e2787290cad39320556 Mon Sep 17 00:00:00 2001 From: kashwy Date: Tue, 16 Aug 2022 06:48:49 -0700 Subject: [PATCH 086/279] Kusto-phase2 : finish make series --- src/Parsers/Kusto/ParserKQLMakeSeries.cpp | 300 +++++++++++++++------- src/Parsers/Kusto/ParserKQLMakeSeries.h | 20 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 34 ++- 3 files changed, 263 insertions(+), 91 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp index 0c658b0ba7f..03528b6af1e 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -116,7 +116,7 @@ bool ParserKQLMakeSeries :: parseFromToStepClause(FromToStepClause & from_to_ste if (end_pos == begin) end_pos = pos; - if (step_pos == begin) + if (String(step_pos->begin, step_pos->end) != "step") return false; if (String(from_pos->begin, from_pos->end) == "from") @@ -124,42 +124,236 @@ bool ParserKQLMakeSeries :: parseFromToStepClause(FromToStepClause & from_to_ste ++from_pos; auto end_from_pos = (to_pos != begin) ? to_pos : step_pos; --end_from_pos; - from_to_step.from = String(from_pos->begin, end_from_pos->end); + from_to_step.from_str = String(from_pos->begin, end_from_pos->end); } - if (to_pos != begin) + if (String(to_pos->begin, to_pos->end) == "to") { ++to_pos; --step_pos; - from_to_step.to = String(to_pos->begin, step_pos->end); - ++step_pos; + from_to_step.to_str = String(to_pos->begin, step_pos->end); ++step_pos; } --end_pos; - from_to_step.step = String(step_pos->begin, end_pos->end); + ++step_pos; + from_to_step.step_str = String(step_pos->begin, end_pos->end); + + if (String(step_pos->begin, step_pos->end) == "time" || String(step_pos->begin, step_pos->end) == "timespan" || ParserKQLDateTypeTimespan().parseConstKQLTimespan(from_to_step.step_str)) + { + from_to_step.is_timespan = true; + from_to_step.step = std::stod(getExprFromToken(from_to_step.step_str, pos.max_depth)); + } + else + from_to_step.step = std::stod(from_to_step.step_str); + return true; } +void ParserKQLMakeSeries :: makeNumericSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth) +{ + String start_str, end_str; + String sub_query, main_query; + + auto & aggregation_columns = kql_make_series.aggregation_columns; + auto & from_to_step = kql_make_series.from_to_step; + auto & subquery_columns = kql_make_series.subquery_columns; + auto & axis_column = kql_make_series.axis_column; + auto & group_expression = kql_make_series.group_expression; + auto step = from_to_step.step; + + if (!kql_make_series.from_to_step.from_str.empty()) + start_str = getExprFromToken(kql_make_series.from_to_step.from_str, max_depth); + + if (!kql_make_series.from_to_step.to_str.empty()) + end_str = getExprFromToken(from_to_step.to_str, max_depth); + + String bin_str, start, end; + + if (!start_str.empty()) // has from + { + bin_str = std::format(" toFloat64({0}) + (toInt64(((toFloat64({1}) - toFloat64({0})) / {2}) ) * {2}) AS {1}_ali ", + start_str, axis_column, step); + start = std::format("toUInt64({})", start_str); + } + else + { + bin_str = std::format(" toFloat64(toInt64((toFloat64({0}) ) / {1}) * {1}) AS {0}_ali ", + axis_column, step); + } + + auto sub_sub_query = std::format(" (Select {0}, {1}, {2} FROM {3} GROUP BY {0}, {4}_ali ORDER BY {4}_ali) ", group_expression, subquery_columns, bin_str, table_name, axis_column); + + if (!end_str.empty()) + end = std::format("toUInt64({})", end_str); + + String range, condition; + if (!start_str.empty() && !end_str.empty()) + { + range = std::format("range({},{}, toUInt64({}))", start, end, step); + condition = std::format("{0}_ali >= {1} and {0}_ali <= {2}", axis_column, start, end); + } + else if (start_str.empty() && !end_str.empty()) + { + range = std::format("range(low, {} , toUInt64({}))", end, step); + condition = std::format("{}_ali <= {}", axis_column, end); + } + else if (!start_str.empty() && end_str.empty()) + { + range = std::format("range({}, high, toUInt64({}))", start, step); + condition = std::format("{}_ali >= {}", axis_column, start); + } + else + { + range = std::format("range(low, high, toUInt64({}))", step); + condition = "1"; //true + } + + auto range_len = std::format("length({})", range); + main_query = std::format("{} ", group_expression); + + auto axis_and_agg_alias_list = axis_column; + auto final_axis_agg_alias_list =std::format("tupleElement(zipped,1) AS {}",axis_column); //tupleElement(pp,2) as PriceAvg ,tupleElement(pp,1) + int idx = 2; + for (auto agg_column : aggregation_columns) + { + String agg_group_column = std::format("arrayConcat(groupArrayIf ({}_ali,{}) as ga, arrayMap(x -> ({}),range(0,toUInt32 ({} - length(ga) < 0 ? 0 : {} - length(ga)),1) )) as {}", + agg_column.alias, condition, agg_column.default_value, range_len, range_len, agg_column.alias); + main_query +=", " + agg_group_column; + + axis_and_agg_alias_list +=", " + agg_column.alias; + final_axis_agg_alias_list += std::format(", tupleElement(zipped,{}) AS {}", idx, agg_column.alias); + } + + auto axis_str = std::format("arrayDistinct(arrayConcat(groupArrayIf({0}_ali, {1}), arrayMap( x->(toFloat64(x)), {2})) ) as {0}", + axis_column, condition,range); + + main_query += ", " + axis_str; + auto sub_group_by = std::format("{}", group_expression); + + sub_query = std::format("( SELECT toUInt64(min({}_ali)) AS low, toUInt64(max({}_ali))+ {} AS high, arraySort(arrayZip({})) as zipped, {} FROM {} GROUP BY {} )", + axis_column, axis_column,step, axis_and_agg_alias_list,main_query,sub_sub_query, sub_group_by); + + main_query = std::format("{},{}", group_expression, final_axis_agg_alias_list); + + kql_make_series.sub_query = std::move(sub_query); + kql_make_series.main_query = std::move(main_query); +} + +void ParserKQLMakeSeries :: makeTimeSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth) +{ + const uint64_t era_diff = 62135596800; // this magic number is the differicen is second form 0001-01-01 (Azure start time ) and 1970-01-01 (CH start time) + + String start_str, end_str; + String sub_query, main_query; + + auto & aggregation_columns = kql_make_series.aggregation_columns; + auto & from_to_step = kql_make_series.from_to_step; + auto & subquery_columns = kql_make_series.subquery_columns; + auto & axis_column = kql_make_series.axis_column; + auto & group_expression = kql_make_series.group_expression; + auto step = from_to_step.step; + + if (!kql_make_series.from_to_step.from_str.empty()) + start_str = getExprFromToken(kql_make_series.from_to_step.from_str, max_depth); + + if (!kql_make_series.from_to_step.to_str.empty()) + end_str = getExprFromToken(from_to_step.to_str, max_depth); + + String bin_str, start, end; + + uint64_t diff = 0; + if (!start_str.empty()) // has from + { + bin_str = std::format(" toFloat64(toDateTime64({0}, 9, 'UTC')) + (toInt64(((toFloat64(toDateTime64({1}, 9, 'UTC')) - toFloat64(toDateTime64({0}, 9, 'UTC'))) / {2}) ) * {2}) AS {1}_ali ", + start_str, axis_column, step); + start = std::format("toUInt64(toDateTime64({},9,'UTC'))", start_str); + } + else + { + bin_str = std::format(" toInt64((toFloat64(toDateTime64({0}, 9, 'UTC')) + {1}) / {2}) * {2} AS {0}_ali ", + axis_column, era_diff, step); + diff = era_diff; + } + + auto sub_sub_query = std::format(" (Select {0}, {1}, {2} FROM {3} GROUP BY {0}, {4}_ali ORDER BY {4}_ali) ", group_expression, subquery_columns, bin_str, table_name, axis_column); + + if (!end_str.empty()) + end = std::format("toUInt64(toDateTime64({}, 9, 'UTC'))", end_str); + + String range, condition; + if (!start_str.empty() && !end_str.empty()) + { + range = std::format("range({},{}, toUInt64({}))", start, end, step); + condition = std::format("{0}_ali >= {1} and {0}_ali <= {2}", axis_column, start, end); + } + else if (start_str.empty() && !end_str.empty()) + { + range = std::format("range(low, {} + {}, toUInt64({}))", end, era_diff, step); + condition = std::format("{0}_ali - {1} < {2}", axis_column, era_diff, end); + } + else if (!start_str.empty() && end_str.empty()) + { + range = std::format("range({}, high, toUInt64({}))", start, step); + condition = std::format("{}_ali >= {}", axis_column, start); + } + else + { + range = std::format("range(low, high, toUInt64({}))", step); + condition = "1"; //true + } + + auto range_len = std::format("length({})", range); + main_query = std::format("{} ", group_expression); + + auto axis_and_agg_alias_list = axis_column; + auto final_axis_agg_alias_list =std::format("tupleElement(zipped,1) AS {}",axis_column); //tupleElement(pp,2) as PriceAvg ,tupleElement(pp,1) + int idx = 2; + for (auto agg_column : aggregation_columns) + { + String agg_group_column = std::format("arrayConcat(groupArrayIf ({}_ali,{}) as ga, arrayMap(x -> ({}),range(0,toUInt32 ({} - length(ga) < 0 ? 0 : {} - length(ga)),1) )) as {}", + agg_column.alias, condition, agg_column.default_value, range_len, range_len, agg_column.alias); + main_query +=", " + agg_group_column; + + axis_and_agg_alias_list +=", " + agg_column.alias; + final_axis_agg_alias_list += std::format(", tupleElement(zipped,{}) AS {}", idx, agg_column.alias); + } + auto axis_str = std::format("arrayDistinct(arrayConcat(groupArrayIf(toDateTime64({0}_ali - {1},9,'UTC'), {2}), arrayMap( x->(toDateTime64(x - {1} ,9,'UTC')), {3}) )) as {0}", + axis_column, diff, condition,range); + + main_query += ", " + axis_str; + auto sub_group_by = std::format("{}", group_expression); + + sub_query = std::format("( SELECT toUInt64(min({}_ali)) AS low, toUInt64(max({}_ali))+ {} AS high, arraySort(arrayZip({})) as zipped, {} FROM {} GROUP BY {} )", + axis_column, axis_column,step, axis_and_agg_alias_list, main_query, sub_sub_query, sub_group_by); + + main_query = std::format("{},{}", group_expression, final_axis_agg_alias_list); + + kql_make_series.sub_query = std::move(sub_query); + kql_make_series.main_query = std::move(main_query); +} + bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (op_pos.empty()) return true; auto begin = pos; - pos = op_pos.back(); - String axis_column; - String group_expression; - ParserKeyword s_on("on"); ParserKeyword s_by("by"); ParserToken equals(TokenType::Equals); ParserToken comma(TokenType::Comma); - AggregationColumns aggregation_columns; - FromToStepClause from_to_step; + ASTPtr sub_qurery_table; + + KQLMakeSeries kql_make_series; + auto & aggregation_columns = kql_make_series.aggregation_columns; + auto & from_to_step = kql_make_series.from_to_step; + auto & subquery_columns = kql_make_series.subquery_columns; + auto & axis_column = kql_make_series.axis_column; + auto & group_expression = kql_make_series.group_expression; ParserKQLDateTypeTimespan time_span; @@ -177,8 +371,6 @@ bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!parseFromToStepClause(from_to_step, pos)) return false; - // 'on' statement parameter, expecting scalar value of type 'int', 'long', 'real', 'datetime' or 'timespan'. - if (s_by.ignore(pos, expected)) { group_expression = getExprFromToken(pos); @@ -186,8 +378,6 @@ bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; } - String subquery_columns; - for (auto agg_column : aggregation_columns) { String column_str = std::format("{}({}) AS {}_ali", agg_column.aggregation_fun, agg_column.column, agg_column.alias); @@ -197,90 +387,26 @@ bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expec subquery_columns += ", "+ column_str; } - ASTPtr sub_qurery_table; - double step; - String sub_query ; - String main_query ; - String group_by; - - String start_str = getExprFromToken(from_to_step.from, pos.max_depth); - String end_str = getExprFromToken(from_to_step.to, pos.max_depth); - String step_str = from_to_step.step; - - if (time_span.parseConstKQLTimespan(step_str)) - { - step = time_span.toSeconds(); - - auto bin_str = std::format(" toUInt64(toFloat64(toDateTime64({},6,'UTC')) / {}) * {} AS {}_ali ", axis_column, step,step, axis_column); - auto sub_sub_query = std::format(" (Select {},{}, {} FROM {} GROUP BY {},{}_ali ORDER BY {}_ali) ", group_expression, subquery_columns, bin_str, table_name, group_expression, axis_column, axis_column); - - auto start = std::format("toUInt64(toDateTime64({},6,'UTC'))", start_str); - auto end = std::format("toUInt64(toDateTime64({},6,'UTC'))", end_str); - auto range = std::format("range({},{}, toUInt64({}))", start, end, step); - auto range_len = std::format("length({})", range); - main_query = std::format("{} ", group_expression); - - auto axis_and_agg_alias_list = axis_column; - auto final_axis_agg_alias_list =std::format("tupleElement(zipped,1) AS {}",axis_column); - int idx = 2; - for (auto agg_column : aggregation_columns) - { - String agg_group_column = std::format("arrayConcat(groupArrayIf ({}_ali,{}_ali >= {} and {}_ali <= {}) as ga, arrayMap(x -> ({}),range(0,toUInt32 ({} - length(ga) < 0 ? 0 : {} - length(ga)),1) )) as {}", - agg_column.alias, axis_column, start, axis_column, end, agg_column.default_value, range_len, range_len, agg_column.alias); - main_query +=", " + agg_group_column; - - axis_and_agg_alias_list +=", " + agg_column.alias; - final_axis_agg_alias_list += std::format(", tupleElement(zipped,{}) AS {}", idx, agg_column.alias); - } - auto axis_str = std::format("arrayDistinct(arrayConcat(groupArrayIf(toDateTime64({}_ali,6,'UTC'),{}_ali >= {} and {}_ali <= {}), arrayMap( x->(toDateTime64(x,6,'UTC')), {}) )) as {}", - axis_column, axis_column, start, axis_column, end, range, axis_column); - - main_query += ", " + axis_str; - auto sub_group_by = std::format("{}", group_expression); - - sub_query = std::format("( SELECT min({}_ali) AS low,max({}_ali) AS high, arraySort(arrayZip({})) as zipped, {} FROM {} GROUP BY {} )", - axis_column, axis_column,axis_and_agg_alias_list,main_query,sub_sub_query, sub_group_by); - - main_query = std::format("{},{}", group_expression, final_axis_agg_alias_list); - - } + if (from_to_step.is_timespan) + makeTimeSeries(kql_make_series, pos.max_depth); else - { - step = stod(step_str); + makeNumericSeries(kql_make_series, pos.max_depth); - sub_query = std::format("kql( {} | summarize {}, {} = toint({} / {}) * {} by {},{} )", - table_name, subquery_columns, axis_column, axis_column, step, subquery_columns, axis_column); - } - - Tokens token_subquery(sub_query.c_str(), sub_query.c_str()+sub_query.size()); + Tokens token_subquery(kql_make_series.sub_query.c_str(), kql_make_series.sub_query.c_str() + kql_make_series.sub_query.size()); IParser::Pos pos_subquery(token_subquery, pos.max_depth); if (!ParserTablesInSelectQuery().parse(pos_subquery, sub_qurery_table, expected)) return false; tables = std::move(sub_qurery_table); - String converted_columns = main_query; + Tokens token_main_query(kql_make_series.main_query.c_str(), kql_make_series.main_query.c_str() + kql_make_series.main_query.size()); + IParser::Pos pos_main_query(token_main_query, pos.max_depth); - Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); - IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); - - if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, node, expected)) + if (!ParserNotEmptyExpressionList(true).parse(pos_main_query, node, expected)) return false; - if (!group_by.empty()) - { - String converted_groupby = group_by; - - Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); - IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); - - if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) - return false; - } - pos = begin; return true; } - } diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.h b/src/Parsers/Kusto/ParserKQLMakeSeries.h index b30155b1bd8..a89ec97174e 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.h +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.h @@ -26,11 +26,25 @@ protected: using AggregationColumns = std::vector; struct FromToStepClause { - String from; - String to; - String step; + String from_str; + String to_str; + String step_str; + bool is_timespan = false; + double step; }; + struct KQLMakeSeries { + AggregationColumns aggregation_columns; + FromToStepClause from_to_step; + String axis_column; + String group_expression; + String subquery_columns; + String sub_query; + String main_query; + }; + + void makeNumericSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth); + void makeTimeSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth); bool parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos); bool parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos); const char * getName() const override { return "KQL project"; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index bfa52368c16..3c8705bc9fd 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -12,6 +12,9 @@ #include #include #include +#include +#include + namespace DB { @@ -94,6 +97,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKQLSort kql_sort_p; ParserKQLSummarize kql_summarize_p; ParserKQLTable kql_table_p; + ParserKQLMakeSeries kql_make_series_p; ASTPtr select_expression_list; ASTPtr tables; @@ -111,7 +115,8 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { "sort",&kql_sort_p}, { "order",&kql_sort_p}, { "summarize",&kql_summarize_p}, - { "table",&kql_table_p} + { "table",&kql_table_p}, + { "make-series",&kql_make_series_p} }; std::vector> operation_pos; @@ -137,6 +142,20 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ++pos; String kql_operator(pos->begin,pos->end); + if (kql_operator == "make") + { + ++pos; + ParserKeyword s_series("series"); + ParserToken s_dash(TokenType::Minus); + if (s_dash.ignore(pos,expected)) + { + if (s_series.ignore(pos,expected)) + { + kql_operator = "make-series"; + --pos; + } + } + } if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) return false; ++pos; @@ -187,6 +206,19 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) where_expression = kql_summarize_p.where_expression; } + kql_make_series_p.setTableName(table_name); + if (!kql_make_series_p.parse(pos, select_expression_list, expected)) + return false; + else + { + if (kql_make_series_p.group_expression_list) + group_expression_list = kql_make_series_p.group_expression_list; + + if (kql_make_series_p.tables) + tables = kql_make_series_p.tables; + + } + select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); select_query->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); From 5c712100e079576de1512a3f79e259566e77a73c Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Fri, 12 Aug 2022 12:34:03 -0400 Subject: [PATCH 087/279] Added functional tests for IP, String, Binary and Datetime --- .../0_stateless/02366_kql_create_table.sql | 8 +-- .../0_stateless/02366_kql_func_ip.reference | 53 +++++++++++++++++-- .../queries/0_stateless/02366_kql_func_ip.sql | 50 +++++++++++++++-- .../02366_kql_func_string.reference | 25 +++++++++ .../0_stateless/02366_kql_func_string.sql | 26 ++++++++- 5 files changed, 147 insertions(+), 15 deletions(-) diff --git a/tests/queries/0_stateless/02366_kql_create_table.sql b/tests/queries/0_stateless/02366_kql_create_table.sql index 67f099a2d70..8820d4c30f8 100644 --- a/tests/queries/0_stateless/02366_kql_create_table.sql +++ b/tests/queries/0_stateless/02366_kql_create_table.sql @@ -8,13 +8,7 @@ CREATE TABLE Customers Age Nullable(UInt8) ) ENGINE = Memory; -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28); -INSERT INTO Customers VALUES ('Stephanie','Cox','Management abcd defg','Bachelors',33); -INSERT INTO Customers VALUES ('Peter','Nara','Skilled Manual','Graduate Degree',26); -INSERT INTO Customers VALUES ('Latoya','Shen','Professional','Graduate Degree',25); -INSERT INTO Customers VALUES ('Apple','','Skilled Manual','Bachelors',28); -INSERT INTO Customers VALUES (NULL,'why','Professional','Partial College',38); - +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); Select '-- test create table --' ; Select * from kql(Customers|project FirstName) limit 1;; DROP TABLE IF EXISTS kql_table1; diff --git a/tests/queries/0_stateless/02366_kql_func_ip.reference b/tests/queries/0_stateless/02366_kql_func_ip.reference index b6074a33b55..732a5ad38b7 100644 --- a/tests/queries/0_stateless/02366_kql_func_ip.reference +++ b/tests/queries/0_stateless/02366_kql_func_ip.reference @@ -1,11 +1,11 @@ -- ipv4_is_private(\'127.0.0.1\') -false +0 -- ipv4_is_private(\'10.1.2.3\') -true +1 -- ipv4_is_private(\'192.168.1.1/24\') -true +1 ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\')) -true +1 -- ipv4_is_private(\'abc\') \N -- ipv4_netmask_suffix(\'192.168.1.1/24\') @@ -34,3 +34,48 @@ ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\')) fe80:0000:0000:0000:085d:e82c:9446:7994 -- parse_ipv4(\'127.0.0.1\') 2130706433 +-- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\') +1 +-- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432 +1 +-- parse_ipv4_mask(\'abc\', 31) +\N +\N +-- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31) +1 +-- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\') +1 +-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\') +0 +-- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\') +1 +-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24) +1 +-- ipv4_is_match(\'abc\', \'def\', 24) +\N +-- ipv4_compare() +0 +-1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +-- format_ipv4() +192.168.1.0 +192.168.1.1 +192.168.1.0 +192.168.1.0 +1 +1 +-- format_ipv4_mask() +192.168.1.0/24 +192.168.1.0/24 +192.168.1.0/24 +192.168.1.1/32 +1 +1 diff --git a/tests/queries/0_stateless/02366_kql_func_ip.sql b/tests/queries/0_stateless/02366_kql_func_ip.sql index a625c0bf470..c70e01e2a5e 100644 --- a/tests/queries/0_stateless/02366_kql_func_ip.sql +++ b/tests/queries/0_stateless/02366_kql_func_ip.sql @@ -38,9 +38,53 @@ print '-- parse_ipv6(fe80::85d:e82c:9446:7994)'; print parse_ipv6('fe80::85d:e82c:9446:7994'); print '-- parse_ipv4(\'127.0.0.1\')'; print parse_ipv4('127.0.0.1'); --- TODO: --- print parse_ipv4('192.1.168.1') < parse_ipv4('192.1.168.2'); -- == true - + +print '-- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\')'; +print parse_ipv4('192.1.168.1') < parse_ipv4('192.1.168.2'); +print '-- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432'; +print parse_ipv4_mask('127.0.0.1', 24) == 2130706432; +print '-- parse_ipv4_mask(\'abc\', 31)'; +print parse_ipv4_mask('abc', 31) +print '-- parse_ipv4_mask(\'192.1.168.2\', 1000)'; +print parse_ipv4_mask('192.1.168.2', 1000); +print '-- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31)'; +print parse_ipv4_mask('192.1.168.2', 31) == parse_ipv4_mask('192.1.168.3', 31); +print '-- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\')'; +print ipv4_is_match('127.0.0.1', '127.0.0.1'); +print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\')'; +print ipv4_is_match('192.168.1.1', '192.168.1.255'); +print '-- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\')'; +print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24'); +print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24)'; +print ipv4_is_match('192.168.1.1', '192.168.1.255', 24); +print '-- ipv4_is_match(\'abc\', \'def\', 24)'; +print ipv4_is_match('abc', 'dev', 24); +print '-- ipv4_compare()'; +print ipv4_compare('127.0.0.1', '127.0.0.1'); +print ipv4_compare('192.168.1.1', '192.168.1.255'); +print ipv4_compare('192.168.1.255', '192.168.1.1'); +print ipv4_compare('192.168.1.1/24', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1', '192.168.1.255', 24); +print ipv4_compare('192.168.1.1/24', '192.168.1.255'); +print ipv4_compare('192.168.1.1', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1/30', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1', '192.168.1.0', 31); +print ipv4_compare('192.168.1.1/24', '192.168.1.255', 31); +print ipv4_compare('192.168.1.1', '192.168.1.255', 24); +print '-- format_ipv4()'; +print format_ipv4('192.168.1.255', 24); +print format_ipv4('192.168.1.1', 32); +print format_ipv4('192.168.1.1/24', 32); +print format_ipv4(3232236031, 24); +print format_ipv4('192.168.1.1/24', -1) == ''; +print format_ipv4('abc', 24) == ''; +print '-- format_ipv4_mask()'; +print format_ipv4_mask('192.168.1.255', 24); +print format_ipv4_mask(3232236031, 24); +print format_ipv4_mask('192.168.1.1', 24); +print format_ipv4_mask('192.168.1.1', 32); +print format_ipv4_mask('192.168.1.1/24', -1) == ''; +print format_ipv4_mask('abc', 24) == ''; diff --git a/tests/queries/0_stateless/02366_kql_func_string.reference b/tests/queries/0_stateless/02366_kql_func_string.reference index 255acb486cd..78e130ad092 100644 --- a/tests/queries/0_stateless/02366_kql_func_string.reference +++ b/tests/queries/0_stateless/02366_kql_func_string.reference @@ -260,3 +260,28 @@ PINEAPPLE 2 2 -1 +-- base64_encode_fromguid() +YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi +-- base64_decode_toarray() +[75,117,115,116,111] +-- base64_decode_toguid() +1 +-- parse_url() +{"Scheme":"scheme","Host":"","Port":"0","Path":"/this/is/a/path","Username":"username","Password":"password","Query Parameters":{"k1":"v1","k2":"v2"},"Fragment":"fragment"} +-- parse_urlquery() +{"Query Parameters":{"k1":"v1","k2":"v2","k3":"v3"}} +-- strcmp() +0 1 -1 1 +-- translate() +kusto xxx +-- trim() +https://www.ibm.com +-- trim_start() +www.ibm.com +Te st1// $ +-- trim_end() +https +-- replace_regex +Number was: 1 +-- has_any_index() +0 1 -1 -1 diff --git a/tests/queries/0_stateless/02366_kql_func_string.sql b/tests/queries/0_stateless/02366_kql_func_string.sql index cdf9b1e4b17..b49a80a363f 100644 --- a/tests/queries/0_stateless/02366_kql_func_string.sql +++ b/tests/queries/0_stateless/02366_kql_func_string.sql @@ -182,4 +182,28 @@ print '-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/qu Customers | project indexof('abcdefg','cde') | take 1; Customers | project indexof('abcdefg','cde',2) | take 1; Customers | project indexof('abcdefg','cde',6) | take 1; - +print '-- base64_encode_fromguid()'; +print base64_encode_fromguid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb'); +print '-- base64_decode_toarray()'; +print base64_decode_toarray('S3VzdG8='); +print '-- base64_decode_toguid()'; +print base64_decode_toguid(base64_encode_fromguid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')) == 'ae3133f2-6e22-49ae-b06a-16e6a9b212eb'; +print '-- parse_url()'; +print parse_url('scheme://username:password@host:1234/this/is/a/path?k1=v1&k2=v2#fragment'); +print '-- parse_urlquery()'; +print parse_urlquery('k1=v1&k2=v2&k3=v3'); +print '-- strcmp()'; +print strcmp('ABC','ABC'), strcmp('abc','ABC'), strcmp('ABC','abc'), strcmp('abcde','abc'); +print '-- translate()'; +print translate('krasp', 'otsku', 'spark'), translate('abc', '', 'ab'), translate('abc', 'x', 'abc'); +print '-- trim()'; +print trim("--", "--https://www.ibm.com--"); +print '-- trim_start()'; +print trim_start("https://", "https://www.ibm.com"); +print trim_start("[^\w]+", strcat("- ","Te st", "1", "// $")); +print '-- trim_end()'; +print trim_end("://www.ibm.com", "https://www.ibm.com"); +print '-- replace_regex'; +print replace_regex(strcat('Number is ', '1'), 'is (\d+)', 'was: \1'); +print '-- has_any_index()'; +print has_any_index('this is an example', dynamic(['this', 'example'])), has_any_index("this is an example", dynamic(['not', 'example'])), has_any_index("this is an example", dynamic(['not', 'found'])), has_any_index("this is an example", dynamic([])); From b6c112267334c9a3392d03a660b6e5159c8edca8 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Fri, 12 Aug 2022 12:35:56 -0400 Subject: [PATCH 088/279] Add tests for binary and datetime --- .../02366_kql_func_binary.reference | 7 ++++ .../0_stateless/02366_kql_func_binary.sql | 8 ++++ .../02366_kql_func_datetime.reference | 28 +++++++++++++ .../0_stateless/02366_kql_func_datetime.sql | 41 +++++++++++++++++++ 4 files changed, 84 insertions(+) create mode 100644 tests/queries/0_stateless/02366_kql_func_binary.reference create mode 100644 tests/queries/0_stateless/02366_kql_func_binary.sql create mode 100644 tests/queries/0_stateless/02366_kql_func_datetime.reference create mode 100644 tests/queries/0_stateless/02366_kql_func_datetime.sql diff --git a/tests/queries/0_stateless/02366_kql_func_binary.reference b/tests/queries/0_stateless/02366_kql_func_binary.reference new file mode 100644 index 00000000000..6276cd6d867 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_binary.reference @@ -0,0 +1,7 @@ + -- binary functions +4 7 +1 +1 +1 +7 3 +1 diff --git a/tests/queries/0_stateless/02366_kql_func_binary.sql b/tests/queries/0_stateless/02366_kql_func_binary.sql new file mode 100644 index 00000000000..824022b564c --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_binary.sql @@ -0,0 +1,8 @@ +set dialect='kusto'; +print ' -- binary functions'; +print binary_and(4,7), binary_or(4,7); +print binary_shift_left(1, 1) == binary_shift_left(1, 65); +print binary_shift_right(2, 1) == binary_shift_right(2, 65); +print binary_shift_right(binary_shift_left(1, 65), 65) == 1; +print binary_xor(2, 5), bitset_count_ones(42); +print bitset_count_ones(binary_shift_left(binary_and(4,7), 1)); diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.reference b/tests/queries/0_stateless/02366_kql_func_datetime.reference new file mode 100644 index 00000000000..e87db62ebdf --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_datetime.reference @@ -0,0 +1,28 @@ +-- dayofmonth() +31 +-- dayofweek() +4 +-- dayofyear() +365 +-- getmonth() +10 +-- getyear() +2015 +-- hoursofday() +23 +-- startofday() +2017-01-02 00:00:00.000000000 +-- startofmonth() +2016-12-01 00:00:00.000000000 +2017-02-01 00:00:00.000000000 +-- startofweek() +2017-01-08 00:00:00.000000000 +-- startofyear() +2018-01-01 00:00:00.000000000 +-- unixtime_seconds_todatetime() +2019-01-01 00:00:00.000000000 +-- weekofyear() +52 +-- monthofyear() +-- now() +1 diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.sql b/tests/queries/0_stateless/02366_kql_func_datetime.sql new file mode 100644 index 00000000000..fb2e0f68be1 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_datetime.sql @@ -0,0 +1,41 @@ +set dialect = 'kusto'; + + +print '-- dayofmonth()'; +print dayofmonth(datetime(2015-12-31)); +print '-- dayofweek()'; +print dayofweek(datetime(2015-12-31)); +print '-- dayofyear()'; +print dayofyear(datetime(2015-12-31)); +print '-- getmonth()'; +print getmonth(datetime(2015-10-12)); +print '-- getyear()'; +print getyear(datetime(2015-10-12)); +print '-- hoursofday()'; +print hourofday(datetime(2015-12-31 23:59:59.9)); +print '-- startofday()' +-- print startofday(datetime(2017-01-01 10:10:17)); +print startofday(datetime(2017-01-01 10:10:17), -1); +print startofday(datetime(2017-01-01 10:10:17), 1); +print '-- startofmonth()'; +-- print startofmonth(datetime(2017-01-01 10:10:17)); +print startofmonth(datetime(2017-01-01 10:10:17), -1); +print startofmonth(datetime(2017-01-01 10:10:17), 1); +print '-- startofweek()' +-- print startofweek(datetime(2017-01-01 10:10:17)); +print startofweek(datetime(2017-01-01 10:10:17), -1); +print startofweek(datetime(2017-01-01 10:10:17), 1); +print '-- startofyear()' +-- print startofyear(datetime(2017-01-01 10:10:17)); +print startofyear(datetime(2017-01-01 10:10:17), -1); +print startofyear(datetime(2017-01-01 10:10:17), 1); +print '-- unixtime_seconds_todatetime()'; +print unixtime_seconds_todatetime(1546300800); +print '-- weekofyear()'; +print week_of_year(datetime(2000-01-01)); +print '-- monthofyear()' +print monthofyear(datetime(2015-12-31)); +print '-- now()'; +print getyear(now(-2d))>1900; + + From 8b763be30a9763dbb4596c2d7b301c4614a893e5 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Fri, 12 Aug 2022 14:22:47 -0400 Subject: [PATCH 089/279] Added ipv6 tests --- .../queries/0_stateless/02366_kql_func_ip.sql | 40 +++++++++++++++++-- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02366_kql_func_ip.sql b/tests/queries/0_stateless/02366_kql_func_ip.sql index c70e01e2a5e..638aaf3a2b0 100644 --- a/tests/queries/0_stateless/02366_kql_func_ip.sql +++ b/tests/queries/0_stateless/02366_kql_func_ip.sql @@ -85,6 +85,40 @@ print format_ipv4_mask('192.168.1.1', 24); print format_ipv4_mask('192.168.1.1', 32); print format_ipv4_mask('192.168.1.1/24', -1) == ''; print format_ipv4_mask('abc', 24) == ''; - - - +print '-- parse_ipv6_mask()'; +print parse_ipv6_mask("127.0.0.1", 24) == '0000:0000:0000:0000:0000:ffff:7f00:0000'; +print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 120) == 'fe80:0000:0000:0000:085d:e82c:9446:7900'; +-- print parse_ipv6_mask("192.168.255.255", 120) == '0000:0000:0000:0000:0000:ffff:c0a8:ff00'; +print parse_ipv6_mask("192.168.255.255/24", 124) == '0000:0000:0000:0000:0000:ffff:c0a8:ff00'; +print parse_ipv6_mask("255.255.255.255", 128) == '0000:0000:0000:0000:0000:ffff:ffff:ffff'; +print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 128) == 'fe80:0000:0000:0000:085d:e82c:9446:7994'; +print parse_ipv6_mask("fe80::85d:e82c:9446:7994/120", 124) == 'fe80:0000:0000:0000:085d:e82c:9446:7900'; +-- print parse_ipv6_mask("::192.168.255.255", 128) == '0000:0000:0000:0000:0000:ffff:c0a8:ffff'; +-- print parse_ipv6_mask("::192.168.255.255/24", 128) == '0000:0000:0000:0000:0000:ffff:c0a8:ff00'; +print '-- ipv6_is_match()'; +print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false; +print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true; +print ipv6_is_match('192.168.1.1', '192.168.1.1'); -- // Equal IPs +print ipv6_is_match('192.168.1.1/24', '192.168.1.255'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7994'); -- // Equal IPs +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '::ffff:c0a8:0101'); -- // Equal IPs +print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.0', 31); -- // 31 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1/24', '192.168.1.255', 31); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.255', 24); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127); -- // 127 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7998', 120); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998', 127); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff', 127); -- // 127 bit IP6-prefix is used for comparison +print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255', 120); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24', 127); -- // 120 bit IP6-prefix is used for comparison \ No newline at end of file From 53e369dab9ac040a7819e2b3ac29f318e06afe7e Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Fri, 12 Aug 2022 14:26:52 -0400 Subject: [PATCH 090/279] Added missing file for IPv6 tests --- .../0_stateless/02366_kql_func_ip.reference | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/queries/0_stateless/02366_kql_func_ip.reference b/tests/queries/0_stateless/02366_kql_func_ip.reference index 732a5ad38b7..7c1d2907d5a 100644 --- a/tests/queries/0_stateless/02366_kql_func_ip.reference +++ b/tests/queries/0_stateless/02366_kql_func_ip.reference @@ -79,3 +79,37 @@ fe80:0000:0000:0000:085d:e82c:9446:7994 192.168.1.1/32 1 1 +-- parse_ipv6_mask() +1 +1 +1 +1 +1 +1 +-- ipv6_is_match() +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 From 372f00db24085ba787806078855eb2351b69b874 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Mon, 15 Aug 2022 17:12:04 -0400 Subject: [PATCH 091/279] Added datatype tests --- .../0_stateless/02366_kql_datatype.reference | 68 ++++++++++++++ .../0_stateless/02366_kql_datatype.sql | 88 +++++++++++++++++++ .../02366_kql_func_string.reference | 2 + .../0_stateless/02366_kql_func_string.sql | 2 + 4 files changed, 160 insertions(+) create mode 100644 tests/queries/0_stateless/02366_kql_datatype.reference create mode 100644 tests/queries/0_stateless/02366_kql_datatype.sql diff --git a/tests/queries/0_stateless/02366_kql_datatype.reference b/tests/queries/0_stateless/02366_kql_datatype.reference new file mode 100644 index 00000000000..eb34b5761d8 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_datatype.reference @@ -0,0 +1,68 @@ +-- bool +true +\N +-- int +123 +\N +-- long +123 +255 +-1 +\N +456 +-- real +0.01 +\N +nan +inf +-inf +-- datetime +2015-12-31 23:59:59.900000000 +2015-12-31 00:00:00.000000000 +2014-05-25 08:20:03.123456000 +2014-11-08 15:55:55.000000000 +1970-01-01 00:00:00.000000000 +2014-11-08 00:00:00.000000000 +-- guid +172800 +5400 +1800 +10 +0.1 +0.1 +0.00001 +0 +3 +-- null +1 +\N [NULL] \N \N \N \N +-- dynamic +[1,2,3] +['a','b','c'] +-- cast functions +true +1 +-- tobool("false") +false +1 +-- tobool(1) +true +1 +-- tobool(123) +true +1 +-- tobool("abc") +\N +\N +-- todouble() +123.4 +\N +-- toreal() +123.4 +\N +-- toint() +1 +\N +-- tostring() +123 +1 diff --git a/tests/queries/0_stateless/02366_kql_datatype.sql b/tests/queries/0_stateless/02366_kql_datatype.sql new file mode 100644 index 00000000000..9b1d0346360 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_datatype.sql @@ -0,0 +1,88 @@ +set dialect = 'kusto'; + +print '-- bool' +print bool(true); +print bool(true); +print bool(null); +print '-- int'; +print int(123); +print int(null); +print '-- long'; +print long(123); +print long(0xff); +print long(-1); +print long(null); +print 456; +print '-- real'; +print real(0.01); +print real(null); +print real(nan); +print real(+inf); +print real(-inf); +print '-- datetime'; +print datetime(2015-12-31 23:59:59.9); +print datetime(2015-12-31); +print datetime('2014-05-25T08:20:03.123456'); +print datetime('2014-11-08 15:55:55'); +print datetime('2014-11-08 15:55'); +print datetime('2014-11-08'); +-- print datetime(null); +-- print datetime('2014-05-25T08:20:03.123456Z'); +-- print datetime('2014-11-08 15:55:55.123456Z'); +print '-- guid' +print guid(74be27de-1e4e-49d9-b579-fe0b331d3642) +-- print guid(null) +print '-- timespan (time)'; +print timespan(2d); -- 2 days +print timespan(1.5h); -- 1.5 hour +print timespan(30m); -- 30 minutes +print timespan(10s); -- 10 seconds +print timespan(0.1s); -- 0.1 second +print timespan(100ms); -- 100 millisecond +print timespan(10microsecond); -- 10 microseconds +print timespan(1tick); +print timespan(1.5h) / timespan(30m); +print '-- null'; +print isnull(null); +print bool(null), dynamic(null), int(null), long(null), real(null), double(null); +print '-- dynamic'; -- only support 1D array at the moment +print dynamic([1,2,3]); +print dynamic(['a', 'b', 'c']); + +print '-- cast functions' +print '--tobool("true")'; -- == true +print tobool('true'); -- == true +print tobool('true') == toboolean('true'); -- == true +print '-- tobool("false")'; -- == false +print tobool('false'); -- == false +print tobool('false') == toboolean('false'); -- == false +print '-- tobool(1)'; -- == true +print tobool(1); -- == true +print tobool(1) == toboolean(1); -- == true +print '-- tobool(123)'; -- == true +print tobool(123); -- == true +print tobool(123) == toboolean(123); -- == true +print '-- tobool("abc")'; -- == null +print tobool('abc'); -- == null +print tobool('abc') == toboolean('abc'); -- == null +print '-- todouble()'; +print todouble('123.4'); +print todouble('abc') == null; +print '-- toreal()'; +print toreal("123.4"); +print toreal('abc') == null; +print '-- toint()'; +print toint("123") == int(123); +print toint('abc'); +print '-- tostring()'; +print tostring(123); +print tostring(null) == ''; + +-- TODO: +-- print '-- totimespan()'; +-- print totimespan('0.00:01:00'); +-- print totimespan('abc') == null; +-- print '-- todatetime()'; +-- print todatetime('2015-12-24'); +-- print todatetime('abc') == null; + diff --git a/tests/queries/0_stateless/02366_kql_func_string.reference b/tests/queries/0_stateless/02366_kql_func_string.reference index 78e130ad092..25da15bc25d 100644 --- a/tests/queries/0_stateless/02366_kql_func_string.reference +++ b/tests/queries/0_stateless/02366_kql_func_string.reference @@ -276,11 +276,13 @@ YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi kusto xxx -- trim() https://www.ibm.com +Te st1 -- trim_start() www.ibm.com Te st1// $ -- trim_end() https +- Te st1 -- replace_regex Number was: 1 -- has_any_index() diff --git a/tests/queries/0_stateless/02366_kql_func_string.sql b/tests/queries/0_stateless/02366_kql_func_string.sql index b49a80a363f..d367ec553c4 100644 --- a/tests/queries/0_stateless/02366_kql_func_string.sql +++ b/tests/queries/0_stateless/02366_kql_func_string.sql @@ -198,11 +198,13 @@ print '-- translate()'; print translate('krasp', 'otsku', 'spark'), translate('abc', '', 'ab'), translate('abc', 'x', 'abc'); print '-- trim()'; print trim("--", "--https://www.ibm.com--"); +print trim("[^\w]+", strcat("- ","Te st", "1", "// $")); print '-- trim_start()'; print trim_start("https://", "https://www.ibm.com"); print trim_start("[^\w]+", strcat("- ","Te st", "1", "// $")); print '-- trim_end()'; print trim_end("://www.ibm.com", "https://www.ibm.com"); +print trim_end("[^\w]+", strcat("- ","Te st", "1", "// $")); print '-- replace_regex'; print replace_regex(strcat('Number is ', '1'), 'is (\d+)', 'was: \1'); print '-- has_any_index()'; From fc1b3174f6b8017f8a250b42f7a42ef60eeb547b Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 17 Aug 2022 23:17:29 -0700 Subject: [PATCH 092/279] Kusto-phase2 : fixed the double quote issue --- .../KustoFunctions/IParserKQLFunction.cpp | 9 +++-- .../KustoFunctions/KQLDataTypeFunctions.cpp | 34 ++++++++++--------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 2310879862a..d81ba571aec 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -102,7 +102,7 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: std::vector tokens; std::unique_ptr fun; - if (pos->type == TokenType::ClosingRoundBracket) + if (pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket) return converted_arg; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) @@ -117,7 +117,7 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: { tokens.push_back(IParserKQLFunction::getExpression(pos)); } - else if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + else if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket) { break; } @@ -133,7 +133,7 @@ String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser: } } ++pos; - if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket) break; } for (auto token : tokens) @@ -213,6 +213,9 @@ String IParserKQLFunction::getExpression(IParser::Pos & pos) arg = std::to_string(time_span.toSeconds()); } } + else if (pos->type == TokenType::QuotedIdentifier) + arg = "'" + String(pos->begin + 1,pos->end - 1) + "'"; + return arg; } } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp index 0f60bf6d326..4caf4188c8c 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp @@ -52,24 +52,26 @@ bool DatatypeDatetime::convertImpl(String &out,IParser::Pos &pos) bool DatatypeDynamic::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin, pos->end); - String array; - ++pos; //go pass "dynamic" string - while (pos->type != TokenType::ClosingRoundBracket) - { - if (pos->type != TokenType::OpeningSquareBracket && pos->type != TokenType::ClosingSquareBracket) - { - array += String(pos->begin, pos->end); - } - ++pos; - } - if (pos->type == TokenType::ClosingRoundBracket) - array += String(pos->begin, pos->end); - else + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) return false; - out = "array" + array; - return true; + String array; + ++pos; + if (pos->type == TokenType::OpeningSquareBracket) + { + ++pos; + while (pos->type != TokenType::ClosingRoundBracket) + { + auto tmp_arg = getConvertedArgument(fn_name, pos); + array = array.empty() ? tmp_arg : array +", " + tmp_arg; + ++pos; + } + out = "array (" + array + ")"; + return true; + } + else + return false; // should throw exception , later } bool DatatypeGuid::convertImpl(String &out,IParser::Pos &pos) From 7adc203f1c0fc51a9fe7762c2d54c8a9d1b7de59 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Wed, 17 Aug 2022 05:53:02 -0700 Subject: [PATCH 093/279] Implement array_iff / array_iif --- src/Parsers/Kusto/KQL_ReleaseNote.md | 10 +++++++++- .../KustoFunctions/KQLDynamicFunctions.cpp | 20 ++++++++++++++++--- .../KustoFunctions/KQLFunctionFactory.cpp | 1 + src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp | 17 ++++++++++++++++ 4 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index b948dbf2443..1c68a6563f9 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,6 +1,14 @@ ## KQL implemented features +# August XX, 2022 +## Dynamic functions +- [array_iff / array_iif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayifffunction) + `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` + `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` + `print array_iif(dynamic([true, false, true, false]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, null])` + `print array_iif(dynamic([1, 0, -1, 44, 0]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, 4, null])` + # August 15, 2022 **double quote support** ``print res = strcat("double ","quote")`` @@ -49,7 +57,7 @@ `print tostring(123) == '123'` `print tostring('asd') == 'asd'` -## DateType +## Data Types - [dynamic](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic) *Supports only 1D array* `print output = dynamic(['a', 'b', 'c'])` diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp index 3f534679c58..0dfa1907dc3 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -15,6 +15,8 @@ #include #include +#include + namespace DB { @@ -25,11 +27,23 @@ bool ArrayConcat::convertImpl(String &out,IParser::Pos &pos) return false; } -bool ArrayIif::convertImpl(String &out,IParser::Pos &pos) +bool ArrayIif::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) return false; + + const auto conditions = getArgument(function_name, pos); + const auto if_true = getArgument(function_name, pos); + const auto if_false = getArgument(function_name, pos); + + out = std::format( + "arrayMap(x -> if(x.1 != 0, x.2, x.3), arrayZip({0}, arrayResize({1}, length({0}), null), arrayResize({2}, length({0}), null)))", + conditions, + if_true, + if_false); + + return true; } bool ArrayIndexOf::convertImpl(String &out,IParser::Pos &pos) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp index 6cfd67514a2..d7619c8dc40 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -93,6 +93,7 @@ namespace DB {"url_encode", KQLFunctionValue::url_encode}, {"array_concat", KQLFunctionValue::array_concat}, + {"array_iff", KQLFunctionValue::array_iif}, {"array_iif", KQLFunctionValue::array_iif}, {"array_index_of", KQLFunctionValue::array_index_of}, {"array_length", KQLFunctionValue::array_length}, diff --git a/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp b/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp new file mode 100644 index 00000000000..1e316593a5b --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp @@ -0,0 +1,17 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Dynamic, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print array_iff(A, B, C)", + "SELECT arrayMap(x -> if((x.1) != 0, x.2, x.3), arrayZip(A, arrayResize(B, length(A), NULL), arrayResize(C, length(A), NULL)))" + }, + { + "print array_iif(A, B, C)", + "SELECT arrayMap(x -> if((x.1) != 0, x.2, x.3), arrayZip(A, arrayResize(B, length(A), NULL), arrayResize(C, length(A), NULL)))" + } +}))); From 067a5c6c00de9ec2ce92c8a658cf2b1af42e716d Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Fri, 19 Aug 2022 07:24:36 -0700 Subject: [PATCH 094/279] Set the release date --- src/Parsers/Kusto/KQL_ReleaseNote.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 1c68a6563f9..5905625bb82 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,7 +1,7 @@ ## KQL implemented features -# August XX, 2022 +# August 29, 2022 ## Dynamic functions - [array_iff / array_iif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayifffunction) `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` From c33f67fc8655b7aefb8dd71a4059c43383c904f0 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Fri, 12 Aug 2022 10:24:52 -0700 Subject: [PATCH 095/279] Part2 DateTime functions --- src/Parsers/Kusto/KQL_ReleaseNote.md | 41 ++++ .../KustoFunctions/KQLDateTimeFunctions.cpp | 207 +++++++++++++++--- .../KustoFunctions/KQLDateTimeFunctions.h | 8 + .../KustoFunctions/KQLFunctionFactory.cpp | 5 + .../Kusto/KustoFunctions/KQLFunctionFactory.h | 1 + .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 73 ++++++ 6 files changed, 301 insertions(+), 34 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 5905625bb82..178ba4fcc08 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -162,6 +162,15 @@ - [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) `print unixtime_seconds_todatetime(1546300800)` + +- [unixtime_microseconds_todatetime] + `print unixtime_microseconds_todatetime(1546300800000000)` + +- [unixtime_milliseconds_todatetime] + `print unixtime_milliseconds_todatetime(1546300800000)` + +- [unixtime_nanoseconds_todatetime] + `print unixtime_nanoseconds_todatetime(1546300800000000000)` - [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) `print dayofweek(datetime(2015-12-20))` @@ -176,6 +185,38 @@ `print now(-2d) ` `print now(time(1d))` +- [ago] + `print ago(2h)` + +- [endofday] + `print endofday(datetime(2017-01-01 10:10:17), -1)` + `print endofday(datetime(2017-01-01 10:10:17), 1)` + `print endofday(datetime(2017-01-01 10:10:17))` + +- [endofmonth] + `print endofmonth(datetime(2017-01-01 10:10:17), -1)` + `print endofmonth(datetime(2017-01-01 10:10:17), 1)` + `print endofmonth(datetime(2017-01-01 10:10:17))` + +- [endofweek] + `print endofweek(datetime(2017-01-01 10:10:17), 1)` + `print endofweek(datetime(2017-01-01 10:10:17), -1)` + `print endofweek(datetime(2017-01-01 10:10:17))` + +- [endofyear] + `print endofyear(datetime(2017-01-01 10:10:17), -1)` + `print endofyear(datetime(2017-01-01 10:10:17), 1)` + `print endofyear(datetime(2017-01-01 10:10:17))` + +- [make_datetime] + `print make_datetime(2017,10,01)` + `print make_datetime(2017,10,01,12,10)` + `print make_datetime(2017,10,01,12,11,0.1234567)` + +- [datetime_diff] + `print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))` + `print datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30))` + `print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))` ## Binary functions - [binary_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-andfunction) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 58d8536fb49..094410cb023 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -35,16 +35,24 @@ bool DateTime::convertImpl(String & out, IParser::Pos & pos) bool Ago::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + { + const auto offset = getConvertedArgument(fn_name, pos); + out = std::format("now64(9,'UTC') - {}", offset); + } + else + out = "now64(9,'UTC')"; + return true; } bool DatetimeAdd::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + return directMapping(out, pos, "date_add"); }; bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) @@ -56,9 +64,21 @@ bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) bool DatetimeDiff::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + String arguments; + + arguments = arguments + getConvertedArgument(fn_name, pos) + ","; + ++pos; + arguments = arguments + getConvertedArgument(fn_name, pos) + ","; + ++pos; + arguments = arguments + getConvertedArgument(fn_name, pos); + + out = std::format("ABS(DateDiff({}))",arguments); + return true; + } bool DayOfMonth::convertImpl(String & out, IParser::Pos & pos) @@ -84,25 +104,93 @@ bool DayOfYear::convertImpl(String & out, IParser::Pos & pos) return directMapping(out, pos, "toDayOfYear"); } +bool EndOfMonth::convertImpl(String & out, IParser::Pos & pos) +{ + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset = "0"; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + } + + out = std::format("toDateTime(toStartOfDay({}),9,'UTC') + (INTERVAL {} +1 MONTH) - (INTERVAL 1 microsecond)", datetime_str, toString(offset)); + + return true; + +} + bool EndOfDay::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset = "0"; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + } + out = std::format("toDateTime(toStartOfDay({}),9,'UTC') + (INTERVAL {} +1 DAY) - (INTERVAL 1 microsecond)", datetime_str, toString(offset)); + + return true; + } bool EndOfWeek::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset = "0"; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + } + out = std::format("toDateTime(toStartOfDay({}),9,'UTC') + (INTERVAL {} +1 WEEK) - (INTERVAL 1 microsecond)", datetime_str, toString(offset)); + + return true; + } bool EndOfYear::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String datetime_str = getConvertedArgument(fn_name, pos); + String offset = "0"; + + if (pos->type == TokenType::Comma) + { + ++pos; + offset = getConvertedArgument(fn_name, pos); + } + out = std::format("toDateTime(toStartOfDay({}),9,'UTC') + (INTERVAL {} +1 YEAR) - (INTERVAL 1 microsecond)", datetime_str, toString(offset)); + + return true; + } bool FormatDateTime::convertImpl(String & out, IParser::Pos & pos) @@ -143,9 +231,44 @@ bool MakeTimeSpan::convertImpl(String & out, IParser::Pos & pos) bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String arguments; + int number_of_arguments=1; + String argument; + while (pos->type != TokenType::ClosingRoundBracket) + { + argument = String(pos->begin,pos->end); + auto dot_pos = argument.find('.'); + + if (dot_pos == String::npos) + arguments = arguments + String(pos->begin,pos->end); + else + { + arguments = arguments + argument.substr(dot_pos-1, dot_pos) + "," + argument.substr(dot_pos+1,argument.length()); + number_of_arguments++; + } + + ++pos; + if(pos->type == TokenType::Comma) + number_of_arguments++; + } + + while(number_of_arguments < 7) + { + arguments = arguments+ ","; + arguments = arguments+ "0"; + number_of_arguments++; + } + arguments = arguments + ",7,'UTC'"; + + out = std::format("makeDateTime64({})",arguments); + + return true; } bool Now::convertImpl(String & out, IParser::Pos & pos) @@ -174,7 +297,7 @@ bool StartOfDay::convertImpl(String & out, IParser::Pos & pos) ++pos; const String datetime_str = getConvertedArgument(fn_name, pos); - String offset; + String offset = "0"; if (pos->type == TokenType::Comma) { @@ -194,7 +317,7 @@ bool StartOfMonth::convertImpl(String & out, IParser::Pos & pos) ++pos; const String datetime_str = getConvertedArgument(fn_name, pos); - String offset; + String offset = "0"; if (pos->type == TokenType::Comma) { @@ -214,7 +337,7 @@ bool StartOfWeek::convertImpl(String & out, IParser::Pos & pos) ++pos; const String datetime_str = getConvertedArgument(fn_name, pos); - String offset; + String offset = "0"; if (pos->type == TokenType::Comma) { @@ -234,7 +357,7 @@ bool StartOfYear::convertImpl(String & out, IParser::Pos & pos) ++pos; const String datetime_str = getConvertedArgument(fn_name, pos); - String offset ; + String offset = "0"; if (pos->type == TokenType::Comma) { @@ -247,23 +370,39 @@ bool StartOfYear::convertImpl(String & out, IParser::Pos & pos) bool UnixTimeMicrosecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String value = getConvertedArgument(fn_name, pos); + out = std::format("fromUnixTimestamp64Micro({},'UTC')", value); + return true; } bool UnixTimeMillisecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String value = getConvertedArgument(fn_name, pos); + out = std::format("fromUnixTimestamp64Milli({},'UTC')", value); + return true; + } bool UnixTimeNanosecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String value = getConvertedArgument(fn_name, pos); + out = std::format("fromUnixTimestamp64Nano({},'UTC')", value); + return true; } bool UnixTimeSecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h index adf95a39a64..bde104e88b9 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -19,6 +19,7 @@ protected: bool convertImpl(String &out,IParser::Pos &pos) override; };*/ + class Ago : public IParserKQLFunction { protected: @@ -75,6 +76,13 @@ protected: bool convertImpl(String &out,IParser::Pos &pos) override; }; +class EndOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + class EndOfWeek : public IParserKQLFunction { protected: diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp index d7619c8dc40..a25ca15b924 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -31,6 +31,8 @@ namespace DB {"endofday", KQLFunctionValue::endofday}, {"endofweek", KQLFunctionValue::endofweek}, {"endofyear", KQLFunctionValue::endofyear}, + {"endofmonth", KQLFunctionValue::endofmonth}, + {"format_datetime", KQLFunctionValue::format_datetime}, {"format_timespan", KQLFunctionValue::format_timespan}, {"getmonth", KQLFunctionValue::getmonth}, @@ -273,6 +275,9 @@ std::unique_ptr KQLFunctionFactory::get(String &kql_function case KQLFunctionValue::endofyear: return std::make_unique(); + case KQLFunctionValue::endofmonth: + return std::make_unique(); + case KQLFunctionValue::monthofyear: return std::make_unique(); diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h index 38bac6d641a..1938d449fd3 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -19,6 +19,7 @@ namespace DB endofday, endofweek, endofyear, + endofmonth, monthofyear, format_datetime, format_timespan, diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp index 74d13c60d05..87af9836c6c 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -86,6 +86,10 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, "SELECT toDateTime64(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalDay(-1)" }, + { + "print startofyear(datetime(2017-01-01 10:10:17), -1)", + "SELECT toDateTime64(toStartOfYear(toDateTime64('2017-01-01 10:10:17', 9, 'UTC'), 'UTC'), 9, 'UTC') + toIntervalYear(-1)" + }, { "print monthofyear(datetime(2015-12-14))", "SELECT toMonth(toDateTime64('2015-12-14', 9, 'UTC'))" @@ -125,6 +129,75 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, { "print now(1d)", "SELECT now64(9, 'UTC') + 86400." + }, + { + "print ago(2d)", + "SELECT now64(9, 'UTC') - 172800." + }, + { + "print endofday(datetime(2017-01-01 10:10:17), -1)", + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalDay(-1 + 1)) - toIntervalMicrosecond(1)" + }, + { + "print endofday(datetime(2017-01-01 10:10:17), 1)", + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalDay(1 + 1)) - toIntervalMicrosecond(1)" + + }, + { + "print endofmonth(datetime(2017-01-01 10:10:17), -1)", + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalMonth(-1 + 1)) - toIntervalMicrosecond(1)" + }, + { + "print endofmonth(datetime(2017-01-01 10:10:17), 1)", + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalMonth(1 + 1)) - toIntervalMicrosecond(1)" + }, + { + "print endofweek(datetime(2017-01-01 10:10:17), -1)", + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalWeek(-1 + 1)) - toIntervalMicrosecond(1)" + }, + { + "print endofweek(datetime(2017-01-01 10:10:17), 1)", + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalWeek(1 + 1)) - toIntervalMicrosecond(1)" + }, + { + "print endofyear(datetime(2017-01-01 10:10:17), -1) ", + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalYear(-1 + 1)) - toIntervalMicrosecond(1)" + }, + { + "print endofyear(datetime(2017-01-01 10:10:17), 1)" , + "SELECT (toDateTime(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalYear(1 + 1)) - toIntervalMicrosecond(1)" + }, + { + "print make_datetime(2017,10,01)", + "SELECT makeDateTime64(2017, 10, 1, 0, 0, 0, 0, 7, 'UTC')" + }, + { + "print make_datetime(2017,10,01,12,10)", + "SELECT makeDateTime64(2017, 10, 1, 12, 10, 0, 0, 7, 'UTC')" + }, + { + "print make_datetime(2017,10,01,12,11,0.1234567)", + "SELECT makeDateTime64(2017, 10, 1, 12, 11, 0, 1234567, 7, 'UTC')" + }, + { + "print unixtime_microseconds_todatetime(1546300800000000)", + "SELECT fromUnixTimestamp64Micro(1546300800000000, 'UTC')" + }, + { + "print unixtime_milliseconds_todatetime(1546300800000)", + "SELECT fromUnixTimestamp64Milli(1546300800000, 'UTC')" + }, + { + "print unixtime_nanoseconds_todatetime(1546300800000000000)", + "SELECT fromUnixTimestamp64Nano(1546300800000000000, 'UTC')" + }, + { + "print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))", + "SELECT ABS(dateDiff('year', toDateTime64('2017-01-01', 9, 'UTC'), toDateTime64('2000-12-31', 9, 'UTC')))" + }, + { + "print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))", + "SELECT ABS(dateDiff('minute', toDateTime64('2017-10-30 23:05:01', 9, 'UTC'), toDateTime64('2017-10-30 23:00:59', 9, 'UTC')))" } }))); From 2b8e25e7641c4b4be334d0a60a38522d76883202 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Fri, 12 Aug 2022 12:58:25 -0700 Subject: [PATCH 096/279] Updated release notes --- src/Parsers/Kusto/KQL_ReleaseNote.md | 20 +++++++++---------- .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 1 + 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 178ba4fcc08..b8e8b75b997 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -163,13 +163,13 @@ - [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) `print unixtime_seconds_todatetime(1546300800)` -- [unixtime_microseconds_todatetime] +- [unixtime_microseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-microseconds-todatetimefunction) `print unixtime_microseconds_todatetime(1546300800000000)` -- [unixtime_milliseconds_todatetime] +- [unixtime_milliseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-milliseconds-todatetimefunction) `print unixtime_milliseconds_todatetime(1546300800000)` -- [unixtime_nanoseconds_todatetime] +- [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) `print unixtime_nanoseconds_todatetime(1546300800000000000)` - [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) @@ -185,35 +185,35 @@ `print now(-2d) ` `print now(time(1d))` -- [ago] +- [ago](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/agofunction) `print ago(2h)` -- [endofday] +- [endofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofdayfunction) `print endofday(datetime(2017-01-01 10:10:17), -1)` `print endofday(datetime(2017-01-01 10:10:17), 1)` `print endofday(datetime(2017-01-01 10:10:17))` -- [endofmonth] +- [endofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofmonthfunction) `print endofmonth(datetime(2017-01-01 10:10:17), -1)` `print endofmonth(datetime(2017-01-01 10:10:17), 1)` `print endofmonth(datetime(2017-01-01 10:10:17))` -- [endofweek] +- [endofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofweekfunction) `print endofweek(datetime(2017-01-01 10:10:17), 1)` `print endofweek(datetime(2017-01-01 10:10:17), -1)` `print endofweek(datetime(2017-01-01 10:10:17))` -- [endofyear] +- [endofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofyearfunction) `print endofyear(datetime(2017-01-01 10:10:17), -1)` `print endofyear(datetime(2017-01-01 10:10:17), 1)` `print endofyear(datetime(2017-01-01 10:10:17))` -- [make_datetime] +- [make_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-datetimefunction) `print make_datetime(2017,10,01)` `print make_datetime(2017,10,01,12,10)` `print make_datetime(2017,10,01,12,11,0.1234567)` -- [datetime_diff] +- [datetime_diff](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-difffunction) `print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))` `print datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30))` `print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))` diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp index 87af9836c6c..1efbc97d2bb 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -127,6 +127,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, "SELECT now64(9, 'UTC')" }, { + "print now(1d)", "SELECT now64(9, 'UTC') + 86400." }, From 18bbe5a52361e894ae04861f5efdd3ee427b8b1c Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Mon, 15 Aug 2022 20:45:39 -0700 Subject: [PATCH 097/279] Incorporated review comments --- .../KustoFunctions/KQLDateTimeFunctions.cpp | 34 ++++++------------- .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 6 ++-- 2 files changed, 13 insertions(+), 27 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 094410cb023..6aa877c2112 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -76,7 +76,7 @@ bool DatetimeDiff::convertImpl(String & out, IParser::Pos & pos) ++pos; arguments = arguments + getConvertedArgument(fn_name, pos); - out = std::format("ABS(DateDiff({}))",arguments); + out = std::format("DateDiff({}) * -1",arguments); return true; } @@ -238,33 +238,19 @@ bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) ++pos; String arguments; - int number_of_arguments=1; - String argument; + String argument[7] = {"0","0","0","0","0","0","0"}; + + int i = 0; while (pos->type != TokenType::ClosingRoundBracket) { - argument = String(pos->begin,pos->end); - auto dot_pos = argument.find('.'); - - if (dot_pos == String::npos) - arguments = arguments + String(pos->begin,pos->end); - else - { - arguments = arguments + argument.substr(dot_pos-1, dot_pos) + "," + argument.substr(dot_pos+1,argument.length()); - number_of_arguments++; - } - - ++pos; + argument[i] = getConvertedArgument(fn_name, pos); if(pos->type == TokenType::Comma) - number_of_arguments++; + ++pos; + + i++; } - - while(number_of_arguments < 7) - { - arguments = arguments+ ","; - arguments = arguments+ "0"; - number_of_arguments++; - } - arguments = arguments + ",7,'UTC'"; + + arguments = argument[0] + "," + argument[1] + "," + argument[2] + "," + argument[3] + "," + argument[4] + "," + argument[5] + "," + argument[6] + ",7,'UTC'"; out = std::format("makeDateTime64({})",arguments); diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp index 1efbc97d2bb..55d88e2aa78 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -178,7 +178,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, }, { "print make_datetime(2017,10,01,12,11,0.1234567)", - "SELECT makeDateTime64(2017, 10, 1, 12, 11, 0, 1234567, 7, 'UTC')" + "SELECT makeDateTime64(2017, 10, 1, 12, 11, 0.1234567, 0, 7, 'UTC')" }, { "print unixtime_microseconds_todatetime(1546300800000000)", @@ -194,11 +194,11 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, }, { "print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))", - "SELECT ABS(dateDiff('year', toDateTime64('2017-01-01', 9, 'UTC'), toDateTime64('2000-12-31', 9, 'UTC')))" + "SELECT dateDiff('year', toDateTime64('2017-01-01', 9, 'UTC'), toDateTime64('2000-12-31', 9, 'UTC')) * -1" }, { "print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))", - "SELECT ABS(dateDiff('minute', toDateTime64('2017-10-30 23:05:01', 9, 'UTC'), toDateTime64('2017-10-30 23:00:59', 9, 'UTC')))" + "SELECT dateDiff('minute', toDateTime64('2017-10-30 23:05:01', 9, 'UTC'), toDateTime64('2017-10-30 23:00:59', 9, 'UTC')) * -1" } }))); From 758960794a405cd3ec8ca943176d536a27ca4a69 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Fri, 12 Aug 2022 10:24:52 -0700 Subject: [PATCH 098/279] Part2 DateTime functions --- .../KustoFunctions/KQLDateTimeFunctions.cpp | 33 ++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 6aa877c2112..fe6b1529916 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -77,6 +77,7 @@ bool DatetimeDiff::convertImpl(String & out, IParser::Pos & pos) arguments = arguments + getConvertedArgument(fn_name, pos); out = std::format("DateDiff({}) * -1",arguments); + return true; } @@ -238,19 +239,35 @@ bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) ++pos; String arguments; - String argument[7] = {"0","0","0","0","0","0","0"}; - - int i = 0; + + int number_of_arguments=1; + String argument; while (pos->type != TokenType::ClosingRoundBracket) { - argument[i] = getConvertedArgument(fn_name, pos); + argument = String(pos->begin,pos->end); + auto dot_pos = argument.find('.'); + + if (dot_pos == String::npos) + arguments = arguments + String(pos->begin,pos->end); + else + { + arguments = arguments + argument.substr(dot_pos-1, dot_pos) + "," + argument.substr(dot_pos+1,argument.length()); + number_of_arguments++; + } + + ++pos; if(pos->type == TokenType::Comma) - ++pos; - - i++; + number_of_arguments++; } + + while(number_of_arguments < 7) + { + arguments = arguments+ ","; + arguments = arguments+ "0"; + number_of_arguments++; + } + arguments = arguments + ",7,'UTC'"; - arguments = argument[0] + "," + argument[1] + "," + argument[2] + "," + argument[3] + "," + argument[4] + "," + argument[5] + "," + argument[6] + ",7,'UTC'"; out = std::format("makeDateTime64({})",arguments); From d542332bc0d9c110fa0621d85a991d8e4933b7bc Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Mon, 15 Aug 2022 20:45:39 -0700 Subject: [PATCH 099/279] Incorporated review comments --- .../KustoFunctions/KQLDateTimeFunctions.cpp | 34 ++++++------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index fe6b1529916..e994028d6a4 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -239,35 +239,21 @@ bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) ++pos; String arguments; - int number_of_arguments=1; - String argument; + + String argument[7] = {"0","0","0","0","0","0","0"}; + + int i = 0; while (pos->type != TokenType::ClosingRoundBracket) { - argument = String(pos->begin,pos->end); - auto dot_pos = argument.find('.'); - - if (dot_pos == String::npos) - arguments = arguments + String(pos->begin,pos->end); - else - { - arguments = arguments + argument.substr(dot_pos-1, dot_pos) + "," + argument.substr(dot_pos+1,argument.length()); - number_of_arguments++; - } - - ++pos; + argument[i] = getConvertedArgument(fn_name, pos); if(pos->type == TokenType::Comma) - number_of_arguments++; - } - - while(number_of_arguments < 7) - { - arguments = arguments+ ","; - arguments = arguments+ "0"; - number_of_arguments++; - } - arguments = arguments + ",7,'UTC'"; + ++pos; + i++; + } + + arguments = argument[0] + "," + argument[1] + "," + argument[2] + "," + argument[3] + "," + argument[4] + "," + argument[5] + "," + argument[6] + ",7,'UTC'"; out = std::format("makeDateTime64({})",arguments); From 004c273fee0f2aab88b97b06d2f603e38ac57764 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Tue, 16 Aug 2022 13:01:01 -0700 Subject: [PATCH 100/279] Updated as per review comments --- .../KustoFunctions/KQLDateTimeFunctions.cpp | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index e994028d6a4..96c5bca06de 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -16,6 +16,10 @@ #include #include +namespace DB::ErrorCodes +{ +extern const int SYNTAX_ERROR; +} namespace DB { @@ -239,24 +243,29 @@ bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) ++pos; String arguments; - int number_of_arguments=1; - - String argument[7] = {"0","0","0","0","0","0","0"}; + int arg_count = 0; - int i = 0; - while (pos->type != TokenType::ClosingRoundBracket) + while (!pos->isEnd() && pos->type != TokenType::ClosingRoundBracket) { - argument[i] = getConvertedArgument(fn_name, pos); + String arg = getConvertedArgument(fn_name, pos); if(pos->type == TokenType::Comma) - ++pos; - - i++; + ++pos; + arguments = arguments + arg + ","; + ++arg_count; } - - arguments = argument[0] + "," + argument[1] + "," + argument[2] + "," + argument[3] + "," + argument[4] + "," + argument[5] + "," + argument[6] + ",7,'UTC'"; - - out = std::format("makeDateTime64({})",arguments); + if (arg_count < 1 || arg_count > 7) + throw Exception("argument count out of bound in function: " + fn_name, ErrorCodes::SYNTAX_ERROR); + + if(arg_count < 7) + { + for(int i = arg_count;i < 7 ; ++i) + arguments = arguments + "0 ,"; + } + + arguments = arguments + "7,'UTC'"; + out = std::format("makeDateTime64({})",arguments); + return true; } From d261b29a43d46052fce38a1f9c1963c8eb3b8566 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Fri, 19 Aug 2022 07:35:22 -0700 Subject: [PATCH 101/279] Updated Readme --- src/Parsers/Kusto/KQL_ReleaseNote.md | 86 +++++++++++++++------------- 1 file changed, 45 insertions(+), 41 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index b8e8b75b997..299e003f37f 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -9,6 +9,51 @@ `print array_iif(dynamic([true, false, true, false]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, null])` `print array_iif(dynamic([1, 0, -1, 44, 0]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, 4, null])` + +## DateTimeFunctions + +- [ago](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/agofunction) + `print ago(2h)` + +- [endofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofdayfunction) + `print endofday(datetime(2017-01-01 10:10:17), -1)` + `print endofday(datetime(2017-01-01 10:10:17), 1)` + `print endofday(datetime(2017-01-01 10:10:17))` + +- [endofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofmonthfunction) + `print endofmonth(datetime(2017-01-01 10:10:17), -1)` + `print endofmonth(datetime(2017-01-01 10:10:17), 1)` + `print endofmonth(datetime(2017-01-01 10:10:17))` + +- [endofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofweekfunction) + `print endofweek(datetime(2017-01-01 10:10:17), 1)` + `print endofweek(datetime(2017-01-01 10:10:17), -1)` + `print endofweek(datetime(2017-01-01 10:10:17))` + +- [endofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofyearfunction) + `print endofyear(datetime(2017-01-01 10:10:17), -1)` + `print endofyear(datetime(2017-01-01 10:10:17), 1)` + `print endofyear(datetime(2017-01-01 10:10:17))` + +- [make_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-datetimefunction) + `print make_datetime(2017,10,01)` + `print make_datetime(2017,10,01,12,10)` + `print make_datetime(2017,10,01,12,11,0.1234567)` + +- [datetime_diff](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-difffunction) + `print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))` + `print datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30))` + `print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))` + +- [unixtime_microseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-microseconds-todatetimefunction) + `print unixtime_microseconds_todatetime(1546300800000000)` + +- [unixtime_milliseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-milliseconds-todatetimefunction) + `print unixtime_milliseconds_todatetime(1546300800000)` + +- [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) + `print unixtime_nanoseconds_todatetime(1546300800000000000)` + # August 15, 2022 **double quote support** ``print res = strcat("double ","quote")`` @@ -162,15 +207,6 @@ - [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) `print unixtime_seconds_todatetime(1546300800)` - -- [unixtime_microseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-microseconds-todatetimefunction) - `print unixtime_microseconds_todatetime(1546300800000000)` - -- [unixtime_milliseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-milliseconds-todatetimefunction) - `print unixtime_milliseconds_todatetime(1546300800000)` - -- [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) - `print unixtime_nanoseconds_todatetime(1546300800000000000)` - [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) `print dayofweek(datetime(2015-12-20))` @@ -185,38 +221,6 @@ `print now(-2d) ` `print now(time(1d))` -- [ago](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/agofunction) - `print ago(2h)` - -- [endofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofdayfunction) - `print endofday(datetime(2017-01-01 10:10:17), -1)` - `print endofday(datetime(2017-01-01 10:10:17), 1)` - `print endofday(datetime(2017-01-01 10:10:17))` - -- [endofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofmonthfunction) - `print endofmonth(datetime(2017-01-01 10:10:17), -1)` - `print endofmonth(datetime(2017-01-01 10:10:17), 1)` - `print endofmonth(datetime(2017-01-01 10:10:17))` - -- [endofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofweekfunction) - `print endofweek(datetime(2017-01-01 10:10:17), 1)` - `print endofweek(datetime(2017-01-01 10:10:17), -1)` - `print endofweek(datetime(2017-01-01 10:10:17))` - -- [endofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofyearfunction) - `print endofyear(datetime(2017-01-01 10:10:17), -1)` - `print endofyear(datetime(2017-01-01 10:10:17), 1)` - `print endofyear(datetime(2017-01-01 10:10:17))` - -- [make_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-datetimefunction) - `print make_datetime(2017,10,01)` - `print make_datetime(2017,10,01,12,10)` - `print make_datetime(2017,10,01,12,11,0.1234567)` - -- [datetime_diff](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-difffunction) - `print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))` - `print datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30))` - `print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))` ## Binary functions - [binary_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-andfunction) From 322a0aa2d8a708553918864ffb5e3c8e92319279 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Sun, 21 Aug 2022 22:31:03 -0700 Subject: [PATCH 102/279] Kusto-phase2, updated make-series operator --- src/Parsers/Kusto/ParserKQLMakeSeries.cpp | 202 +++++++++------------- src/Parsers/Kusto/ParserKQLMakeSeries.h | 3 +- 2 files changed, 80 insertions(+), 125 deletions(-) diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp index 03528b6af1e..65eeafe737b 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -13,7 +13,7 @@ namespace DB bool ParserKQLMakeSeries :: parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos) { - std::unordered_set allowed_aggregation + std::unordered_set allowed_aggregation ({ "avg", "avgif", @@ -54,6 +54,7 @@ bool ParserKQLMakeSeries :: parseAggregationColumns(AggregationColumns & aggrega { alias = std::move(first_token); aggregation_fun = String(pos->begin,pos->end); + ++pos; } else aggregation_fun = std::move(first_token); @@ -61,7 +62,6 @@ bool ParserKQLMakeSeries :: parseAggregationColumns(AggregationColumns & aggrega if (allowed_aggregation.find(aggregation_fun) == allowed_aggregation.end()) return false; - ++pos; if (open_bracket.ignore(pos, expected)) column = String(pos->begin,pos->end); else @@ -148,98 +148,7 @@ bool ParserKQLMakeSeries :: parseFromToStepClause(FromToStepClause & from_to_ste return true; } - -void ParserKQLMakeSeries :: makeNumericSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth) -{ - String start_str, end_str; - String sub_query, main_query; - - auto & aggregation_columns = kql_make_series.aggregation_columns; - auto & from_to_step = kql_make_series.from_to_step; - auto & subquery_columns = kql_make_series.subquery_columns; - auto & axis_column = kql_make_series.axis_column; - auto & group_expression = kql_make_series.group_expression; - auto step = from_to_step.step; - - if (!kql_make_series.from_to_step.from_str.empty()) - start_str = getExprFromToken(kql_make_series.from_to_step.from_str, max_depth); - - if (!kql_make_series.from_to_step.to_str.empty()) - end_str = getExprFromToken(from_to_step.to_str, max_depth); - - String bin_str, start, end; - - if (!start_str.empty()) // has from - { - bin_str = std::format(" toFloat64({0}) + (toInt64(((toFloat64({1}) - toFloat64({0})) / {2}) ) * {2}) AS {1}_ali ", - start_str, axis_column, step); - start = std::format("toUInt64({})", start_str); - } - else - { - bin_str = std::format(" toFloat64(toInt64((toFloat64({0}) ) / {1}) * {1}) AS {0}_ali ", - axis_column, step); - } - - auto sub_sub_query = std::format(" (Select {0}, {1}, {2} FROM {3} GROUP BY {0}, {4}_ali ORDER BY {4}_ali) ", group_expression, subquery_columns, bin_str, table_name, axis_column); - - if (!end_str.empty()) - end = std::format("toUInt64({})", end_str); - - String range, condition; - if (!start_str.empty() && !end_str.empty()) - { - range = std::format("range({},{}, toUInt64({}))", start, end, step); - condition = std::format("{0}_ali >= {1} and {0}_ali <= {2}", axis_column, start, end); - } - else if (start_str.empty() && !end_str.empty()) - { - range = std::format("range(low, {} , toUInt64({}))", end, step); - condition = std::format("{}_ali <= {}", axis_column, end); - } - else if (!start_str.empty() && end_str.empty()) - { - range = std::format("range({}, high, toUInt64({}))", start, step); - condition = std::format("{}_ali >= {}", axis_column, start); - } - else - { - range = std::format("range(low, high, toUInt64({}))", step); - condition = "1"; //true - } - - auto range_len = std::format("length({})", range); - main_query = std::format("{} ", group_expression); - - auto axis_and_agg_alias_list = axis_column; - auto final_axis_agg_alias_list =std::format("tupleElement(zipped,1) AS {}",axis_column); //tupleElement(pp,2) as PriceAvg ,tupleElement(pp,1) - int idx = 2; - for (auto agg_column : aggregation_columns) - { - String agg_group_column = std::format("arrayConcat(groupArrayIf ({}_ali,{}) as ga, arrayMap(x -> ({}),range(0,toUInt32 ({} - length(ga) < 0 ? 0 : {} - length(ga)),1) )) as {}", - agg_column.alias, condition, agg_column.default_value, range_len, range_len, agg_column.alias); - main_query +=", " + agg_group_column; - - axis_and_agg_alias_list +=", " + agg_column.alias; - final_axis_agg_alias_list += std::format(", tupleElement(zipped,{}) AS {}", idx, agg_column.alias); - } - - auto axis_str = std::format("arrayDistinct(arrayConcat(groupArrayIf({0}_ali, {1}), arrayMap( x->(toFloat64(x)), {2})) ) as {0}", - axis_column, condition,range); - - main_query += ", " + axis_str; - auto sub_group_by = std::format("{}", group_expression); - - sub_query = std::format("( SELECT toUInt64(min({}_ali)) AS low, toUInt64(max({}_ali))+ {} AS high, arraySort(arrayZip({})) as zipped, {} FROM {} GROUP BY {} )", - axis_column, axis_column,step, axis_and_agg_alias_list,main_query,sub_sub_query, sub_group_by); - - main_query = std::format("{},{}", group_expression, final_axis_agg_alias_list); - - kql_make_series.sub_query = std::move(sub_query); - kql_make_series.main_query = std::move(main_query); -} - -void ParserKQLMakeSeries :: makeTimeSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth) +void ParserKQLMakeSeries :: makeSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth) { const uint64_t era_diff = 62135596800; // this magic number is the differicen is second form 0001-01-01 (Azure start time ) and 1970-01-01 (CH start time) @@ -262,71 +171,122 @@ void ParserKQLMakeSeries :: makeTimeSeries(KQLMakeSeries & kql_make_series, cons String bin_str, start, end; uint64_t diff = 0; + String axis_column_format; + String axis_str; + + auto get_group_expression_alias = [&] + { + std::vector group_expression_tokens; + Tokens tokens(group_expression.c_str(), group_expression.c_str() + group_expression.size()); + IParser::Pos pos(tokens, max_depth); + while (!pos->isEnd()) + { + if (String(pos->begin, pos->end) == "AS") + { + if (!group_expression_tokens.empty()) + group_expression_tokens.pop_back(); + ++pos; + group_expression_tokens.push_back(String(pos->begin, pos->end)); + } + else + group_expression_tokens.push_back(String(pos->begin, pos->end)); + ++pos; + } + String res; + for (auto token : group_expression_tokens) + res = res + token + " "; + return res; + }; + + auto group_expression_alias = get_group_expression_alias(); + + if (from_to_step.is_timespan) + { + axis_column_format = std::format("toFloat64(toDateTime64({}, 9, 'UTC'))", axis_column); + } + else + axis_column_format = std::format("toFloat64({})", axis_column); + if (!start_str.empty()) // has from { - bin_str = std::format(" toFloat64(toDateTime64({0}, 9, 'UTC')) + (toInt64(((toFloat64(toDateTime64({1}, 9, 'UTC')) - toFloat64(toDateTime64({0}, 9, 'UTC'))) / {2}) ) * {2}) AS {1}_ali ", - start_str, axis_column, step); - start = std::format("toUInt64(toDateTime64({},9,'UTC'))", start_str); + bin_str = std::format(" toFloat64({0}) + (toInt64((({1} - toFloat64({0})) / {2}) ) * {2}) AS {3}_ali ", + start_str, axis_column_format, step, axis_column); + start = std::format("toUInt64({})", start_str); } else { - bin_str = std::format(" toInt64((toFloat64(toDateTime64({0}, 9, 'UTC')) + {1}) / {2}) * {2} AS {0}_ali ", - axis_column, era_diff, step); - diff = era_diff; + if (from_to_step.is_timespan) + diff = era_diff; + bin_str = std::format(" toFloat64(toInt64(({0} + {1}) / {2}) * {2}) AS {3}_ali ", axis_column_format, diff, step, axis_column); } - auto sub_sub_query = std::format(" (Select {0}, {1}, {2} FROM {3} GROUP BY {0}, {4}_ali ORDER BY {4}_ali) ", group_expression, subquery_columns, bin_str, table_name, axis_column); - if (!end_str.empty()) - end = std::format("toUInt64(toDateTime64({}, 9, 'UTC'))", end_str); + end = std::format("toUInt64({})", end_str); String range, condition; + if (!start_str.empty() && !end_str.empty()) { - range = std::format("range({},{}, toUInt64({}))", start, end, step); - condition = std::format("{0}_ali >= {1} and {0}_ali <= {2}", axis_column, start, end); + range = std::format("range({}, {}, toUInt64({}))", start, end, step); + condition = std::format("where toInt64({0}) >= {1} and toInt64({0}) < {2}", axis_column_format, start, end); } else if (start_str.empty() && !end_str.empty()) { - range = std::format("range(low, {} + {}, toUInt64({}))", end, era_diff, step); - condition = std::format("{0}_ali - {1} < {2}", axis_column, era_diff, end); + range = std::format("range(low, {} + {}, toUInt64({}))", end, diff, step); + condition = std::format("where toInt64({0}) - {1} < {2}", axis_column_format, diff, end); } else if (!start_str.empty() && end_str.empty()) { range = std::format("range({}, high, toUInt64({}))", start, step); - condition = std::format("{}_ali >= {}", axis_column, start); + condition = std::format("where toInt64({}) >= {}", axis_column_format, start); } else { range = std::format("range(low, high, toUInt64({}))", step); - condition = "1"; //true + condition = " "; } auto range_len = std::format("length({})", range); - main_query = std::format("{} ", group_expression); + + String sub_sub_query; + if (group_expression.empty()) + sub_sub_query = std::format(" (Select {0}, {1} FROM {2} {4} GROUP BY {3}_ali ORDER BY {3}_ali) ", subquery_columns, bin_str, table_name, axis_column, condition); + else + sub_sub_query = std::format(" (Select {0}, {1}, {2} FROM {3} {5} GROUP BY {0}, {4}_ali ORDER BY {4}_ali) ", group_expression, subquery_columns, bin_str, table_name, axis_column, condition); + + if (!group_expression.empty()) + main_query = std::format("{} ", group_expression_alias); auto axis_and_agg_alias_list = axis_column; auto final_axis_agg_alias_list =std::format("tupleElement(zipped,1) AS {}",axis_column); //tupleElement(pp,2) as PriceAvg ,tupleElement(pp,1) int idx = 2; for (auto agg_column : aggregation_columns) { - String agg_group_column = std::format("arrayConcat(groupArrayIf ({}_ali,{}) as ga, arrayMap(x -> ({}),range(0,toUInt32 ({} - length(ga) < 0 ? 0 : {} - length(ga)),1) )) as {}", - agg_column.alias, condition, agg_column.default_value, range_len, range_len, agg_column.alias); - main_query +=", " + agg_group_column; + String agg_group_column = std::format("arrayConcat(groupArray ({}_ali) as ga, arrayMap(x -> ({}),range(0,toUInt32 ({} - length(ga) < 0 ? 0 : {} - length(ga)),1) )) as {}", + agg_column.alias, agg_column.default_value, range_len, range_len, agg_column.alias); + main_query = main_query.empty() ? agg_group_column : main_query + ", " + agg_group_column; axis_and_agg_alias_list +=", " + agg_column.alias; final_axis_agg_alias_list += std::format(", tupleElement(zipped,{}) AS {}", idx, agg_column.alias); } - auto axis_str = std::format("arrayDistinct(arrayConcat(groupArrayIf(toDateTime64({0}_ali - {1},9,'UTC'), {2}), arrayMap( x->(toDateTime64(x - {1} ,9,'UTC')), {3}) )) as {0}", - axis_column, diff, condition,range); + + if (from_to_step.is_timespan) + axis_str = std::format("arrayDistinct(arrayConcat(groupArray(toDateTime64({0}_ali - {1},9,'UTC')), arrayMap( x->(toDateTime64(x - {1} ,9,'UTC')), {2}) )) as {0}", + axis_column, diff, range); + else + axis_str = std::format("arrayDistinct(arrayConcat(groupArray({0}_ali), arrayMap( x->(toFloat64(x)), {1}) )) as {0}", + axis_column, range); main_query += ", " + axis_str; - auto sub_group_by = std::format("{}", group_expression); + auto sub_group_by = group_expression.empty()? "" : std::format("GROUP BY {}", group_expression_alias); - sub_query = std::format("( SELECT toUInt64(min({}_ali)) AS low, toUInt64(max({}_ali))+ {} AS high, arraySort(arrayZip({})) as zipped, {} FROM {} GROUP BY {} )", + sub_query = std::format("( SELECT toUInt64(min({}_ali)) AS low, toUInt64(max({}_ali))+ {} AS high, arraySort(arrayZip({})) as zipped, {} FROM {} {} )", axis_column, axis_column,step, axis_and_agg_alias_list, main_query, sub_sub_query, sub_group_by); - main_query = std::format("{},{}", group_expression, final_axis_agg_alias_list); + if (group_expression.empty()) + main_query = std::format("{}", final_axis_agg_alias_list); + else + main_query = std::format("{},{}", group_expression_alias, final_axis_agg_alias_list); kql_make_series.sub_query = std::move(sub_query); kql_make_series.main_query = std::move(main_query); @@ -387,10 +347,7 @@ bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expec subquery_columns += ", "+ column_str; } - if (from_to_step.is_timespan) - makeTimeSeries(kql_make_series, pos.max_depth); - else - makeNumericSeries(kql_make_series, pos.max_depth); + makeSeries(kql_make_series, pos.max_depth); Tokens token_subquery(kql_make_series.sub_query.c_str(), kql_make_series.sub_query.c_str() + kql_make_series.sub_query.size()); IParser::Pos pos_subquery(token_subquery, pos.max_depth); @@ -407,6 +364,5 @@ bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expec pos = begin; return true; - } } diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.h b/src/Parsers/Kusto/ParserKQLMakeSeries.h index a89ec97174e..f00eaa1dc99 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.h +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.h @@ -43,8 +43,7 @@ protected: String main_query; }; - void makeNumericSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth); - void makeTimeSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth); + void makeSeries(KQLMakeSeries & kql_make_series, const uint32_t & max_depth); bool parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos); bool parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos); const char * getName() const override { return "KQL project"; } From a803b4833322c37397beaeafa5f905cf45310d86 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Mon, 22 Aug 2022 13:33:01 -0700 Subject: [PATCH 103/279] Kusto-phase2: add bin function, unit test for make-series --- .../KustoFunctions/KQLGeneralFunctions.cpp | 39 ++++++++++++++++--- .../tests/KQL/gtest_KQL_MakeSeries.cpp | 25 ++++++++++++ .../tests/KQL/gtest_KQL_StringFunctions.cpp | 12 ++++++ 3 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp index dd79cc06898..e1d932e9ce1 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -20,11 +20,38 @@ namespace DB { -bool Bin::convertImpl(String &out,IParser::Pos &pos) +bool Bin::convertImpl(String & out,IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + double bin_size; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String origal_expr(pos->begin, pos->end); + String value = getConvertedArgument(fn_name, pos); + + ++pos; + String round_to = getConvertedArgument(fn_name, pos); + + auto t = std::format("toFloat64({})", value); + + bin_size = std::stod(round_to); + + if (origal_expr == "datetime" || origal_expr == "date") + { + out = std::format("toDateTime64(toInt64({0} / {1} ) * {1}, 9, 'UTC')", t, bin_size); + } + else if (origal_expr == "timespan" || origal_expr =="time" || ParserKQLDateTypeTimespan().parseConstKQLTimespan(origal_expr)) + { + String bin_value = std::format(" toInt64({0} / {1} ) * {1}", t, bin_size); + out = std::format("concat(toString( toInt32((({}) as x) / 3600)),':', toString( toInt32(x % 3600 / 60)),':',toString( toInt32(x % 3600 % 60)))", bin_value); + } + else + { + out = std::format("toInt64({0} / {1} ) * {1}", t, bin_size); + } + return true; } bool BinAt::convertImpl(String & out,IParser::Pos & pos) @@ -49,11 +76,11 @@ bool BinAt::convertImpl(String & out,IParser::Pos & pos) int dir = t2 >= t1 ? 0 : -1; bin_size = std::stod(bin_size_str); - if (origal_expr == "datetime" or origal_expr == "date") + if (origal_expr == "datetime" || origal_expr == "date") { out = std::format("toDateTime64({} + toInt64(({} - {}) / {} + {}) * {}, 9, 'UTC')", t1, t2, t1, bin_size, dir, bin_size); } - else if (origal_expr == "timespan" or origal_expr =="time" or ParserKQLDateTypeTimespan().parseConstKQLTimespan(origal_expr)) + else if (origal_expr == "timespan" || origal_expr =="time" || ParserKQLDateTypeTimespan().parseConstKQLTimespan(origal_expr)) { String bin_value = std::format("{} + toInt64(({} - {}) / {} + {}) * {}", t1, t2, t1, bin_size, dir, bin_size); out = std::format("concat(toString( toInt32((({}) as x) / 3600)),':', toString( toInt32(x % 3600 / 60)),':',toString( toInt32(x % 3600 % 60)))", bin_value); diff --git a/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp b/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp new file mode 100644 index 00000000000..5c94ab4665e --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp @@ -0,0 +1,25 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_MakeSeries, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "T | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga)) < 0, 0, length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 0, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 0, 9, 'UTC'), range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toDateTime64('2016-09-10', 9, 'UTC')) + (toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) - toFloat64(toDateTime64('2016-09-10', 9, 'UTC'))) / 86400) * 86400) AS Purchase_ali\n FROM T\n WHERE (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) >= toUInt64(toDateTime64('2016-09-10', 9, 'UTC'))) AND (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) < toUInt64(toDateTime64('2016-09-13', 9, 'UTC')))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 to 15 step 1.0 by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 1 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(10), toUInt64(15), toUInt64(1))) - length(ga)) < 0, 0, length(range(toUInt64(10), toUInt64(15), toUInt64(1))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(Purchase_ali), arrayMap(x -> toFloat64(x), range(toUInt64(10), toUInt64(15), toUInt64(1))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(10) + (toInt64((toFloat64(Purchase) - toFloat64(10)) / 1) * 1) AS Purchase_ali\n FROM T2\n WHERE (toInt64(toFloat64(Purchase)) >= toUInt64(10)) AND (toInt64(toFloat64(Purchase)) < toUInt64(15))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(low, high, toUInt64(86400))) - length(ga)) < 0, 0, length(range(low, high, toUInt64(86400))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 62135596800, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 62135596800, 9, 'UTC'), range(low, high, toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) + 62135596800) / 86400) * 86400) AS Purchase_ali\n FROM T\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 1.0 by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 1 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(low, high, toUInt64(1))) - length(ga)) < 0, 0, length(range(low, high, toUInt64(1))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(Purchase_ali), arrayMap(x -> toFloat64(x), range(low, high, toUInt64(1))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toInt64((toFloat64(Purchase) + 0) / 1) * 1) AS Purchase_ali\n FROM T2\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp index 0ef816646a4..6fa03ef946d 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp @@ -176,6 +176,18 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserStringFuncTest, { "print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))", "SELECT toDateTime64(toFloat64(toDateTime64('1970-01-01 12:00:00.0', 9, 'UTC')) + (toInt64(((toFloat64(toDateTime64('2017-05-15 10:20:00.0', 9, 'UTC')) - toFloat64(toDateTime64('1970-01-01 12:00:00.0', 9, 'UTC'))) / 86400) + 0) * 86400), 9, 'UTC') AS res" + }, + { + "print bin(4.5, 1)", + "SELECT toInt64(toFloat64(4.5) / 1) * 1" + }, + { + "print bin(time(16d), 7d)", + "SELECT concat(toString(toInt32(((toInt64(toFloat64(1382400.) / 604800) * 604800) AS x) / 3600)), ':', toString(toInt32((x % 3600) / 60)), ':', toString(toInt32((x % 3600) % 60)))" + }, + { + "print bin(datetime(1970-05-11 13:45:07), 1d)", + "SELECT toDateTime64(toInt64(toFloat64(toDateTime64('1970-05-11 13:45:07', 9, 'UTC')) / 86400) * 86400, 9, 'UTC')" } }))); From c41f1cff656bdc01410f964cb4f58795c4d282ad Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Mon, 22 Aug 2022 15:08:43 -0700 Subject: [PATCH 104/279] Implement some KQL array functions --- src/Parsers/Kusto/KQL_ReleaseNote.md | 13 +- .../KustoFunctions/KQLDynamicFunctions.cpp | 173 ++++++++++-------- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 6 +- src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp | 53 +++++- src/Parsers/tests/KQL/gtest_KQL_IP.cpp | 23 +-- .../tests/KQL/gtest_KQL_StringFunctions.cpp | 67 +------ .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 67 +------ src/Parsers/tests/gtest_Parser.cpp | 29 --- src/Parsers/tests/gtest_common.cpp | 64 +++++++ src/Parsers/tests/gtest_common.h | 3 + 10 files changed, 235 insertions(+), 263 deletions(-) create mode 100644 src/Parsers/tests/gtest_common.cpp diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index 299e003f37f..d9be1f8e3f4 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -3,12 +3,23 @@ # August 29, 2022 ## Dynamic functions +- [array_concat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayconcatfunction) + `print array_concat(dynamic([1, 2, 3]), dynamic([4, 5]), dynamic([6, 7, 8, 9])) == dynamic([1, 2, 3, 4, 5, 6, 7, 8, 9])` + - [array_iff / array_iif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayifffunction) `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` `print array_iif(dynamic([true, false, true, false]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, null])` `print array_iif(dynamic([1, 0, -1, 44, 0]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, 4, null])` +- [array_slice](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayslicefunction) + `print array_slice(dynamic([1,2,3]), 1, 2) == dynamic([2, 3])` + `print array_slice(dynamic([1,2,3,4,5]), 2, -1) == dynamic([3, 4, 5])` + `print array_slice(dynamic([1,2,3,4,5]), -3, -2) == dynamic([3, 4])` + +- [array_split](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysplitfunction) + `print array_split(dynamic([1,2,3,4,5]), 2) == dynamic([[1,2],[3,4,5]])` + `print array_split(dynamic([1,2,3,4,5]), dynamic([1,3])) == dynamic([[1],[2,3],[4,5]])` ## DateTimeFunctions @@ -276,7 +287,7 @@ `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` -- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) +- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6-maskfunction) `print parse_ipv6_mask('127.0.0.1', 24) == '0000:0000:0000:0000:0000:ffff:7f00:0000'` `print parse_ipv6_mask('fe80::85d:e82c:9446:7994', 120) == 'fe80:0000:0000:0000:085d:e82c:9446:7900'` diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp index 0dfa1907dc3..2c79ae45601 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -1,18 +1,12 @@ -#include -#include #include #include -#include -#include +#include #include -#include -#include -#include -#include #include -#include -#include #include +#include +#include +#include #include #include @@ -20,18 +14,16 @@ namespace DB { -bool ArrayConcat::convertImpl(String &out,IParser::Pos &pos) +bool ArrayConcat::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out, pos, "arrayConcat"); } bool ArrayIif::convertImpl(String & out, IParser::Pos & pos) { const auto function_name = getKQLFunctionName(pos); if (function_name.empty()) - return false; + return false; const auto conditions = getArgument(function_name, pos); const auto if_true = getArgument(function_name, pos); @@ -46,189 +38,214 @@ bool ArrayIif::convertImpl(String & out, IParser::Pos & pos) return true; } -bool ArrayIndexOf::convertImpl(String &out,IParser::Pos &pos) +bool ArrayIndexOf::convertImpl(String & out, IParser::Pos & pos) { - String fn_name = getKQLFunctionName(pos); - + const auto fn_name = getKQLFunctionName(pos); if (fn_name.empty()) return false; - ++pos; - String array = getConvertedArgument(fn_name, pos); - ++pos; - const auto needle = getConvertedArgument(fn_name, pos); - out = "minus(indexOf(" + array + ", " + needle + ") , 1)"; - + const auto array = getArgument(fn_name, pos); + const auto needle = getArgument(fn_name, pos); + out = "minus(indexOf(" + array + ", " + needle + "), 1)"; + return true; } -bool ArrayLength::convertImpl(String &out,IParser::Pos &pos) +bool ArrayLength::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "length"); } -bool ArrayReverse::convertImpl(String &out,IParser::Pos &pos) +bool ArrayReverse::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ArrayRotateLeft::convertImpl(String &out,IParser::Pos &pos) +bool ArrayRotateLeft::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ArrayRotateRight::convertImpl(String &out,IParser::Pos &pos) +bool ArrayRotateRight::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ArrayShiftLeft::convertImpl(String &out,IParser::Pos &pos) +bool ArrayShiftLeft::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ArrayShiftRight::convertImpl(String &out,IParser::Pos &pos) +bool ArrayShiftRight::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ArraySlice::convertImpl(String &out,IParser::Pos &pos) +bool ArraySlice::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + const auto start = getArgument(function_name, pos); + const auto end = getArgument(function_name, pos); + + out = std::format( + "arraySlice({0}, plus(1, if({1} >= 0, {1}, toInt64(max2(-length({0}), {1})) + length({0}))) as offset_{3}, " + " plus(1, if({2} >= 0, {2}, toInt64(max2(-length({0}), {2})) + length({0}))) - offset_{3} + 1)", + array, + start, + end, + generateUniqueIdentifier()); + + return true; +} + +bool ArraySortAsc::convertImpl(String & out, IParser::Pos & pos) +{ + String res = String(pos->begin, pos->end); out = res; return false; } -bool ArraySortAsc::convertImpl(String &out,IParser::Pos &pos) +bool ArraySortDesc::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool ArraySortDesc::convertImpl(String &out,IParser::Pos &pos) +bool ArraySplit::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); - out = res; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) return false; + + const auto array = getArgument(function_name, pos); + const auto indices = getArgument(function_name, pos); + + out = std::format( + "if(empty(arrayMap(x -> if(x >= 0, x, toInt64(max2(0, x + length({0})))), flatten([{1}])) as indices_{2}), [{0}], " + "arrayConcat([arraySlice({0}, 1, indices_{2}[1])], arrayMap(i -> arraySlice({0}, indices_{2}[i] + 1, " + "if(i = length(indices_{2}), length({0})::Int64, indices_{2}[i + 1]::Int64) - indices_{2}[i]), " + "range(1, length(indices_{2}) + 1))))", + array, + indices, + generateUniqueIdentifier()); + + return true; } -bool ArraySplit::convertImpl(String &out,IParser::Pos &pos) -{ - String res = String(pos->begin,pos->end); - out = res; - return false; -} - -bool ArraySum::convertImpl(String &out,IParser::Pos &pos) +bool ArraySum::convertImpl(String & out, IParser::Pos & pos) { return directMapping(out, pos, "arraySum"); } -bool BagKeys::convertImpl(String &out,IParser::Pos &pos) +bool BagKeys::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool BagMerge::convertImpl(String &out,IParser::Pos &pos) +bool BagMerge::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool BagRemoveKeys::convertImpl(String &out,IParser::Pos &pos) +bool BagRemoveKeys::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool JaccardIndex::convertImpl(String &out,IParser::Pos &pos) +bool JaccardIndex::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool Pack::convertImpl(String &out,IParser::Pos &pos) +bool Pack::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool PackAll::convertImpl(String &out,IParser::Pos &pos) +bool PackAll::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool PackArray::convertImpl(String &out,IParser::Pos &pos) +bool PackArray::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool Repeat::convertImpl(String &out,IParser::Pos &pos) +bool Repeat::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool SetDifference::convertImpl(String &out,IParser::Pos &pos) +bool SetDifference::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool SetHasElement::convertImpl(String &out,IParser::Pos &pos) +bool SetHasElement::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool SetIntersect::convertImpl(String &out,IParser::Pos &pos) +bool SetIntersect::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool SetUnion::convertImpl(String &out,IParser::Pos &pos) +bool SetUnion::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool TreePath::convertImpl(String &out,IParser::Pos &pos) +bool TreePath::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } -bool Zip::convertImpl(String &out,IParser::Pos &pos) +bool Zip::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin,pos->end); + String res = String(pos->begin, pos->end); out = res; return false; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 40f34f766b5..53c81a66208 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -225,11 +225,13 @@ bool ParseIpv6Mask::convertImpl(String & out, IParser::Pos & pos) const auto ip_address = getArgument(function_name, pos); const auto mask = getArgument(function_name, pos); + const auto unique_identifier = generateUniqueIdentifier(); out = std::format( - "if(isNull({0} as ipv4), {1}, {2})", + "if(isNull({0} as ipv4_{3}), {1}, {2})", kqlCallToExpression("parse_ipv4_mask", {ip_address, mask}, pos.max_depth), kqlCallToExpression("parse_ipv6", {"strcat(tostring(parse_ipv6(" + ip_address + ")), '/', tostring(" + mask + "))"}, pos.max_depth), - kqlCallToExpression("parse_ipv6", {"format_ipv4(ipv4)"}, pos.max_depth)); + kqlCallToExpression("parse_ipv6", {"format_ipv4(ipv4_" + unique_identifier + ")"}, pos.max_depth), + unique_identifier); return true; } diff --git a/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp b/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp index 1e316593a5b..566cc5791dc 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp @@ -2,10 +2,18 @@ #include -INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Dynamic, ParserTest, +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicExactMatch, ParserTest, ::testing::Combine( ::testing::Values(std::make_shared()), ::testing::ValuesIn(std::initializer_list{ + { + "print array_concat(A, B)", + "SELECT arrayConcat(A, B)" + }, + { + "print array_concat(A, B, C, D)", + "SELECT arrayConcat(A, B, C, D)" + }, { "print array_iff(A, B, C)", "SELECT arrayMap(x -> if((x.1) != 0, x.2, x.3), arrayZip(A, arrayResize(B, length(A), NULL), arrayResize(C, length(A), NULL)))" @@ -13,5 +21,48 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Dynamic, ParserTest, { "print array_iif(A, B, C)", "SELECT arrayMap(x -> if((x.1) != 0, x.2, x.3), arrayZip(A, arrayResize(B, length(A), NULL), arrayResize(C, length(A), NULL)))" + }, + { + "print output = array_index_of(dynamic([1, 2, 3]), 2)", + "SELECT indexOf([1, 2, 3], 2) - 1 AS output" + }, + { + "print output = array_index_of(dynamic(['a', 'b', 'c']), 'b')", + "SELECT indexOf(['a', 'b', 'c'], 'b') - 1 AS output" + }, + { + "print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')", + "SELECT indexOf(['John', 'Denver', 'Bob', 'Marley'], 'Marley') - 1 AS output" + }, + + { + "print output = array_length(dynamic([1, 2, 3]))", + "SELECT length([1, 2, 3]) AS output" + }, + { + "print output = array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))", + "SELECT length(['John', 'Denver', 'Bob', 'Marley']) AS output" + }, + { + "print output = array_sum(dynamic([2, 5, 3]))", + "SELECT arraySum([2, 5, 3]) AS output" + }, + { + "print output = array_sum(dynamic([2.5, 5.5, 3]))", + "SELECT arraySum([2.5, 5.5, 3]) AS output" + } +}))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicRegex, ParserRegexTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print array_slice(A, B, C)", + "SELECT arraySlice\\(A, 1 \\+ if\\(B >= 0, B, toInt64\\(max2\\(-length\\(A\\), B\\)\\) \\+ length\\(A\\)\\) AS offset_\\d+, \\(\\(1 \\+ if\\(C >= 0, C, toInt64\\(max2\\(-length\\(A\\), C\\)\\) \\+ length\\(A\\)\\)\\) - offset_\\d+\\) \\+ 1\\)" + }, + { + "print array_split(A, B)", + "SELECT if\\(empty\\(arrayMap\\(x -> if\\(x >= 0, x, toInt64\\(max2\\(0, x \\+ length\\(A\\)\\)\\)\\), flatten\\(\\[B\\]\\)\\) AS indices_\\d+\\), \\[A\\], arrayConcat\\(\\[arraySlice\\(A, 1, indices_\\d+\\[1\\]\\)\\], arrayMap\\(i -> arraySlice\\(A, \\(indices_\\d+\\[i\\]\\) \\+ 1, if\\(i = length\\(indices_\\d+\\), CAST\\(length\\(A\\), 'Int64'\\), CAST\\(indices_\\d+\\[i \\+ 1\\], 'Int64'\\)\\) - \\(indices_\\d+\\[i\\]\\)\\), range\\(1, length\\(indices_\\d+\\) \\+ 1\\)\\)\\)\\)" } }))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp index 731715b4542..7f1d93625be 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp @@ -1,28 +1,7 @@ #include -#include -#include #include -#include -#include - -class ParserRegexTest : public ::testing::TestWithParam, ParserTestCase>> -{}; - -TEST_P(ParserRegexTest, parseQuery) -{ - const auto & parser = std::get<0>(GetParam()); - const auto & [input_text, expected_ast] = std::get<1>(GetParam()); - - ASSERT_TRUE(parser); - ASSERT_TRUE(expected_ast); - - DB::ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); - EXPECT_THAT(serializeAST(*ast->clone(), false), ::testing::MatchesRegex(expected_ast)); -} - INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, ::testing::Combine( ::testing::Values(std::make_shared()), @@ -101,6 +80,6 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, }, { "print parse_ipv6_mask(A, B)", - "SELECT if\\(\\(if\\(\\(\\(toIPv4OrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(toUInt8OrNull\\(toString\\(B\\)\\) AS mask_\\d+\\) IS NULL\\), NULL, toUInt32\\(IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(max2\\(0, min2\\(32, assumeNotNull\\(mask_\\d+\\)\\)\\)\\)\\).1\\)\\) AS ipv4\\) IS NULL, if\\(\\(length\\(splitByChar\\('/', concat\\(ifNull\\(toString\\(if\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)\\), ''\\), '/', ifNull\\(toString\\(B\\), ''\\)\\)\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\), if\\(\\(length\\(splitByChar\\('/', ifNull\\(if\\(\\(\\(\\(toUInt32OrNull\\(toString\\(ipv4\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(ipv4\\) = 'String'\\)\\) OR \\(32 < 0\\) OR \\(\\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(ipv4\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL\\), NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\)\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)\\)" + "SELECT if\\(\\(if\\(\\(\\(toIPv4OrNull\\(A\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(toUInt8OrNull\\(toString\\(B\\)\\) AS mask_\\d+\\) IS NULL\\), NULL, toUInt32\\(IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(max2\\(0, min2\\(32, assumeNotNull\\(mask_\\d+\\)\\)\\)\\)\\).1\\)\\) AS ipv4_\\d+\\) IS NULL, if\\(\\(length\\(splitByChar\\('/', concat\\(ifNull\\(toString\\(if\\(\\(length\\(splitByChar\\('/', A\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)\\), ''\\), '/', ifNull\\(toString\\(B\\), ''\\)\\)\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\), if\\(\\(length\\(splitByChar\\('/', ifNull\\(if\\(\\(\\(\\(toUInt32OrNull\\(toString\\(ipv4_\\d+\\)\\) AS param_as_uint32_\\d+\\) IS NOT NULL\\) AND \\(toTypeName\\(ipv4_\\d+\\) = 'String'\\)\\) OR \\(32 < 0\\) OR \\(\\(ifNull\\(param_as_uint32_\\d+, multiIf\\(length\\(splitByChar\\('/', ifNull\\(toString\\(ipv4_\\d+\\), ''\\)\\) AS tokens_\\d+\\) = 1, IPv4StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+, \\(length\\(tokens_\\d+\\) = 2\\) AND \\(ip_\\d+ IS NOT NULL\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NOT NULL\\), IPv4CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), assumeNotNull\\(mask_\\d+\\)\\).1, NULL\\)\\) AS ip_as_number_\\d+\\) IS NULL\\), NULL, IPv4NumToString\\(bitAnd\\(ip_as_number_\\d+, bitNot\\(toUInt32\\(intExp2\\(32 - 32\\) - 1\\)\\)\\)\\)\\), ''\\)\\) AS tokens_\\d+\\) > 2\\) OR \\(\\(IPv6StringToNumOrNull\\(tokens_\\d+\\[1\\]\\) AS ip_\\d+\\) IS NULL\\) OR \\(\\(length\\(tokens_\\d+\\) = 2\\) AND \\(\\(toUInt8OrNull\\(tokens_\\d+\\[-1\\]\\) AS mask_\\d+\\) IS NULL\\)\\), NULL, arrayStringConcat\\(flatten\\(extractAllGroups\\(lower\\(hex\\(IPv6CIDRToRange\\(assumeNotNull\\(ip_\\d+\\), toUInt8\\(ifNull\\(mask_\\d+ \\+ if\\(isIPv4String\\(tokens_\\d+\\[1\\]\\), 96, 0\\), 128\\)\\)\\).1\\)\\), '\\(\\[\\\\\\\\da-f\\]\\{4\\}\\)'\\)\\), ':'\\)\\)\\)" } }))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp index 6fa03ef946d..3dd5447b7db 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp @@ -1,71 +1,8 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include + #include -#include -#include -#include -namespace -{ -using namespace DB; -using namespace std::literals; -} -class ParserStringFuncTest : public ::testing::TestWithParam, ParserTestCase>> -{}; - -TEST_P(ParserStringFuncTest, ParseQuery) -{ const auto & parser = std::get<0>(GetParam()); - const auto & [input_text, expected_ast] = std::get<1>(GetParam()); - ASSERT_NE(nullptr, parser); - if (expected_ast) - { - if (std::string(expected_ast).starts_with("throws")) - { - EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } - else - { - ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); - if (std::string("CREATE USER or ALTER USER query") != parser->getName() - && std::string("ATTACH access entity query") != parser->getName()) - { - EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); - } - else - { - if (input_text.starts_with("ATTACH")) - { - auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); - EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); - } - else - { - EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); - } - } - } - } - else - { - ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } -} - -INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserStringFuncTest, +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, ::testing::Combine( ::testing::Values(std::make_shared()), ::testing::ValuesIn(std::initializer_list{ diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp index 55d88e2aa78..e5d2ee5e063 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -1,71 +1,8 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include + #include -#include -#include -#include -namespace -{ -using namespace DB; -using namespace std::literals; -} -class ParserDateTimeFuncTest : public ::testing::TestWithParam, ParserTestCase>> -{}; - -TEST_P(ParserDateTimeFuncTest, ParseQuery) -{ const auto & parser = std::get<0>(GetParam()); - const auto & [input_text, expected_ast] = std::get<1>(GetParam()); - ASSERT_NE(nullptr, parser); - if (expected_ast) - { - if (std::string(expected_ast).starts_with("throws")) - { - EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } - else - { - ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); - if (std::string("CREATE USER or ALTER USER query") != parser->getName() - && std::string("ATTACH access entity query") != parser->getName()) - { - EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); - } - else - { - if (input_text.starts_with("ATTACH")) - { - auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); - EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); - } - else - { - EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); - } - } - } - } - else - { - ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } -} - -INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserDateTimeFuncTest, +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, ::testing::Combine( ::testing::Values(std::make_shared()), ::testing::ValuesIn(std::initializer_list{ diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index bbd2313cd48..25cdb51df23 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -603,34 +602,6 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "print output = dynamic(['a', 'b', 'c'])", "SELECT ['a', 'b', 'c'] AS output" - }, - { - "print output = array_index_of(dynamic([1, 2, 3]), 2)", - "SELECT indexOf([1, 2, 3], 2) - 1 AS output" - }, - { - "print output = array_index_of(dynamic(['a', 'b', 'c']), 'b')", - "SELECT indexOf(['a', 'b', 'c'], 'b') - 1 AS output" - }, - { - "print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')", - "SELECT indexOf(['John', 'Denver', 'Bob', 'Marley'], 'Marley') - 1 AS output" - }, - { - "print output = array_length(dynamic([1, 2, 3]))", - "SELECT length([1, 2, 3]) AS output" - }, - { - "print output = array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))", - "SELECT length(['John', 'Denver', 'Bob', 'Marley']) AS output" - }, - { - "print output = array_sum(dynamic([2, 5, 3]))", - "SELECT arraySum([2, 5, 3]) AS output" - }, - { - "print output = array_sum(dynamic([2.5, 5.5, 3]))", - "SELECT arraySum([2.5, 5.5, 3]) AS output" } }))); diff --git a/src/Parsers/tests/gtest_common.cpp b/src/Parsers/tests/gtest_common.cpp new file mode 100644 index 00000000000..c9efdbe105c --- /dev/null +++ b/src/Parsers/tests/gtest_common.cpp @@ -0,0 +1,64 @@ +#include "gtest_common.h" + +#include +#include +#include + +#include + +#include + +TEST_P(ParserTest, parseQuery) +{ + const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + + ASSERT_NE(nullptr, parser); + + if (expected_ast) + { + if (std::string(expected_ast).starts_with("throws")) + { + EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } + else + { + DB::ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + if (std::string("CREATE USER or ALTER USER query") != parser->getName() + && std::string("ATTACH access entity query") != parser->getName()) + { + EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); + } + else + { + if (input_text.starts_with("ATTACH")) + { + auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); + EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); + } + else + { + EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); + } + } + } + } + else + { + ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } +} + +TEST_P(ParserRegexTest, parseQuery) +{ + const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + + ASSERT_TRUE(parser); + ASSERT_TRUE(expected_ast); + + DB::ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + EXPECT_THAT(serializeAST(*ast->clone(), false), ::testing::MatchesRegex(expected_ast)); +} diff --git a/src/Parsers/tests/gtest_common.h b/src/Parsers/tests/gtest_common.h index aac3dddb117..4eca9390d92 100644 --- a/src/Parsers/tests/gtest_common.h +++ b/src/Parsers/tests/gtest_common.h @@ -12,3 +12,6 @@ struct ParserTestCase class ParserTest : public ::testing::TestWithParam, ParserTestCase>> {}; + +class ParserRegexTest : public ::testing::TestWithParam, ParserTestCase>> +{}; From 6973ce0814dd41b813e882512f4fcf5a6856ec82 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Tue, 16 Aug 2022 07:31:15 -0700 Subject: [PATCH 105/279] date_add and date_part --- .../KustoFunctions/KQLDateTimeFunctions.cpp | 71 +++++++++++++++++-- 1 file changed, 67 insertions(+), 4 deletions(-) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index 96c5bca06de..c1a5000c6fc 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -56,14 +56,77 @@ bool Ago::convertImpl(String & out, IParser::Pos & pos) bool DatetimeAdd::convertImpl(String & out, IParser::Pos & pos) { - return directMapping(out, pos, "date_add"); + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String period = getConvertedArgument(fn_name, pos); + //remove quotes from period. + if ( period.front() == '\"' || period.front() == '\'' ) + { + //period.remove + period.erase( 0, 1 ); // erase the first quote + period.erase( period.size() - 2 ); // erase the last quuote(Since token includes trailing space alwayas as per implememtation) + } + ++pos; + const String offset = getConvertedArgument(fn_name, pos); + ++pos; + const String datetime = getConvertedArgument(fn_name, pos); + + out = std::format("date_add({}, {}, {} )",period,offset,datetime); + + return true; + }; bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String part = Poco::toUpper(getConvertedArgument(fn_name, pos)); + if ( part.front() == '\"' || part.front() == '\'' ) + { + //period.remove + part.erase( 0, 1 ); // erase the first quote + part.erase( part.size() - 2 ); // erase the last quuote + } + String date; + if (pos->type == TokenType::Comma) + { + ++pos; + date = getConvertedArgument(fn_name, pos); + } + + String format; + + if(part == "YEAR" ) + format = "%G"; + else if (part == "QUARTER" ) + format = "%Q"; + else if (part == "MONTH") + format = "%m"; + else if (part == "WEEK_OF_YEAR") + format = "%V"; + else if (part == "DAY") + format = "%e"; + else if (part == "DAYOFYEAR") + format = "%j"; + else if (part == "HOUR") + format = "%I"; + else if (part == "MINUTE") + format = "%M"; + else if (part == "SECOND") + format = "%S"; + else + return false; + + out = std::format("formatDateTime(toDateTime64({}, 9, 'UTC'), '{}' )", date, format); + + return true; } bool DatetimeDiff::convertImpl(String & out, IParser::Pos & pos) From 3771e81ccd90bcaf68beff9fb01025c16e90a874 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Tue, 23 Aug 2022 09:43:43 -0700 Subject: [PATCH 106/279] DateTime part3 functions --- src/Parsers/Kusto/KQL_ReleaseNote.md | 18 ++ .../KustoFunctions/KQLCastingFunctions.cpp | 21 +- .../KustoFunctions/KQLDataTypeFunctions.cpp | 2 +- .../KustoFunctions/KQLDateTimeFunctions.cpp | 256 ++++++++++++++++-- .../KustoFunctions/KQLDateTimeFunctions.h | 31 +++ .../KustoFunctions/KQLGeneralFunctions.cpp | 2 +- src/Parsers/Kusto/ParserKQLMakeSeries.cpp | 2 +- .../tests/KQL/gtest_KQL_StringFunctions.cpp | 8 +- .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 67 ++++- 9 files changed, 360 insertions(+), 47 deletions(-) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index d9be1f8e3f4..9e69a91b88d 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -65,6 +65,24 @@ - [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) `print unixtime_nanoseconds_todatetime(1546300800000000000)` +- [datetime_part] (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-partfunction) + `print datetime_part('day', datetime(2017-10-30 01:02:03.7654321))` + +- [datetime_add] (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-addfunction) + `print datetime_add('day',1,datetime(2017-10-30 01:02:03.7654321))` + +-[format_timespan] (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-timespanfunction) + `print format_timespan(time(1d), 'd-[hh:mm:ss]')` + `print format_timespan(time('12:30:55.123'), 'ddddd-[hh:mm:ss.ffff]')` + +-[format_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-datetimefunction) + `print format_datetime(todatetime('2009-06-15T13:45:30.6175425'), 'yy-M-dd [H:mm:ss.fff]')` + `print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s tt')` + +-[todatetime] (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/todatetimefunction) + `print todatetime('2014-05-25T08:20:03.123456Z')` + `print todatetime('2014-05-25 20:03.123')` + # August 15, 2022 **double quote support** ``print res = strcat("double ","quote")`` diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp index b6082995ec1..3fde2ea8364 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -24,9 +24,14 @@ bool ToBool::convertImpl(String & out, IParser::Pos & pos) bool ToDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + + out = std::format("parseDateTime64BestEffortOrNull(toString({0}),9,'UTC')", param); + return true; } bool ToDouble::convertImpl(String & out, IParser::Pos & pos) @@ -60,13 +65,13 @@ bool ToString::convertImpl(String & out, IParser::Pos & pos) const auto param = getArgument(function_name, pos); out = std::format("ifNull(toString({0}), '')", param); return true; -} - +} bool ToTimeSpan::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + String res = String(pos->begin, pos->end); + out = res; + return false; } + } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp index 4caf4188c8c..cc6834744b0 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp @@ -45,7 +45,7 @@ bool DatatypeDatetime::convertImpl(String &out,IParser::Pos &pos) --pos; datetime_str = std::format("'{}'",String(start->begin,pos->end)); } - out = std::format("toDateTime64({},9,'UTC')", datetime_str); + out = std::format("parseDateTime64BestEffortOrNull({},9,'UTC')", datetime_str); ++pos; return true; } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index c1a5000c6fc..1f238fc222b 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -15,11 +15,13 @@ #include #include #include +#include namespace DB::ErrorCodes { extern const int SYNTAX_ERROR; } + namespace DB { @@ -67,7 +69,7 @@ bool DatetimeAdd::convertImpl(String & out, IParser::Pos & pos) { //period.remove period.erase( 0, 1 ); // erase the first quote - period.erase( period.size() - 2 ); // erase the last quuote(Since token includes trailing space alwayas as per implememtation) + period.erase( period.size() - 2 ); // erase the last quote(Since token includes trailing space alwayas as per implememtation) } ++pos; const String offset = getConvertedArgument(fn_name, pos); @@ -88,7 +90,8 @@ bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) ++pos; String part = Poco::toUpper(getConvertedArgument(fn_name, pos)); - if ( part.front() == '\"' || part.front() == '\'' ) + + if (part.front() == '\"' || part.front() == '\'' ) { //period.remove part.erase( 0, 1 ); // erase the first quote @@ -100,13 +103,12 @@ bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) ++pos; date = getConvertedArgument(fn_name, pos); } - String format; if(part == "YEAR" ) format = "%G"; else if (part == "QUARTER" ) - format = "%Q"; + format = "%Q"; else if (part == "MONTH") format = "%m"; else if (part == "WEEK_OF_YEAR") @@ -122,9 +124,9 @@ bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) else if (part == "SECOND") format = "%S"; else - return false; - - out = std::format("formatDateTime(toDateTime64({}, 9, 'UTC'), '{}' )", date, format); + throw Exception("Unexpected argument " + part + " for " + fn_name, ErrorCodes::SYNTAX_ERROR); + + out = std::format("formatDateTime({}, '{}' )", date, format); return true; } @@ -263,16 +265,162 @@ bool EndOfYear::convertImpl(String & out, IParser::Pos & pos) bool FormatDateTime::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String formatspecifier; + ++pos; + const auto datetime = getConvertedArgument(fn_name, pos); + ++pos; + auto format = getConvertedArgument(fn_name, pos); + + //remove quotes and end space from format argument. + if (format.front() == '\"' || format.front() == '\'' ) + { + format.erase( 0, 1 ); // erase the first quote + format.erase( format.size() - 2 ); // erase the last quuote(Since token includes trailing space alwayas as per implememtation) + } + + std::vector res; + getTokens(format, res); + std::string::size_type i = 0; + size_t decimal =0; + while (i < format.size()) + { + char c = format[i]; + if(!isalpha(c)) + { + //delimeter + if (c == ' ' || c == '-' || c == '_' || c == '[' || c == ']' || c == '/' || c == ',' || c == '.' || c == ':') + formatspecifier = formatspecifier + c; + else + throw Exception("Invalid format delimeter in function:" + fn_name, ErrorCodes::SYNTAX_ERROR); + ++i; + } + else + { + //format specifier + String arg = res.back(); + + if(arg == "y" || arg == "yy" ) + formatspecifier = formatspecifier + "%y"; + else if (arg == "yyyy") + formatspecifier = formatspecifier + "%Y"; + else if (arg == "M" || arg == "MM") + formatspecifier = formatspecifier + "%m"; + else if (arg == "s" || arg == "ss") + formatspecifier = formatspecifier + "%S"; + else if (arg == "m" || arg == "mm") + formatspecifier = formatspecifier + "%M"; + else if (arg == "h" || arg == "hh") + formatspecifier = formatspecifier + "%I"; + else if (arg == "H" || arg == "HH") + formatspecifier = formatspecifier + "%H"; + else if (arg == "d") + formatspecifier = formatspecifier + "%e"; + else if (arg == "dd") + formatspecifier = formatspecifier + "%d"; + else if (arg == "tt") + formatspecifier = formatspecifier + "%p"; + else if (arg.starts_with('f')) + decimal = arg.size(); + else if (arg.starts_with('F')) + decimal = arg.size(); + else + throw Exception("Format specifier " + arg + " in function:" + fn_name + "is not supported", ErrorCodes::SYNTAX_ERROR); + res.pop_back(); + i = i + arg.size(); + } + } + if(decimal > 0 && formatspecifier.find('.')!=String::npos) + { + + out = std::format("concat(" + "substring(toString(formatDateTime( {0} , '{1}' )),1, position(toString(formatDateTime({0},'{1}')),'.')) ," + "substring(substring(toString({0}), position(toString({0}),'.')+1),1,{2})," + "substring(toString(formatDateTime( {0},'{1}')), position(toString(formatDateTime({0},'{1}')),'.')+1 ,length (toString(formatDateTime({0},'{1}'))))) " ,datetime, formatspecifier,decimal); + } + else + out = std::format("formatDateTime( {0},'{1}')" ,datetime, formatspecifier); + + return true; } -bool FormatTimeSpan::convertImpl(String & out, IParser::Pos & pos) +bool FormatTimeSpan::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String formatspecifier; + ++pos; + const auto datetime = getConvertedArgument(fn_name, pos); + ++pos; + auto format = getConvertedArgument(fn_name, pos); + size_t decimal=0; + //remove quotes and end space from format argument. + if (format.front() == '\"' || format.front() == '\'' ) + { + format.erase( 0, 1 ); // erase the first quote + format.erase( format.size() - 2 ); // erase the last quuote(Since token includes trailing space alwayas as per implememtation) + } + std::vector res; + getTokens(format, res); + size_t pad = 0; + std::string::size_type i = 0; + + while (i < format.size()) + { + char c = format[i]; + if(!isalpha(c)) + { + //delimeter + if (c == ' ' || c == '-' || c == '_' || c == '[' || c == ']' || c == '/' || c == ',' || c == '.' || c == ':') + formatspecifier = formatspecifier + c; + else + throw Exception("Invalid format delimeter in function:" + fn_name, ErrorCodes::SYNTAX_ERROR); + ++i; + } + else + { + //format specifier + String arg = res.back(); + + if (arg == "s" || arg == "ss") + formatspecifier = formatspecifier + "%S"; + else if (arg == "m" || arg == "mm") + formatspecifier = formatspecifier + "%M"; + else if (arg == "h" || arg == "hh") + formatspecifier = formatspecifier + "%I"; + else if (arg == "H" || arg == "HH") + formatspecifier = formatspecifier + "%H"; + else if (arg == "d") + formatspecifier = formatspecifier + "%e"; + else if (arg == "dd") + formatspecifier = formatspecifier + "%d"; + else if (arg.starts_with('d') && arg.size() >2) + { formatspecifier = formatspecifier + "%d"; + pad = arg.size() - 2 ; + } + else if (arg.starts_with('f')) + decimal = arg.size(); + else if (arg.starts_with('F')) + decimal = arg.size(); + else + throw Exception("Format specifier " + arg + " in function:" + fn_name + "is not supported", ErrorCodes::SYNTAX_ERROR); + res.pop_back(); + i = i + arg.size(); + } + } + if(decimal > 0 && formatspecifier.find('.')!=String::npos ) + { + out = std::format("leftPad(concat(substring(toString(formatDateTime( toDateTime64({0},9,'UTC'),'{1}')),1, position( toString(formatDateTime( toDateTime64({0},9,'UTC'),'{1}')),'.')),substring(SUBSTRING(toString(toDateTime64({0},9,'UTC')),position(toString(toDateTime64({0},9,'UTC')),'.')+1),1,{2}),substring(toString(formatDateTime(toDateTime64({0},9,'UTC'),'{1}')),position( toString(formatDateTime( toDateTime64({0},9,'UTC'),'{1}')),'.')+1,length(toString(formatDateTime( toDateTime64({0},9,'UTC'),'{1}'))))),length(toString(formatDateTime( toDateTime64({0},9,'UTC'),'{1}')))+{3}+{2},'0')", datetime,formatspecifier,decimal,pad); + } + else if (decimal == 0 && formatspecifier.find('.')==String::npos) + out = std::format("leftPad(toString(formatDateTime(toDateTime64({0},9,'UTC'),'{1}')),length(toString(formatDateTime( toDateTime64({0},9,'UTC'),'{1}')))+{2},'0')", datetime,formatspecifier,pad); + else + out = std::format("formatDateTime(toDateTime64({0},9,'UTC'),'{1}')", datetime,formatspecifier); + + return true; } bool GetMonth::convertImpl(String & out, IParser::Pos & pos) @@ -292,9 +440,74 @@ bool HoursOfDay::convertImpl(String & out, IParser::Pos & pos) bool MakeTimeSpan::convertImpl(String & out, IParser::Pos & pos) { - String res = String(pos->begin, pos->end); - out = res; - return false; + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String datetime_str; + String hour ; + String day ; + String minute ; + String second ; + int arg_count = 0; + std::vector args; + while (!pos->isEnd() && pos->type != TokenType::ClosingRoundBracket) + { + String arg = getConvertedArgument(fn_name, pos); + args.insert(args.begin(),arg); + if(pos->type == TokenType::Comma) + ++pos; + ++arg_count; + } + + if (arg_count < 2 || arg_count > 4) + throw Exception("argument count out of bound in function: " + fn_name, ErrorCodes::SYNTAX_ERROR); + + if(arg_count == 2) + { + hour = args.back(); + args.pop_back(); + minute = args.back(); + args.pop_back(); + datetime_str = hour.erase(hour.size() - 1) + ":" + minute.erase(minute.size() - 1) ; + } + else if (arg_count == 3) + { + hour = args.back(); + args.pop_back(); + minute = args.back(); + args.pop_back(); + second = args.back(); + args.pop_back(); + + datetime_str = hour.erase(hour.size() - 1) + ":" + minute.erase(minute.size() - 1) + ":" + second.erase(second.size() - 1); + } + else if (arg_count == 4) + { + day = args.back(); + args.pop_back(); + hour = args.back(); + args.pop_back(); + minute = args.back(); + args.pop_back(); + second = args.back(); + args.pop_back(); + + datetime_str = hour.erase(hour.size() - 1) + ":" + minute.erase(minute.size() - 1) + ":" + second.erase(second.size() - 1); + day = day.erase(day.size() - 1) + "."; + + } + else + throw Exception("argument count out of bound in function: " + fn_name, ErrorCodes::SYNTAX_ERROR); + + //Add dummy yyyy-mm-dd to parse datetime in CH + datetime_str = "0000-00-00 " + datetime_str; + + out = std::format("CONCAT('{}',toString(SUBSTRING(toString(toTime(parseDateTime64BestEffortOrNull('{}', 9 ,'UTC' ))),12)))" ,day ,datetime_str ); + + return true; } bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) @@ -366,7 +579,7 @@ bool StartOfDay::convertImpl(String & out, IParser::Pos & pos) offset = getConvertedArgument(fn_name, pos); } - out = std::format("date_add(DAY,{}, toDateTime64((toStartOfDay({})) , 9 , 'UTC')) ", offset, datetime_str); + out = std::format("date_add(DAY,{}, parseDateTime64BestEffortOrNull((toStartOfDay({})) , 9 , 'UTC')) ", offset, datetime_str); return true; } @@ -386,7 +599,7 @@ bool StartOfMonth::convertImpl(String & out, IParser::Pos & pos) offset = getConvertedArgument(fn_name, pos); } - out = std::format("date_add(MONTH,{}, toDateTime64((toStartOfMonth({})) , 9 , 'UTC')) ", offset, datetime_str); + out = std::format("date_add(MONTH,{}, parseDateTime64BestEffortOrNull((toStartOfMonth({})) , 9 , 'UTC')) ", offset, datetime_str); return true; } @@ -406,7 +619,7 @@ bool StartOfWeek::convertImpl(String & out, IParser::Pos & pos) offset = getConvertedArgument(fn_name, pos); } - out = std::format("date_add(Week,{}, toDateTime64((toStartOfWeek({})) , 9 , 'UTC')) ", offset, datetime_str); + out = std::format("date_add(Week,{}, parseDateTime64BestEffortOrNull((toStartOfWeek({})) , 9 , 'UTC')) ", offset, datetime_str); return true; } @@ -425,7 +638,7 @@ bool StartOfYear::convertImpl(String & out, IParser::Pos & pos) ++pos; offset = getConvertedArgument(fn_name, pos); } - out = std::format("date_add(YEAR,{}, toDateTime64((toStartOfYear({}, 'UTC')) , 9 , 'UTC'))", offset, datetime_str); + out = std::format("date_add(YEAR,{}, parseDateTime64BestEffortOrNull((toStartOfYear({}, 'UTC')) , 9 , 'UTC'))", offset, datetime_str); return true; } @@ -496,3 +709,4 @@ bool MonthOfYear::convertImpl(String & out, IParser::Pos & pos) } } + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h index bde104e88b9..a40c8125063 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -223,5 +223,36 @@ protected: bool convertImpl(String &out,IParser::Pos &pos) override; }; +void inline getTokens(String format , std::vector & res ) +{ + String str = format; + String token; + auto pos = str.find_first_not_of("abcdefghijklmnopqrstuvwxyzQWERTYUIOPASDFGHJKLZXCVBNM"); + while (pos != String::npos ) + { + if ( pos != 0 ) + { + // Found a token + token = str.substr(0, pos); + res.insert(res.begin(),token); + } + /* else + { + // Found another delimiter + // Just move on to next one + + } +*/ + str.erase(0, pos+1); // Always remove pos+1 to get rid of delimiter + pos = str.find_first_not_of("abcdefghijklmnopqrstuvwxyzQWERTYUIOPASDFGHJKLZXCVBNM"); + } + // Cover the last (or only) token + if ( str.length() > 0 ) + { + token = str; + res.insert(res.begin(),token); + } +} + } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp index e1d932e9ce1..038e801216b 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -78,7 +78,7 @@ bool BinAt::convertImpl(String & out,IParser::Pos & pos) if (origal_expr == "datetime" || origal_expr == "date") { - out = std::format("toDateTime64({} + toInt64(({} - {}) / {} + {}) * {}, 9, 'UTC')", t1, t2, t1, bin_size, dir, bin_size); + out = std::format("parseDateTime64BestEffortOrNull({} + toInt64(({} - {}) / {} + {}) * {}, 9, 'UTC')", t1, t2, t1, bin_size, dir, bin_size); } else if (origal_expr == "timespan" || origal_expr =="time" || ParserKQLDateTypeTimespan().parseConstKQLTimespan(origal_expr)) { diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp index 65eeafe737b..ca2d2421181 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -348,7 +348,7 @@ bool ParserKQLMakeSeries :: parseImpl(Pos & pos, ASTPtr & node, Expected & expec } makeSeries(kql_make_series, pos.max_depth); - + Tokens token_subquery(kql_make_series.sub_query.c_str(), kql_make_series.sub_query.c_str() + kql_make_series.sub_query.size()); IParser::Pos pos_subquery(token_subquery, pos.max_depth); diff --git a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp index 3dd5447b7db..b5c7971e644 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp @@ -52,15 +52,15 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "print datetime(2015-12-31 23:59:59.9)", - "SELECT toDateTime64('2015-12-31 23:59:59.9', 9, 'UTC')" + "SELECT parseDateTime64BestEffortOrNull('2015-12-31 23:59:59.9', 9, 'UTC')" }, { "print datetime(\"2015-12-31 23:59:59.9\")", - "SELECT toDateTime64('2015-12-31 23:59:59.9', 9, 'UTC')" + "SELECT parseDateTime64BestEffortOrNull('2015-12-31 23:59:59.9', 9, 'UTC')" }, { "print datetime('2015-12-31 23:59:59.9')", - "SELECT toDateTime64('2015-12-31 23:59:59.9', 9, 'UTC')" + "SELECT parseDateTime64BestEffortOrNull('2015-12-31 23:59:59.9', 9, 'UTC')" }, { "print guid(74be27de-1e4e-49d9-b579-fe0b331d3642)", @@ -124,7 +124,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "print bin(datetime(1970-05-11 13:45:07), 1d)", - "SELECT toDateTime64(toInt64(toFloat64(toDateTime64('1970-05-11 13:45:07', 9, 'UTC')) / 86400) * 86400, 9, 'UTC')" + "SELECT parseDateTime64BestEffortOrNull(toFloat64(parseDateTime64BestEffortOrNull('1970-01-01 12:00:00.0', 9, 'UTC')) + (toInt64(((toFloat64(parseDateTime64BestEffortOrNull('2017-05-15 10:20:00.0', 9, 'UTC')) - toFloat64(parseDateTime64BestEffortOrNull('1970-01-01 12:00:00.0', 9, 'UTC'))) / 86400) + 0) * 86400), 9, 'UTC') AS res" } }))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp index e5d2ee5e063..09fbff6625f 100644 --- a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -8,19 +8,19 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, ::testing::ValuesIn(std::initializer_list{ { "print week_of_year(datetime(2020-12-31))", - "SELECT toWeek(toDateTime64('2020-12-31', 9, 'UTC'), 3, 'UTC')" + "SELECT toWeek(parseDateTime64BestEffortOrNull('2020-12-31', 9, 'UTC'), 3, 'UTC')" }, { "print startofweek(datetime(2017-01-01 10:10:17), -1)", - "SELECT toDateTime64(toStartOfWeek(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalWeek(-1)" + "SELECT parseDateTime64BestEffortOrNull(toStartOfWeek(parseDateTime64BestEffortOrNull('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalWeek(-1)" }, { "print startofmonth(datetime(2017-01-01 10:10:17), -1)", - "SELECT toDateTime64(toStartOfMonth(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalMonth(-1)" + "SELECT parseDateTime64BestEffortOrNull(toStartOfMonth(parseDateTime64BestEffortOrNull('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalMonth(-1)" }, { "print startofday(datetime(2017-01-01 10:10:17), -1)", - "SELECT toDateTime64(toStartOfDay(toDateTime64('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalDay(-1)" + "SELECT parseDateTime64BestEffortOrNull(toStartOfDay(parseDateTime64BestEffortOrNull('2017-01-01 10:10:17', 9, 'UTC')), 9, 'UTC') + toIntervalDay(-1)" }, { @@ -29,27 +29,27 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "print monthofyear(datetime(2015-12-14))", - "SELECT toMonth(toDateTime64('2015-12-14', 9, 'UTC'))" + "SELECT toMonth(parseDateTime64BestEffortOrNull('2015-12-14', 9, 'UTC'))" }, { "print hourofday(datetime(2015-12-14 10:54:00))", - "SELECT toHour(toDateTime64('2015-12-14 10:54:00', 9, 'UTC'))" + "SELECT toHour(parseDateTime64BestEffortOrNull('2015-12-14 10:54:00', 9, 'UTC'))" }, { "print getyear(datetime(2015-10-12))", - "SELECT toYear(toDateTime64('2015-10-12', 9, 'UTC'))" + "SELECT toYear(parseDateTime64BestEffortOrNull('2015-10-12', 9, 'UTC'))" }, { "print getmonth(datetime(2015-10-12))", - "SELECT toMonth(toDateTime64('2015-10-12', 9, 'UTC'))" + "SELECT toMonth(parseDateTime64BestEffortOrNull('2015-10-12', 9, 'UTC'))" }, { "print dayofyear(datetime(2015-10-12))", - "SELECT toDayOfYear(toDateTime64('2015-10-12', 9, 'UTC'))" + "SELECT toDayOfYear(parseDateTime64BestEffortOrNull('2015-10-12', 9, 'UTC'))" }, { "print dayofmonth(datetime(2015-10-12))", - "SELECT toDayOfMonth(toDateTime64('2015-10-12', 9, 'UTC'))" + "SELECT toDayOfMonth(parseDateTime64BestEffortOrNull('2015-10-12', 9, 'UTC'))" }, { "print unixtime_seconds_todatetime(1546300899)", @@ -57,7 +57,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, }, { "print dayofweek(datetime(2015-12-20))", - "SELECT toDayOfWeek(toDateTime64('2015-12-20', 9, 'UTC')) % 7" + "SELECT toDayOfWeek(parseDateTime64BestEffortOrNull('2015-12-20', 9, 'UTC')) % 7" }, { "print now()", @@ -136,6 +136,51 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, { "print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))", "SELECT dateDiff('minute', toDateTime64('2017-10-30 23:05:01', 9, 'UTC'), toDateTime64('2017-10-30 23:00:59', 9, 'UTC')) * -1" + }, + { + "print datetime(null)", + "SELECT parseDateTime64BestEffortOrNull('null', 9, 'UTC')" + }, + { + "print datetime('2014-05-25T08:20:03.123456Z')", + "SELECT parseDateTime64BestEffortOrNull('2014-05-25T08:20:03.123456Z', 9, 'UTC')" + }, + { + "print datetime(2015-12-14 18:54)", + "SELECT parseDateTime64BestEffortOrNull('2015-12-14 18:54', 9, 'UTC')" + }, + { + "print make_timespan(67,12,30,59.9799)", + "SELECT CONCAT('67.', toString(substring(toString(toTime(parseDateTime64BestEffortOrNull('0000-00-00 12:30:59.9799', 9, 'UTC'))), 12)))" + }, + { + "print todatetime('2014-05-25T08:20:03.123456Z')", + "SELECT parseDateTime64BestEffortOrNull(toString('2014-05-25T08:20:03.123456Z'), 9, 'UTC')" + }, + { + "print format_datetime(todatetime('2009-06-15T13:45:30.6175425'), 'yy-M-dd [H:mm:ss.fff]')", + "SELECT concat(substring(toString(formatDateTime(parseDateTime64BestEffortOrNull(toString('2009-06-15T13:45:30.6175425'), 9, 'UTC'), '%y-%m-%d [%H:%M:%S.]')), 1, position(toString(formatDateTime(parseDateTime64BestEffortOrNull(toString('2009-06-15T13:45:30.6175425'), 9, 'UTC'), '%y-%m-%d [%H:%M:%S.]')), '.')), substring(substring(toString(parseDateTime64BestEffortOrNull(toString('2009-06-15T13:45:30.6175425'), 9, 'UTC')), position(toString(parseDateTime64BestEffortOrNull(toString('2009-06-15T13:45:30.6175425'), 9, 'UTC')), '.') + 1), 1, 3), substring(toString(formatDateTime(parseDateTime64BestEffortOrNull(toString('2009-06-15T13:45:30.6175425'), 9, 'UTC'), '%y-%m-%d [%H:%M:%S.]')), position(toString(formatDateTime(parseDateTime64BestEffortOrNull(toString('2009-06-15T13:45:30.6175425'), 9, 'UTC'), '%y-%m-%d [%H:%M:%S.]')), '.') + 1, length(toString(formatDateTime(parseDateTime64BestEffortOrNull(toString('2009-06-15T13:45:30.6175425'), 9, 'UTC'), '%y-%m-%d [%H:%M:%S.]')))))" + }, + { + "print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s tt')", + "SELECT formatDateTime(parseDateTime64BestEffortOrNull('2015-12-14 02:03:04.12345', 9, 'UTC'), '%y-%m-%e %I:%M:%S %p')" + }, + { + "print format_timespan(time(1d), 'd-[hh:mm:ss]')", + "SELECT leftPad(toString(formatDateTime(toDateTime64(86400., 9, 'UTC'), '%e-[%I:%M:%S]')), length(toString(formatDateTime(toDateTime64(86400., 9, 'UTC'), '%e-[%I:%M:%S]'))) + 0, '0')" + }, + { + "print format_timespan(time('12:30:55.123'), 'ddddd-[hh:mm:ss.ffff]')", + "SELECT leftPad(concat(substring(toString(formatDateTime(toDateTime64(1038655., 9, 'UTC'), '%d-[%I:%M:%S.]')), 1, position(toString(formatDateTime(toDateTime64(1038655., 9, 'UTC'), '%d-[%I:%M:%S.]')), '.')), substring(substring(toString(toDateTime64(1038655., 9, 'UTC')), position(toString(toDateTime64(1038655., 9, 'UTC')), '.') + 1), 1, 4), substring(toString(formatDateTime(toDateTime64(1038655., 9, 'UTC'), '%d-[%I:%M:%S.]')), position(toString(formatDateTime(toDateTime64(1038655., 9, 'UTC'), '%d-[%I:%M:%S.]')), '.') + 1, length(toString(formatDateTime(toDateTime64(1038655., 9, 'UTC'), '%d-[%I:%M:%S.]'))))), (length(toString(formatDateTime(toDateTime64(1038655., 9, 'UTC'), '%d-[%I:%M:%S.]'))) + 3) + 4, '0')" + }, + { + "print datetime_part('day', datetime(2017-10-30 01:02:03.7654321))", + "SELECT formatDateTime(parseDateTime64BestEffortOrNull('2017-10-30 01:02:03.7654321', 9, 'UTC'), '%e')" + }, + { + "print datetime_add('day',1,datetime(2017-10-30 01:02:03.7654321))", + "SELECT parseDateTime64BestEffortOrNull('2017-10-30 01:02:03.7654321', 9, 'UTC') + toIntervalDay(1)" } + }))); From f6a5731a1239a544e02a2062c4b503eb9fc8598f Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Tue, 23 Aug 2022 12:56:55 -0700 Subject: [PATCH 107/279] Fixed tests --- q | 674432 +++++++++++++++ .../KustoFunctions/KQLGeneralFunctions.cpp | 2 +- .../tests/KQL/gtest_KQL_MakeSeries.cpp | 2 +- .../tests/KQL/gtest_KQL_StringFunctions.cpp | 2 +- .../tests/KQL/gtest_KQL_dateTimeFunctions.cpp | 25 +- 5 files changed, 674446 insertions(+), 17 deletions(-) create mode 100644 q diff --git a/q b/q new file mode 100644 index 00000000000..1f9ab46542e --- /dev/null +++ b/q @@ -0,0 +1,674432 @@ +commit 9f567b4e62e8f65bc7d2a976dcd771b6636156f6 (HEAD -> DateTime-Part3) +Author: HeenaBansal2009 +Date: Tue Aug 23 09:43:43 2022 -0700 + + DateTime part3 functions + +commit a6a8340d00ba90509177b84626038cdc2dd2695a +Author: HeenaBansal2009 +Date: Tue Aug 16 07:31:15 2022 -0700 + + date_add and date_part + +commit 1f74eddb5fab974f683cfdcbef917a6da994eccc (origin/Kusto-phase2, Kusto-phase2) +Merge: ea036cc0cd 55932cfd9c +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Tue Aug 23 09:17:43 2022 -0400 + + Merge pull request #48 from ClibMouse/feature/kql-array-functions-1096 + + Implement some KQL array functions + +commit 55932cfd9c8023a450719d5fdb53f614fccc424c (origin/feature/kql-array-functions-1096) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Mon Aug 22 15:08:43 2022 -0700 + + Implement some KQL array functions + +commit ea036cc0cd039fa4e88967d343f757acd355264f +Author: Yong Wang +Date: Mon Aug 22 13:33:01 2022 -0700 + + Kusto-phase2: add bin function, unit test for make-series + +commit 019eeb3c5b2bd64c22c3974aacf7646288c91d0d (origin/larry-kql-functional-tests) +Author: Yong Wang +Date: Sun Aug 21 22:31:03 2022 -0700 + + Kusto-phase2, updated make-series operator + +commit 640d8f7f0bf435bd754e65b34b89e413e5f25335 +Merge: 0a4310fef1 e362bc7a6e +Author: Heena Bansal +Date: Fri Aug 19 10:41:40 2022 -0400 + + Merge pull request #41 from ClibMouse/Datetime_Part2 + + Part2 DateTime functions + +commit e362bc7a6ed3ceae6ea18b3dfcad180cfd2b6133 (origin/Datetime_Part2, Datetime_Part2) +Author: HeenaBansal2009 +Date: Fri Aug 19 07:35:22 2022 -0700 + + Updated Readme + +commit bbd245e091ebd26ad741d674b100c91ec41f0378 +Author: HeenaBansal2009 +Date: Tue Aug 16 13:01:01 2022 -0700 + + Updated as per review comments + +commit 37b1be801c9a25b501e862009fedb73da3737949 +Author: HeenaBansal2009 +Date: Mon Aug 15 20:45:39 2022 -0700 + + Incorporated review comments + +commit 238b45f99195dec983e1ad06aa35819f6c3379f4 +Author: HeenaBansal2009 +Date: Fri Aug 12 10:24:52 2022 -0700 + + Part2 DateTime functions + +commit f76a277039f5101d552da0f0357937d80ee3b1db +Author: HeenaBansal2009 +Date: Mon Aug 15 20:45:39 2022 -0700 + + Incorporated review comments + +commit 5332da3b617d0050131999d31c052422bc1bf3ab +Author: HeenaBansal2009 +Date: Fri Aug 12 12:58:25 2022 -0700 + + Updated release notes + +commit e687abf259da928960fd54db1b30d7bd12beb8f8 +Author: HeenaBansal2009 +Date: Fri Aug 12 10:24:52 2022 -0700 + + Part2 DateTime functions + +commit 0a4310fef1bc4afcef4799432eee3525fc79bc91 +Merge: 5cb0aa4d0c 09aac74f50 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 19 10:25:24 2022 -0400 + + Merge pull request #46 from ClibMouse/feature/kql-array-iif + + Implement array_iff / array_iif + +commit 09aac74f508d38d34c5579ec2b8eb787dc23acfd (origin/feature/kql-array-iif) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 19 07:24:36 2022 -0700 + + Set the release date + +commit 5cb0aa4d0c06e40b33d2ad4d59ea10e8426595ea +Author: Yong Wang +Date: Wed Aug 17 23:17:29 2022 -0700 + + Kusto-phase2 : fixed the double quote issue + +commit 66b02d0551d4d4129830bedd3105f11e89277dd9 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Wed Aug 17 05:53:02 2022 -0700 + + Implement array_iff / array_iif + +commit b83ab47bf7849db7c10b6533a8c81bfbc6965d46 +Merge: f7cbc8647f 6614c412ef +Author: larryluogit +Date: Tue Aug 16 16:02:45 2022 -0400 + + Merge pull request #40 from ClibMouse/larry-kql-functional-tests + + Adding functional tests for IP Binary DateTime and more String functions + +commit 6614c412ef1d8c6c33ebbf7f68ad0dfbbd2f0e3d +Author: Larry Luo +Date: Mon Aug 15 17:12:04 2022 -0400 + + Added datatype tests + +commit 2e95aed33372b16b0a04dc4b66030a956c12bae7 +Author: Larry Luo +Date: Fri Aug 12 14:26:52 2022 -0400 + + Added missing file for IPv6 tests + +commit 52b95e8921d91204f4ab568147ca383043935243 +Author: Larry Luo +Date: Fri Aug 12 14:22:47 2022 -0400 + + Added ipv6 tests + +commit 707cba3e4b5e81487eaef0e139557a91c9983d96 +Author: Larry Luo +Date: Fri Aug 12 12:35:56 2022 -0400 + + Add tests for binary and datetime + +commit 1d4adaa168347ea1a0cf4751c53a81c487c044c7 +Author: Larry Luo +Date: Fri Aug 12 12:34:03 2022 -0400 + + Added functional tests for IP, String, Binary and Datetime + +commit f7cbc8647f08302a299bb90fc03aa49309986ad2 +Author: kashwy +Date: Tue Aug 16 06:48:49 2022 -0700 + + Kusto-phase2 : finish make series + +commit 1fc52b579f60d3df9f1873e7211d702a8e03d355 +Merge: e0c4c5afff a516aaf84f +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 12 17:09:11 2022 -0400 + + Merge pull request #42 from ClibMouse/feature/kql-conversion-functions + + Implement some KQL conversion functions + +commit a516aaf84fb1293160ed73b2d4a5ada9d9bcbe03 (origin/feature/kql-conversion-functions) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 12 13:34:55 2022 -0700 + + Implement some KQL conversion functions + +commit e0c4c5afffda69ebe549c0d0da83f96a6a79c435 +Author: kashwy +Date: Fri Aug 12 11:47:25 2022 -0700 + + Kusto-phase2: add bin_at function. fix trim error + +commit 214ef8aa9fda30e69d6712a691d427532be2f1b8 +Merge: fcc0d8340b f94045366a +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 12 13:03:40 2022 -0400 + + Merge pull request #39 from ClibMouse/feature/ip-functions-part-3 + + Implement KQL IPv6 functions + +commit f94045366a74d534132879f823763cdd3c2bf73f (origin/feature/ip-functions-part-3) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 12 09:54:49 2022 -0700 + + Add unit tests + +commit 140a343e006071012a7a97fb541fce2d2f83bc35 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 12 07:54:38 2022 -0700 + + Correct rebase error + +commit 6336c666c10c2bcfe657a4948e3538b5e3eacde7 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 12 07:32:42 2022 -0700 + + Implement KQL IPv6 functions + +commit fcc0d8340bc0d5b22a7af87b39fe8e8af31dcb14 +Merge: 80c29bcdeb 8d03117091 +Author: Mallik Hassan +Date: Fri Aug 12 11:35:34 2022 -0300 + + Merge pull request #37 from ClibMouse/Kusto-phase2-Dynamic-Array-Functions + + Resubmit Dynamic Array functions - array_index_of, length, sum and dynamic keyword + +commit 8d031170917b16f5371b52d1ca58aab4dd6b5685 (origin/Kusto-phase2-Dynamic-Array-Functions) +Author: root +Date: Thu Aug 11 17:11:22 2022 -0700 + + Resubmit Aggregate functions - array_index_of, length, sum and dynamic data type + +commit 80c29bcdebc1e3ea6693a7d06d1be5cc29169003 +Author: kashwy +Date: Thu Aug 11 12:38:49 2022 -0700 + + Kusto-pahse2: fixed toimspan issue and other functions + +commit d8c1f41b66dbc1ef686a9f9c0e74abde3bdb10bd +Merge: 8e71c21508 4a1866b320 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Thu Aug 11 09:04:37 2022 -0400 + + Merge pull request #36 from ClibMouse/feature/kql-binary-functions + + Implement KQL binary functions + +commit 8e71c21508662af555a2fb417d3d0a5a81a712d2 +Merge: 803dfd2063 7296dd583e +Author: Heena Bansal +Date: Wed Aug 10 16:57:40 2022 -0400 + + Merge pull request #34 from ClibMouse/Kusto-DateTime_part1 + + KQL DateTime functions PART 1 + +commit 7296dd583e465a174c3eb5d0b7ff1bcdd914c470 (origin/Kusto-DateTime_part1, Kusto-DateTime_part1) +Author: HeenaBansal2009 +Date: Wed Aug 10 13:04:07 2022 -0700 + + Added test and review comments + +commit 4a1866b320f6f3938cd683c264c31b74457b70fb (origin/feature/kql-binary-functions) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Wed Aug 10 07:58:07 2022 -0700 + + Update release notes + +commit f5b87531a46ec899a412b22f06984560d8a98f94 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Wed Aug 10 07:46:29 2022 -0700 + + Implement KQL binary functions + +commit 803dfd2063e7a058b397eaa09732963a8f4473f6 +Merge: cd2838cc42 f7c47a79c8 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Tue Aug 9 13:06:39 2022 -0400 + + Merge pull request #35 from ClibMouse/bugfix/fix-unit-tests + + Update unit tests for IP functions + +commit f7c47a79c80a25c911b62fd929e6bfca80c84958 (origin/bugfix/fix-unit-tests) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Tue Aug 9 10:02:09 2022 -0700 + + Update unit tests for IP functions + +commit e4dc7889dfdec654871198e70fccc22c4fb47960 +Author: HeenaBansal2009 +Date: Tue Aug 9 09:40:35 2022 -0700 + + Date_Time functions PART 1 + +commit cd2838cc424e139be3e98ecad03eba7e2b1bdba6 +Author: kashwy +Date: Tue Aug 9 06:11:39 2022 -0700 + + Kusto-phase2: Add kusto data types + +commit 26472e65bcd806201207c09dcf0da55bcf8df63e +Merge: e08e0f6b9b 5d67138a81 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Mon Aug 8 10:49:56 2022 -0400 + + Merge pull request #31 from ClibMouse/feature/improve-ipv4-performance + + Improve performance of IPv4 functions + +commit 5d67138a81bff4f5c52c47ddd4c6e81a2822eb85 (origin/feature/improve-ipv4-performance) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Mon Aug 8 07:38:51 2022 -0700 + + Improve performance of IPv4 functions + +commit e08e0f6b9b18ed20ce19272f0559dde104cb0e24 +Merge: 5a7dac91dd 89c9491ecc +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Mon Aug 8 10:28:08 2022 -0400 + + Merge pull request #25 from ClibMouse/feature/ip-functions-part-2-783 + + KQL IPv4 functions + +commit 89c9491eccc576d81bb800707595c9849f5a6c1d (origin/feature/ip-functions-part-2-783) +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Mon Aug 8 07:27:00 2022 -0700 + + Move KQL tests into their own folder + +commit c98a49d358a5516007c7c12864e0c2cf161bb195 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Aug 5 13:58:32 2022 -0700 + + Implement unit tests for IP functions + +commit 5a7dac91dd491ad160e6577d23f96d7413154150 +Merge: 6be179ae7e 3e6578796f +Author: larryluogit +Date: Fri Aug 5 13:05:17 2022 -0400 + + Merge pull request #30 from ClibMouse/larry-kql-functional-tests + + Add make list set and more ip tests + +commit 45a7fba5fcf8d0c3e07f7a4ad7600fc0558a06b0 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Thu Aug 4 10:16:23 2022 -0700 + + Make aliases unique + +commit 01aa7ddbdfa401bfe80fe856308473e65e435b25 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Tue Aug 2 16:28:50 2022 -0700 + + Add unit tests and release notes + +commit 432aaa085f5bf971f3edcc6571ef5cd6a844f88f +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Tue Aug 2 07:35:21 2022 -0700 + + Implement KQL functions handling IPv4 + +commit 3e6578796f90856c41078e766e9f3e12e2944a19 +Author: Larry Luo +Date: Thu Aug 4 22:33:08 2022 -0400 + + Add make list set and more ip tests + +commit 6be179ae7ee3b1614cf6231f3d9f6b055d37f489 +Author: Larry Luo +Date: Thu Aug 4 20:55:33 2022 -0400 + + Fix rebase conflicts. + +commit 561570f622a42f7d79936750768366762a75cca5 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Wed Aug 3 16:49:36 2022 -0700 + + Fix some IP function unit tests + +commit 441c348647260c48929b7a633ca01bae09cfaa6f +Author: HeenaBansal2009 +Date: Wed Aug 3 14:06:02 2022 -0700 + + Fix bug in clickhouse-client for non-interactive mode + +commit 8e3cc459c94aafc48a09b12bfd159b74e784f695 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Thu Jul 28 07:24:45 2022 -0700 + + Extract common functions + +commit 79e645cf885cdb5464642040029121c159d15c82 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Wed Jul 27 12:44:08 2022 -0700 + + Improve conformance to the specifications + +commit f6365f148598e23a3a8f22709f6a95c220877bc8 +Author: Larry Luo +Date: Tue Jul 26 20:24:29 2022 -0400 + + Added func tests for string and ip + +commit e7af515ba28174b09e9aaa770b4bcc4bf047faf5 +Author: Yong Wang +Date: Mon Jul 25 00:01:19 2022 -0700 + + Kusto-phase2: Added check end of function, and neww string functions + +commit b80da6b076d7202e34e53b3eba6f0f1167204c6b +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Tue Jul 26 08:32:28 2022 -0700 + + Provide conformance to the specification + +commit 4564e97e4ce0bdad2b94e3bde5bc17fd22d0eae1 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Tue Jul 19 09:52:54 2022 -0700 + + Support expressions as IP function arguments + +commit afa8390df7821cd21b5e8a9b17f8613596ac3179 +Author: root +Date: Wed Aug 3 08:22:17 2022 -0700 + + update release notes and test script + +commit 0150da0edc286efc6906547b2a6530462f9a4979 +Author: root +Date: Tue Aug 2 19:03:22 2022 -0700 + + Kusto Aggregate functions as of July 29 + +commit 4c627fdddbb56eda0dfbc7443101d9864d2ffaba +Author: Yong Wang +Date: Fri Jul 22 05:52:26 2022 -0700 + + Kusto-phase2: Add print operator + +commit 5681abb810e78dce035b9ba34d7702dddfd2dc27 +Author: Larry Luo +Date: Wed Jul 20 14:18:03 2022 -0400 + + Added sorting test cases + +commit 1e6d472555c4f87ebf1c1050eb9d2125182fdadf +Author: Larry Luo +Date: Mon Jul 18 15:56:57 2022 -0400 + + Add functional tests for tabular table summarize + +commit d807446849922616c03b9a0980eeb91d291ef6c1 +Author: Yong Wang +Date: Wed Jul 20 06:39:32 2022 -0700 + + Kusto-phase: Add function to validate end of kql function + +commit c890a9dbc8be90868ec576b133dd3ab0d88c650e +Author: Yong Wang +Date: Tue Jul 19 21:25:52 2022 -0700 + + Kusto-phase2 Fixed bug of Syntax error when Order By is followed by another statement + +commit 94a739094b765291368296d168d6b2608b67eb48 +Author: HeenaBansal2009 +Date: Thu Jul 28 08:59:08 2022 -0700 + + Updated Release notes with examples + +commit b6484dbdf05a40d1e281f01303221e9f024c755e +Author: HeenaBansal2009 +Date: Tue Jul 26 22:13:34 2022 -0700 + + Add config entry to overwrite default dialect to kusto auto + +commit 6cc15190c9d9ecda8e895b9a996749089262e2a5 +Author: Yong Wang +Date: Sat Jul 16 07:49:24 2022 -0700 + + Kusto-phase2: Fixed the issue of conflict + +commit a4a947b33260f824d481d764a39bbd0054acea01 +Author: Yong Wang +Date: Thu Jun 23 14:26:37 2022 -0700 + + Kusto-phase2: add kusto_auto dialect + +commit 6754d71d34d7613c510488b7ee2f90dc88e0ebcd +Author: Yong Wang +Date: Tue Jun 21 09:33:07 2022 -0700 + + Kusto-phase2: Add KQL functions parser + +commit 45d804a1d917d6fcb7addd45148f64bf19a538ec +Author: Yong Wang +Date: Fri Jun 17 08:47:08 2022 -0700 + + Kusto-phase2 : Added KQL functions interface. + changed the summarize class for new aggregation functions + +commit 3a619fb39fc5ef9de12501d7c9a76aabc0da2441 +Author: Yong Wang +Date: Sat Jun 11 10:33:38 2022 -0700 + + Kusto-phase1: Fixed style + +commit e5f3ab433311c5d4aa53ff0a74c7fdaa8433aa80 +Author: root +Date: Thu Jun 9 11:04:20 2022 -0700 + + Kusto summarize init + +commit caee54a5f77324d686b37207d9a66e1db6dc9592 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Jul 15 12:13:50 2022 -0700 + + Updated release notes to indicate deficiencies + +commit bd2b0296bad51880c8941923d85d05336fec3ccd +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Jul 15 10:32:49 2022 -0700 + + Update release notes + +commit 41396c3770f489c37bd182b72eb120f46669519a +Author: Yong Wang +Date: Fri Jul 15 06:54:23 2022 -0700 + + Kusto-phase2: Added some string functions and release note + +commit 808c2883ede5a9176a3fac7df65d7bf85634db6d +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Jul 15 10:14:01 2022 -0700 + + Implement ipv4_is_private + +commit 7b65761f50a7ba10b4b665e35fdf7eefb02ec235 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Fri Jul 15 10:13:44 2022 -0700 + + Implement review comments + +commit 3667092e968d97798c81c89f824230a7d38d50a8 +Author: ltrk2 <107155950+ltrk2@users.noreply.github.com> +Date: Thu Jul 14 09:00:51 2022 -0700 + + Implement some IP-handling functions + +commit f3df9c7734a3277894b44fb56db204be5188aae6 +Author: Yong Wang +Date: Tue Jul 12 08:49:42 2022 -0700 + + Kusto-phase2: Changed dialect to use enumerate, Added subquery for in operator, fixed the multi query issue + +commit 8d20a97ec66bfb6661a2baf170fce112537ae6ce +Author: Yong Wang +Date: Wed Jun 29 23:01:17 2022 -0700 + + Kusto-phase2 : Fix the function base64_decode_tostring() + +commit 69e12692b371ec3b2f71f3cae1c8ce2c8af57b84 +Author: Yong Wang +Date: Wed Jun 29 13:02:14 2022 -0700 + + Kusto-phase 2: Add more string operators + +commit 7fb659ea5b58574ef1be93bee8969d24716db3bd +Author: Yong Wang +Date: Tue Jun 28 22:03:36 2022 -0700 + + Kusto-phase2: Add table function kql() + +commit 9d902a53ceecab662a9e7b52c442d15d76003e52 +Author: root +Date: Wed Jun 29 21:17:17 2022 -0700 + + Aggregate function working with two pipes + +commit 55a1e836b1559128ea12de872b0485c2989363d0 +Author: root +Date: Wed Jun 29 11:23:13 2022 -0700 + + Aggregate functions initial code - Priority:HIGHT(Easy and Medium) + +commit 3e9f23f7ceaa1d41f0af15b87a1b681e564a3771 +Author: Yong Wang +Date: Fri Jun 24 13:05:52 2022 -0700 + + Kusto-phase2: Add alias support + +commit 311a59191e058e707195b6ad65bcd0f62808be5d +Author: Yong Wang +Date: Thu Jun 23 14:26:37 2022 -0700 + + Kusto-phase2: add kusto_auto dialect + +commit 6bb8f1a06e8ebada3f337e6bc8bfaa5f19beccec +Author: Yong Wang +Date: Wed Jun 22 12:00:47 2022 -0700 + + Kusto-phase2: Add common function to get argument for function convertion + +commit 8d5a925fcac00c34f8bb99c1551ee02136f9bde0 +Author: Yong Wang +Date: Tue Jun 21 09:33:07 2022 -0700 + + Kusto-phase2: Add KQL functions parser + +commit 4a86da3992bf3cae8c55a6bc2edc9e7043367031 +Author: Yong Wang +Date: Fri Jun 17 08:47:08 2022 -0700 + + Kusto-phase2 : Added KQL functions interface. + changed the summarize class for new aggregation functions + +commit 257abcdb6942fa86ddb1ff1c19e985c9b370d811 +Author: Yong Wang +Date: Sat Jun 11 10:33:38 2022 -0700 + + Kusto-phase1: Fixed style + +commit a99e3adb50988270f841ccd859716148eae8698d +Author: root +Date: Thu Jun 9 11:04:20 2022 -0700 + + Kusto summarize init + +commit 8fe315e4b6c9c6c1bbb6d84289021b893976b2c8 +Author: Yong Wang +Date: Wed Jun 8 10:14:03 2022 -0700 + + Kusto-phase1: Add Support to Kusto Query Language + + This is the initial implement of Kusto Query Language. + + in this commit, we support the following features as MVP : + + Tabular expression statements + Limit returned results + Select Column (basic project) + sort, order + Perform string equality operations + Filter using a list of elements + Filter using common string operations + Some string operators + Aggregate by columns + Base aggregate functions + only support avg, count ,min, max, sum + Aggregate by time intervals + +commit b8ca6a7a832bfb691b4453f2bca9af62e6203fa2 +Author: Yong Wang +Date: Tue Jun 21 09:33:07 2022 -0700 + + Kusto-phase2: Add KQL functions parser + +commit d72c403f816509685e06eef09a957438725e8d02 +Author: Yong Wang +Date: Fri Jun 17 08:47:08 2022 -0700 + + Kusto-phase2 : Added KQL functions interface. + changed the summarize class for new aggregation functions + +commit ff202f1d11a71a35bb381339a35d5e589478f8c5 +Author: Yong Wang +Date: Tue Jun 14 07:40:06 2022 -0700 + + Kusto-pahse2: Add support for multiple summarize + +commit e17d586b56ffd0a4ab8b443e55be07cf2a5fb87d +Author: Yong Wang +Date: Mon Jun 13 06:26:02 2022 -0700 + + Kusto-phase1: Fixed misleading indentation + +commit c84d164c91792f2504cddf3f2851b06a5433eb15 +Author: Yong Wang +Date: Sun Jun 12 20:05:51 2022 -0700 + + Kusto-pahse1: Fixed moy style issues. + +commit bedfc2ed619235b842db7bd5f030e844d0a0c816 +Author: Yong Wang +Date: Sat Jun 11 10:33:38 2022 -0700 + + Kusto-phase1: Fixed style + +commit 4b47d3299a7b6da7ac15c06cf535407379c2b502 +Author: Yong Wang +Date: Thu Jun 9 22:17:58 2022 -0700 + + Kusto-phase1: Fixed the bug for KQL filer with multiple operations + +commit 77806601ac2316bd450022aff265373ac49e45b9 +Author: Yong Wang +Date: Thu Jun 9 18:49:22 2022 -0700 + + Kusto-phase1 : + Add new test cases + +commit 20f8edb08d67e98825aad1568f0fe6539e032354 +Author: root +Date: Thu Jun 9 12:06:15 2022 -0700 + + corrected unit test + +commit f7b84af6aaf5b6e7a5e295f7a7b6635a5071e75f +Author: root +Date: Thu Jun 9 11:29:51 2022 -0700 + + removed unwanted comments + +commit a1f2f8f8ca73453cbbb08eef53ea62bef662e2af +Author: root +Date: Thu Jun 9 11:18:49 2022 -0700 + + added single unit test case for summarize bin() + +commit d594afa8efdf148b2db0f1eccd3488a6b6eed23b +Author: root +Date: Thu Jun 9 11:04:20 2022 -0700 + + Kusto summarize init + +commit 5763e77e7651d4847d8e87e227f43e35168cdd0a +Author: Yong Wang +Date: Wed Jun 8 10:14:03 2022 -0700 + + Kusto-phase1: Add Support to Kusto Query Language + + This is the initial implement of Kusto Query Language. + + in this commit, we support the following features as MVP : + + Tabular expression statements + Limit returned results + Select Column (basic project) + sort, order + Perform string equality operations + Filter using a list of elements + Filter using common string operations + Some string operators + Aggregate by columns + Base aggregate functions + only support avg, count ,min, max, sum + Aggregate by time intervals + +commit 6c5a2a1214c0166b2b0fe999cf8850055872315f +Merge: 29273d2bc1 0d1c31a869 +Author: Mikhail f. Shiryaev +Date: Thu Aug 4 21:34:00 2022 +0200 + + Merge pull request #39853 from ClickHouse/release-tweak-generate + + Update tweak on version part update + +commit 0d1c31a869d9720c6357f6d97cf6aba07a88f871 +Merge: 8919fd6e58 29273d2bc1 +Author: Mikhail f. Shiryaev +Date: Thu Aug 4 21:33:54 2022 +0200 + + Merge branch 'master' into release-tweak-generate + +commit 29273d2bc1c47debb8b2098a938b7574ad7863ab +Merge: 1eb28f5f5f 0b82fb7816 +Author: alesapin +Date: Thu Aug 4 14:48:10 2022 +0200 + + Merge pull request #39847 from nityanandagohain/patch-1 + + Extra semicolon removed from the TTL example + +commit 1eb28f5f5fc27970f3a74f86d8525eab72f98d66 +Merge: 235649cb98 f608e62c27 +Author: alesapin +Date: Thu Aug 4 14:21:31 2022 +0200 + + Merge pull request #39860 from ClickHouse/avoid_additional_disk_touch + + Better total part size calculation on mutation + +commit 235649cb984371cb141f349f07959611062a5e12 +Merge: 8010479394 c5eab9c760 +Author: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> +Date: Thu Aug 4 13:02:08 2022 +0200 + + Merge pull request #39458 from Avogar/fix-cancel-insert-into-function + + Fix WriteBuffer finalize when cancel insert into function + +commit 80104793948a82ceaefe710700096d2d31089dd2 +Merge: a952a5dfe5 1d67344ac8 +Author: Alexander Tokmakov +Date: Thu Aug 4 12:38:15 2022 +0300 + + Merge pull request #39893 from ClickHouse/tavplubix-patch-2 + + Update 02354_distributed_with_external_aggregation_memory_usage.sql + +commit 1d67344ac8106688cced06ccf18b0b3868445bfd +Author: Alexander Tokmakov +Date: Thu Aug 4 12:37:25 2022 +0300 + + Update 02354_distributed_with_external_aggregation_memory_usage.sql + +commit a952a5dfe5baa3cf5b8bba9521809bf4e4825d35 +Merge: 9e46abc560 dc25f18f13 +Author: Vitaly Baranov +Date: Thu Aug 4 09:46:25 2022 +0200 + + Merge pull request #39859 from vitlibar/fix-flaky-test_async_backups_to_same_destination + + Fix flaky integration test test_async_backups_to_same_destination. + +commit 9e46abc56003a5f8854f03571577c8a36c8a4d3e +Merge: 71cb055ecc b98e645ff7 +Author: Alexey Milovidov +Date: Thu Aug 4 03:06:55 2022 +0300 + + Merge pull request #39420 from amosbird/better-projection1-fix1 + + Normalize AggregateFunction types and state representations + +commit 71cb055eccc9534b704ddaa6dfcc63c97cd2528b +Merge: fe95703a49 91e3e2f18b +Author: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> +Date: Thu Aug 4 01:36:47 2022 +0200 + + Merge pull request #39812 from guowangy/applyFunction-multi-thread + + KeyCondition: optimize applyFunction in multi-thread scenario + +commit fe95703a49d612547e9c5ac24d01734fc900f45b +Merge: f474eb957d ce3411b0ff +Author: Alexey Milovidov +Date: Thu Aug 4 02:33:58 2022 +0300 + + Merge pull request #39586 from guowangy/bytes-to-bits-mask + + Improve bytes to bits mask transform for SSE/AVX/AVX512 + +commit f474eb957dcab93c89015f9db1ace36e94f191f2 +Merge: 5297592f38 e292d830f5 +Author: Alexey Milovidov +Date: Thu Aug 4 01:56:52 2022 +0300 + + Merge pull request #39758 from ClickHouse/tsan_clang_15 + + Try clang-15 for build with tsan + +commit 5297592f38765293ed592726439c5bec438d6896 +Merge: 9987a9e740 58fc49df66 +Author: Alexey Milovidov +Date: Thu Aug 4 01:51:34 2022 +0300 + + Merge pull request #39868 from ClickHouse/auto/v22.3.10.22-lts + + Update version_date.tsv after v22.3.10.22-lts + +commit 9987a9e7400e125c51a86e7ac9d3f429a81225f6 +Merge: a5d5dc2c00 149581e319 +Author: Alexey Milovidov +Date: Thu Aug 4 01:48:44 2022 +0300 + + Merge pull request #39862 from ClickHouse/follow-up-do-not-optimize-functions-shadowing-args + + Remove prefer_localhost_replica from test + +commit a5d5dc2c00047f5b4f2b2e58f1e456c50a7e3522 +Merge: 1842a3fc7a 517f821e94 +Author: Alexey Milovidov +Date: Thu Aug 4 01:46:08 2022 +0300 + + Merge pull request #39323 from ClickHouse/clickhouse-server-service + + Clean out our clickhouse-server.service from /etc + +commit 8919fd6e58aa18fd0f783b51458bced9a643ce2b +Author: Mikhail f. Shiryaev +Date: Thu Aug 4 00:40:32 2022 +0200 + + Add handful notes to a post-release logging + +commit ff26492830551a46ea6c3903baaf3d221a31ed6a +Author: Mikhail f. Shiryaev +Date: Thu Aug 4 00:24:43 2022 +0200 + + Prevent spoiling rollback_stack + +commit 1842a3fc7a8fdb50491aa97a5b531088ac63fdb9 +Merge: 4354e3db96 8533769132 +Author: Mikhail f. Shiryaev +Date: Thu Aug 4 00:21:44 2022 +0200 + + Merge pull request #39709 from ClickHouse/update-ccache + + Update ccache to the latest available version + +commit 4354e3db9680a442f14ed30e4e235ab472783fba +Merge: b84e65bb3b 0e6a0d589f +Author: Dan Roscigno +Date: Wed Aug 3 16:41:29 2022 -0400 + + Merge pull request #39873 from DanRoscigno/translate-guides-to-zh + + moving PR from clickhouse-docs + +commit 0e6a0d589f7a5fbe05472f8e1f1c50bf074b20db +Author: DanRoscigno +Date: Wed Aug 3 16:27:17 2022 -0400 + + moved image dir + +commit 8b1b059ae96520f6b84ebe6e743199bbdfa07a1f +Author: DanRoscigno +Date: Wed Aug 3 15:56:32 2022 -0400 + + wrong directory + +commit d0c3de9da9e5df877d9034fbe9fd873d9ead4d74 +Author: DanRoscigno +Date: Wed Aug 3 15:55:12 2022 -0400 + + wrong dir + +commit ab455f3767a55d55d22dfbfe0419509277b707f9 +Author: DanRoscigno +Date: Wed Aug 3 14:47:31 2022 -0400 + + moving PR from clickhouse-docs + +commit b84e65bb3b7d4162e9caf0fadd296a895db38b3e +Merge: e6efb47aa3 b386db02e1 +Author: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> +Date: Wed Aug 3 18:53:37 2022 +0200 + + Merge pull request #39716 from arthurpassos/fix_scalar_cte_with_lc_result + + Unwrap LC column in IExecutablefunction::executeWithoutSparseColumns + +commit e6efb47aa362d1ce0731e4f1f7e4070cd6eaa367 +Merge: 4943202921 3e6b663020 +Author: Maksim Kita +Date: Wed Aug 3 18:50:25 2022 +0200 + + Merge pull request #39850 from kitaisreal/select-query-has-join-method + + Add hasJoin method into ASTSelectQuery + +commit 4943202921001add4b363fd1cd770db898079877 +Author: Nikita Taranov +Date: Wed Aug 3 17:56:59 2022 +0200 + + Improve memory usage during memory efficient merging of aggregation results (#39429) + +commit 58fc49df6665cc939032a59d58798e45e2d06780 +Author: robot-clickhouse +Date: Wed Aug 3 14:53:22 2022 +0000 + + Update version_date.tsv after v22.3.10.22-lts + +commit 1c0d2677673a82336641605d08b743964eb3dadc +Merge: f144eae388 b4c3ff0cef +Author: Alexey Milovidov +Date: Wed Aug 3 16:38:25 2022 +0300 + + Merge pull request #39861 from nathanbegbie/fix-docs-typo-postgres + + typo: PostgerSQL -> PostgreSQL + +commit f144eae388b93c65aedf9237cf3a3f2dd8856c31 +Author: Nikita Mikhaylov +Date: Wed Aug 3 15:23:07 2022 +0200 + + Fix typo and extra dots in exception messages from OverCommitTracker (#39858) + +commit 149581e319ac2b78bdd65cfd78b27519ca16eb8e +Author: Igor Nikonov +Date: Wed Aug 3 13:15:16 2022 +0000 + + Remove prefer_localhost_replica + + Test queries failed before fix #39103 regardless the setting value + The setting is randomized + +commit 9eef299e110a7d86ce0baa7d2b6b453a7fb424f8 +Merge: 6b15ee2bd9 ce70f3dacb +Author: Igor Nikonov <954088+devcrafter@users.noreply.github.com> +Date: Wed Aug 3 15:06:42 2022 +0200 + + Merge pull request #39103 from tonickkozlov/tonickkozlov/37032/do-not-optimize-functions-shadowing-args + + Do not optimize GROUP BY functions that shadow their arguments + +commit b4c3ff0cef0a95063cef4f9c6ea93517bff6c002 +Author: nathanbegbie +Date: Wed Aug 3 16:01:24 2022 +0300 + + typo: PostgerSQL -> PostgreSQL + +commit de91875b5e8e0ad425d7110229d3861509a24beb +Author: nathanbegbie +Date: Wed Aug 3 15:59:36 2022 +0300 + + Revert "typo: PostgerSQL -> PostgreSQL" + + This reverts commit fda8b113dc88100ff80dfd778ed1e0bcd740d4d2. + +commit fda8b113dc88100ff80dfd778ed1e0bcd740d4d2 +Author: nathanbegbie +Date: Wed Aug 3 15:54:58 2022 +0300 + + typo: PostgerSQL -> PostgreSQL + +commit 8533769132027c4bd8fee5386dc1a3837704e470 +Author: Mikhail f. Shiryaev +Date: Sat Jul 30 00:47:12 2022 +0200 + + Use compression and cleanup with the recent version ccache + +commit eeaf08525fd167b9241ced37840fd007d1157579 +Author: Mikhail f. Shiryaev +Date: Fri Jul 29 14:31:53 2022 +0200 + + Use test-util as source for base-test, fasttest and package builder + +commit dc25f18f132006b6b194788bf9769b412c4f8e59 +Author: Vitaly Baranov +Date: Wed Aug 3 14:04:18 2022 +0200 + + Fix flaky integration test test_async_backups_to_same_destination. + +commit f608e62c27bd5c830b87d646758081def912224f +Author: alesapin +Date: Wed Aug 3 14:17:31 2022 +0200 + + Fix call + +commit 56a4d26e87b5100f59b54a4cf2bccbcb15fdcebd +Author: alesapin +Date: Wed Aug 3 14:15:45 2022 +0200 + + Better total part size calculation on mutation + +commit 6b15ee2bd96f6f101d2b17b8a6e5c0ab9c48c34e +Merge: 05467e315f f94d4d4877 +Author: Anton Popov +Date: Wed Aug 3 13:51:16 2022 +0200 + + Merge pull request #39685 from CurtizJ/hash-functions-map + + Allow to execute hash functions with arguments of type `Map` + +commit 62a05dc10df37725b58e9e222c633addc801bacb +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 13:40:31 2022 +0200 + + Add instructions for github-cli installation + +commit 05467e315f56599ed3bc2124d0c60ec13cd1a26c +Merge: 86561509c5 cbff608d0b +Author: Antonio Andelic +Date: Wed Aug 3 13:13:00 2022 +0200 + + Merge pull request #39698 from ClickHouse/update-digest-version + + Update Keeper version for digest + +commit 86561509c510e97709e9a1151667a45d31f28c85 +Merge: cdee1d94d0 3b96ff0fe4 +Author: alesapin +Date: Wed Aug 3 13:03:31 2022 +0200 + + Merge pull request #39673 from ClickHouse/fix-rollback-inconsistency-keeper + + Rollback request in Keeper if storing log fails + +commit cdee1d94d0331f12b0164f614f58689ed96a66b2 +Merge: 14135927fb 7c8ceead63 +Author: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> +Date: Wed Aug 3 12:31:40 2022 +0200 + + Merge pull request #39404 from HeenaBansal2009/Issue_39395 + + Clickhouse-local fixes + +commit 14135927fb790963047753787cbea730f7ac5c66 +Merge: 2ca9df9b22 e78a176b0a +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 12:10:08 2022 +0200 + + Merge pull request #39854 from ClickHouse/auto/v22.7.2.15-stable + + Update version_date.tsv and changelogs after v22.7.2.15-stable + +commit e78a176b0a63bc9333e3481c06f42c5e905e0b53 +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 12:09:29 2022 +0200 + + Regenerate changelog with the recent script + +commit 49b1f62abd91af876190b4ebb4b343500f541105 +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 12:06:45 2022 +0200 + + Update SECURITY.md + +commit c05526beeff79f624394a5e07db3f180436736b1 +Merge: da655fbfcf 2ca9df9b22 +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 12:04:34 2022 +0200 + + Merge remote-tracking branch 'origin/master' into auto/v22.7.2.15-stable + +commit 2ca9df9b22f46f30e6dd7e7fd007d04430c74644 +Merge: 1815b8c00c 469b7e7668 +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 12:04:03 2022 +0200 + + Merge pull request #39421 from ClickHouse/github-helper + + GitHub helper + +commit da655fbfcfeee1dc0c39e2e7ec80eb1057ecb0f2 +Author: robot-clickhouse +Date: Wed Aug 3 09:57:02 2022 +0000 + + Update version_date.tsv and changelogs after v22.7.2.15-stable + +commit 08474cf869078120e5e32e83b3f3eb8cca676de2 +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 11:43:47 2022 +0200 + + Update tweak on version part update + +commit 3e6b663020833e26dfa2a08af28e47ea35453280 +Author: Maksim Kita +Date: Wed Aug 3 11:25:45 2022 +0200 + + ASTSelectQuery added hasJoin method + +commit 1815b8c00c8397f9943d175153c6e946707d1009 +Merge: 00a7c8733b fd8ad12e6b +Author: Mikhail f. Shiryaev +Date: Wed Aug 3 10:34:40 2022 +0200 + + Merge pull request #39730 from ClickHouse/jepsen-label + + Jepsen label + +commit 00a7c8733b5e1600709e038faf431cd15fad7893 +Merge: 80d2685ab7 6a7213291b +Author: Robert Schulze +Date: Wed Aug 3 09:23:24 2022 +0200 + + Merge pull request #39633 from guowangy/filter-vbmi2 + + ColumnVector: optimize filter with AVX512VBMI2 compress store + +commit 0b82fb78164ea08d5ee4e8f89055bc7285a82b52 +Author: Nityananda Gohain +Date: Wed Aug 3 12:52:06 2022 +0530 + + Extra semicolon removed from the TTL example + + This PR removes an extra semicolon from the TTL example. + +commit 80d2685ab714abd8c4b23ed93ad55ce39f64eeff +Merge: e2a5faede9 a4c4b1f54d +Author: Alexey Milovidov +Date: Wed Aug 3 09:10:08 2022 +0300 + + Merge pull request #39814 from qianmoQ/fix-cte + + Support cte statement for antlr4 syntax file #39810 + +commit cbff608d0b48dc81a976d09871d968dbbb3d0095 +Merge: b3b3c371f0 e2a5faede9 +Author: Antonio Andelic +Date: Wed Aug 3 07:53:38 2022 +0200 + + Merge branch 'master' into update-digest-version + +commit e2a5faede91980b07dc8ff193f008f17d5ba634f +Merge: 70d97e9393 504180d7d6 +Author: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> +Date: Tue Aug 2 22:55:40 2022 -0400 + + Merge pull request #39843 from ClickHouse/util-self-extracting-macos-script-fix + + Fix post-build script for building utils/self-extracting-executable/compressor + +commit a4c4b1f54da352cf522109ec3120bc773844fdfe +Merge: 094b28b869 70d97e9393 +Author: qianmoQ +Date: Wed Aug 3 09:50:08 2022 +0800 + + Merge branch 'master' into fix-cte + +commit 504180d7d6576467f25c61518ca4742dfd45335f +Author: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> +Date: Tue Aug 2 15:39:11 2022 -0400 + + stat is different for macos + +commit 469b7e7668cd976b058079c4c0e8a3e1c8769f53 +Author: Mikhail f. Shiryaev +Date: Tue Aug 2 18:44:49 2022 +0200 + + Add notes about _is_cache_updated logic + +commit 70d97e9393885b8949115827438fde29d5f8a733 +Merge: ec8a11dfdd f0474f9e46 +Author: Mikhail f. Shiryaev +Date: Tue Aug 2 18:29:43 2022 +0200 + + Merge pull request #39630 from ClickHouse/workflow-rerun-lambda + + Attempt to fix wrong workflow_run data for rerun + +commit ec8a11dfdd2f3ce8fa223192263ae32ee6430ca7 +Merge: 2a5b023b0f ad55c2f55a +Author: Alexander Tokmakov +Date: Tue Aug 2 18:48:34 2022 +0300 + + Merge pull request #39817 from ClickHouse/revert-39788-revert-39124-fix-02232_dist_insert_send_logs_level_hung + + Revert "Revert "tests: enable back 02232_dist_insert_send_logs_level_hung"" + +commit 2a5b023b0f50aa610f95452cbe9fd2e9d4ace8ca +Merge: 6405439976 b1919d045f +Author: Anton Popov +Date: Tue Aug 2 16:06:13 2022 +0200 + + Merge pull request #39800 from CurtizJ/fix-cannot-read-all-data + + Fix `CANNOT_READ_ALL_DATA` with `pread_threadpool`. + +commit b386db02e14caf5cc4eb283c5d05b822dbb01e0e +Author: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> +Date: Tue Aug 2 15:51:57 2022 +0200 + + Fix test + +commit 6405439976e8e6e5321230ec3e47dd60e846293c +Merge: 914bd3654f 90b08d6fae +Author: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> +Date: Tue Aug 2 15:49:14 2022 +0200 + + Merge pull request #39558 from evillique/fix-logs + + Fix logs rotation issue + +commit 914bd3654f5b95b6b95e4a8eace1fe55eef3e3ee +Merge: 2312d8ceea e5c47cb26f +Author: Alexander Tokmakov +Date: Tue Aug 2 16:40:55 2022 +0300 + + Merge pull request #39798 from ClickHouse/tavplubix-patch-2 + + Minor fix for Stress Tests + +commit e292d830f57540e20b1f1f021f86d13eb38ca4a2 +Author: Alexander Tokmakov +Date: Tue Aug 2 15:37:02 2022 +0300 + + Update Dockerfile + +commit 5f7848ffd48f40a34e72d22182571fe31bfa666d +Author: Arthur Passos +Date: Tue Aug 2 09:30:54 2022 -0300 + + Replace LC CTE scalar integ tests by stateless tests + +commit 2312d8ceea244b7c53ba9baa94c1ec21086cbe7d +Merge: f150966f10 5d6804dd65 +Author: Anton Popov +Date: Tue Aug 2 14:27:01 2022 +0200 + + Merge pull request #39696 from BiteTheDDDDt/fix_0729 + + fix align of AggregateFunctionDistinct + +commit e9b124b4bcced6ebc5a7a77aa54aca2ddb48db15 +Author: Arthur Passos +Date: Tue Aug 2 09:17:53 2022 -0300 + + Don't use default implementation for LC columns in Scalar functions + +commit fd8ad12e6b35d816b88c61d8df688dcb75be4c13 +Merge: 621da05145 f150966f10 +Author: Mikhail f. Shiryaev +Date: Tue Aug 2 13:56:02 2022 +0200 + + Merge branch 'master' into jepsen-label + +commit f94d4d4877e61fef816dceaecc2c7fdf62c1e814 +Merge: 2a841d0860 f150966f10 +Author: Anton Popov +Date: Tue Aug 2 13:26:54 2022 +0200 + + Merge branch 'master' into hash-functions-map + +commit f150966f1028ffeef44f61bd8c5c2f170183cd36 +Merge: cd9fc3b5ab 6126bd60ed +Author: Mikhail f. Shiryaev +Date: Tue Aug 2 13:13:47 2022 +0200 + + Merge pull request #39723 from ClickHouse/cherry-pick-fix + + Fix cherry-pick for cases, when assignee is not set for PR + +commit ce70f3dacb4b197e7b96617b2d9de742fd477f20 +Author: Anton Kozlov +Date: Thu Jul 14 15:07:47 2022 +0000 + + fixed 02303_query_kind test; added logging in 02352_grouby_shadows_arg test + +commit 82b50e79cf60393e5ba8b2d07f7122706243dffd +Merge: 0d68b1c67f cd9fc3b5ab +Author: Alexander Tokmakov +Date: Tue Aug 2 13:00:55 2022 +0300 + + Merge branch 'master' into tsan_clang_15 + +commit ad55c2f55a1f85368061bdb9483b4b057c172859 +Author: Alexander Tokmakov +Date: Tue Aug 2 12:49:29 2022 +0300 + + Revert "Revert "tests: enable back 02232_dist_insert_send_logs_level_hung"" + +commit cd9fc3b5ab014bb7d28d34c7ec3eb2dd1f1400ac +Merge: 5ae7f339c4 5050e0aca5 +Author: Alexander Tokmakov +Date: Tue Aug 2 12:48:43 2022 +0300 + + Merge pull request #39816 from ClickHouse/revert-38185-analyze_stuck + + Revert "Limit number of analyze for one query" + +commit 5050e0aca52189cbe3bc07c10dfd2a40e0180107 +Author: Alexander Tokmakov +Date: Tue Aug 2 12:48:31 2022 +0300 + + Revert "Limit number of analyze for one query" + +commit 094b28b869766ca43e2ac5b427e7d220b889a572 +Author: qianmoQ +Date: Tue Aug 2 17:17:08 2022 +0800 + + Support cte statement for antlr4 syntax file #39810 + +commit e5c47cb26f3d9cd15ebbdb1383865469aca81dc0 +Author: Alexander Tokmakov +Date: Tue Aug 2 12:10:53 2022 +0300 + + Update run.sh + +commit 5ae7f339c417d1ceba5dc67c1689e7be587d92c5 +Merge: a3bf9496d4 e832153e93 +Author: Robert Schulze +Date: Tue Aug 2 10:41:14 2022 +0200 + + Merge pull request #39813 from ClickHouse/typos + + Typos + +commit c5eab9c760c8f33752978f4f90bd33f65a60604c +Author: avogar +Date: Tue Aug 2 08:38:15 2022 +0000 + + Delete test for s3 + +commit e832153e93561fb7e075e1aa423ce2c933f77cec +Author: Robert Schulze +Date: Tue Aug 2 08:37:58 2022 +0000 + + Typos + +commit a3bf9496d4c1eaa231d40427bb3cc8c265667659 +Merge: 316528817b 77c143aa23 +Author: Nikolai Kochetov +Date: Tue Aug 2 10:35:35 2022 +0200 + + Merge pull request #39799 from ClickHouse/fix-extra-column-after-array-join-optimization + + Fix extra column after ARRAY JOIN optimization. + +commit 91e3e2f18bdf70d8d3cc66d16074605d9743a40c +Author: Wangyang Guo +Date: Tue Aug 2 15:38:27 2022 +0800 + + KeyCondition: optimize applyFunction in multi-thread scenario + + Construct and deconstruct args (ColumnsWithTypeAndName) will inc/dec + ref_count (actually this is a atomic lock inc/dec operation) to share_ptr, + which may share the same DataTypePtr among different threads. This will + have a lock contention issue in large parallel situation. + + The patch try to minimize `args` scope and reduce unnecessary + construct/destory of instances. It will improve the performance in + multi-thread cases. + +commit b3b3c371f068d390a7fed7623197e7122929f204 +Author: Antonio Andelic +Date: Tue Aug 2 09:20:02 2022 +0200 + + Update KeeperStorage.h + +commit b98e645ff7316af83457aa5df3dee8ca660c3cef +Author: Amos Bird +Date: Tue Aug 2 11:33:45 2022 +0800 + + Revert "test what will be wrong if state returns norm type" + + This reverts commit 55802099bcf42ccca359a1ddc462b20ab72123df. + +commit 7c8ceead63b38eed7ae0f2fe538eda63d13ad826 +Merge: d8db482b2e 316528817b +Author: Heena Bansal +Date: Mon Aug 1 22:49:58 2022 -0400 + + Merge branch 'master' into Issue_39395 + +commit 81a15304ca9f6f2d8d0f425aef9b9a04cb79a840 +Merge: 3cc20f05ba 316528817b +Author: Alexey Milovidov +Date: Tue Aug 2 05:45:04 2022 +0300 + + Merge branch 'master' into tavplubix-patch-2 + +commit 316528817b2458fd37960b965f9eef10b4d13535 +Merge: b33fe26d8c 3e627e2861 +Author: Alexey Milovidov +Date: Tue Aug 2 05:44:35 2022 +0300 + + Merge pull request #39179 from azat/fsync-profile-events + + Add profile events for fsync + +commit c8aaa32f9ca75f89fe5848e1266fbca120a4ddc4 +Author: Amos Bird +Date: Tue Aug 2 10:43:48 2022 +0800 + + Revert "Another test" + + This reverts commit 69347028c54edcedc9a43e6795c52c15ad6972ec. + +commit 6a7213291b2c10105ea15081ec977e1e75789187 +Merge: 6a67147584 b33fe26d8c +Author: Wangyang Guo +Date: Tue Aug 2 10:40:40 2022 +0800 + + Merge master and resolve conflict + +commit b1919d045f45df9f63f527e3323cc66e303a2ff9 +Author: Alexey Milovidov +Date: Tue Aug 2 05:34:14 2022 +0300 + + Update ThreadPoolReader.cpp + +commit ce3411b0ff5ec37e7103b29b310e1225029b2f84 +Merge: 3fa1a775d9 b33fe26d8c +Author: Wangyang Guo +Date: Tue Aug 2 10:27:01 2022 +0800 + + Merge master and solve conflict + +commit 90b08d6faee4b5d886210784d0f23a0f7257dc7b +Merge: fbedb70f8b b33fe26d8c +Author: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> +Date: Tue Aug 2 02:55:29 2022 +0200 + + Merge branch 'master' into fix-logs + +commit 64cbecf0c8e32b86d2049f0b41069efdf36458a3 +Author: Anton Popov +Date: Tue Aug 2 00:13:20 2022 +0000 + + fix build on non linux systems + +commit b33fe26d8cd295f2e9fadb33fa447a7688c06788 +Merge: 82e78a03e5 0e154ed1df +Author: Robert Schulze +Date: Mon Aug 1 22:39:57 2022 +0200 + + Merge pull request #39759 from ClickHouse/splitted-to-shared-renaming + + Rename "splitted build" to "shared libraries build" in CI tools + +commit 82e78a03e51c7fb8c2bd640eae020414fcddff0d +Author: Yuko Takagi <70714860+yukotakagi@users.noreply.github.com> +Date: Mon Aug 1 14:09:28 2022 -0600 + + Add URL for release webinar (#39796) + + Add URL for release webinar. + +commit 6792c3211d5fcd430d0f5715eedb93078c5e988d +Merge: 755a4c3ecf 55af8878a5 +Author: Alexander Tokmakov +Date: Mon Aug 1 22:18:43 2022 +0300 + + Merge pull request #39804 from ClickHouse/revert-39510-update-arrow + + Revert "Update arrow to fix possible data race" + +commit 55af8878a52079e969907532cc374380b33d8032 +Author: Alexander Tokmakov +Date: Mon Aug 1 22:18:34 2022 +0300 + + Revert "Update arrow to fix possible data race" + +commit 5d6804dd6544283a4f8354057b37b16787ca0e2f +Merge: b4842860e3 755a4c3ecf +Author: Anton Popov +Date: Mon Aug 1 21:14:46 2022 +0200 + + Merge branch 'master' into fix_0729 + +commit 69347028c54edcedc9a43e6795c52c15ad6972ec +Author: Amos Bird +Date: Tue Aug 2 03:08:25 2022 +0800 + + Another test + +commit 43e8ca5ba81cd989a5dc5601083e92d60ff8c92b +Author: Anton Popov +Date: Mon Aug 1 18:40:21 2022 +0000 + + fix CANNOT_READ_ALL_DATA with pread_threadpool + +commit 77c143aa235ce4959d01a0c94ff24eb8c7ff56ee +Author: Nikolai Kochetov +Date: Mon Aug 1 17:56:27 2022 +0000 + + Fix extra column after ARRAY JOIN optimization. + +commit 3cc20f05babe5cec22490460b74e1d45a62345f4 +Author: Alexander Tokmakov +Date: Mon Aug 1 20:47:14 2022 +0300 + + Update run.sh + +commit 755a4c3ecfcabc23c4735a66dba33b62edde4d5f +Merge: 3a57634dbb a63fb07f54 +Author: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> +Date: Mon Aug 1 19:10:15 2022 +0200 + + Merge pull request #39794 from melvynator/patch-5 + + Update settings.md + +commit 3a57634dbbff898bb160ec64c955122f70d28485 +Merge: c083abd40b d3cfa0a0c0 +Author: Alexander Tokmakov +Date: Mon Aug 1 20:09:31 2022 +0300 + + Merge pull request #39772 from ClickHouse/fix_distinct_in_order_test + + Fix non-deterministic queries in distinct_in_order test + +commit c083abd40b52884793a9f55d7004bad9223f20fd +Merge: 3de747a6e2 65efc0ec98 +Author: Alexander Tokmakov +Date: Mon Aug 1 19:59:21 2022 +0300 + + Merge pull request #39775 from ClickHouse/fix_subnets_integration_tests + + Fix some flaky integration tests + +commit d8db482b2e11c255aff09921b7c8e72eea938b1f +Merge: 50c98789b8 3de747a6e2 +Author: Heena Bansal +Date: Mon Aug 1 12:22:16 2022 -0400 + + Merge branch 'master' into Issue_39395 + +commit 3de747a6e25a35d79e93c1ad4852bd9a3db2c0e7 +Merge: 31891322a5 567b57a627 +Author: Alexey Milovidov +Date: Mon Aug 1 19:21:30 2022 +0300 + + Merge pull request #39746 from ClickHouse/cleanup-projection-setting + + Cleanup usages of `allow_experimental_projection_optimization` setting, part 1 + +commit 0d68b1c67f4707fa97dab2fa9d36ebb7b5e044b9 +Author: Alexander Tokmakov +Date: Mon Aug 1 18:00:54 2022 +0200 + + fix build with clang-15 + +commit 31891322a51febe79ec3edba6278b5cecdd9e8df +Merge: bf574b9154 b9d7cd6a5d +Author: Maksim Kita +Date: Mon Aug 1 17:59:52 2022 +0200 + + Merge pull request #39681 from pkit/pkit/executable_settings + + add settings for executable table func + +commit d3cfa0a0c0f481c130c4b6842be27eae79602cd8 +Merge: 914cf8eb4d bf574b9154 +Author: Igor Nikonov <954088+devcrafter@users.noreply.github.com> +Date: Mon Aug 1 17:19:55 2022 +0200 + + Merge branch 'master' into fix_distinct_in_order_test + +commit bf574b91547aec799364d032564606feb5a8bf03 +Merge: 2fd7530880 d39259a4c0 +Author: Robert Schulze +Date: Mon Aug 1 17:04:51 2022 +0200 + + Merge pull request #39760 from ClickHouse/bit-fiddling + + Use std::popcount, ::countl_zero, ::countr_zero functions + +commit 2fd75308807f0b43db5efc9612163cca697cac52 +Merge: 2150d0b9b0 af2f1b4cc3 +Author: Mikhail f. Shiryaev +Date: Mon Aug 1 16:34:23 2022 +0200 + + Merge pull request #39780 from ClickHouse/ch-play-retry + + Retry inserts with ClickHouseHelper + +commit 2a841d0860fc599694ebe639550798dd0547dfe1 +Author: Anton Popov +Date: Mon Aug 1 14:21:07 2022 +0000 + + update docs for hash functions + +commit a63fb07f54d1ce801acd5dd022459a1960360bd9 +Author: Peignon Melvyn +Date: Mon Aug 1 16:20:33 2022 +0200 + + Update settings.md + +commit 50c98789b8ffc2d2b886fb31f28f5a3ba9bfb85b +Merge: 800ed546be fa9c3dcc48 +Author: HeenaBansal2009 +Date: Mon Aug 1 07:05:50 2022 -0700 + + Updated as per comments + +commit 800ed546bef57d0ae6d7c7c2809c645aec32f487 +Author: HeenaBansal2009 +Date: Mon Aug 1 07:03:36 2022 -0700 + + Updated as per comments + +commit 2150d0b9b08497f035c10390b3267fec00ab4e6b +Merge: 8a3ec52b5e 63f9cf02cc +Author: Alexander Tokmakov +Date: Mon Aug 1 16:32:34 2022 +0300 + + Merge pull request #39788 from ClickHouse/revert-39124-fix-02232_dist_insert_send_logs_level_hung + + Revert "tests: enable back 02232_dist_insert_send_logs_level_hung" + +commit 63f9cf02cc103089c4ba2cb81bf59dcb400f5099 +Author: Alexander Tokmakov +Date: Mon Aug 1 16:32:24 2022 +0300 + + Revert "tests: enable back 02232_dist_insert_send_logs_level_hung" + +commit af2f1b4cc3610506b5cf996a4ef8a197c245d4a4 +Author: robot-clickhouse +Date: Mon Aug 1 13:22:53 2022 +0000 + + Automatic style fix + +commit 33b26dda05db2c3f2c0454e3a57c5d61b73460ad +Author: Mikhail f. Shiryaev +Date: Mon Aug 1 15:15:48 2022 +0200 + + Improve logging + + Co-authored-by: Antonio Andelic + +commit 55802099bcf42ccca359a1ddc462b20ab72123df +Author: Amos Bird +Date: Mon Aug 1 11:37:36 2022 +0800 + + test what will be wrong if state returns norm type + +commit 1ac716b7427b684711571374729bba88782087ae +Author: Amos Bird +Date: Sun Jul 31 02:45:33 2022 +0800 + + Remove no-s3-storage tag from tests + +commit 8ab475ccf379347cafde4183f53fc3d22c78ae41 +Author: Amos Bird +Date: Tue Jul 26 19:14:38 2022 +0800 + + Fix another case + +commit 09c99d8440fdd81c023f10e65cadcf8687ffd6ed +Author: Amos Bird +Date: Fri Jul 22 14:27:45 2022 +0800 + + Fix tests + +commit f84e5b68270dd6c4140319c09f92bbb599e5e74a +Author: Amos Bird +Date: Fri Jul 22 14:26:46 2022 +0800 + + Allow to format DataTypePtr + +commit 0e746c1afa01b520528d52e97078c41938779fc3 +Author: Amos Bird +Date: Fri Jul 22 12:52:54 2022 +0800 + + More format refactor + +commit 2b2ee8a2c3c38c7dea2560a095b8cbf06f3dc489 +Author: Amos Bird +Date: Fri Jul 22 10:57:54 2022 +0800 + + Format tests + +commit f23b3d64dcdb8dd044ad4c02a8fad4b2e77f889c +Author: Amos Bird +Date: Thu Jul 21 23:09:56 2022 +0800 + + Add tests + +commit f11d0484f3ad0ddbf829e695056fdd154d12a2cc +Author: Amos Bird +Date: Thu Jul 21 23:08:42 2022 +0800 + + Normalize everything else + +commit 2a73ccb3f681b73cbd5f5abcb4eaa9ad1a885ac4 +Author: Amos Bird +Date: Wed Jul 20 22:13:06 2022 +0800 + + Normalize AggregateFunctionCount type comparison + +commit 079db7f34b6207704148c9a948ea4d6d6a832445 +Author: Mikhail f. Shiryaev +Date: Mon Aug 1 14:59:13 2022 +0200 + + Retry inserts with ClickHouseHelper + +commit 8a3ec52b5e06f20ccd3472aed7fa440b625ebc0e +Merge: eeb9366010 095e400075 +Author: Anton Popov +Date: Mon Aug 1 14:41:46 2022 +0200 + + Merge pull request #39752 from CurtizJ/fix-index-analysis + + Fix index analysis with tuples and `IN` + +commit eeb9366010f3d336689dba3ccefc4bc6c0477b69 +Merge: c882bdc88e 942f056ce5 +Author: Anton Popov +Date: Mon Aug 1 14:22:49 2022 +0200 + + Merge pull request #39731 from CurtizJ/fix-send-logs-level-test + + Fix redirecting of logs to stdout in clickhouse-client + +commit c882bdc88e75b249ecc901ac68145f1a6cf93ed9 +Merge: 49a708ff29 6a2f7d0c8f +Author: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> +Date: Mon Aug 1 13:57:17 2022 +0200 + + Merge pull request #35968 from ClickHouse/interserver_listen_port + +commit 49a708ff2995917754dcab9076ddeaae68c088a1 +Merge: 80f9ba9186 c9e1364cd0 +Author: Robert Schulze +Date: Mon Aug 1 13:44:33 2022 +0200 + + Merge pull request #39596 from ClickHouse/move-woboq + + Merge Woboq code browser page into "Getting Started" document + +commit 621da0514510c40a169bfb0d8e8e8663ccf24e69 +Merge: d86f07d7ac 80f9ba9186 +Author: Antonio Andelic +Date: Mon Aug 1 13:35:27 2022 +0200 + + Merge branch 'master' into jepsen-label + +commit 65efc0ec98f9771ed98c9212a73eb0057cbb8be0 +Author: robot-clickhouse +Date: Mon Aug 1 11:24:03 2022 +0000 + + Automatic style fix + +commit 80f9ba9186dea2f9db26559ee527dae8c9dac6de +Merge: dfdfabec94 87d513f799 +Author: Alexander Tokmakov +Date: Mon Aug 1 14:20:37 2022 +0300 + + Merge pull request #39690 from ClickHouse/show-addresses-in-stack-traces + + Configuration parameter to hide addresses in stack traces + +commit 38e5e885c31687e9fe76fa5d3304626f574e5d18 +Author: Alexander Tokmakov +Date: Mon Aug 1 13:16:12 2022 +0200 + + fix some flaky integration tests + +commit dfdfabec947065dd4939d8ca92e984212eba0f31 +Merge: 91c0b94768 f79924f270 +Author: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> +Date: Mon Aug 1 13:04:19 2022 +0200 + + Merge pull request #39218 from evillique/file_default_value + + Add default argument to the function `file` + +commit 74f87a95c707f4c28c046511b6da08f781b325ab +Author: avogar +Date: Mon Aug 1 10:57:55 2022 +0000 + + Fis style + +commit 91c0b9476889c074ca1388de867febde5ce51dd5 +Merge: 42136b7630 075ff5005e +Author: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> +Date: Mon Aug 1 12:54:02 2022 +0200 + + Merge pull request #39510 from Avogar/update-arrow + + Update arrow to fix possible data race + +commit 42136b7630fe144a75a0e6caa233bc01f71e83ec +Merge: 6457c069a9 be656f9dfa +Author: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> +Date: Mon Aug 1 12:46:07 2022 +0200 + + Merge pull request #39647 from Avogar/fix-arrow-strings + + Fix strings in dictionary in Arrow format + +commit 6457c069a97453124245afd72682e1f65822af9a +Merge: 9ec27c0ab4 d66c108a04 +Author: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> +Date: Mon Aug 1 12:42:18 2022 +0200 + + Merge pull request #39293 from kssenii/fix-positional-args-case + + Fix positional arguments in case of unneeded columns pruning + +commit 914cf8eb4d78b9b5249da06d8f21686ee63c4468 +Author: Igor Nikonov +Date: Mon Aug 1 10:40:18 2022 +0000 + + Fix non-deterministic queries + +commit 9ec27c0ab45c78699f07c7175845358d604d713e +Merge: 379d8c5c6a d87aac2013 +Author: Antonio Andelic +Date: Mon Aug 1 12:17:10 2022 +0200 + + Merge pull request #39757 from ClickHouse/fix-rocksdb-filter-with-params + + Use params correctly for filtering by key in EmbeddedRocksDB + +commit 379d8c5c6a2732c4c9e6e87ae347e0e4690975a8 +Author: Nikita Mikhaylov +Date: Mon Aug 1 12:08:32 2022 +0200 + + Chown all directories for multidisk setup (#39121) + +commit 3bb060336dfad62e591889743f9959f984475916 +Merge: 76f2ba3e98 aff8c12a4a +Author: Alexander Tokmakov +Date: Mon Aug 1 12:42:51 2022 +0300 + + Merge pull request #39124 from azat/fix-02232_dist_insert_send_logs_level_hung + + tests: enable back 02232_dist_insert_send_logs_level_hung + +commit c9e1364cd00128fef9b8b7c691263ca94e443a8b +Author: Robert Schulze +Date: Mon Aug 1 08:34:28 2022 +0000 + + Temporarily restore Woboq pages + +commit 76f2ba3e98112688fbfb9974d3d8f2c82be3b1a9 +Merge: ab5a147065 55ff4956ed +Author: Antonio Andelic +Date: Mon Aug 1 09:09:32 2022 +0200 + + Merge pull request #39738 from ClickHouse/fix-jepsen-total-queue + + Use different root path for total-queue Jepsen test + +commit 0e154ed1df6f50aa681b61790c6849b83fa45130 +Author: Robert Schulze +Date: Sun Jul 31 12:43:50 2022 +0000 + + More renamings + +commit 6a67147584b1f57cc24e05f6bfc1eb69ee831c64 +Author: Wangyang Guo +Date: Mon Aug 1 13:17:11 2022 +0800 + + ColumnVector: refactory to use TargetSpecific::Default::doFilterAligned + +commit b05be56eefa2f7de09aaa9cbd2ccf3dd394489bf +Author: Wangyang Guo +Date: Mon Aug 1 10:15:49 2022 +0800 + + ColumnVector: naming style fix + +commit ab5a1470659557f9a3f19681279298ab9aa31cb2 +Merge: c5f7a3327b 22d8e532ed +Author: Dmitry Novik +Date: Mon Aug 1 00:32:59 2022 +0200 + + Merge pull request #38725 from azat/fix-order-by-projection + + Fix ORDER BY that matches projections ORDER BY + +commit d39259a4c0fa022db55ae23cc176a4a6e0576cf0 +Author: Robert Schulze +Date: Sun Jul 31 18:25:50 2022 +0000 + + More conversions + +commit 3e627e2861e08ac511435dc79e78681f97486bc3 +Author: Azat Khuzhin +Date: Wed Jul 13 16:29:22 2022 +0300 + + Add profile events for fsync + + The following new provile events had been added: + + - FileSync - Number of times the F_FULLFSYNC/fsync/fdatasync function was called for files. + - DirectorySync - Number of times the F_FULLFSYNC/fsync/fdatasync function was called for directories. + - FileSyncElapsedMicroseconds - Total time spent waiting for F_FULLFSYNC/fsync/fdatasync syscall for files. + - DirectorySyncElapsedMicroseconds - Total time spent waiting for F_FULLFSYNC/fsync/fdatasync syscall for directories. + + v2: rewrite test to sh with retries + Signed-off-by: Azat Khuzhin + +commit c5f7a3327b4f4d94b71b3c0a0e42585cf7fc6886 +Merge: ccef227494 7e4fb960cb +Author: Alexey Milovidov +Date: Sun Jul 31 22:57:26 2022 +0300 + + Merge pull request #39085 from quickhouse/patch-3 + + Fixed regexp in `test_match_process_uid_against_data_owner` + +commit ccef2274949bac5e6c0e3aa264f190505f9dfc0c +Merge: 52d08d9db4 1e974b55ea +Author: Alexey Milovidov +Date: Sun Jul 31 22:53:09 2022 +0300 + + Merge pull request #38185 from vdimir/analyze_stuck + + Limit number of analyze for one query + +commit 6a2f7d0c8f41d2c9048d24f7b779b311734771ff +Merge: f80a4c184e 52d08d9db4 +Author: Alexey Milovidov +Date: Sun Jul 31 22:51:38 2022 +0300 + + Merge branch 'master' into interserver_listen_port + +commit 567b57a627dbbb6e3c44a8875cdd55128ed2a7cf +Author: Alexey Milovidov +Date: Sun Jul 31 22:44:28 2022 +0300 + + Update a test. + +commit a7734672b90bfcf4c138c7c0f9085d402f2fac31 +Author: Robert Schulze +Date: Sun Jul 31 14:34:05 2022 +0000 + + Use std::popcount, ::countl_zero, ::countr_zero functions + + - Introduced with the C++20 header + + - The problem with __builtin_c(l|t)z() is that 0 as input has an + undefined result (*) and the code did not always check. The std:: + versions do not have this issue. + + - In some cases, we continue to use buildin_c(l|t)z(), (e.g. in + src/Common/BitHelpers.h) because the std:: versions only accept + unsigned inputs (and they also check that) and the casting would be + ugly. + + (*) https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html + +commit 63836749c674c5c1722d1fc6dd207ec13be2865e +Author: Robert Schulze +Date: Sun Jul 31 15:04:14 2022 +0000 + + Try to fix "Docs Check" error + +commit dd030c6b48d253384beac9b5d1efe0039d2f640f +Author: Robert Schulze +Date: Sun Jul 31 13:01:35 2022 +0000 + + Add anchor + +commit fb622e4c4b2a38aa5aeb13fa0993974a599cde15 +Author: Robert Schulze +Date: Sun Jul 31 12:52:14 2022 +0000 + + Remove IDE recommendation from Russian / Chinese translations of dev guide + + - IDEs are already recommended at length earlier in the documents + +commit 31550436e4b9a9e05017c09716214b10bec2043d +Author: Alexander Tokmakov +Date: Sun Jul 31 14:38:13 2022 +0200 + + try clang-15 for build with tsan + +commit 729d19fa4fcb74a17039292851aa1a4b1b7e5d6c +Author: Robert Schulze +Date: Fri Jul 29 12:30:40 2022 +0000 + + Rename "splitted build" to "shared libraries build" in CI tools + + - The old name made sense for (dev option) "-DUSE_STATIC_LIBRARIES=0 + -DSPLIT_SHARED_LIBRARIES=1 -DSPLIT_BINARY=1" but that was removed with + #39520. + + - What still exists is "-DUSE_STATIC_LIBRARIES=0 + -DSPLIT_SHARED_LIBRARIES=1" which does a shared library build + +commit 52d08d9db4c46e9f0a23a1913d3adac222630689 +Merge: 0f2177127b dcc8751685 +Author: Robert Schulze +Date: Sun Jul 31 14:23:31 2022 +0200 + + Merge pull request #39520 from ClickHouse/no-split-binary + + Remove SPLIT_BINARY + +commit 0f2177127b7bb1517e0acab815ded905f5ba1390 +Merge: eaeb0446c7 4d7627e45e +Author: Robert Schulze +Date: Sun Jul 31 14:09:46 2022 +0200 + + Merge pull request #39751 from ClickHouse/enable-getoskernelversion + + Enable SQL function getOSKernelVersion() on all platforms + +commit d87aac2013f5d0211e63ec1c49bb6b06e88246a3 +Author: Antonio Andelic +Date: Sun Jul 31 11:31:46 2022 +0000 + + Disable fasttest for rocksdb + +commit aff8c12a4a6bfb851a546ff3e2632053857825f2 +Author: Azat Khuzhin +Date: Sun Jul 10 19:26:38 2022 +0300 + + tests: enable back 02232_dist_insert_send_logs_level_hung + + The original issue was that log_comment was wrong, and this test relies + on correct log_comment, but this had been fixed already in #37932. + + Also adjust the timeout to avoid possible query hung check failures. + + Signed-off-by: Azat Khuzhin + +commit eaeb0446c7c98cb43f21df42167fe7a255924ebf +Merge: d259c4fa6c 6099f66fd6 +Author: Nikolai Kochetov +Date: Sun Jul 31 12:59:20 2022 +0200 + + Merge pull request #39705 from ClickHouse/avoid-recursive-dtor-for-ast + + Avoid recursive destruction of AST. + +commit eea1aaef2c7e84b023434dfa279dae1aa6e4965a +Author: Antonio Andelic +Date: Sun Jul 31 10:44:01 2022 +0000 + + Use params for filtering by key in EmbeddedRocksDB + +commit d259c4fa6c4aedc93f5021e4cf8091a458da98c6 +Merge: c9e6850306 4828be7fc4 +Author: Alexander Gololobov <440544+davenger@users.noreply.github.com> +Date: Sun Jul 31 11:28:25 2022 +0200 + + Merge pull request #39747 from ClickHouse/fix-double-escaping-json + + Fix double escaping in the metadata of FORMAT JSON + +commit dcc8751685874efdc559d65f467c3e998f7420f0 +Author: Robert Schulze +Date: Sun Jul 31 08:51:17 2022 +0000 + + Disable harmful env var check to workaround failure to start the server + +commit 55ff4956edbd4bb19933d0ae8812bcdcb8da92bb +Merge: 628503c92a c9e6850306 +Author: Antonio Andelic +Date: Sun Jul 31 08:28:10 2022 +0000 + + Merge branch 'master' into fix-jepsen-total-queue + +commit 7c23e48b5b2fc3a1907cf0bd4d8fd170f9db941b +Author: Robert Schulze +Date: Sun Jul 31 08:05:12 2022 +0000 + + Revert exclusion of libharmful (did not work anyways) + +commit 7fe106a0fbbe9976410959183169578709c54ea9 +Author: Robert Schulze +Date: Sat Jul 30 21:46:50 2022 +0000 + + Try to fix libharmful fail + +commit d834d9e1378aff4fc15a7cbc7191beec53a7afbf +Author: Robert Schulze +Date: Sat Jul 30 18:58:12 2022 +0000 + + Minor: Better variable name output + +commit 9de69a021c5e84780aae2abeb5fc014bc757bac3 +Author: Robert Schulze +Date: Fri Jul 29 12:19:04 2022 +0000 + + Fix documentation + +commit 4d7627e45e7a762c6205064881ee0393a8e5bfe8 +Author: Robert Schulze +Date: Sun Jul 31 07:36:40 2022 +0000 + + Fix include + +commit 095e400075196fd9ff3bf5356a37a82e89b15c02 +Author: Anton Popov +Date: Sun Jul 31 00:38:52 2022 +0000 + + fix index analysis with tuples and IN + +commit 075ff5005ee97b253056a3e5d3f157bc69c853b0 +Merge: c9ad914164 c9e6850306 +Author: Alexey Milovidov +Date: Sun Jul 31 03:05:53 2022 +0300 + + Merge branch 'master' into update-arrow + +commit fa9c3dcc4899227156d877cf9b676629103a537f +Author: Alexey Milovidov +Date: Sun Jul 31 03:02:27 2022 +0300 + + Update programs/local/LocalServer.cpp + + Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> + +commit a30dbed6b8dba4c08f9a27b786d2cad59f66becf +Author: Alexey Milovidov +Date: Sun Jul 31 03:02:20 2022 +0300 + + Update programs/local/LocalServer.cpp + + Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> + +commit c9e685030625f749c7564fc861d8aa1aab8f5e60 +Merge: 22bc89690d cf1a5baa23 +Author: Alexey Milovidov +Date: Sun Jul 31 02:51:38 2022 +0300 + + Merge pull request #39325 from azat/perf-parallel_mv-fix + + tests/performance: improve parallel_mv test + +commit 22bc89690d3b3681362c5c3615f285293a2bcc2e +Merge: 8fb70abe3e a068c397df +Author: Alexey Milovidov +Date: Sun Jul 31 02:24:05 2022 +0300 + + Merge pull request #39222 from azat/fix-http-session + + Do not report "Failed communicating with" on and on for parts exchange + +commit 8fb70abe3e54cbbfa935825fa88cf4e8caf99537 +Merge: 85773e0926 4f25a08b7c +Author: Alexey Milovidov +Date: Sun Jul 31 02:22:22 2022 +0300 + + Merge pull request #39178 from azat/dist-insert-log + + Add connection info for Distributed sends log message + +commit 7e4fb960cb53077ef83ce090ae07dc9813317b46 +Merge: 17176212a7 85773e0926 +Author: Alexey Milovidov +Date: Sun Jul 31 02:20:27 2022 +0300 + + Merge branch 'master' into patch-3 + +commit 17176212a77524b4086f9810d0bd529a7be51ad9 +Author: Alexey Milovidov +Date: Sun Jul 31 02:20:08 2022 +0300 + + Update test.py + +commit 8ca236de08375015aa09ec17e5de96a1f7f2de9f +Author: Robert Schulze +Date: Sat Jul 30 22:36:47 2022 +0000 + + Enable SQL function getOSKernelVersion() on all platforms + + Follow up to PR #38615 + +commit f80a4c184e67988a39b0aff34aa4503a957e5db4 +Merge: c026dbf51c 85773e0926 +Author: Alexey Milovidov +Date: Sun Jul 31 01:22:32 2022 +0300 + + Merge branch 'master' into interserver_listen_port + +commit 85773e0926b5e17152db7a35824ccf3a34b59061 +Merge: 15a3ed2e3b 4088c0a7f3 +Author: Robert Schulze +Date: Sun Jul 31 00:18:37 2022 +0200 + + Merge pull request #38615 from liyinsg/simplified_function_registration_interface + + Simplified function registration interface + +commit 15a3ed2e3bd130d76c1e75acf448879e7b6d1a19 +Merge: ed5090a398 146756e2ea +Author: Alexey Milovidov +Date: Sun Jul 31 01:14:54 2022 +0300 + + Merge pull request #34662 from den-crane/test/insert_deduplication_token_materialized_views + + Test/insert deduplication token materialized views + +commit ed5090a398eaf3a3e39877bc2dbaedf36ecdb47f +Merge: 2bdc926572 9551a36bda +Author: Alexey Milovidov +Date: Sun Jul 31 01:08:42 2022 +0300 + + Merge pull request #39622 from ClickHouse/blinkov-patch-23 + + Update README.md + +commit 9551a36bda62f42fe57a429494e83c4b9c219118 +Merge: 3bc9e1bd16 2bdc926572 +Author: Alexey Milovidov +Date: Sun Jul 31 01:08:25 2022 +0300 + + Merge branch 'master' into blinkov-patch-23 + +commit 4828be7fc42e9ba01935edf878ff4abedf5eb0b0 +Author: Alexey Milovidov +Date: Sat Jul 30 23:56:41 2022 +0200 + + Fix double escaping in the metadata of FORMAT JSON + +commit acb148122d9aa0c411c03fb074ba1e0cebce56b0 +Author: robot-clickhouse +Date: Sat Jul 30 21:49:36 2022 +0000 + + Automatic style fix + +commit 441f2feb49c5d089b49ab6323b7f3c0dc85d61dd +Author: Alexey Milovidov +Date: Sat Jul 30 23:40:21 2022 +0200 + + Cleanup usages of `allow_experimental_projection_optimization` setting, part 1 + +commit 2bdc9265728c2b176498a691dc1d966172c42dfc +Merge: b52843d5fd 5eea7ce18d +Author: Alexey Milovidov +Date: Sat Jul 30 23:09:15 2022 +0300 + + Merge pull request #39687 from vitlibar/fix-reading-from-encrypted-disk + + Fix seeking while reading from encrypted disk + +commit b52843d5fd79f0d11ab379098b94fcb5dd805032 +Merge: acb0137dbb b390bcfe7c +Author: Robert Schulze +Date: Sat Jul 30 20:49:05 2022 +0200 + + Merge pull request #37951 from zvonand/dt64_timeslots + + Fix timeSlots for DateTime64 + +commit b9d7cd6a5d7c5b22be2cf2e5e6055313dfd25f14 +Author: Constantine Peresypkin +Date: Thu Jul 28 19:54:46 2022 +0200 + + add settings for executable table func + + SELECT * FROM executable('