From 18f3c5c5c895824220606dfb7131c985ca5514e5 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 17 Aug 2022 18:42:46 -0700 Subject: [PATCH] Aggregate functions added --- src/Parsers/Kusto/KQL_ReleaseNote.md | 33 +++ .../KQLAggregationFunctions.cpp | 209 +++++++++++++++--- .../KQL/gtest_KQL_AggregateFunctions.cpp | 113 ++++++++++ 3 files changed, 328 insertions(+), 27 deletions(-) create mode 100644 src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index fa0a4c1240b..7206c3cec89 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -1,5 +1,38 @@ ## KQL implemented features +# August XX, 2022 + +## Aggregate Functions +- [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) + `Customers | summarize t = stdev(Age) by FirstName` + +- [stdevif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdevif-aggfunction) + `Customers | summarize t = stdevif(Age, Age < 10) by FirstName` + +- [binary_all_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-and-aggfunction) + `Customers | summarize t = binary_all_and(Age) by FirstName` + +- [binary_all_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-or-aggfunction) + `Customers | summarize t = binary_all_or(Age) by FirstName` + +- [binary_all_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-xor-aggfunction) + `Customers | summarize t = binary_all_xor(Age) by FirstName` + +- [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName` + +- [percentiles_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + **do not support `range()` now** + `Customers | summarize t = percentiles_array(Age, 10, 20, 30, 50) by FirstName` + `Customers | summarize t = percentiles_array(Age, dynamic([10, 20, 30, 50])) by FirstName` + +- [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)` + +- [percentilesw_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + **do not support `range()` now** + `DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))` + # August 15, 2022 ## DateTpye diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp index 30b33b5933a..a16c4f6ea22 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -40,23 +40,17 @@ bool AvgIf::convertImpl(String &out,IParser::Pos &pos) bool BinaryAllAnd::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"groupBitAnd"); } bool BinaryAllOr::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"groupBitOr"); } bool BinaryAllXor::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + return directMapping(out,pos,"groupBitXor"); } bool BuildSchema::convertImpl(String &out,IParser::Pos &pos) @@ -220,44 +214,205 @@ bool MinIf::convertImpl(String &out,IParser::Pos &pos) bool Percentiles::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name,pos); + column_name.pop_back(); + String expr = ""; + String value; + String value_in_column; + while(pos->type != TokenType::ClosingRoundBracket) + { + if(pos->type != TokenType::Comma){ + value = String(pos->begin, pos->end); + value_in_column = ""; + + for(size_t i = 0; i < value.size(); i++) + { + if(value[i] == '.') + value_in_column += '_'; + else + value_in_column += value[i]; + } + expr = expr + "quantile( " + value + "/100)(" + column_name + ") AS percentile_" + column_name + "_" + value_in_column; + ++pos; + if(pos->type != TokenType::ClosingRoundBracket) + expr += ", "; + } + else + ++pos; + } + out = expr; + return true; } bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name,pos); + column_name.pop_back(); + String expr = "quantiles("; + String value; + while(pos->type != TokenType::ClosingRoundBracket) + { + if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" + && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket){ + + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + + if(pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket) + expr += ", "; + ++pos; + } + else + { + ++pos; + } + + } + ++pos; + if(pos->type != TokenType::ClosingRoundBracket) + --pos; + + expr.pop_back(); + expr.pop_back(); + expr = expr + ")(" + column_name + ")"; + out = expr; + return true; } bool Percentilesw::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name,pos); + bucket_column.pop_back(); + + ++pos; + String frequency_column = getConvertedArgument(fn_name,pos); + frequency_column.pop_back(); + + String expr = ""; + String value; + String value_in_column; + + while(pos->type != TokenType::ClosingRoundBracket) + { + if(pos->type != TokenType::Comma){ + value = String(pos->begin, pos->end); + value_in_column = ""; + + for(size_t i = 0; i < value.size(); i++) + { + if(value[i] == '.') + value_in_column += '_'; + else + value_in_column += value[i]; + } + + expr = expr + "quantileExactWeighted( " + value + "/100)(" + bucket_column + ","+frequency_column + ") AS percentile_" + bucket_column + "_" + value_in_column; + ++pos; + if(pos->type != TokenType::ClosingRoundBracket) + expr += ", "; + } + else + ++pos; + } + out = expr; + return true; } bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name,pos); + bucket_column.pop_back(); + + ++pos; + String frequency_column = getConvertedArgument(fn_name,pos); + frequency_column.pop_back(); + + String expr = "quantilesExactWeighted("; + String value; + while(pos->type != TokenType::ClosingRoundBracket) + { + if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" + && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket){ + + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + + if(pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket) + expr += ", "; + ++pos; + } + else + { + ++pos; + } + + } + ++pos; + if(pos->type != TokenType::ClosingRoundBracket) + --pos; + + expr.pop_back(); + expr.pop_back(); + expr = expr + ")(" + bucket_column + ","+frequency_column + ")"; + out = expr; + return true; } bool Stdev::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name,pos); + out = "sqrt(varSamp(" + expr + "))"; + return true; } bool StdevIf::convertImpl(String &out,IParser::Pos &pos) { - String res = String(pos->begin,pos->end); - out = res; - return false; + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name,pos); + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const auto predicate = getConvertedArgument(fn_name,pos); + out = "sqrt(varSampIf(" + expr + ", " + predicate + "))"; + return true; } bool Sum::convertImpl(String &out,IParser::Pos &pos) diff --git a/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp new file mode 100644 index 00000000000..83bec1d5333 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp @@ -0,0 +1,113 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ +using namespace DB; +using namespace std::literals; +} +class ParserAggregateFuncTest : public ::testing::TestWithParam, ParserTestCase>> +{}; + +TEST_P(ParserAggregateFuncTest, ParseQuery) +{ const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + ASSERT_NE(nullptr, parser); + if (expected_ast) + { + if (std::string(expected_ast).starts_with("throws")) + { + EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } + else + { + ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + if (std::string("CREATE USER or ALTER USER query") != parser->getName() + && std::string("ATTACH access entity query") != parser->getName()) + { + EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); + } + else + { + if (input_text.starts_with("ATTACH")) + { + auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt(); + EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); + } + else + { + EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); + } + } + } + } + else + { + ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } +} + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserAggregateFuncTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | summarize t = stdev(Age) by FirstName", + "SELECT\n FirstName,\n sqrt(varSamp(Age)) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = stdevif(Age, Age < 10) by FirstName", + "SELECT\n FirstName,\n sqrt(varSampIf(Age, Age < 10)) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = binary_all_and(Age) by FirstName", + "SELECT\n FirstName,\n groupBitAnd(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = binary_all_or(Age) by FirstName", + "SELECT\n FirstName,\n groupBitOr(Age) AS t\nFROM Customers\nGROUP BY FirstName" + + }, + { + "Customers | summarize t = binary_all_xor(Age) by FirstName", + "SELECT\n FirstName,\n groupBitXor(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName", + "SELECT\n FirstName,\n quantile(30 / 100)(Age) AS percentile_Age_30,\n quantile(40 / 100)(Age) AS percentile_Age_40,\n quantile(50 / 100)(Age) AS percentile_Age_50,\n quantile(60 / 100)(Age) AS percentile_Age_60,\n quantile(70 / 100)(Age) AS percentile_Age_70\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = percentiles_array(Age, 10, 20, 30, 50) by FirstName", + "SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = percentiles_array(Age, dynamic([10, 20, 30, 50])) by FirstName", + "SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)", + "SELECT\n quantileExactWeighted(50 / 100)(Bucket, Frequency) AS percentile_Bucket_50,\n quantileExactWeighted(75 / 100)(Bucket, Frequency) AS percentile_Bucket_75,\n quantileExactWeighted(99.9 / 100)(Bucket, Frequency) AS percentile_Bucket_99_9\nFROM DataTable" + }, + { + "DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))", + "SELECT quantilesExactWeighted(10 / 100, 50 / 100, 30 / 100)(Bucket, Frequency) AS t\nFROM DataTable" + } +})));