Aggregate functions added

This commit is contained in:
root 2022-08-17 18:42:46 -07:00 committed by Yong Wang
parent e22413823e
commit 18f3c5c5c8
3 changed files with 328 additions and 27 deletions

View File

@ -1,5 +1,38 @@
## KQL implemented features
# August XX, 2022
## Aggregate Functions
- [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction)
`Customers | summarize t = stdev(Age) by FirstName`
- [stdevif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdevif-aggfunction)
`Customers | summarize t = stdevif(Age, Age < 10) by FirstName`
- [binary_all_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-and-aggfunction)
`Customers | summarize t = binary_all_and(Age) by FirstName`
- [binary_all_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-or-aggfunction)
`Customers | summarize t = binary_all_or(Age) by FirstName`
- [binary_all_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-xor-aggfunction)
`Customers | summarize t = binary_all_xor(Age) by FirstName`
- [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction)
`Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName`
- [percentiles_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction)
**do not support `range()` now**
`Customers | summarize t = percentiles_array(Age, 10, 20, 30, 50) by FirstName`
`Customers | summarize t = percentiles_array(Age, dynamic([10, 20, 30, 50])) by FirstName`
- [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction)
`DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)`
- [percentilesw_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction)
**do not support `range()` now**
`DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))`
# August 15, 2022
## DateTpye

View File

@ -40,23 +40,17 @@ bool AvgIf::convertImpl(String &out,IParser::Pos &pos)
bool BinaryAllAnd::convertImpl(String &out,IParser::Pos &pos)
{
String res = String(pos->begin,pos->end);
out = res;
return false;
return directMapping(out,pos,"groupBitAnd");
}
bool BinaryAllOr::convertImpl(String &out,IParser::Pos &pos)
{
String res = String(pos->begin,pos->end);
out = res;
return false;
return directMapping(out,pos,"groupBitOr");
}
bool BinaryAllXor::convertImpl(String &out,IParser::Pos &pos)
{
String res = String(pos->begin,pos->end);
out = res;
return false;
return directMapping(out,pos,"groupBitXor");
}
bool BuildSchema::convertImpl(String &out,IParser::Pos &pos)
@ -220,44 +214,205 @@ bool MinIf::convertImpl(String &out,IParser::Pos &pos)
bool Percentiles::convertImpl(String &out,IParser::Pos &pos)
{
String res = String(pos->begin,pos->end);
out = res;
String fn_name = getKQLFunctionName(pos);
if (fn_name.empty())
return false;
++pos;
String column_name = getConvertedArgument(fn_name,pos);
column_name.pop_back();
String expr = "";
String value;
String value_in_column;
while(pos->type != TokenType::ClosingRoundBracket)
{
if(pos->type != TokenType::Comma){
value = String(pos->begin, pos->end);
value_in_column = "";
for(size_t i = 0; i < value.size(); i++)
{
if(value[i] == '.')
value_in_column += '_';
else
value_in_column += value[i];
}
expr = expr + "quantile( " + value + "/100)(" + column_name + ") AS percentile_" + column_name + "_" + value_in_column;
++pos;
if(pos->type != TokenType::ClosingRoundBracket)
expr += ", ";
}
else
++pos;
}
out = expr;
return true;
}
bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos)
{
String res = String(pos->begin,pos->end);
out = res;
String fn_name = getKQLFunctionName(pos);
if (fn_name.empty())
return false;
++pos;
String column_name = getConvertedArgument(fn_name,pos);
column_name.pop_back();
String expr = "quantiles(";
String value;
while(pos->type != TokenType::ClosingRoundBracket)
{
if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic"
&& pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket
&& pos->type != TokenType::ClosingSquareBracket){
value = String(pos->begin, pos->end);
expr = expr + value + "/100";
if(pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket
&& pos->type != TokenType::ClosingSquareBracket)
expr += ", ";
++pos;
}
else
{
++pos;
}
}
++pos;
if(pos->type != TokenType::ClosingRoundBracket)
--pos;
expr.pop_back();
expr.pop_back();
expr = expr + ")(" + column_name + ")";
out = expr;
return true;
}
bool Percentilesw::convertImpl(String &out,IParser::Pos &pos)
{
String res = String(pos->begin,pos->end);
out = res;
String fn_name = getKQLFunctionName(pos);
if (fn_name.empty())
return false;
++pos;
String bucket_column = getConvertedArgument(fn_name,pos);
bucket_column.pop_back();
++pos;
String frequency_column = getConvertedArgument(fn_name,pos);
frequency_column.pop_back();
String expr = "";
String value;
String value_in_column;
while(pos->type != TokenType::ClosingRoundBracket)
{
if(pos->type != TokenType::Comma){
value = String(pos->begin, pos->end);
value_in_column = "";
for(size_t i = 0; i < value.size(); i++)
{
if(value[i] == '.')
value_in_column += '_';
else
value_in_column += value[i];
}
expr = expr + "quantileExactWeighted( " + value + "/100)(" + bucket_column + ","+frequency_column + ") AS percentile_" + bucket_column + "_" + value_in_column;
++pos;
if(pos->type != TokenType::ClosingRoundBracket)
expr += ", ";
}
else
++pos;
}
out = expr;
return true;
}
bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos)
{
String res = String(pos->begin,pos->end);
out = res;
String fn_name = getKQLFunctionName(pos);
if (fn_name.empty())
return false;
++pos;
String bucket_column = getConvertedArgument(fn_name,pos);
bucket_column.pop_back();
++pos;
String frequency_column = getConvertedArgument(fn_name,pos);
frequency_column.pop_back();
String expr = "quantilesExactWeighted(";
String value;
while(pos->type != TokenType::ClosingRoundBracket)
{
if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic"
&& pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket
&& pos->type != TokenType::ClosingSquareBracket){
value = String(pos->begin, pos->end);
expr = expr + value + "/100";
if(pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket
&& pos->type != TokenType::ClosingSquareBracket)
expr += ", ";
++pos;
}
else
{
++pos;
}
}
++pos;
if(pos->type != TokenType::ClosingRoundBracket)
--pos;
expr.pop_back();
expr.pop_back();
expr = expr + ")(" + bucket_column + ","+frequency_column + ")";
out = expr;
return true;
}
bool Stdev::convertImpl(String &out,IParser::Pos &pos)
{
String res = String(pos->begin,pos->end);
out = res;
String fn_name = getKQLFunctionName(pos);
if (fn_name.empty())
return false;
++pos;
const auto expr = getConvertedArgument(fn_name,pos);
out = "sqrt(varSamp(" + expr + "))";
return true;
}
bool StdevIf::convertImpl(String &out,IParser::Pos &pos)
{
String res = String(pos->begin,pos->end);
out = res;
String fn_name = getKQLFunctionName(pos);
if (fn_name.empty())
return false;
++pos;
const auto expr = getConvertedArgument(fn_name,pos);
if (pos->type != TokenType::Comma)
return false;
++pos;
const auto predicate = getConvertedArgument(fn_name,pos);
out = "sqrt(varSampIf(" + expr + ", " + predicate + "))";
return true;
}
bool Sum::convertImpl(String &out,IParser::Pos &pos)

View File

@ -0,0 +1,113 @@
#include <Parsers/tests/gtest_common.h>
#include <IO/WriteBufferFromOStream.h>
#include <Interpreters/applyTableOverride.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/Access/ASTCreateUserQuery.h>
#include <Parsers/Access/ParserCreateUserQuery.h>
#include <Parsers/ParserAlterQuery.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/ParserOptimizeQuery.h>
#include <Parsers/ParserQueryWithOutput.h>
#include <Parsers/ParserAttachAccessEntity.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <string_view>
#include <regex>
#include <gtest/gtest.h>
namespace
{
using namespace DB;
using namespace std::literals;
}
class ParserAggregateFuncTest : public ::testing::TestWithParam<std::tuple<std::shared_ptr<DB::IParser>, ParserTestCase>>
{};
TEST_P(ParserAggregateFuncTest, ParseQuery)
{ const auto & parser = std::get<0>(GetParam());
const auto & [input_text, expected_ast] = std::get<1>(GetParam());
ASSERT_NE(nullptr, parser);
if (expected_ast)
{
if (std::string(expected_ast).starts_with("throws"))
{
EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception);
}
else
{
ASTPtr ast;
ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0));
if (std::string("CREATE USER or ALTER USER query") != parser->getName()
&& std::string("ATTACH access entity query") != parser->getName())
{
EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false));
}
else
{
if (input_text.starts_with("ATTACH"))
{
auto salt = (dynamic_cast<const ASTCreateUserQuery *>(ast.get())->auth_data)->getSalt();
EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast)));
}
else
{
EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast)));
}
}
}
}
else
{
ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception);
}
}
INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserAggregateFuncTest,
::testing::Combine(
::testing::Values(std::make_shared<DB::ParserKQLQuery>()),
::testing::ValuesIn(std::initializer_list<ParserTestCase>{
{
"Customers | summarize t = stdev(Age) by FirstName",
"SELECT\n FirstName,\n sqrt(varSamp(Age)) AS t\nFROM Customers\nGROUP BY FirstName"
},
{
"Customers | summarize t = stdevif(Age, Age < 10) by FirstName",
"SELECT\n FirstName,\n sqrt(varSampIf(Age, Age < 10)) AS t\nFROM Customers\nGROUP BY FirstName"
},
{
"Customers | summarize t = binary_all_and(Age) by FirstName",
"SELECT\n FirstName,\n groupBitAnd(Age) AS t\nFROM Customers\nGROUP BY FirstName"
},
{
"Customers | summarize t = binary_all_or(Age) by FirstName",
"SELECT\n FirstName,\n groupBitOr(Age) AS t\nFROM Customers\nGROUP BY FirstName"
},
{
"Customers | summarize t = binary_all_xor(Age) by FirstName",
"SELECT\n FirstName,\n groupBitXor(Age) AS t\nFROM Customers\nGROUP BY FirstName"
},
{
"Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName",
"SELECT\n FirstName,\n quantile(30 / 100)(Age) AS percentile_Age_30,\n quantile(40 / 100)(Age) AS percentile_Age_40,\n quantile(50 / 100)(Age) AS percentile_Age_50,\n quantile(60 / 100)(Age) AS percentile_Age_60,\n quantile(70 / 100)(Age) AS percentile_Age_70\nFROM Customers\nGROUP BY FirstName"
},
{
"Customers | summarize t = percentiles_array(Age, 10, 20, 30, 50) by FirstName",
"SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName"
},
{
"Customers | summarize t = percentiles_array(Age, dynamic([10, 20, 30, 50])) by FirstName",
"SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName"
},
{
"DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)",
"SELECT\n quantileExactWeighted(50 / 100)(Bucket, Frequency) AS percentile_Bucket_50,\n quantileExactWeighted(75 / 100)(Bucket, Frequency) AS percentile_Bucket_75,\n quantileExactWeighted(99.9 / 100)(Bucket, Frequency) AS percentile_Bucket_99_9\nFROM DataTable"
},
{
"DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))",
"SELECT quantilesExactWeighted(10 / 100, 50 / 100, 30 / 100)(Bucket, Frequency) AS t\nFROM DataTable"
}
})));