mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-02 12:32:04 +00:00
Aggregate functions added
This commit is contained in:
parent
e22413823e
commit
18f3c5c5c8
@ -1,5 +1,38 @@
|
||||
## KQL implemented features
|
||||
|
||||
# August XX, 2022
|
||||
|
||||
## Aggregate Functions
|
||||
- [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction)
|
||||
`Customers | summarize t = stdev(Age) by FirstName`
|
||||
|
||||
- [stdevif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdevif-aggfunction)
|
||||
`Customers | summarize t = stdevif(Age, Age < 10) by FirstName`
|
||||
|
||||
- [binary_all_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-and-aggfunction)
|
||||
`Customers | summarize t = binary_all_and(Age) by FirstName`
|
||||
|
||||
- [binary_all_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-or-aggfunction)
|
||||
`Customers | summarize t = binary_all_or(Age) by FirstName`
|
||||
|
||||
- [binary_all_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-xor-aggfunction)
|
||||
`Customers | summarize t = binary_all_xor(Age) by FirstName`
|
||||
|
||||
- [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction)
|
||||
`Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName`
|
||||
|
||||
- [percentiles_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction)
|
||||
**do not support `range()` now**
|
||||
`Customers | summarize t = percentiles_array(Age, 10, 20, 30, 50) by FirstName`
|
||||
`Customers | summarize t = percentiles_array(Age, dynamic([10, 20, 30, 50])) by FirstName`
|
||||
|
||||
- [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction)
|
||||
`DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)`
|
||||
|
||||
- [percentilesw_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction)
|
||||
**do not support `range()` now**
|
||||
`DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))`
|
||||
|
||||
# August 15, 2022
|
||||
|
||||
## DateTpye
|
||||
|
@ -40,23 +40,17 @@ bool AvgIf::convertImpl(String &out,IParser::Pos &pos)
|
||||
|
||||
bool BinaryAllAnd::convertImpl(String &out,IParser::Pos &pos)
|
||||
{
|
||||
String res = String(pos->begin,pos->end);
|
||||
out = res;
|
||||
return false;
|
||||
return directMapping(out,pos,"groupBitAnd");
|
||||
}
|
||||
|
||||
bool BinaryAllOr::convertImpl(String &out,IParser::Pos &pos)
|
||||
{
|
||||
String res = String(pos->begin,pos->end);
|
||||
out = res;
|
||||
return false;
|
||||
return directMapping(out,pos,"groupBitOr");
|
||||
}
|
||||
|
||||
bool BinaryAllXor::convertImpl(String &out,IParser::Pos &pos)
|
||||
{
|
||||
String res = String(pos->begin,pos->end);
|
||||
out = res;
|
||||
return false;
|
||||
return directMapping(out,pos,"groupBitXor");
|
||||
}
|
||||
|
||||
bool BuildSchema::convertImpl(String &out,IParser::Pos &pos)
|
||||
@ -220,44 +214,205 @@ bool MinIf::convertImpl(String &out,IParser::Pos &pos)
|
||||
|
||||
bool Percentiles::convertImpl(String &out,IParser::Pos &pos)
|
||||
{
|
||||
String res = String(pos->begin,pos->end);
|
||||
out = res;
|
||||
String fn_name = getKQLFunctionName(pos);
|
||||
|
||||
if (fn_name.empty())
|
||||
return false;
|
||||
|
||||
++pos;
|
||||
String column_name = getConvertedArgument(fn_name,pos);
|
||||
column_name.pop_back();
|
||||
String expr = "";
|
||||
String value;
|
||||
String value_in_column;
|
||||
while(pos->type != TokenType::ClosingRoundBracket)
|
||||
{
|
||||
if(pos->type != TokenType::Comma){
|
||||
value = String(pos->begin, pos->end);
|
||||
value_in_column = "";
|
||||
|
||||
for(size_t i = 0; i < value.size(); i++)
|
||||
{
|
||||
if(value[i] == '.')
|
||||
value_in_column += '_';
|
||||
else
|
||||
value_in_column += value[i];
|
||||
}
|
||||
expr = expr + "quantile( " + value + "/100)(" + column_name + ") AS percentile_" + column_name + "_" + value_in_column;
|
||||
++pos;
|
||||
if(pos->type != TokenType::ClosingRoundBracket)
|
||||
expr += ", ";
|
||||
}
|
||||
else
|
||||
++pos;
|
||||
}
|
||||
out = expr;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PercentilesArray::convertImpl(String &out,IParser::Pos &pos)
|
||||
{
|
||||
String res = String(pos->begin,pos->end);
|
||||
out = res;
|
||||
String fn_name = getKQLFunctionName(pos);
|
||||
|
||||
if (fn_name.empty())
|
||||
return false;
|
||||
|
||||
++pos;
|
||||
String column_name = getConvertedArgument(fn_name,pos);
|
||||
column_name.pop_back();
|
||||
String expr = "quantiles(";
|
||||
String value;
|
||||
while(pos->type != TokenType::ClosingRoundBracket)
|
||||
{
|
||||
if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic"
|
||||
&& pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket
|
||||
&& pos->type != TokenType::ClosingSquareBracket){
|
||||
|
||||
value = String(pos->begin, pos->end);
|
||||
expr = expr + value + "/100";
|
||||
|
||||
if(pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket
|
||||
&& pos->type != TokenType::ClosingSquareBracket)
|
||||
expr += ", ";
|
||||
++pos;
|
||||
}
|
||||
else
|
||||
{
|
||||
++pos;
|
||||
}
|
||||
|
||||
}
|
||||
++pos;
|
||||
if(pos->type != TokenType::ClosingRoundBracket)
|
||||
--pos;
|
||||
|
||||
expr.pop_back();
|
||||
expr.pop_back();
|
||||
expr = expr + ")(" + column_name + ")";
|
||||
out = expr;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Percentilesw::convertImpl(String &out,IParser::Pos &pos)
|
||||
{
|
||||
String res = String(pos->begin,pos->end);
|
||||
out = res;
|
||||
String fn_name = getKQLFunctionName(pos);
|
||||
|
||||
if (fn_name.empty())
|
||||
return false;
|
||||
|
||||
++pos;
|
||||
String bucket_column = getConvertedArgument(fn_name,pos);
|
||||
bucket_column.pop_back();
|
||||
|
||||
++pos;
|
||||
String frequency_column = getConvertedArgument(fn_name,pos);
|
||||
frequency_column.pop_back();
|
||||
|
||||
String expr = "";
|
||||
String value;
|
||||
String value_in_column;
|
||||
|
||||
while(pos->type != TokenType::ClosingRoundBracket)
|
||||
{
|
||||
if(pos->type != TokenType::Comma){
|
||||
value = String(pos->begin, pos->end);
|
||||
value_in_column = "";
|
||||
|
||||
for(size_t i = 0; i < value.size(); i++)
|
||||
{
|
||||
if(value[i] == '.')
|
||||
value_in_column += '_';
|
||||
else
|
||||
value_in_column += value[i];
|
||||
}
|
||||
|
||||
expr = expr + "quantileExactWeighted( " + value + "/100)(" + bucket_column + ","+frequency_column + ") AS percentile_" + bucket_column + "_" + value_in_column;
|
||||
++pos;
|
||||
if(pos->type != TokenType::ClosingRoundBracket)
|
||||
expr += ", ";
|
||||
}
|
||||
else
|
||||
++pos;
|
||||
}
|
||||
out = expr;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PercentileswArray::convertImpl(String &out,IParser::Pos &pos)
|
||||
{
|
||||
String res = String(pos->begin,pos->end);
|
||||
out = res;
|
||||
String fn_name = getKQLFunctionName(pos);
|
||||
|
||||
if (fn_name.empty())
|
||||
return false;
|
||||
|
||||
++pos;
|
||||
String bucket_column = getConvertedArgument(fn_name,pos);
|
||||
bucket_column.pop_back();
|
||||
|
||||
++pos;
|
||||
String frequency_column = getConvertedArgument(fn_name,pos);
|
||||
frequency_column.pop_back();
|
||||
|
||||
String expr = "quantilesExactWeighted(";
|
||||
String value;
|
||||
while(pos->type != TokenType::ClosingRoundBracket)
|
||||
{
|
||||
if(pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic"
|
||||
&& pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket
|
||||
&& pos->type != TokenType::ClosingSquareBracket){
|
||||
|
||||
value = String(pos->begin, pos->end);
|
||||
expr = expr + value + "/100";
|
||||
|
||||
if(pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket
|
||||
&& pos->type != TokenType::ClosingSquareBracket)
|
||||
expr += ", ";
|
||||
++pos;
|
||||
}
|
||||
else
|
||||
{
|
||||
++pos;
|
||||
}
|
||||
|
||||
}
|
||||
++pos;
|
||||
if(pos->type != TokenType::ClosingRoundBracket)
|
||||
--pos;
|
||||
|
||||
expr.pop_back();
|
||||
expr.pop_back();
|
||||
expr = expr + ")(" + bucket_column + ","+frequency_column + ")";
|
||||
out = expr;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Stdev::convertImpl(String &out,IParser::Pos &pos)
|
||||
{
|
||||
String res = String(pos->begin,pos->end);
|
||||
out = res;
|
||||
String fn_name = getKQLFunctionName(pos);
|
||||
|
||||
if (fn_name.empty())
|
||||
return false;
|
||||
++pos;
|
||||
const auto expr = getConvertedArgument(fn_name,pos);
|
||||
out = "sqrt(varSamp(" + expr + "))";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool StdevIf::convertImpl(String &out,IParser::Pos &pos)
|
||||
{
|
||||
String res = String(pos->begin,pos->end);
|
||||
out = res;
|
||||
String fn_name = getKQLFunctionName(pos);
|
||||
|
||||
if (fn_name.empty())
|
||||
return false;
|
||||
++pos;
|
||||
const auto expr = getConvertedArgument(fn_name,pos);
|
||||
if (pos->type != TokenType::Comma)
|
||||
return false;
|
||||
|
||||
++pos;
|
||||
const auto predicate = getConvertedArgument(fn_name,pos);
|
||||
out = "sqrt(varSampIf(" + expr + ", " + predicate + "))";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Sum::convertImpl(String &out,IParser::Pos &pos)
|
||||
|
113
src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp
Normal file
113
src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp
Normal file
@ -0,0 +1,113 @@
|
||||
#include <Parsers/tests/gtest_common.h>
|
||||
#include <IO/WriteBufferFromOStream.h>
|
||||
#include <Interpreters/applyTableOverride.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/Access/ASTCreateUserQuery.h>
|
||||
#include <Parsers/Access/ParserCreateUserQuery.h>
|
||||
#include <Parsers/ParserAlterQuery.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/ParserOptimizeQuery.h>
|
||||
#include <Parsers/ParserQueryWithOutput.h>
|
||||
#include <Parsers/ParserAttachAccessEntity.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/Kusto/ParserKQLQuery.h>
|
||||
#include <string_view>
|
||||
#include <regex>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
using namespace DB;
|
||||
using namespace std::literals;
|
||||
}
|
||||
class ParserAggregateFuncTest : public ::testing::TestWithParam<std::tuple<std::shared_ptr<DB::IParser>, ParserTestCase>>
|
||||
{};
|
||||
|
||||
TEST_P(ParserAggregateFuncTest, ParseQuery)
|
||||
{ const auto & parser = std::get<0>(GetParam());
|
||||
const auto & [input_text, expected_ast] = std::get<1>(GetParam());
|
||||
ASSERT_NE(nullptr, parser);
|
||||
if (expected_ast)
|
||||
{
|
||||
if (std::string(expected_ast).starts_with("throws"))
|
||||
{
|
||||
EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception);
|
||||
}
|
||||
else
|
||||
{
|
||||
ASTPtr ast;
|
||||
ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0));
|
||||
if (std::string("CREATE USER or ALTER USER query") != parser->getName()
|
||||
&& std::string("ATTACH access entity query") != parser->getName())
|
||||
{
|
||||
EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (input_text.starts_with("ATTACH"))
|
||||
{
|
||||
auto salt = (dynamic_cast<const ASTCreateUserQuery *>(ast.get())->auth_data)->getSalt();
|
||||
EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast)));
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserAggregateFuncTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(std::make_shared<DB::ParserKQLQuery>()),
|
||||
::testing::ValuesIn(std::initializer_list<ParserTestCase>{
|
||||
{
|
||||
"Customers | summarize t = stdev(Age) by FirstName",
|
||||
"SELECT\n FirstName,\n sqrt(varSamp(Age)) AS t\nFROM Customers\nGROUP BY FirstName"
|
||||
},
|
||||
{
|
||||
"Customers | summarize t = stdevif(Age, Age < 10) by FirstName",
|
||||
"SELECT\n FirstName,\n sqrt(varSampIf(Age, Age < 10)) AS t\nFROM Customers\nGROUP BY FirstName"
|
||||
},
|
||||
{
|
||||
"Customers | summarize t = binary_all_and(Age) by FirstName",
|
||||
"SELECT\n FirstName,\n groupBitAnd(Age) AS t\nFROM Customers\nGROUP BY FirstName"
|
||||
},
|
||||
{
|
||||
"Customers | summarize t = binary_all_or(Age) by FirstName",
|
||||
"SELECT\n FirstName,\n groupBitOr(Age) AS t\nFROM Customers\nGROUP BY FirstName"
|
||||
|
||||
},
|
||||
{
|
||||
"Customers | summarize t = binary_all_xor(Age) by FirstName",
|
||||
"SELECT\n FirstName,\n groupBitXor(Age) AS t\nFROM Customers\nGROUP BY FirstName"
|
||||
},
|
||||
{
|
||||
"Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName",
|
||||
"SELECT\n FirstName,\n quantile(30 / 100)(Age) AS percentile_Age_30,\n quantile(40 / 100)(Age) AS percentile_Age_40,\n quantile(50 / 100)(Age) AS percentile_Age_50,\n quantile(60 / 100)(Age) AS percentile_Age_60,\n quantile(70 / 100)(Age) AS percentile_Age_70\nFROM Customers\nGROUP BY FirstName"
|
||||
},
|
||||
{
|
||||
"Customers | summarize t = percentiles_array(Age, 10, 20, 30, 50) by FirstName",
|
||||
"SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName"
|
||||
},
|
||||
{
|
||||
"Customers | summarize t = percentiles_array(Age, dynamic([10, 20, 30, 50])) by FirstName",
|
||||
"SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName"
|
||||
},
|
||||
{
|
||||
"DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)",
|
||||
"SELECT\n quantileExactWeighted(50 / 100)(Bucket, Frequency) AS percentile_Bucket_50,\n quantileExactWeighted(75 / 100)(Bucket, Frequency) AS percentile_Bucket_75,\n quantileExactWeighted(99.9 / 100)(Bucket, Frequency) AS percentile_Bucket_99_9\nFROM DataTable"
|
||||
},
|
||||
{
|
||||
"DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))",
|
||||
"SELECT quantilesExactWeighted(10 / 100, 50 / 100, 30 / 100)(Bucket, Frequency) AS t\nFROM DataTable"
|
||||
}
|
||||
})));
|
Loading…
Reference in New Issue
Block a user