Merge pull request #25589 from TszKitLo40/support-distinct-on

This commit is contained in:
Vladimir 2021-07-09 09:50:45 +03:00 committed by GitHub
commit bf0695ff84
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 74 additions and 8 deletions

View File

@ -1,4 +1,5 @@
#include <memory>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/IParserBase.h>
#include <Parsers/CommonParsers.h>
@ -16,11 +17,12 @@ namespace DB
namespace ErrorCodes
{
extern const int TOP_AND_LIMIT_TOGETHER;
extern const int WITH_TIES_WITHOUT_ORDER_BY;
extern const int FIRST_AND_NEXT_TOGETHER;
extern const int LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED;
extern const int ROW_AND_ROWS_TOGETHER;
extern const int FIRST_AND_NEXT_TOGETHER;
extern const int SYNTAX_ERROR;
extern const int TOP_AND_LIMIT_TOGETHER;
extern const int WITH_TIES_WITHOUT_ORDER_BY;
}
@ -32,6 +34,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
ParserKeyword s_select("SELECT");
ParserKeyword s_all("ALL");
ParserKeyword s_distinct("DISTINCT");
ParserKeyword s_distinct_on("DISTINCT ON");
ParserKeyword s_from("FROM");
ParserKeyword s_prewhere("PREWHERE");
ParserKeyword s_where("WHERE");
@ -77,12 +80,13 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
ASTPtr limit_by_length;
ASTPtr limit_by_offset;
ASTPtr limit_by_expression_list;
ASTPtr distinct_on_expression_list;
ASTPtr limit_offset;
ASTPtr limit_length;
ASTPtr top_length;
ASTPtr settings;
/// WITH expr list
/// WITH expr_list
{
if (s_with.ignore(pos, expected))
{
@ -94,7 +98,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
}
}
/// SELECT [ALL/DISTINCT] [TOP N [WITH TIES]] expr list
/// SELECT [ALL/DISTINCT [ON (expr_list)]] [TOP N [WITH TIES]] expr_list
{
bool has_all = false;
if (!s_select.ignore(pos, expected))
@ -103,13 +107,27 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (s_all.ignore(pos, expected))
has_all = true;
if (s_distinct.ignore(pos, expected))
if (s_distinct_on.ignore(pos, expected))
{
if (open_bracket.ignore(pos, expected))
{
if (!exp_list.parse(pos, distinct_on_expression_list, expected))
return false;
if (!close_bracket.ignore(pos, expected))
return false;
}
else
return false;
}
else if (s_distinct.ignore(pos, expected))
{
select_query->distinct = true;
}
if (!has_all && s_all.ignore(pos, expected))
has_all = true;
if (has_all && select_query->distinct)
if (has_all && (select_query->distinct || distinct_on_expression_list))
return false;
if (s_top.ignore(pos, expected))
@ -256,13 +274,19 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
select_query->limit_with_ties = true;
}
if (limit_with_ties_occured && distinct_on_expression_list)
throw Exception("Can not use WITH TIES alongside LIMIT BY/DISTINCT ON", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED);
if (s_by.ignore(pos, expected))
{
/// WITH TIES was used alongside LIMIT BY
/// But there are other kind of queries like LIMIT n BY smth LIMIT m WITH TIES which are allowed.
/// So we have to ignore WITH TIES exactly in LIMIT BY state.
if (limit_with_ties_occured)
throw Exception("Can not use WITH TIES alongside LIMIT BY", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED);
throw Exception("Can not use WITH TIES alongside LIMIT BY/DISTINCT ON", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED);
if (distinct_on_expression_list)
throw Exception("Can not use DISTINCT ON alongside LIMIT BY", ErrorCodes::SYNTAX_ERROR);
limit_by_length = limit_length;
limit_by_offset = limit_offset;
@ -335,6 +359,17 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
}
}
if (distinct_on_expression_list)
{
/// DISTINCT ON and LIMIT BY are mutually exclusive, checked before
assert (limit_by_expression_list == nullptr);
/// Transform `DISTINCT ON expr` to `LIMIT 1 BY expr`
limit_by_expression_list = distinct_on_expression_list;
limit_by_length = std::make_shared<ASTLiteral>(Field{UInt8(1)});
distinct_on_expression_list = nullptr;
}
/// Because TOP n in totally equals LIMIT n
if (top_length)
limit_length = top_length;

View File

@ -0,0 +1,8 @@
1 1 1
2 2 2
1 2 2
1 1 1
2 2 2
1 2 2
1 1 1
2 2 2

View File

@ -0,0 +1,23 @@
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (`a` UInt32, `b` UInt32, `c` UInt32 ) ENGINE = Memory;
INSERT INTO t1 VALUES (1, 1, 1), (1, 1, 2), (2, 2, 2), (1, 2, 2);
SELECT DISTINCT ON (a, b) a, b, c FROM t1;
SELECT DISTINCT ON (a, b) * FROM t1;
SELECT DISTINCT ON (a) * FROM t1;
-- fuzzer will fail, enable when fixed
-- SELECT DISTINCT ON (a, b) a, b, c FROM t1 LIMIT 1 BY a, b; -- { clientError 62 }
-- SELECT DISTINCT ON a, b a, b FROM t1; -- { clientError 62 }
-- SELECT DISTINCT ON a a, b FROM t1; -- { clientError 62 }
-- "Code: 47. DB::Exception: Missing columns: 'DISTINCT'" - error can be better
-- SELECT DISTINCT ON (a, b) DISTINCT a, b FROM t1; -- { serverError 47 }
-- SELECT DISTINCT DISTINCT ON (a, b) a, b FROM t1; -- { clientError 62 }
-- SELECT ALL DISTINCT ON (a, b) a, b FROM t1; -- { clientError 62 }
-- SELECT DISTINCT ON (a, b) ALL a, b FROM t1; -- { clientError 62 }
DROP TABLE IF EXISTS t1;