diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 548ec8879bd..b1f7570878f 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -16,11 +17,12 @@ namespace DB namespace ErrorCodes { - extern const int TOP_AND_LIMIT_TOGETHER; - extern const int WITH_TIES_WITHOUT_ORDER_BY; + extern const int FIRST_AND_NEXT_TOGETHER; extern const int LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED; extern const int ROW_AND_ROWS_TOGETHER; - extern const int FIRST_AND_NEXT_TOGETHER; + extern const int SYNTAX_ERROR; + extern const int TOP_AND_LIMIT_TOGETHER; + extern const int WITH_TIES_WITHOUT_ORDER_BY; } @@ -32,6 +34,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_select("SELECT"); ParserKeyword s_all("ALL"); ParserKeyword s_distinct("DISTINCT"); + ParserKeyword s_distinct_on("DISTINCT ON"); ParserKeyword s_from("FROM"); ParserKeyword s_prewhere("PREWHERE"); ParserKeyword s_where("WHERE"); @@ -77,12 +80,13 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr limit_by_length; ASTPtr limit_by_offset; ASTPtr limit_by_expression_list; + ASTPtr distinct_on_expression_list; ASTPtr limit_offset; ASTPtr limit_length; ASTPtr top_length; ASTPtr settings; - /// WITH expr list + /// WITH expr_list { if (s_with.ignore(pos, expected)) { @@ -94,7 +98,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - /// SELECT [ALL/DISTINCT] [TOP N [WITH TIES]] expr list + /// SELECT [ALL/DISTINCT [ON (expr_list)]] [TOP N [WITH TIES]] expr_list { bool has_all = false; if (!s_select.ignore(pos, expected)) @@ -103,13 +107,27 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (s_all.ignore(pos, expected)) has_all = true; - if (s_distinct.ignore(pos, expected)) + if (s_distinct_on.ignore(pos, expected)) + { + if (open_bracket.ignore(pos, expected)) + { + if (!exp_list.parse(pos, distinct_on_expression_list, expected)) + return false; + if (!close_bracket.ignore(pos, expected)) + return false; + } + else + return false; + } + else if (s_distinct.ignore(pos, expected)) + { select_query->distinct = true; + } if (!has_all && s_all.ignore(pos, expected)) has_all = true; - if (has_all && select_query->distinct) + if (has_all && (select_query->distinct || distinct_on_expression_list)) return false; if (s_top.ignore(pos, expected)) @@ -256,13 +274,19 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) select_query->limit_with_ties = true; } + if (limit_with_ties_occured && distinct_on_expression_list) + throw Exception("Can not use WITH TIES alongside LIMIT BY/DISTINCT ON", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); + if (s_by.ignore(pos, expected)) { /// WITH TIES was used alongside LIMIT BY /// But there are other kind of queries like LIMIT n BY smth LIMIT m WITH TIES which are allowed. /// So we have to ignore WITH TIES exactly in LIMIT BY state. if (limit_with_ties_occured) - throw Exception("Can not use WITH TIES alongside LIMIT BY", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); + throw Exception("Can not use WITH TIES alongside LIMIT BY/DISTINCT ON", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); + + if (distinct_on_expression_list) + throw Exception("Can not use DISTINCT ON alongside LIMIT BY", ErrorCodes::SYNTAX_ERROR); limit_by_length = limit_length; limit_by_offset = limit_offset; @@ -335,6 +359,17 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } + if (distinct_on_expression_list) + { + /// DISTINCT ON and LIMIT BY are mutually exclusive, checked before + assert (limit_by_expression_list == nullptr); + + /// Transform `DISTINCT ON expr` to `LIMIT 1 BY expr` + limit_by_expression_list = distinct_on_expression_list; + limit_by_length = std::make_shared(Field{UInt8(1)}); + distinct_on_expression_list = nullptr; + } + /// Because TOP n in totally equals LIMIT n if (top_length) limit_length = top_length; diff --git a/tests/queries/0_stateless/01917_distinct_on.reference b/tests/queries/0_stateless/01917_distinct_on.reference new file mode 100644 index 00000000000..b5b231e5786 --- /dev/null +++ b/tests/queries/0_stateless/01917_distinct_on.reference @@ -0,0 +1,8 @@ +1 1 1 +2 2 2 +1 2 2 +1 1 1 +2 2 2 +1 2 2 +1 1 1 +2 2 2 diff --git a/tests/queries/0_stateless/01917_distinct_on.sql b/tests/queries/0_stateless/01917_distinct_on.sql new file mode 100644 index 00000000000..ae528b6e838 --- /dev/null +++ b/tests/queries/0_stateless/01917_distinct_on.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS t1; + +CREATE TABLE t1 (`a` UInt32, `b` UInt32, `c` UInt32 ) ENGINE = Memory; +INSERT INTO t1 VALUES (1, 1, 1), (1, 1, 2), (2, 2, 2), (1, 2, 2); + +SELECT DISTINCT ON (a, b) a, b, c FROM t1; +SELECT DISTINCT ON (a, b) * FROM t1; +SELECT DISTINCT ON (a) * FROM t1; + +-- fuzzer will fail, enable when fixed +-- SELECT DISTINCT ON (a, b) a, b, c FROM t1 LIMIT 1 BY a, b; -- { clientError 62 } + +-- SELECT DISTINCT ON a, b a, b FROM t1; -- { clientError 62 } +-- SELECT DISTINCT ON a a, b FROM t1; -- { clientError 62 } + +-- "Code: 47. DB::Exception: Missing columns: 'DISTINCT'" - error can be better +-- SELECT DISTINCT ON (a, b) DISTINCT a, b FROM t1; -- { serverError 47 } +-- SELECT DISTINCT DISTINCT ON (a, b) a, b FROM t1; -- { clientError 62 } + +-- SELECT ALL DISTINCT ON (a, b) a, b FROM t1; -- { clientError 62 } +-- SELECT DISTINCT ON (a, b) ALL a, b FROM t1; -- { clientError 62 } + +DROP TABLE IF EXISTS t1;