From 63db58710d8ddfac43bbe20b41e4909471b0ec79 Mon Sep 17 00:00:00 2001 From: Zijie Lu Date: Tue, 22 Jun 2021 19:25:14 +0800 Subject: [PATCH 1/7] Support for DISTINCT ON (columns) Signed-off-by: Zijie Lu --- src/Common/ErrorCodes.cpp | 1 + src/Parsers/ParserSelectQuery.cpp | 15 +++++++++++++++ .../0_stateless/01917_distinct_on.reference | 3 +++ tests/queries/0_stateless/01917_distinct_on.sql | 9 +++++++++ 4 files changed, 28 insertions(+) create mode 100644 tests/queries/0_stateless/01917_distinct_on.reference create mode 100644 tests/queries/0_stateless/01917_distinct_on.sql diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index d840830bf28..5afba23657d 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -554,6 +554,7 @@ M(584, PROJECTION_NOT_USED) \ M(585, CANNOT_PARSE_YAML) \ M(586, CANNOT_CREATE_FILE) \ + M(587, DISTINCT_ON_AND_LIMIT_BY_TOGETHER) \ \ M(998, POSTGRESQL_CONNECTION_FAILURE) \ M(999, KEEPER_EXCEPTION) \ diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 548ec8879bd..12e83486af8 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -21,6 +22,7 @@ namespace ErrorCodes extern const int LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED; extern const int ROW_AND_ROWS_TOGETHER; extern const int FIRST_AND_NEXT_TOGETHER; + extern const int DISTINCT_ON_AND_LIMIT_BY_TOGETHER; } @@ -32,6 +34,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_select("SELECT"); ParserKeyword s_all("ALL"); ParserKeyword s_distinct("DISTINCT"); + ParserKeyword s_distinct_on("DISTINCT ON"); ParserKeyword s_from("FROM"); ParserKeyword s_prewhere("PREWHERE"); ParserKeyword s_where("WHERE"); @@ -94,6 +97,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } + bool has_distinct_on = false; + /// SELECT [ALL/DISTINCT] [TOP N [WITH TIES]] expr list { bool has_all = false; @@ -103,6 +108,13 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (s_all.ignore(pos, expected)) has_all = true; + if (s_distinct_on.ignore(pos, expected)) { + has_distinct_on = true; + if (!exp_list.parse(pos, limit_by_expression_list, expected)) + return false; + limit_by_length = std::make_shared(Field{UInt8(1)}); + } + if (s_distinct.ignore(pos, expected)) select_query->distinct = true; @@ -264,6 +276,9 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (limit_with_ties_occured) throw Exception("Can not use WITH TIES alongside LIMIT BY", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); + if (has_distinct_on) + throw Exception("Can not use distinct on alongside LIMIT BY", ErrorCodes::DISTINCT_ON_AND_LIMIT_BY_TOGETHER); + limit_by_length = limit_length; limit_by_offset = limit_offset; limit_length = nullptr; diff --git a/tests/queries/0_stateless/01917_distinct_on.reference b/tests/queries/0_stateless/01917_distinct_on.reference new file mode 100644 index 00000000000..09e5879c7f6 --- /dev/null +++ b/tests/queries/0_stateless/01917_distinct_on.reference @@ -0,0 +1,3 @@ +1 1 1 +2 2 2 +1 2 2 diff --git a/tests/queries/0_stateless/01917_distinct_on.sql b/tests/queries/0_stateless/01917_distinct_on.sql new file mode 100644 index 00000000000..0940d8566bd --- /dev/null +++ b/tests/queries/0_stateless/01917_distinct_on.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS t1; + +CREATE TABLE t1 (`a` UInt32, `b` UInt32, `c` UInt32 ) ENGINE = Memory; +INSERT INTO t1 VALUES (1, 1, 1), (1, 1, 2), (2, 2, 2), (1, 2, 2); + +SELECT DISTINCT ON (a, b) a, b, c FROM t1; + +DROP TABLE IF EXISTS t1; + From 9665d4109267cd62da0dec9f3c6d842ee90833ba Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 6 Jul 2021 12:17:26 +0300 Subject: [PATCH 2/7] Force parentheses for DISTINCT ON --- src/Parsers/ParserSelectQuery.cpp | 39 ++++++++++++++----- .../0_stateless/01917_distinct_on.reference | 5 +++ .../queries/0_stateless/01917_distinct_on.sql | 14 ++++++- 3 files changed, 48 insertions(+), 10 deletions(-) diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 3f6607be0bc..8272a130422 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -80,12 +80,13 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr limit_by_length; ASTPtr limit_by_offset; ASTPtr limit_by_expression_list; + ASTPtr distinct_on_expression_list; ASTPtr limit_offset; ASTPtr limit_length; ASTPtr top_length; ASTPtr settings; - /// WITH expr list + /// WITH expr_list { if (s_with.ignore(pos, expected)) { @@ -97,7 +98,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - /// SELECT [DISTINCT ON expr] [ALL/DISTINCT] [TOP N [WITH TIES]] expr list + /// SELECT [ALL/DISTINCT [ON (expr_list)]] [TOP N [WITH TIES]] expr_list { bool has_all = false; if (!s_select.ignore(pos, expected)) @@ -108,18 +109,25 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (s_distinct_on.ignore(pos, expected)) { - if (!exp_list.parse(pos, limit_by_expression_list, expected)) + if (open_bracket.ignore(pos, expected)) + { + if (!exp_list.parse(pos, distinct_on_expression_list, expected)) + return false; + if (!close_bracket.ignore(pos, expected)) + return false; + } + else return false; - limit_by_length = std::make_shared(Field{UInt8(1)}); } - - if (s_distinct.ignore(pos, expected)) + else if (s_distinct.ignore(pos, expected)) + { select_query->distinct = true; + } if (!has_all && s_all.ignore(pos, expected)) has_all = true; - if (has_all && select_query->distinct) + if (has_all && (select_query->distinct || distinct_on_expression_list)) return false; if (s_top.ignore(pos, expected)) @@ -266,15 +274,18 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) select_query->limit_with_ties = true; } + if (limit_with_ties_occured && distinct_on_expression_list) + throw Exception("Can not use WITH TIES alongside LIMIT BY/DISTINCT ON", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); + if (s_by.ignore(pos, expected)) { /// WITH TIES was used alongside LIMIT BY /// But there are other kind of queries like LIMIT n BY smth LIMIT m WITH TIES which are allowed. /// So we have to ignore WITH TIES exactly in LIMIT BY state. if (limit_with_ties_occured) - throw Exception("Can not use WITH TIES alongside LIMIT BY", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); + throw Exception("Can not use WITH TIES alongside LIMIT BY/DISTINCT ON", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); - if (limit_by_length) + if (distinct_on_expression_list) throw Exception("Can not use DISTINCT ON alongside LIMIT BY", ErrorCodes::DISTINCT_ON_AND_LIMIT_BY_TOGETHER); limit_by_length = limit_length; @@ -348,6 +359,16 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } + if (distinct_on_expression_list) + { + /// DISTINCT ON and LIMIT BY are mutually exclusive, checked before + assert (limit_by_expression_list == nullptr); + + /// Transform `DISTINCT ON expr` to `LIMIT 1 BY expr` + limit_by_expression_list = distinct_on_expression_list; + limit_by_length = std::make_shared(Field{UInt8(1)}); + } + /// Because TOP n in totally equals LIMIT n if (top_length) limit_length = top_length; diff --git a/tests/queries/0_stateless/01917_distinct_on.reference b/tests/queries/0_stateless/01917_distinct_on.reference index 09e5879c7f6..b5b231e5786 100644 --- a/tests/queries/0_stateless/01917_distinct_on.reference +++ b/tests/queries/0_stateless/01917_distinct_on.reference @@ -1,3 +1,8 @@ 1 1 1 2 2 2 1 2 2 +1 1 1 +2 2 2 +1 2 2 +1 1 1 +2 2 2 diff --git a/tests/queries/0_stateless/01917_distinct_on.sql b/tests/queries/0_stateless/01917_distinct_on.sql index f9b12ca6f05..b7875719c92 100644 --- a/tests/queries/0_stateless/01917_distinct_on.sql +++ b/tests/queries/0_stateless/01917_distinct_on.sql @@ -4,8 +4,20 @@ CREATE TABLE t1 (`a` UInt32, `b` UInt32, `c` UInt32 ) ENGINE = Memory; INSERT INTO t1 VALUES (1, 1, 1), (1, 1, 2), (2, 2, 2), (1, 2, 2); SELECT DISTINCT ON (a, b) a, b, c FROM t1; +SELECT DISTINCT ON (a, b) * FROM t1; +SELECT DISTINCT ON (a) * FROM t1; -SELECT DISTINCT ON (a, b) a, b, c FROM t1 LIMIT 1 BY a, b; -- { serverError 590 } +SELECT DISTINCT ON (a, b) a, b, c FROM t1 LIMIT 1 BY a, b; -- { clientError 590 } + +SELECT DISTINCT ON a, b a, b FROM t1; -- { clientError 62 } +SELECT DISTINCT ON a a, b FROM t1; -- { clientError 62 } + +-- "Code: 47. DB::Exception: Missing columns: 'DISTINCT'" - error can be better +SELECT DISTINCT ON (a, b) DISTINCT a, b FROM t1; -- { serverError 47 } +SELECT DISTINCT DISTINCT ON (a, b) a, b FROM t1; -- { clientError 62 } + +SELECT ALL DISTINCT ON (a, b) a, b FROM t1; -- { clientError 62 } +SELECT DISTINCT ON (a, b) ALL a, b FROM t1; -- { clientError 62 } DROP TABLE IF EXISTS t1; From 3cef256f7905db0c797c5b93faa3806ef3f2ec94 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 6 Jul 2021 12:22:08 +0300 Subject: [PATCH 3/7] Errorcode DISTINCT_ON_AND_LIMIT_BY_TOGETHER -> UNSUPPORTED_METHOD --- src/Common/ErrorCodes.cpp | 1 - src/Parsers/ParserSelectQuery.cpp | 10 +++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index e165e184cd3..f4ceef2896a 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -557,7 +557,6 @@ M(587, CONCURRENT_ACCESS_NOT_SUPPORTED) \ M(588, DISTRIBUTED_BROKEN_BATCH_INFO) \ M(589, DISTRIBUTED_BROKEN_BATCH_FILES) \ - M(590, DISTINCT_ON_AND_LIMIT_BY_TOGETHER) \ \ M(998, POSTGRESQL_CONNECTION_FAILURE) \ M(999, KEEPER_EXCEPTION) \ diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 8272a130422..2b7f6bcaaf9 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -17,12 +17,12 @@ namespace DB namespace ErrorCodes { - extern const int TOP_AND_LIMIT_TOGETHER; - extern const int WITH_TIES_WITHOUT_ORDER_BY; + extern const int FIRST_AND_NEXT_TOGETHER; extern const int LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED; extern const int ROW_AND_ROWS_TOGETHER; - extern const int FIRST_AND_NEXT_TOGETHER; - extern const int DISTINCT_ON_AND_LIMIT_BY_TOGETHER; + extern const int TOP_AND_LIMIT_TOGETHER; + extern const int UNSUPPORTED_METHOD; + extern const int WITH_TIES_WITHOUT_ORDER_BY; } @@ -286,7 +286,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) throw Exception("Can not use WITH TIES alongside LIMIT BY/DISTINCT ON", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); if (distinct_on_expression_list) - throw Exception("Can not use DISTINCT ON alongside LIMIT BY", ErrorCodes::DISTINCT_ON_AND_LIMIT_BY_TOGETHER); + throw Exception("Can not use DISTINCT ON alongside LIMIT BY", ErrorCodes::UNSUPPORTED_METHOD); limit_by_length = limit_length; limit_by_offset = limit_offset; From 96536a9cbebe06522e2bd332003f4a59009d7aca Mon Sep 17 00:00:00 2001 From: Vladimir Date: Tue, 6 Jul 2021 15:32:28 +0300 Subject: [PATCH 4/7] Update tests/queries/0_stateless/01917_distinct_on.sql --- tests/queries/0_stateless/01917_distinct_on.sql | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01917_distinct_on.sql b/tests/queries/0_stateless/01917_distinct_on.sql index b7875719c92..e394f219b62 100644 --- a/tests/queries/0_stateless/01917_distinct_on.sql +++ b/tests/queries/0_stateless/01917_distinct_on.sql @@ -7,7 +7,7 @@ SELECT DISTINCT ON (a, b) a, b, c FROM t1; SELECT DISTINCT ON (a, b) * FROM t1; SELECT DISTINCT ON (a) * FROM t1; -SELECT DISTINCT ON (a, b) a, b, c FROM t1 LIMIT 1 BY a, b; -- { clientError 590 } +SELECT DISTINCT ON (a, b) a, b, c FROM t1 LIMIT 1 BY a, b; -- { clientError 1 } SELECT DISTINCT ON a, b a, b FROM t1; -- { clientError 62 } SELECT DISTINCT ON a a, b FROM t1; -- { clientError 62 } @@ -20,4 +20,3 @@ SELECT ALL DISTINCT ON (a, b) a, b FROM t1; -- { clientError 62 } SELECT DISTINCT ON (a, b) ALL a, b FROM t1; -- { clientError 62 } DROP TABLE IF EXISTS t1; - From b44bd174cc40ba1fdd8e4e185a5e383621529687 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 6 Jul 2021 19:14:22 +0300 Subject: [PATCH 5/7] Change error code for DISTINCT ON and LIMIT BY, finally --- src/Parsers/ParserSelectQuery.cpp | 4 ++-- tests/queries/0_stateless/01917_distinct_on.sql | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 2b7f6bcaaf9..255595caa0e 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -20,8 +20,8 @@ namespace ErrorCodes extern const int FIRST_AND_NEXT_TOGETHER; extern const int LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED; extern const int ROW_AND_ROWS_TOGETHER; + extern const int SYNTAX_ERROR; extern const int TOP_AND_LIMIT_TOGETHER; - extern const int UNSUPPORTED_METHOD; extern const int WITH_TIES_WITHOUT_ORDER_BY; } @@ -286,7 +286,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) throw Exception("Can not use WITH TIES alongside LIMIT BY/DISTINCT ON", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); if (distinct_on_expression_list) - throw Exception("Can not use DISTINCT ON alongside LIMIT BY", ErrorCodes::UNSUPPORTED_METHOD); + throw Exception("Can not use DISTINCT ON alongside LIMIT BY", ErrorCodes::SYNTAX_ERROR); limit_by_length = limit_length; limit_by_offset = limit_offset; diff --git a/tests/queries/0_stateless/01917_distinct_on.sql b/tests/queries/0_stateless/01917_distinct_on.sql index e394f219b62..75dd8c0b7b8 100644 --- a/tests/queries/0_stateless/01917_distinct_on.sql +++ b/tests/queries/0_stateless/01917_distinct_on.sql @@ -7,7 +7,7 @@ SELECT DISTINCT ON (a, b) a, b, c FROM t1; SELECT DISTINCT ON (a, b) * FROM t1; SELECT DISTINCT ON (a) * FROM t1; -SELECT DISTINCT ON (a, b) a, b, c FROM t1 LIMIT 1 BY a, b; -- { clientError 1 } +SELECT DISTINCT ON (a, b) a, b, c FROM t1 LIMIT 1 BY a, b; -- { clientError 62 } SELECT DISTINCT ON a, b a, b FROM t1; -- { clientError 62 } SELECT DISTINCT ON a a, b FROM t1; -- { clientError 62 } From 35ca8b97ac6a4a1897968b48cd86fc1170221981 Mon Sep 17 00:00:00 2001 From: Vladimir Date: Wed, 7 Jul 2021 12:48:39 +0300 Subject: [PATCH 6/7] Set distinct_on_expression_list to null white transforming to limit_by --- src/Parsers/ParserSelectQuery.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 255595caa0e..b1f7570878f 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -367,6 +367,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// Transform `DISTINCT ON expr` to `LIMIT 1 BY expr` limit_by_expression_list = distinct_on_expression_list; limit_by_length = std::make_shared(Field{UInt8(1)}); + distinct_on_expression_list = nullptr; } /// Because TOP n in totally equals LIMIT n From 02977007dc7559c126e23a004eef86c717dd19a4 Mon Sep 17 00:00:00 2001 From: Vladimir Date: Thu, 8 Jul 2021 15:45:23 +0300 Subject: [PATCH 7/7] Remove queries with syntax error from 01917_distinct_on.sql --- tests/queries/0_stateless/01917_distinct_on.sql | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/01917_distinct_on.sql b/tests/queries/0_stateless/01917_distinct_on.sql index 75dd8c0b7b8..ae528b6e838 100644 --- a/tests/queries/0_stateless/01917_distinct_on.sql +++ b/tests/queries/0_stateless/01917_distinct_on.sql @@ -7,16 +7,17 @@ SELECT DISTINCT ON (a, b) a, b, c FROM t1; SELECT DISTINCT ON (a, b) * FROM t1; SELECT DISTINCT ON (a) * FROM t1; -SELECT DISTINCT ON (a, b) a, b, c FROM t1 LIMIT 1 BY a, b; -- { clientError 62 } +-- fuzzer will fail, enable when fixed +-- SELECT DISTINCT ON (a, b) a, b, c FROM t1 LIMIT 1 BY a, b; -- { clientError 62 } -SELECT DISTINCT ON a, b a, b FROM t1; -- { clientError 62 } -SELECT DISTINCT ON a a, b FROM t1; -- { clientError 62 } +-- SELECT DISTINCT ON a, b a, b FROM t1; -- { clientError 62 } +-- SELECT DISTINCT ON a a, b FROM t1; -- { clientError 62 } -- "Code: 47. DB::Exception: Missing columns: 'DISTINCT'" - error can be better -SELECT DISTINCT ON (a, b) DISTINCT a, b FROM t1; -- { serverError 47 } -SELECT DISTINCT DISTINCT ON (a, b) a, b FROM t1; -- { clientError 62 } +-- SELECT DISTINCT ON (a, b) DISTINCT a, b FROM t1; -- { serverError 47 } +-- SELECT DISTINCT DISTINCT ON (a, b) a, b FROM t1; -- { clientError 62 } -SELECT ALL DISTINCT ON (a, b) a, b FROM t1; -- { clientError 62 } -SELECT DISTINCT ON (a, b) ALL a, b FROM t1; -- { clientError 62 } +-- SELECT ALL DISTINCT ON (a, b) a, b FROM t1; -- { clientError 62 } +-- SELECT DISTINCT ON (a, b) ALL a, b FROM t1; -- { clientError 62 } DROP TABLE IF EXISTS t1;