From db2956eb737e137f076f0c6467979621d4edc3a8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 12 Mar 2015 06:31:15 +0300 Subject: [PATCH] dbms: removing duplicate elements from ORDER BY [#METR-2944]. --- .../DB/Interpreters/ExpressionAnalyzer.h | 3 ++ dbms/src/Interpreters/ExpressionAnalyzer.cpp | 35 +++++++++++++++++++ .../Interpreters/InterpreterSelectQuery.cpp | 8 ++--- .../00136_duplicate_order_by_elems.reference | 0 .../00136_duplicate_order_by_elems.sql | 1 + 5 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00136_duplicate_order_by_elems.reference create mode 100644 dbms/tests/queries/0_stateless/00136_duplicate_order_by_elems.sql diff --git a/dbms/include/DB/Interpreters/ExpressionAnalyzer.h b/dbms/include/DB/Interpreters/ExpressionAnalyzer.h index c4b01ffe0ba..24ba3c863e7 100644 --- a/dbms/include/DB/Interpreters/ExpressionAnalyzer.h +++ b/dbms/include/DB/Interpreters/ExpressionAnalyzer.h @@ -218,6 +218,9 @@ private: /// Eliminates injective function calls and constant expressions from group by statement void optimizeGroupBy(); + /// Удалить из ORDER BY повторяющиеся элементы. + void optimizeOrderBy(); + /// Превратить перечисление значений или подзапрос в ASTSet. node - функция in или notIn. void makeSet(ASTFunction * node, const Block & sample_block); diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index be35443f5cc..ed20ee30054 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -85,6 +85,9 @@ void ExpressionAnalyzer::init() /// GROUP BY injective function elimination. optimizeGroupBy(); + /// Удалить из ORDER BY повторяющиеся элементы. + optimizeOrderBy(); + /// array_join_alias_to_name, array_join_result_to_source. getArrayJoinedColumns(); @@ -531,6 +534,38 @@ void ExpressionAnalyzer::optimizeGroupBy() } +void ExpressionAnalyzer::optimizeOrderBy() +{ + if (!(select_query && select_query->order_expression_list)) + return; + + /// Уникализируем условия сортировки. + using NameAndLocale = std::pair; + std::set elems_set; + + ASTs & elems = select_query->order_expression_list->children; + ASTs unique_elems; + unique_elems.reserve(elems.size()); + + for (const auto & elem : elems) + { + String name = elem->children.front()->getColumnName(); + const ASTOrderByElement & order_by_elem = typeid_cast(*elem); + + if (elems_set.emplace( + std::piecewise_construct, + std::forward_as_tuple(name), + std::forward_as_tuple(order_by_elem.collator ? order_by_elem.collator->getLocale() : std::string())).second) + { + unique_elems.emplace_back(elem); + } + } + + if (unique_elems.size() < elems.size()) + elems = unique_elems; +} + + void ExpressionAnalyzer::makeSetsForIndex() { if (storage && ast && storage->supportsIndexForIn()) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 2c676e2b3c1..5952defae10 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -817,12 +817,10 @@ static SortDescription getSortDescription(ASTSelectQuery & query) { SortDescription order_descr; order_descr.reserve(query.order_expression_list->children.size()); - for (ASTs::iterator it = query.order_expression_list->children.begin(); - it != query.order_expression_list->children.end(); - ++it) + for (const auto & elem : query.order_expression_list->children) { - String name = (*it)->children.front()->getColumnName(); - const ASTOrderByElement & order_by_elem = typeid_cast(**it); + String name = elem->children.front()->getColumnName(); + const ASTOrderByElement & order_by_elem = typeid_cast(*elem); order_descr.emplace_back(name, order_by_elem.direction, order_by_elem.collator); } diff --git a/dbms/tests/queries/0_stateless/00136_duplicate_order_by_elems.reference b/dbms/tests/queries/0_stateless/00136_duplicate_order_by_elems.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00136_duplicate_order_by_elems.sql b/dbms/tests/queries/0_stateless/00136_duplicate_order_by_elems.sql new file mode 100644 index 00000000000..66a0d1a11d5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00136_duplicate_order_by_elems.sql @@ -0,0 +1 @@ +SELECT n FROM (SELECT number AS n FROM system.numbers LIMIT 1000000) ORDER BY n, n, n, n, n, n, n, n, n, n LIMIT 1000000, 1;