Merge pull request #57875 from zhongyuankai/order_by_all

Support `ORDER BY ALL`
This commit is contained in:
Robert Schulze 2023-12-19 13:38:07 +01:00 committed by GitHub
commit 952175ce39
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 352 additions and 5 deletions

View File

@ -4164,6 +4164,41 @@ Result:
└─────┴─────┴───────┘
```
## enable_order_by_all {#enable-order-by-all}
Enables or disables sorting by `ALL` columns, i.e. [ORDER BY](../../sql-reference/statements/select/order-by.md)
Possible values:
- 0 — Disable ORDER BY ALL.
- 1 — Enable ORDER BY ALL.
Default value: `1`.
**Example**
Query:
```sql
CREATE TABLE TAB(C1 Int, C2 Int, ALL Int) ENGINE=Memory();
INSERT INTO TAB VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20);
SELECT * FROM TAB ORDER BY ALL; -- returns an error that ALL is ambiguous
SELECT * FROM TAB ORDER BY ALL SETTINGS enable_order_by_all;
```
Result:
```text
┌─C1─┬─C2─┬─ALL─┐
│ 20 │ 20 │ 10 │
│ 30 │ 10 │ 20 │
│ 10 │ 20 │ 30 │
└────┴────┴─────┘
```
## splitby_max_substrings_includes_remaining_string {#splitby_max_substrings_includes_remaining_string}
Controls whether function [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with argument `max_substrings` > 0 will include the remaining string in the last element of the result array.

View File

@ -5,12 +5,22 @@ sidebar_label: ORDER BY
# ORDER BY Clause
The `ORDER BY` clause contains a list of expressions, which can each be attributed with `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction. If the direction is not specified, `ASC` is assumed, so its usually omitted. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase`. Sorting is case-sensitive.
The `ORDER BY` clause contains
If you want to sort by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).
- a list of expressions, e.g. `ORDER BY visits, search_phrase`,
- a list of numbers referring to columns in the `SELECT` clause, e.g. `ORDER BY 2, 1`, or
- `ALL` which means all columns of the `SELECT` clause, e.g. `ORDER BY ALL`.
Rows that have identical values for the list of sorting expressions are output in an arbitrary order, which can also be non-deterministic (different each time).
If the ORDER BY clause is omitted, the order of the rows is also undefined, and may be non-deterministic as well.
To disable sorting by column numbers, set setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments) = 0.
To disable sorting by `ALL`, set setting [enable_order_by_all](../../../operations/settings/settings.md#enable-order-by-all) = 0.
The `ORDER BY` clause can be attributed by a `DESC` (descending) or `ASC` (ascending) modifier which determines the sorting direction.
Unless an explicit sort order is specified, `ASC` is used by default.
The sorting direction applies to a single expression, not to the entire list, e.g. `ORDER BY Visits DESC, SearchPhrase`.
Also, sorting is performed case-sensitively.
Rows with identical values for a sort expressions are returned in an arbitrary and non-deterministic order.
If the `ORDER BY` clause is omitted in a `SELECT` statement, the row order is also arbitrary and non-deterministic.
## Sorting of Special Values

View File

@ -61,6 +61,22 @@ sidebar_label: ORDER BY
我们只建议使用 `COLLATE` 对于少量行的最终排序,因为排序与 `COLLATE` 比正常的按字节排序效率低。
## ORDER BY ALL
`ORDER BY ALL` 对所有选定的列进行升序排序。
示例:
``` sql
SELECT a, b, c FROM t ORDER BY ALL
```
等同于:
``` sql
SELECT a, b, c FROM t ORDER BY a, b, c
```
## 实现细节 {#implementation-details}
更少的RAM使用如果一个足够小 [LIMIT](../../../sql-reference/statements/select/limit.md) 除了指定 `ORDER BY`. 否则,所花费的内存量与用于排序的数据量成正比。 对于分布式查询处理,如果 [GROUP BY](../../../sql-reference/statements/select/group-by.md) 省略排序,在远程服务器上部分完成排序,并将结果合并到请求者服务器上。 这意味着对于分布式排序,要排序的数据量可以大于单个服务器上的内存量。

View File

@ -119,6 +119,7 @@ namespace ErrorCodes
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
extern const int SYNTAX_ERROR;
extern const int UNEXPECTED_EXPRESSION;
}
/** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first.
@ -1209,6 +1210,8 @@ private:
static void expandGroupByAll(QueryNode & query_tree_node_typed);
static void expandOrderByAll(QueryNode & query_tree_node_typed);
static std::string
rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context);
@ -2312,6 +2315,35 @@ void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
recursivelyCollectMaxOrdinaryExpressions(node, group_by_nodes);
}
void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed)
{
auto * all_node = query_tree_node_typed.getOrderBy().getNodes()[0]->as<SortNode>();
if (!all_node)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not sort node.");
auto & projection_nodes = query_tree_node_typed.getProjection().getNodes();
auto list_node = std::make_shared<ListNode>();
list_node->getNodes().reserve(projection_nodes.size());
for (auto & node : projection_nodes)
{
if (auto * identifier_node = node->as<IdentifierNode>(); identifier_node != nullptr)
if (Poco::toUpper(identifier_node->getIdentifier().getFullName()) == "ALL" || Poco::toUpper(identifier_node->getAlias()) == "ALL")
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
if (auto * function_node = node->as<FunctionNode>(); function_node != nullptr)
if (Poco::toUpper(function_node->getAlias()) == "ALL")
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
auto sort_node = std::make_shared<SortNode>(node, all_node->getSortDirection(), all_node->getNullsSortDirection());
list_node->getNodes().push_back(sort_node);
}
query_tree_node_typed.getOrderByNode() = list_node;
}
std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(
const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context)
{
@ -6975,6 +7007,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
if (query_node_typed.hasHaving() && query_node_typed.isGroupByWithTotals() && is_rollup_or_cube)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING");
if (settings.enable_order_by_all && query_node_typed.isOrderByAll())
expandOrderByAll(query_node_typed);
/// Initialize aliases in query node scope
QueryExpressionsAliasVisitor visitor(scope);

View File

@ -219,6 +219,18 @@ public:
is_group_by_all = is_group_by_all_value;
}
/// Returns true, if query node has ORDER BY ALL modifier, false otherwise
bool isOrderByAll() const
{
return is_order_by_all;
}
/// Set query node ORDER BY ALL modifier value
void setIsOrderByAll(bool is_order_by_all_value)
{
is_order_by_all = is_order_by_all_value;
}
/// Returns true if query node WITH section is not empty, false otherwise
bool hasWith() const
{
@ -590,6 +602,7 @@ private:
bool is_group_by_with_cube = false;
bool is_group_by_with_grouping_sets = false;
bool is_group_by_all = false;
bool is_order_by_all = false;
std::string cte_name;
NamesAndTypes projection_columns;

View File

@ -284,6 +284,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
current_query_tree->setIsGroupByWithRollup(select_query_typed.group_by_with_rollup);
current_query_tree->setIsGroupByWithGroupingSets(select_query_typed.group_by_with_grouping_sets);
current_query_tree->setIsGroupByAll(select_query_typed.group_by_all);
current_query_tree->setIsOrderByAll(select_query_typed.order_by_all);
current_query_tree->setOriginalAST(select_query);
auto current_context = current_query_tree->getContext();

View File

@ -846,6 +846,7 @@ class IColumn;
M(UInt64, cache_warmer_threads, 4, "Only available in ClickHouse Cloud", 0) \
M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \
M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \
M(Bool, enable_order_by_all, true, "Enable sorting expression ORDER BY ALL.", 0)\
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS.

View File

@ -73,6 +73,7 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int UNKNOWN_IDENTIFIER;
extern const int UNEXPECTED_EXPRESSION;
}
namespace
@ -776,6 +777,37 @@ void expandGroupByAll(ASTSelectQuery * select_query)
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list);
}
void expandOrderByAll(ASTSelectQuery * select_query)
{
auto * all_elem = select_query->orderBy()->children[0]->as<ASTOrderByElement>();
if (!all_elem)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not order by asts.");
auto order_expression_list = std::make_shared<ASTExpressionList>();
for (const auto & expr : select_query->select()->children)
{
if (auto * identifier = expr->as<ASTIdentifier>(); identifier != nullptr)
if (Poco::toUpper(identifier->name()) == "ALL" || Poco::toUpper(identifier->alias) == "ALL")
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
if (auto * function = expr->as<ASTFunction>(); function != nullptr)
if (Poco::toUpper(function->alias) == "ALL")
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
auto elem = std::make_shared<ASTOrderByElement>();
elem->direction = all_elem->direction;
elem->nulls_direction = all_elem->nulls_direction;
elem->nulls_direction_was_explicitly_specified = all_elem->nulls_direction_was_explicitly_specified;
elem->children.push_back(expr);
order_expression_list->children.push_back(elem);
}
select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, order_expression_list);
}
ASTs getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
{
/// There can not be aggregate functions inside the WHERE and PREWHERE.
@ -1292,6 +1324,10 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
if (select_query->group_by_all)
expandGroupByAll(select_query);
// expand ORDER BY ALL
if (settings.enable_order_by_all && select_query->order_by_all)
expandOrderByAll(select_query);
/// Remove unneeded columns according to 'required_result_columns'.
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)

View File

@ -144,7 +144,7 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
window()->as<ASTExpressionList &>().formatImplMultiline(s, state, frame);
}
if (orderBy())
if (!order_by_all && orderBy())
{
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY" << (s.hilite ? hilite_none : "");
s.one_line
@ -163,6 +163,24 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
}
}
if (order_by_all)
{
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY ALL" << (s.hilite ? hilite_none : "");
auto * elem = orderBy()->children[0]->as<ASTOrderByElement>();
s.ostr << (s.hilite ? hilite_keyword : "")
<< (elem->direction == -1 ? " DESC" : " ASC")
<< (s.hilite ? hilite_none : "");
if (elem->nulls_direction_was_explicitly_specified)
{
s.ostr << (s.hilite ? hilite_keyword : "")
<< " NULLS "
<< (elem->nulls_direction == elem->direction ? "LAST" : "FIRST")
<< (s.hilite ? hilite_none : "");
}
}
if (limitByLength())
{
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "LIMIT " << (s.hilite ? hilite_none : "");

View File

@ -87,6 +87,7 @@ public:
bool group_by_with_cube = false;
bool group_by_with_constant_keys = false;
bool group_by_with_grouping_sets = false;
bool order_by_all = false;
bool limit_with_ties = false;
ASTPtr & refSelect() { return getExpression(Expression::SELECT); }

View File

@ -14,6 +14,7 @@
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTInterpolateElement.h>
#include <Parsers/ASTIdentifier.h>
#include <Poco/String.h>
namespace DB
@ -287,6 +288,13 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
interpolate_expression_list = std::make_shared<ASTExpressionList>();
}
}
else if (order_expression_list->children.size() == 1)
{
/// ORDER BY ALL
auto * identifier = order_expression_list->children[0]->as<ASTOrderByElement>()->children[0]->as<ASTIdentifier>();
if (identifier != nullptr && Poco::toUpper(identifier->name()) == "ALL")
select_query->order_by_all = true;
}
}
/// This is needed for TOP expression, because it can also use WITH TIES.

View File

@ -0,0 +1,84 @@
-- no modifiers
A 2
B 3
C \N
D 1
1 D
2 A
3 B
\N C
A 2
B 3
C \N
D 1
1 D
2 A
3 B
\N C
-- with ASC/DESC modifiers
A 2
B 3
C \N
D 1
D 1
C \N
B 3
A 2
A 2
B 3
C \N
D 1
D 1
C \N
B 3
A 2
-- with NULLS FIRST/LAST modifiers
\N C
1 D
2 A
3 B
1 D
2 A
3 B
\N C
\N C
1 D
2 A
3 B
1 D
2 A
3 B
\N C
-- what happens if some column "all" already exists?
B 3 10
D 1 20
A 2 30
C \N 40
B 3 10
D 1 20
A 2 30
C \N 40
D 1
A 2
B 3
C \N
D 1
A 2
B 3
C \N
A 2
B 3
D 1
\N
A 2
B 3
D 1
\N
B 3 10
D 1 20
A 2 30
C \N 40
B 3 10
D 1 20
A 2 30
C \N 40

View File

@ -0,0 +1,89 @@
-- Tests that sort expression ORDER BY ALL
DROP TABLE IF EXISTS order_by_all;
CREATE TABLE order_by_all
(
a String,
b Nullable(Int32),
all UInt64,
)
ENGINE = Memory;
INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30);
SELECT '-- no modifiers';
SET allow_experimental_analyzer = 0;
SELECT a, b FROM order_by_all ORDER BY ALL;
SELECT b, a FROM order_by_all ORDER BY ALL;
SET allow_experimental_analyzer = 1;
SELECT a, b FROM order_by_all ORDER BY ALL;
SELECT b, a FROM order_by_all ORDER BY ALL;
SELECT '-- with ASC/DESC modifiers';
SET allow_experimental_analyzer = 0;
SELECT a, b FROM order_by_all ORDER BY ALL ASC;
SELECT a, b FROM order_by_all ORDER BY ALL DESC;
SET allow_experimental_analyzer = 1;
SELECT a, b FROM order_by_all ORDER BY ALL ASC;
SELECT a, b FROM order_by_all ORDER BY ALL DESC;
SELECT '-- with NULLS FIRST/LAST modifiers';
SET allow_experimental_analyzer = 0;
SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST;
SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST;
SET allow_experimental_analyzer = 1;
SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST;
SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST;
SELECT '-- what happens if some column "all" already exists?';
-- columns
SET allow_experimental_analyzer = 0;
SELECT a, b, all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b, all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SET allow_experimental_analyzer = 1;
SELECT a, b, all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b, all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
-- column aliases
SET allow_experimental_analyzer = 0;
SELECT a, b AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SET allow_experimental_analyzer = 1;
SELECT a, b AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
-- expressions
SET allow_experimental_analyzer = 0;
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION }
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SET allow_experimental_analyzer = 1;
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION }
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SET allow_experimental_analyzer = 0;
SELECT a, b, all FROM order_by_all ORDER BY all, a;
SET allow_experimental_analyzer = 1;
SELECT a, b, all FROM order_by_all ORDER BY all, a;
DROP TABLE order_by_all;