This commit is contained in:
Robert Schulze 2024-02-21 20:24:32 +00:00
parent 52afa46e23
commit 7930c2a993
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
7 changed files with 131 additions and 66 deletions

View File

@ -4281,7 +4281,7 @@ Result:
## enable_order_by_all {#enable-order-by-all}
Enables or disables sorting by `ALL` columns, i.e. [ORDER BY](../../sql-reference/statements/select/order-by.md)
Enables or disables sorting with `ORDER BY ALL` syntax, see [ORDER BY](../../sql-reference/statements/select/order-by.md).
Possible values:
@ -4301,7 +4301,7 @@ INSERT INTO TAB VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20);
SELECT * FROM TAB ORDER BY ALL; -- returns an error that ALL is ambiguous
SELECT * FROM TAB ORDER BY ALL SETTINGS enable_order_by_all;
SELECT * FROM TAB ORDER BY ALL SETTINGS enable_order_by_all = 0;
```
Result:

View File

@ -80,6 +80,8 @@
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/Identifier.h>
#include <boost/algorithm/string.hpp>
namespace ProfileEvents
{
extern const Event ScalarSubqueriesGlobalCacheHit;
@ -2382,6 +2384,13 @@ void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed, const Se
for (auto & node : projection_nodes)
{
/// Detect and reject ambiguous statements:
/// E.g. for a table with columns "all", "a", "b":
/// - SELECT all, a, b ORDER BY all; -- should we sort by all columns in SELECT or by column "all"?
/// - SELECT a, b AS all ORDER BY all; -- like before but "all" as alias
/// - SELECT func(...) AS all ORDER BY all; -- like before but "all" as function
/// - SELECT a, b ORDER BY all; -- tricky in other way: does the user want to sort by columns in SELECT clause or by not SELECTed column "all"?
auto resolved_expression_it = resolved_expressions.find(node);
if (resolved_expression_it != resolved_expressions.end())
{
@ -2390,7 +2399,7 @@ void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed, const Se
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expression nodes list expected 1 projection names. Actual {}",
projection_names.size());
if (Poco::toUpper(projection_names[0]) == "ALL")
if (boost::iequals(projection_names[0], "all"))
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
}

View File

@ -1,4 +1,3 @@
#include <cstddef>
#include <Core/NamesAndTypes.h>
#include <base/sort.h>
@ -12,6 +11,8 @@
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <boost/algorithm/string.hpp>
#include <cstddef>
namespace DB
{
@ -218,6 +219,16 @@ bool NamesAndTypesList::contains(const String & name) const
return false;
}
bool NamesAndTypesList::containsCaseInsensitive(const String & name) const
{
for (const NameAndTypePair & column : *this)
{
if (boost::iequals(column.name, name))
return true;
}
return false;
}
std::optional<NameAndTypePair> NamesAndTypesList::tryGetByName(const std::string & name) const
{
for (const NameAndTypePair & column : *this)

View File

@ -114,8 +114,9 @@ public:
/// Unlike `filter`, returns columns in the order in which they go in `names`.
NamesAndTypesList addTypes(const Names & names) const;
/// Check that column contains in list
/// Check if `name` is one of the column names
bool contains(const String & name) const;
bool containsCaseInsensitive(const String & name) const;
/// Try to get column by name, returns empty optional if column not found
std::optional<NameAndTypePair> tryGetByName(const std::string & name) const;

View File

@ -60,6 +60,8 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <boost/algorithm/string.hpp>
namespace DB
{
@ -776,7 +778,7 @@ void expandGroupByAll(ASTSelectQuery * select_query)
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list);
}
void expandOrderByAll(ASTSelectQuery * select_query)
void expandOrderByAll(ASTSelectQuery * select_query, [[maybe_unused]] const TablesWithColumns & tables_with_columns)
{
auto * all_elem = select_query->orderBy()->children[0]->as<ASTOrderByElement>();
if (!all_elem)
@ -786,16 +788,32 @@ void expandOrderByAll(ASTSelectQuery * select_query)
for (const auto & expr : select_query->select()->children)
{
/// Detect and reject ambiguous statements:
/// E.g. for a table with columns "all", "a", "b":
/// - SELECT all, a, b ORDER BY all; -- should we sort by all columns in SELECT or by column "all"?
/// - SELECT a, b AS all ORDER BY all; -- like before but "all" as alias
/// - SELECT func(...) AS all ORDER BY all; -- like before but "all" as function
/// - SELECT a, b ORDER BY all; -- tricky in other way: does the user want to sort by columns in SELECT clause or by not SELECTed column "all"?
static const String all = "all";
if (auto * identifier = expr->as<ASTIdentifier>(); identifier != nullptr)
if (Poco::toUpper(identifier->name()) == "ALL" || Poco::toUpper(identifier->alias) == "ALL")
if (boost::iequals(identifier->name(), all) || boost::iequals(identifier->alias, all))
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
if (auto * function = expr->as<ASTFunction>(); function != nullptr)
if (Poco::toUpper(function->alias) == "ALL")
if (boost::iequals(function->alias, all))
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
for (const auto & table_with_columns : tables_with_columns)
{
const auto & columns = table_with_columns.columns;
if (columns.containsCaseInsensitive(all))
throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION,
"Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again");
}
auto elem = std::make_shared<ASTOrderByElement>();
elem->direction = all_elem->direction;
elem->nulls_direction = all_elem->nulls_direction;
@ -1324,7 +1342,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
// expand ORDER BY ALL
if (settings.enable_order_by_all && select_query->order_by_all)
expandOrderByAll(select_query);
expandOrderByAll(select_query, tables_with_columns);
/// Remove unneeded columns according to 'required_result_columns'.
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.

View File

@ -49,7 +49,25 @@ A 2
2 A
3 B
\N C
-- what happens if some column "all" already exists?
-- SELECT *
A 2
B 3
C \N
D 1
A 2
B 3
C \N
D 1
-- the trouble starts when "order by all is all" is ambiguous
-- columns
B 3 10
D 1 20
A 2 30
C \N 40
B
D
A
C
B 3 10
D 1 20
A 2 30
@ -58,6 +76,15 @@ B 3 10
D 1 20
A 2 30
C \N 40
B
D
A
C
B 3 10
D 1 20
A 2 30
C \N 40
-- column aliases
D 1
A 2
B 3
@ -66,6 +93,7 @@ D 1
A 2
B 3
C \N
-- expressions
A 2
B 3
D 1
@ -74,6 +102,7 @@ A 2
B 3
D 1
\N
-- ORDER BY ALL loses its special meaning when used in conjunction with other columns
B 3 10
D 1 20
A 2 30
@ -82,12 +111,3 @@ B 3 10
D 1 20
A 2 30
C \N 40
-- test SELECT * ORDER BY ALL with no "all" column in the SELECT clause
A 2 30
B 3 10
C \N 40
D 1 20
A 2 30
B 3 10
C \N 40
D 1 20

View File

@ -5,12 +5,11 @@ DROP TABLE IF EXISTS order_by_all;
CREATE TABLE order_by_all
(
a String,
b Nullable(Int32),
all UInt64,
b Nullable(Int32)
)
ENGINE = Memory;
INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30);
INSERT INTO order_by_all VALUES ('B', 3), ('C', NULL), ('D', 1), ('A', 2);
SELECT '-- no modifiers';
@ -42,68 +41,75 @@ SET allow_experimental_analyzer = 1;
SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST;
SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST;
SELECT '-- what happens if some column "all" already exists?';
-- columns
SELECT '-- SELECT *';
SET allow_experimental_analyzer = 0;
SELECT a, b, all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b, all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SELECT * FROM order_by_all ORDER BY all;
SET allow_experimental_analyzer = 1;
SELECT a, b, all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b, all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
-- column aliases
SET allow_experimental_analyzer = 0;
SELECT a, b AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SET allow_experimental_analyzer = 1;
SELECT a, b AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
-- expressions
SET allow_experimental_analyzer = 0;
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION }
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SET allow_experimental_analyzer = 1;
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION }
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SET allow_experimental_analyzer = 0;
SELECT a, b, all FROM order_by_all ORDER BY all, a;
SET allow_experimental_analyzer = 1;
SELECT a, b, all FROM order_by_all ORDER BY all, a;
SELECT * FROM order_by_all ORDER BY all;
DROP TABLE order_by_all;
SELECT '-- test SELECT * ORDER BY ALL with no "all" column in the SELECT clause';
SELECT '-- the trouble starts when "order by all is all" is ambiguous';
CREATE TABLE order_by_all
(
a String,
b Nullable(Int32),
c UInt64,
all UInt64
)
ENGINE = Memory;
ENGINE = Memory;
INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30);
SELECT ' -- columns';
SET allow_experimental_analyzer = 0;
SELECT * FROM order_by_all ORDER BY ALL;
SELECT a, b, all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SELECT a FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SELECT * FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT * FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SET allow_experimental_analyzer = 1;
SELECT * FROM order_by_all ORDER BY ALL;
SELECT a, b, all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SELECT a FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
-- SELECT * FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } -- (*) see below
SELECT * FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
-- SELECT a FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } -- (*) see below
-- (*) These queries show the expected behavior for analyzer. Unfortunately, it is not implemented that way yet,
-- which is not wrong but a bit unintuitive (some may say a landmine). Keeping the queries for now for reference.
SELECT ' -- column aliases';
SET allow_experimental_analyzer = 0;
SELECT a, b AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SET allow_experimental_analyzer = 1;
SELECT a, b AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SELECT ' -- expressions';
SET allow_experimental_analyzer = 0;
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SET allow_experimental_analyzer = 1;
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION }
SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false;
SELECT ' -- ORDER BY ALL loses its special meaning when used in conjunction with other columns';
SET allow_experimental_analyzer = 0;
SELECT a, b, all FROM order_by_all ORDER BY all, a;
SET allow_experimental_analyzer = 1;
SELECT a, b, all FROM order_by_all ORDER BY all, a;
DROP TABLE order_by_all;