Merge pull request #28910 from ClickHouse/fix-trash-optimization

Fix bad optimization of ORDER BY in subquery if it contains WITH FILL
This commit is contained in:
alexey-milovidov 2021-09-12 15:27:53 +03:00 committed by GitHub
commit f066edc43c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 257 additions and 92 deletions

View File

@ -0,0 +1,124 @@
#include <Interpreters/DuplicateOrderByVisitor.h>
#include <Functions/FunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTOrderByElement.h>
#include <Common/typeid_cast.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_TYPE_OF_AST_NODE;
}
namespace
{
/// Checks if SELECT has stateful functions
class ASTFunctionStatefulData
{
public:
using TypeToVisit = ASTFunction;
ContextPtr context;
bool & is_stateful;
void visit(ASTFunction & ast_function, ASTPtr &)
{
auto aggregate_function_properties = AggregateFunctionFactory::instance().tryGetProperties(ast_function.name);
if (aggregate_function_properties && aggregate_function_properties->is_order_dependent)
{
is_stateful = true;
return;
}
const auto & function = FunctionFactory::instance().tryGet(ast_function.name, context);
if (function && function->isStateful())
{
is_stateful = true;
return;
}
}
};
using ASTFunctionStatefulMatcher = OneTypeMatcher<ASTFunctionStatefulData>;
using ASTFunctionStatefulVisitor = InDepthNodeVisitor<ASTFunctionStatefulMatcher, true>;
}
void DuplicateOrderByFromSubqueriesData::visit(ASTSelectQuery & select_query, ASTPtr &)
{
if (done)
return;
done = true;
if (select_query.orderBy())
{
/// If we have limits then the ORDER BY is non-removable.
if (select_query.limitBy()
|| select_query.limitByOffset()
|| select_query.limitByLength()
|| select_query.limitLength()
|| select_query.limitOffset())
{
return;
}
/// If ORDER BY contains filling (in addition to sorting) it is non-removable.
for (const auto & child : select_query.orderBy()->children)
{
auto * ast = child->as<ASTOrderByElement>();
if (!ast || ast->children.empty())
throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
if (ast->with_fill)
return;
}
select_query.setExpression(ASTSelectQuery::Expression::ORDER_BY, nullptr);
}
}
void DuplicateOrderByData::visit(ASTSelectQuery & select_query, ASTPtr &)
{
if (select_query.orderBy() || select_query.groupBy())
{
for (auto & elem : select_query.children)
{
if (elem->as<ASTExpressionList>())
{
bool is_stateful = false;
ASTFunctionStatefulVisitor::Data data{context, is_stateful};
ASTFunctionStatefulVisitor(data).visit(elem);
if (is_stateful) //-V547
return;
}
}
if (auto select_table_ptr = select_query.tables())
{
if (auto * select_table = select_table_ptr->as<ASTTablesInSelectQuery>())
{
if (!select_table->children.empty())
{
DuplicateOrderByFromSubqueriesVisitor::Data data{false};
DuplicateOrderByFromSubqueriesVisitor(data).visit(select_table->children[0]);
}
}
}
}
}
}

View File

@ -1,51 +1,13 @@
#pragma once
#include <Functions/FunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/IAST.h>
#include <Common/typeid_cast.h>
namespace DB
{
/// Checks if SELECT has stateful functions
class ASTFunctionStatefulData
{
public:
using TypeToVisit = ASTFunction;
ContextPtr context;
bool & is_stateful;
void visit(ASTFunction & ast_function, ASTPtr &)
{
auto aggregate_function_properties = AggregateFunctionFactory::instance().tryGetProperties(ast_function.name);
if (aggregate_function_properties && aggregate_function_properties->is_order_dependent)
{
is_stateful = true;
return;
}
const auto & function = FunctionFactory::instance().tryGet(ast_function.name, context);
if (function && function->isStateful())
{
is_stateful = true;
return;
}
}
};
using ASTFunctionStatefulMatcher = OneTypeMatcher<ASTFunctionStatefulData>;
using ASTFunctionStatefulVisitor = InDepthNodeVisitor<ASTFunctionStatefulMatcher, true>;
class ASTSelectQuery;
/// Erases unnecessary ORDER BY from subquery
class DuplicateOrderByFromSubqueriesData
@ -55,19 +17,7 @@ public:
bool done = false;
void visit(ASTSelectQuery & select_query, ASTPtr &)
{
if (done)
return;
if (select_query.orderBy() && !select_query.limitBy() && !select_query.limitByOffset() &&
!select_query.limitByLength() && !select_query.limitLength() && !select_query.limitOffset())
{
select_query.setExpression(ASTSelectQuery::Expression::ORDER_BY, nullptr);
}
done = true;
}
void visit(ASTSelectQuery & select_query, ASTPtr &);
};
using DuplicateOrderByFromSubqueriesMatcher = OneTypeMatcher<DuplicateOrderByFromSubqueriesData>;
@ -82,35 +32,7 @@ public:
ContextPtr context;
void visit(ASTSelectQuery & select_query, ASTPtr &)
{
if (select_query.orderBy() || select_query.groupBy())
{
for (auto & elem : select_query.children)
{
if (elem->as<ASTExpressionList>())
{
bool is_stateful = false;
ASTFunctionStatefulVisitor::Data data{context, is_stateful};
ASTFunctionStatefulVisitor(data).visit(elem);
if (is_stateful) //-V547
return;
}
}
if (auto select_table_ptr = select_query.tables())
{
if (auto * select_table = select_table_ptr->as<ASTTablesInSelectQuery>())
{
if (!select_table->children.empty())
{
DuplicateOrderByFromSubqueriesVisitor::Data data{false};
DuplicateOrderByFromSubqueriesVisitor(data).visit(select_table->children[0]);
}
}
}
}
}
void visit(ASTSelectQuery & select_query, ASTPtr &);
};
using DuplicateOrderByMatcher = OneTypeMatcher<DuplicateOrderByData>;

View File

@ -1240,7 +1240,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai
{
auto * ast = child->as<ASTOrderByElement>();
if (!ast || ast->children.empty())
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
if (getContext()->getSettingsRef().enable_positional_arguments)
{

View File

@ -24,14 +24,14 @@ public:
void initFromDefaults(size_t from_pos = 0);
Field & operator[](size_t ind) { return row[ind]; }
const Field & operator[](size_t ind) const { return row[ind]; }
Field & operator[](size_t index) { return row[index]; }
const Field & operator[](size_t index) const { return row[index]; }
size_t size() const { return row.size(); }
bool operator<(const FillingRow & other) const;
bool operator==(const FillingRow & other) const;
int getDirection(size_t ind) const { return description[ind].direction; }
FillColumnDescription & getFillDescription(size_t ind) { return description[ind].fill_description; }
int getDirection(size_t index) const { return description[index].direction; }
FillColumnDescription & getFillDescription(size_t index) { return description[index].fill_description; }
private:
Row row;

View File

@ -39,6 +39,7 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int UNKNOWN_TYPE_OF_AST_NODE;
}
namespace
@ -282,7 +283,8 @@ void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query)
String name = elem->children.front()->getColumnName();
const auto & order_by_elem = elem->as<ASTOrderByElement &>();
if (elems_set.emplace(name, order_by_elem.collation ? order_by_elem.collation->getColumnName() : "").second)
if (order_by_elem.with_fill /// Always keep elements WITH FILL as they affects other.
|| elems_set.emplace(name, order_by_elem.collation ? order_by_elem.collation->getColumnName() : "").second)
unique_elems.emplace_back(elem);
}
@ -425,6 +427,17 @@ void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, Context
if (!order_by)
return;
for (const auto & child : order_by->children)
{
auto * order_by_element = child->as<ASTOrderByElement>();
if (!order_by_element || order_by_element->children.empty())
throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
if (order_by_element->with_fill)
return;
}
std::unordered_set<String> group_by_hashes;
if (auto group_by = select_query->groupBy())
{
@ -440,6 +453,7 @@ void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, Context
for (size_t i = 0; i < order_by->children.size(); ++i)
{
auto * order_by_element = order_by->children[i]->as<ASTOrderByElement>();
auto & ast_func = order_by_element->children[0];
if (!ast_func->as<ASTFunction>())
continue;
@ -475,6 +489,17 @@ void optimizeRedundantFunctionsInOrderBy(const ASTSelectQuery * select_query, Co
if (!order_by)
return;
for (const auto & child : order_by->children)
{
auto * order_by_element = child->as<ASTOrderByElement>();
if (!order_by_element || order_by_element->children.empty())
throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
if (order_by_element->with_fill)
return;
}
std::unordered_set<String> prev_keys;
ASTs modified;
modified.reserve(order_by->children.size());

View File

@ -81,7 +81,7 @@ FillingTransform::FillingTransform(
};
std::vector<bool> is_fill_column(header_.columns());
for (size_t i = 0; i < sort_description.size(); ++i)
for (size_t i = 0, size = sort_description.size(); i < size; ++i)
{
size_t block_position = header_.getPositionByName(sort_description[i].column_name);
is_fill_column[block_position] = true;
@ -103,6 +103,11 @@ FillingTransform::FillingTransform(
}
}
std::set<size_t> unique_positions;
for (auto pos : fill_column_positions)
if (!unique_positions.insert(pos).second)
throw Exception("Multiple WITH FILL for identical expressions is not supported in ORDER BY", ErrorCodes::INVALID_WITH_FILL_EXPRESSION);
for (size_t i = 0; i < header_.columns(); ++i)
if (!is_fill_column[i])
other_column_positions.push_back(i);
@ -114,7 +119,7 @@ IProcessor::Status FillingTransform::prepare()
{
should_insert_first = next_row < filling_row;
for (size_t i = 0; i < filling_row.size(); ++i)
for (size_t i = 0, size = filling_row.size(); i < size; ++i)
next_row[i] = filling_row.getFillDescription(i).fill_to;
if (filling_row < next_row)
@ -227,9 +232,9 @@ void FillingTransform::setResultColumns(Chunk & chunk, MutableColumns & fill_col
/// fill_columns always non-empty.
size_t num_rows = fill_columns[0]->size();
for (size_t i = 0; i < fill_columns.size(); ++i)
for (size_t i = 0, size = fill_columns.size(); i < size; ++i)
result_columns[fill_column_positions[i]] = std::move(fill_columns[i]);
for (size_t i = 0; i < other_columns.size(); ++i)
for (size_t i = 0, size = other_columns.size(); i < size; ++i)
result_columns[other_column_positions[i]] = std::move(other_columns[i]);
chunk.setColumns(std::move(result_columns), num_rows);

View File

@ -0,0 +1 @@
SELECT s FROM (SELECT 5 AS x, 'Hello' AS s ORDER BY x WITH FILL FROM 1 TO 10) ORDER BY s;

View File

@ -0,0 +1,3 @@
2021-07-07 15:21:00
2021-07-07 15:21:05
2021-07-07 15:21:10

View File

@ -0,0 +1,6 @@
SELECT toStartOfMinute(some_time) AS ts
FROM
(
SELECT toDateTime('2021-07-07 15:21:05') AS some_time
)
ORDER BY ts ASC WITH FILL FROM toDateTime('2021-07-07 15:21:00') TO toDateTime('2021-07-07 15:21:15') STEP 5;

View File

@ -0,0 +1,9 @@
1
2
3
4
5
6
7
8
9

View File

@ -0,0 +1 @@
SELECT x FROM (SELECT 5 AS x) ORDER BY -x, x WITH FILL FROM 1 TO 10;

View File

@ -0,0 +1 @@
SELECT x, y FROM (SELECT 5 AS x, 'Hello' AS y) ORDER BY x WITH FILL FROM 3 TO 7, y, x WITH FILL FROM 1 TO 10; -- { serverError 475 }

View File

@ -0,0 +1,45 @@
3 -10
3 -9
3 -8
3 -7
3 -6
3 -5
3 -4
3 -3
3 -2
4 -10
4 -9
4 -8
4 -7
4 -6
4 -5
4 -4
4 -3
4 -2
5 -10
5 -9
5 -8
5 -7
5 -6
5 -5 Hello
5 -4
5 -3
5 -2
6 -10
6 -9
6 -8
6 -7
6 -6
6 -5
6 -4
6 -3
6 -2
7 -10
7 -9
7 -8
7 -7
7 -6
7 -5
7 -4
7 -3
7 -2

View File

@ -0,0 +1,14 @@
SELECT
x,
-x,
y
FROM
(
SELECT
5 AS x,
'Hello' AS y
)
ORDER BY
x ASC WITH FILL FROM 3 TO 7,
y ASC,
-x ASC WITH FILL FROM -10 TO -1;