mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 00:22:29 +00:00
Remove duplicate ORDER BY and DISTINCT from subqueries (#10067)
This commit is contained in:
parent
20b8e1e9ec
commit
10566e2b43
@ -360,6 +360,7 @@ struct Settings : public SettingsCollection<Settings>
|
||||
M(SettingBool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
|
||||
M(SettingUInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
|
||||
M(SettingBool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
|
||||
M(SettingBool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
|
||||
M(SettingBool, optimize_if_chain_to_miltiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
|
||||
M(SettingBool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
|
||||
M(SettingBool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
|
||||
|
72
src/Interpreters/DuplicateDistinctVisitor.h
Normal file
72
src/Interpreters/DuplicateDistinctVisitor.h
Normal file
@ -0,0 +1,72 @@
|
||||
#pragma once
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Removes duplicate DISTINCT from queries.
|
||||
class DuplicateDistinctMatcher
|
||||
{
|
||||
public:
|
||||
struct Data
|
||||
{
|
||||
bool is_distinct;
|
||||
std::vector<String> last_ids;
|
||||
};
|
||||
|
||||
static void visit(const ASTPtr & ast, Data & data)
|
||||
{
|
||||
auto * select_query = ast->as<ASTSelectQuery>();
|
||||
if (select_query)
|
||||
visit(*select_query, data);
|
||||
}
|
||||
|
||||
static void visit(ASTSelectQuery & select_query, Data & data)
|
||||
{
|
||||
if (!select_query.distinct || !select_query.select())
|
||||
return;
|
||||
|
||||
/// Optimize shouldn't work for distributed tables
|
||||
for (const auto & elem : select_query.children)
|
||||
{
|
||||
if (elem->as<ASTSetQuery>() && !elem->as<ASTSetQuery>()->is_standalone)
|
||||
return;
|
||||
}
|
||||
|
||||
auto expression_list = select_query.select();
|
||||
std::vector<String> current_ids;
|
||||
|
||||
if (expression_list->children.empty())
|
||||
return;
|
||||
|
||||
current_ids.reserve(expression_list->children.size());
|
||||
for (const auto & id : expression_list->children)
|
||||
current_ids.push_back(id->getColumnName());
|
||||
|
||||
if (data.is_distinct && current_ids == data.last_ids)
|
||||
select_query.distinct = false;
|
||||
|
||||
data.is_distinct = true;
|
||||
data.last_ids = std::move(current_ids);
|
||||
}
|
||||
|
||||
static bool needChildVisit(const ASTPtr &, const ASTPtr &)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
using DuplicateDistinctVisitor = InDepthNodeVisitor<DuplicateDistinctMatcher, false>;
|
||||
|
||||
}
|
127
src/Interpreters/DuplicateOrderByVisitor.h
Normal file
127
src/Interpreters/DuplicateOrderByVisitor.h
Normal file
@ -0,0 +1,127 @@
|
||||
#pragma once
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Checks if SELECT has stateful functions
|
||||
class ASTFunctionStatefulData
|
||||
{
|
||||
public:
|
||||
using TypeToVisit = ASTFunction;
|
||||
|
||||
const Context & context;
|
||||
bool & is_stateful;
|
||||
void visit(ASTFunction & ast_function, ASTPtr &)
|
||||
{
|
||||
if (ast_function.name == "any" || ast_function.name == "groupArray")
|
||||
{
|
||||
is_stateful = true;
|
||||
return;
|
||||
}
|
||||
|
||||
const auto & function = FunctionFactory::instance().tryGet(ast_function.name, context);
|
||||
|
||||
if (function && function->isStateful())
|
||||
{
|
||||
is_stateful = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
using ASTFunctionStatefulMatcher = OneTypeMatcher<ASTFunctionStatefulData>;
|
||||
using ASTFunctionStatefulVisitor = InDepthNodeVisitor<ASTFunctionStatefulMatcher, true>;
|
||||
|
||||
|
||||
/// Erases unnecessary ORDER BY from subquery
|
||||
class DuplicateOrderByFromSubqueriesData
|
||||
{
|
||||
public:
|
||||
using TypeToVisit = ASTSelectQuery;
|
||||
|
||||
bool done = false;
|
||||
|
||||
void visit(ASTSelectQuery & select_query, ASTPtr &)
|
||||
{
|
||||
if (done)
|
||||
return;
|
||||
|
||||
if (select_query.orderBy() && !select_query.limitBy() && !select_query.limitByOffset() &&
|
||||
!select_query.limitByLength() && !select_query.limitLength() && !select_query.limitOffset())
|
||||
{
|
||||
select_query.setExpression(ASTSelectQuery::Expression::ORDER_BY, nullptr);
|
||||
}
|
||||
|
||||
done = true;
|
||||
}
|
||||
};
|
||||
|
||||
using DuplicateOrderByFromSubqueriesMatcher = OneTypeMatcher<DuplicateOrderByFromSubqueriesData>;
|
||||
using DuplicateOrderByFromSubqueriesVisitor = InDepthNodeVisitor<DuplicateOrderByFromSubqueriesMatcher, true>;
|
||||
|
||||
|
||||
/// Finds SELECT that can be optimized
|
||||
class DuplicateOrderByData
|
||||
{
|
||||
public:
|
||||
using TypeToVisit = ASTSelectQuery;
|
||||
|
||||
const Context & context;
|
||||
bool done = false;
|
||||
|
||||
void visit(ASTSelectQuery & select_query, ASTPtr &)
|
||||
{
|
||||
if (done)
|
||||
return;
|
||||
|
||||
/// Disable optimization for distributed tables
|
||||
for (const auto & elem : select_query.children)
|
||||
{
|
||||
if (elem->as<ASTSetQuery>() && !elem->as<ASTSetQuery>()->is_standalone)
|
||||
return;
|
||||
}
|
||||
|
||||
if (select_query.orderBy() || select_query.groupBy())
|
||||
{
|
||||
for (auto & elem : select_query.children)
|
||||
{
|
||||
if (elem->as<ASTExpressionList>())
|
||||
{
|
||||
bool is_stateful = false;
|
||||
ASTFunctionStatefulVisitor::Data data{context, is_stateful};
|
||||
ASTFunctionStatefulVisitor(data).visit(elem);
|
||||
if (is_stateful)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (auto select_table_ptr = select_query.tables())
|
||||
{
|
||||
if (auto * select_table = select_table_ptr->as<ASTTablesInSelectQuery>())
|
||||
{
|
||||
if (!select_table->children.empty())
|
||||
{
|
||||
DuplicateOrderByFromSubqueriesVisitor::Data data{false};
|
||||
DuplicateOrderByFromSubqueriesVisitor(data).visit(select_table->children[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
using DuplicateOrderByMatcher = OneTypeMatcher<DuplicateOrderByData>;
|
||||
using DuplicateOrderByVisitor = InDepthNodeVisitor<DuplicateOrderByMatcher, true>;
|
||||
|
||||
}
|
@ -23,12 +23,15 @@
|
||||
#include <Interpreters/getTableExpressions.h>
|
||||
#include <Interpreters/OptimizeIfChains.h>
|
||||
#include <Interpreters/ArithmeticOperationsInAgrFuncOptimize.h>
|
||||
#include <Interpreters/DuplicateDistinctVisitor.h>
|
||||
#include <Interpreters/DuplicateOrderByVisitor.h>
|
||||
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTOrderByElement.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
@ -370,6 +373,18 @@ void optimizeOrderBy(const ASTSelectQuery * select_query)
|
||||
elems = std::move(unique_elems);
|
||||
}
|
||||
|
||||
/// Optimize duplicate ORDER BY and DISTINCT
|
||||
void optimizeDuplicateOrderByAndDistinct(ASTPtr & query, bool optimize_duplicate_order_by_and_distinct, const Context & context)
|
||||
{
|
||||
if (optimize_duplicate_order_by_and_distinct)
|
||||
{
|
||||
DuplicateOrderByVisitor::Data order_by_data{context, false};
|
||||
DuplicateOrderByVisitor(order_by_data).visit(query);
|
||||
DuplicateDistinctVisitor::Data distinct_data{};
|
||||
DuplicateDistinctVisitor(distinct_data).visit(query);
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove duplicate items from LIMIT BY.
|
||||
void optimizeLimitBy(const ASTSelectQuery * select_query)
|
||||
{
|
||||
@ -831,6 +846,9 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
|
||||
/// Remove duplicate items from ORDER BY.
|
||||
optimizeOrderBy(select_query);
|
||||
|
||||
/// Remove duplicate ORDER BY and DISTINCT from subqueries.
|
||||
optimizeDuplicateOrderByAndDistinct(query, settings.optimize_duplicate_order_by_and_distinct, context);
|
||||
|
||||
/// Remove duplicated elements from LIMIT BY clause.
|
||||
optimizeLimitBy(select_query);
|
||||
|
||||
|
10
tests/performance/duplicate_order_by_and_distinct.xml
Normal file
10
tests/performance/duplicate_order_by_and_distinct.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<test>
|
||||
<preconditions>
|
||||
<table_exists>hits_10m_single</table_exists>
|
||||
</preconditions>
|
||||
|
||||
<query>SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY EventDate, CounterID FORMAT Null</query>
|
||||
<query>SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single) FORMAT Null</query>
|
||||
<query>SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY toStartOfWeek(EventDate) FORMAT Null</query>
|
||||
|
||||
</test>
|
@ -0,0 +1,14 @@
|
||||
SELECT number\nFROM \n(\n SELECT number\n FROM \n (\n SELECT DISTINCT number\n FROM numbers(3)\n )\n)\nORDER BY number ASC
|
||||
0
|
||||
1
|
||||
2
|
||||
SELECT DISTINCT number\nFROM \n(\n SELECT DISTINCT number\n FROM \n (\n SELECT DISTINCT number\n FROM numbers(3)\n ORDER BY number ASC\n )\n ORDER BY number ASC\n)\nORDER BY number ASC
|
||||
0
|
||||
1
|
||||
2
|
||||
SELECT number\nFROM \n(\n SELECT DISTINCT number\n FROM \n (\n SELECT DISTINCT number % 2 AS number\n FROM numbers(3)\n )\n)\nORDER BY number ASC
|
||||
0
|
||||
1
|
||||
SELECT DISTINCT number\nFROM \n(\n SELECT DISTINCT number\n FROM \n (\n SELECT DISTINCT number % 2 AS number\n FROM numbers(3)\n ORDER BY number ASC\n )\n ORDER BY number ASC\n)\nORDER BY number ASC
|
||||
0
|
||||
1
|
@ -0,0 +1,124 @@
|
||||
set enable_debug_queries = 1;
|
||||
set optimize_duplicate_order_by_and_distinct = 1;
|
||||
|
||||
analyze SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
||||
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
||||
|
||||
set optimize_duplicate_order_by_and_distinct = 0;
|
||||
|
||||
analyze SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
||||
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
||||
|
||||
set optimize_duplicate_order_by_and_distinct = 1;
|
||||
|
||||
analyze SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number % 2
|
||||
AS number
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
||||
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number % 2
|
||||
AS number
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
||||
|
||||
set optimize_duplicate_order_by_and_distinct = 0;
|
||||
|
||||
analyze SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number % 2
|
||||
AS number
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
||||
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number % 2
|
||||
AS number
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
@ -0,0 +1,2 @@
|
||||
0
|
||||
0
|
@ -0,0 +1,20 @@
|
||||
set optimize_duplicate_order_by_and_distinct = 1;
|
||||
SELECT DISTINCT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM remote('127.0.0.{1,2}', system.numbers)
|
||||
LIMIT 1
|
||||
SETTINGS distributed_group_by_no_merge = 1
|
||||
);
|
||||
|
||||
set optimize_duplicate_order_by_and_distinct = 0;
|
||||
SELECT DISTINCT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM remote('127.0.0.{1,2}', system.numbers)
|
||||
LIMIT 1
|
||||
SETTINGS distributed_group_by_no_merge = 1
|
||||
);
|
||||
|
Loading…
Reference in New Issue
Block a user