mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
improve input order analyzing and turn on order by optimization with left/inner joins
This commit is contained in:
parent
d17f785260
commit
75c5e02ec2
@ -230,6 +230,16 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global)
|
||||
}
|
||||
|
||||
|
||||
NamesAndTypesList ExpressionAnalyzer::sourceWithJoinedColumns() const
|
||||
{
|
||||
auto result_columns = sourceColumns();
|
||||
result_columns.insert(result_columns.end(), array_join_columns.begin(), array_join_columns.end());
|
||||
result_columns.insert(result_columns.end(),
|
||||
analyzedJoin().columnsAddedByJoin().begin(), analyzedJoin().columnsAddedByJoin().end());
|
||||
return result_columns;
|
||||
}
|
||||
|
||||
|
||||
void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name)
|
||||
{
|
||||
auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name);
|
||||
@ -313,12 +323,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
|
||||
}
|
||||
else
|
||||
{
|
||||
NamesAndTypesList temp_columns = sourceColumns();
|
||||
temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end());
|
||||
temp_columns.insert(temp_columns.end(),
|
||||
analyzedJoin().columnsAddedByJoin().begin(), analyzedJoin().columnsAddedByJoin().end());
|
||||
|
||||
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(temp_columns, context);
|
||||
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceWithJoinedColumns(), context);
|
||||
getRootActions(left_in_operand, true, temp_actions);
|
||||
|
||||
Block sample_block_with_calculated_columns = temp_actions->getSampleBlock();
|
||||
@ -743,10 +748,14 @@ bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain
|
||||
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
|
||||
ASTPtr order_expression = ast->children.at(0);
|
||||
step.required_output.push_back(order_expression->getColumnName());
|
||||
}
|
||||
|
||||
if (optimize_read_in_order)
|
||||
if (optimize_read_in_order)
|
||||
{
|
||||
auto all_columns = sourceWithJoinedColumns();
|
||||
for (auto & child : select_query->orderBy()->children)
|
||||
{
|
||||
order_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(sourceColumns(), context));
|
||||
order_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(all_columns, context));
|
||||
getRootActions(child, only_types, order_by_elements_actions.back());
|
||||
}
|
||||
}
|
||||
|
@ -123,6 +123,7 @@ protected:
|
||||
const AnalyzedJoin & analyzedJoin() const { return *syntax->analyzed_join; }
|
||||
const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; }
|
||||
const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; }
|
||||
NamesAndTypesList sourceWithJoinedColumns() const;
|
||||
|
||||
/// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
|
||||
void initGlobalSubqueriesAndExternalTables(bool do_global);
|
||||
|
@ -33,6 +33,7 @@ public:
|
||||
virtual bool alwaysReturnsEmptySet() const { return false; }
|
||||
|
||||
virtual BlockInputStreamPtr createStreamWithNonJoinedRows(const Block &, UInt64) const { return {}; }
|
||||
virtual bool hasStreamWithNonJoinedRows() const { return false; }
|
||||
};
|
||||
|
||||
using JoinPtr = std::shared_ptr<IJoin>;
|
||||
|
@ -770,11 +770,13 @@ InterpreterSelectQuery::analyzeExpressions(
|
||||
}
|
||||
}
|
||||
|
||||
bool has_stream_with_non_joned_rows = (res.before_join && res.before_join->getTableJoinAlgo()->hasStreamWithNonJoinedRows());
|
||||
res.optimize_read_in_order =
|
||||
context.getSettingsRef().optimize_read_in_order
|
||||
&& storage && query.orderBy()
|
||||
&& !query_analyzer.hasAggregation()
|
||||
&& !query.final() && !query.join();
|
||||
&& !query.final()
|
||||
&& !has_stream_with_non_joned_rows;
|
||||
|
||||
/// If there is aggregation, we execute expressions in SELECT and ORDER BY on the initiating server, otherwise on the source servers.
|
||||
query_analyzer.appendSelect(chain, only_types || (res.need_aggregate ? !res.second_stage : !res.first_stage));
|
||||
@ -1613,7 +1615,7 @@ void InterpreterSelectQuery::executeFetchColumns(
|
||||
getSortDescription(query, *context),
|
||||
query_info.syntax_analyzer_result);
|
||||
|
||||
query_info.input_sorting_info = query_info.order_by_optimizer->analyze(storage);
|
||||
query_info.input_sorting_info = query_info.order_by_optimizer->getInputOrder(storage);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1416,4 +1416,14 @@ BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & result_sam
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
bool Join::hasStreamWithNonJoinedRows()
|
||||
{
|
||||
if (table_join->strictness() == ASTTableJoin::Strictness::Asof ||
|
||||
table_join->strictness() == ASTTableJoin::Strictness::Semi)
|
||||
return false;
|
||||
|
||||
return isRightOrFull(table_join->kind());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -179,6 +179,7 @@ public:
|
||||
* left_sample_block is passed without account of 'use_nulls' setting (columns will be converted to Nullable inside).
|
||||
*/
|
||||
BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const override;
|
||||
bool hasStreamWithNonJoinedRows();
|
||||
|
||||
/// Number of keys in all built JOIN maps.
|
||||
size_t getTotalRowCount() const final;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Storages/ReadInOrderOptimizer.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Interpreters/AnalyzedJoin.h>
|
||||
#include <Functions/IFunction.h>
|
||||
|
||||
namespace DB
|
||||
@ -21,13 +22,14 @@ ReadInOrderOptimizer::ReadInOrderOptimizer(
|
||||
if (elements_actions.size() != required_sort_description.size())
|
||||
throw Exception("Sizes of sort description and actions are mismatched", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
/// Do not analyze ARRAY JOIN result columns.
|
||||
/// TODO: forbid more columns for analyzing.
|
||||
/// Do not analyze joined columns.
|
||||
/// They may have aliases and come to descriprion as is.
|
||||
/// We can mismatch them with order key columns at stage of fetching columns.
|
||||
for (const auto & elem : syntax_result->array_join_result_to_source)
|
||||
forbidden_columns.insert(elem.first);
|
||||
}
|
||||
|
||||
InputSortingInfoPtr ReadInOrderOptimizer::analyze(const StoragePtr & storage) const
|
||||
InputSortingInfoPtr ReadInOrderOptimizer::getInputOrder(const StoragePtr & storage) const
|
||||
{
|
||||
const MergeTreeData * merge_tree = dynamic_cast<const MergeTreeData *>(storage.get());
|
||||
if (!merge_tree || !merge_tree->hasSortingKey())
|
||||
|
@ -8,21 +8,22 @@ namespace DB
|
||||
{
|
||||
|
||||
/** Helper class, that can analyze MergeTree order key
|
||||
* and required sort description to get info needed for
|
||||
* and required sort description to get their
|
||||
* common prefix, which is needed for
|
||||
* performing reading in order of PK.
|
||||
*/
|
||||
class ReadInOrderOptimizer
|
||||
{
|
||||
public:
|
||||
ReadInOrderOptimizer(
|
||||
/// Actions for every element of order expression to analyze functions for monotonicicy
|
||||
const ManyExpressionActions & elements_actions,
|
||||
const SortDescription & required_sort_description,
|
||||
const SyntaxAnalyzerResultPtr & syntax_result);
|
||||
|
||||
InputSortingInfoPtr analyze(const StoragePtr & storage) const;
|
||||
InputSortingInfoPtr getInputOrder(const StoragePtr & storage) const;
|
||||
|
||||
private:
|
||||
/// Actions for every element of order expression to analyze functions for monotonicicy
|
||||
ManyExpressionActions elements_actions;
|
||||
NameSet forbidden_columns;
|
||||
SortDescription required_sort_description;
|
||||
|
@ -166,7 +166,7 @@ BlockInputStreams StorageBuffer::read(
|
||||
if (dst_has_same_structure)
|
||||
{
|
||||
if (query_info.order_by_optimizer)
|
||||
query_info.input_sorting_info = query_info.order_by_optimizer->analyze(destination);
|
||||
query_info.input_sorting_info = query_info.order_by_optimizer->getInputOrder(destination);
|
||||
|
||||
/// The destination table has the same structure of the requested columns and we can simply read blocks from there.
|
||||
streams_from_dst = destination->read(column_names, query_info, context, processed_stage, max_block_size, num_streams);
|
||||
|
@ -202,7 +202,7 @@ BlockInputStreams StorageMaterializedView::read(
|
||||
auto storage = getTargetTable();
|
||||
auto lock = storage->lockStructureForShare(false, context.getCurrentQueryId());
|
||||
if (query_info.order_by_optimizer)
|
||||
query_info.input_sorting_info = query_info.order_by_optimizer->analyze(storage);
|
||||
query_info.input_sorting_info = query_info.order_by_optimizer->getInputOrder(storage);
|
||||
|
||||
auto streams = storage->read(column_names, query_info, context, processed_stage, max_block_size, num_streams);
|
||||
for (auto & stream : streams)
|
||||
|
@ -214,7 +214,7 @@ BlockInputStreams StorageMerge::read(
|
||||
{
|
||||
for (auto it = selected_tables.begin(); it != selected_tables.end(); ++it)
|
||||
{
|
||||
auto current_info = query_info.order_by_optimizer->analyze(it->first);
|
||||
auto current_info = query_info.order_by_optimizer->getInputOrder(it->first);
|
||||
if (it == selected_tables.begin())
|
||||
input_sorting_info = current_info;
|
||||
else if (!current_info || (input_sorting_info && *current_info != *input_sorting_info))
|
||||
|
Loading…
Reference in New Issue
Block a user