improve input order analyzing and turn on order by optimization with left/inner joins

This commit is contained in:
CurtizJ 2019-12-20 16:15:17 +03:00
parent d17f785260
commit 75c5e02ec2
11 changed files with 46 additions and 19 deletions

View File

@ -230,6 +230,16 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global)
}
NamesAndTypesList ExpressionAnalyzer::sourceWithJoinedColumns() const
{
auto result_columns = sourceColumns();
result_columns.insert(result_columns.end(), array_join_columns.begin(), array_join_columns.end());
result_columns.insert(result_columns.end(),
analyzedJoin().columnsAddedByJoin().begin(), analyzedJoin().columnsAddedByJoin().end());
return result_columns;
}
void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name)
{
auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name);
@ -313,12 +323,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
}
else
{
NamesAndTypesList temp_columns = sourceColumns();
temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end());
temp_columns.insert(temp_columns.end(),
analyzedJoin().columnsAddedByJoin().begin(), analyzedJoin().columnsAddedByJoin().end());
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(temp_columns, context);
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceWithJoinedColumns(), context);
getRootActions(left_in_operand, true, temp_actions);
Block sample_block_with_calculated_columns = temp_actions->getSampleBlock();
@ -743,10 +748,14 @@ bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
ASTPtr order_expression = ast->children.at(0);
step.required_output.push_back(order_expression->getColumnName());
}
if (optimize_read_in_order)
if (optimize_read_in_order)
{
auto all_columns = sourceWithJoinedColumns();
for (auto & child : select_query->orderBy()->children)
{
order_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(sourceColumns(), context));
order_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(all_columns, context));
getRootActions(child, only_types, order_by_elements_actions.back());
}
}

View File

@ -123,6 +123,7 @@ protected:
const AnalyzedJoin & analyzedJoin() const { return *syntax->analyzed_join; }
const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; }
const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; }
NamesAndTypesList sourceWithJoinedColumns() const;
/// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
void initGlobalSubqueriesAndExternalTables(bool do_global);

View File

@ -33,6 +33,7 @@ public:
virtual bool alwaysReturnsEmptySet() const { return false; }
virtual BlockInputStreamPtr createStreamWithNonJoinedRows(const Block &, UInt64) const { return {}; }
virtual bool hasStreamWithNonJoinedRows() const { return false; }
};
using JoinPtr = std::shared_ptr<IJoin>;

View File

@ -770,11 +770,13 @@ InterpreterSelectQuery::analyzeExpressions(
}
}
bool has_stream_with_non_joned_rows = (res.before_join && res.before_join->getTableJoinAlgo()->hasStreamWithNonJoinedRows());
res.optimize_read_in_order =
context.getSettingsRef().optimize_read_in_order
&& storage && query.orderBy()
&& !query_analyzer.hasAggregation()
&& !query.final() && !query.join();
&& !query.final()
&& !has_stream_with_non_joned_rows;
/// If there is aggregation, we execute expressions in SELECT and ORDER BY on the initiating server, otherwise on the source servers.
query_analyzer.appendSelect(chain, only_types || (res.need_aggregate ? !res.second_stage : !res.first_stage));
@ -1613,7 +1615,7 @@ void InterpreterSelectQuery::executeFetchColumns(
getSortDescription(query, *context),
query_info.syntax_analyzer_result);
query_info.input_sorting_info = query_info.order_by_optimizer->analyze(storage);
query_info.input_sorting_info = query_info.order_by_optimizer->getInputOrder(storage);
}

View File

@ -1416,4 +1416,14 @@ BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & result_sam
return {};
}
bool Join::hasStreamWithNonJoinedRows()
{
if (table_join->strictness() == ASTTableJoin::Strictness::Asof ||
table_join->strictness() == ASTTableJoin::Strictness::Semi)
return false;
return isRightOrFull(table_join->kind());
}
}

View File

@ -179,6 +179,7 @@ public:
* left_sample_block is passed without account of 'use_nulls' setting (columns will be converted to Nullable inside).
*/
BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const override;
bool hasStreamWithNonJoinedRows();
/// Number of keys in all built JOIN maps.
size_t getTotalRowCount() const final;

View File

@ -1,5 +1,6 @@
#include <Storages/ReadInOrderOptimizer.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Interpreters/AnalyzedJoin.h>
#include <Functions/IFunction.h>
namespace DB
@ -21,13 +22,14 @@ ReadInOrderOptimizer::ReadInOrderOptimizer(
if (elements_actions.size() != required_sort_description.size())
throw Exception("Sizes of sort description and actions are mismatched", ErrorCodes::LOGICAL_ERROR);
/// Do not analyze ARRAY JOIN result columns.
/// TODO: forbid more columns for analyzing.
/// Do not analyze joined columns.
/// They may have aliases and come to descriprion as is.
/// We can mismatch them with order key columns at stage of fetching columns.
for (const auto & elem : syntax_result->array_join_result_to_source)
forbidden_columns.insert(elem.first);
}
InputSortingInfoPtr ReadInOrderOptimizer::analyze(const StoragePtr & storage) const
InputSortingInfoPtr ReadInOrderOptimizer::getInputOrder(const StoragePtr & storage) const
{
const MergeTreeData * merge_tree = dynamic_cast<const MergeTreeData *>(storage.get());
if (!merge_tree || !merge_tree->hasSortingKey())

View File

@ -8,21 +8,22 @@ namespace DB
{
/** Helper class, that can analyze MergeTree order key
* and required sort description to get info needed for
* and required sort description to get their
* common prefix, which is needed for
* performing reading in order of PK.
*/
class ReadInOrderOptimizer
{
public:
ReadInOrderOptimizer(
/// Actions for every element of order expression to analyze functions for monotonicicy
const ManyExpressionActions & elements_actions,
const SortDescription & required_sort_description,
const SyntaxAnalyzerResultPtr & syntax_result);
InputSortingInfoPtr analyze(const StoragePtr & storage) const;
InputSortingInfoPtr getInputOrder(const StoragePtr & storage) const;
private:
/// Actions for every element of order expression to analyze functions for monotonicicy
ManyExpressionActions elements_actions;
NameSet forbidden_columns;
SortDescription required_sort_description;

View File

@ -166,7 +166,7 @@ BlockInputStreams StorageBuffer::read(
if (dst_has_same_structure)
{
if (query_info.order_by_optimizer)
query_info.input_sorting_info = query_info.order_by_optimizer->analyze(destination);
query_info.input_sorting_info = query_info.order_by_optimizer->getInputOrder(destination);
/// The destination table has the same structure of the requested columns and we can simply read blocks from there.
streams_from_dst = destination->read(column_names, query_info, context, processed_stage, max_block_size, num_streams);

View File

@ -202,7 +202,7 @@ BlockInputStreams StorageMaterializedView::read(
auto storage = getTargetTable();
auto lock = storage->lockStructureForShare(false, context.getCurrentQueryId());
if (query_info.order_by_optimizer)
query_info.input_sorting_info = query_info.order_by_optimizer->analyze(storage);
query_info.input_sorting_info = query_info.order_by_optimizer->getInputOrder(storage);
auto streams = storage->read(column_names, query_info, context, processed_stage, max_block_size, num_streams);
for (auto & stream : streams)

View File

@ -214,7 +214,7 @@ BlockInputStreams StorageMerge::read(
{
for (auto it = selected_tables.begin(); it != selected_tables.end(); ++it)
{
auto current_info = query_info.order_by_optimizer->analyze(it->first);
auto current_info = query_info.order_by_optimizer->getInputOrder(it->first);
if (it == selected_tables.begin())
input_sorting_info = current_info;
else if (!current_info || (input_sorting_info && *current_info != *input_sorting_info))