Simplification of creating sets that are used for index [#CLICKHOUSE-3796]

This commit is contained in:
Alexey Milovidov 2018-07-17 00:23:45 +03:00
parent 8f8c14954e
commit e10f0ed6f4
3 changed files with 43 additions and 112 deletions

View File

@ -62,6 +62,8 @@
#include <Functions/FunctionsMiscellaneous.h>
#include <DataTypes/DataTypeTuple.h>
#include <Core/iostream_debug_helpers.h>
namespace DB
{
@ -1461,99 +1463,7 @@ void ExpressionAnalyzer::optimizeLimitBy()
}
void ExpressionAnalyzer::makeSetsForIndex()
{
if (storage && select_query && storage->supportsIndexForIn())
{
if (select_query->where_expression)
makeSetsForIndexImpl(select_query->where_expression, storage->getSampleBlock());
if (select_query->prewhere_expression)
makeSetsForIndexImpl(select_query->prewhere_expression, storage->getSampleBlock());
}
}
void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name)
{
BlockIO res = interpretSubquery(subquery_or_table_name, context, subquery_depth + 1, {})->execute();
SizeLimits set_for_index_size_limits;
if (settings.use_index_for_in_with_subqueries_max_values && settings.use_index_for_in_with_subqueries_max_values < settings.max_rows_in_set)
{
/// Silently cancel creating the set for index if the specific limit has been reached.
set_for_index_size_limits = SizeLimits(settings.use_index_for_in_with_subqueries_max_values, settings.max_bytes_in_set, OverflowMode::BREAK);
}
else
{
/// If the limit specific for set for index is lower than general limits for set - use general limit.
set_for_index_size_limits = SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode);
}
SetPtr set = std::make_shared<Set>(set_for_index_size_limits, true);
set->setHeader(res.in->getHeader());
while (Block block = res.in->read())
{
/// If the limits have been exceeded, give up and let the default subquery processing actions take place.
if (!set->insertFromBlock(block))
return;
}
prepared_sets[subquery_or_table_name->range] = std::move(set);
}
void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block & sample_block)
{
for (auto & child : node->children)
{
/// Don't descent into subqueries.
if (typeid_cast<ASTSubquery *>(child.get()))
continue;
/// Don't dive into lambda functions
const ASTFunction * func = typeid_cast<const ASTFunction *>(child.get());
if (func && func->name == "lambda")
continue;
makeSetsForIndexImpl(child, sample_block);
}
const ASTFunction * func = typeid_cast<const ASTFunction *>(node.get());
if (func && functionIsInOperator(func->name))
{
const IAST & args = *func->arguments;
if (storage && storage->mayBenefitFromIndexForIn(args.children.at(0)))
{
const ASTPtr & arg = args.children.at(1);
if (!prepared_sets.count(arg->range)) /// Not already prepared.
{
if (typeid_cast<ASTSubquery *>(arg.get()) || typeid_cast<ASTIdentifier *>(arg.get()))
{
if (settings.use_index_for_in_with_subqueries)
tryMakeSetForIndexFromSubquery(arg);
}
else
{
NamesAndTypesList temp_columns = source_columns;
temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end());
temp_columns.insert(temp_columns.end(), columns_added_by_join.begin(), columns_added_by_join.end());
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(temp_columns, settings);
getRootActions(func->arguments->children.at(0), true, false, temp_actions);
Block sample_block_with_calculated_columns = temp_actions->getSampleBlock();
if (sample_block_with_calculated_columns.has(args.children.at(0)->getColumnName()))
makeExplicitSet(func, sample_block_with_calculated_columns, true);
}
}
}
}
}
void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_block)
void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_block, bool may_benefit_for_index)
{
/** You need to convert the right argument to a set.
* This can be a table name, a value, a value enumeration, or a subquery.
@ -1594,14 +1504,16 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_
SubqueryForSet & subquery_for_set = subqueries_for_sets[set_id];
/// If you already created a Set with the same subquery / table.
/// If we already created a Set with the same subquery / table.
if (subquery_for_set.set)
{
prepared_sets[arg->range] = subquery_for_set.set;
return;
}
SetPtr set = std::make_shared<Set>(SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode), false);
SetPtr set = std::make_shared<Set>(
SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode),
may_benefit_for_index && settings.use_index_for_in_with_subqueries);
/** The following happens for GLOBAL INs:
* - in the addExternalStorage function, the IN (SELECT ...) subquery is replaced with IN _data1,
@ -1644,11 +1556,29 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_
subquery_for_set.set = set;
prepared_sets[arg->range] = set;
/// Create the set right now - before begin of query execution - if it is needed for the index.
if (may_benefit_for_index && settings.use_index_for_in_with_subqueries)
{
Poco::Logger * log = &Poco::Logger::get("ExpressionAnalyzer");
LOG_DEBUG(log, "Creating set for index.");
size_t rows_read = 0;
set->setHeader(subquery_for_set.source->getHeader());
while (Block block = subquery_for_set.source->read())
{
rows_read += block.rows();
if (!set->insertFromBlock(block))
break;
}
LOG_DEBUG(log, "Created. Set with " << set->getTotalRowCount() << " entries from " << rows_read << " rows.");
}
}
else
{
/// An explicit enumeration of values in parentheses.
makeExplicitSet(node, sample_block, false);
makeExplicitSet(node, sample_block, may_benefit_for_index);
}
}
@ -2059,8 +1989,19 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries,
getActionsImpl(node->arguments->children.at(0), no_subqueries, only_consts, actions_stack,
projection_manipulator);
bool may_benefit_for_index = false;
if (storage
&& select_query
&& storage->supportsIndexForIn()
&& functionIsInOperator(node->name)
&& storage->mayBenefitFromIndexForIn(node->arguments->children.at(0)))
{
/// TODO Check that it's in WHERE or PREWHERE and not in lambda function or subquery.
may_benefit_for_index = true;
}
/// Transform tuple or subquery into a set.
makeSet(node, actions_stack.getSampleBlock());
makeSet(node, actions_stack.getSampleBlock(), may_benefit_for_index);
}
else
{

View File

@ -168,10 +168,6 @@ public:
*/
const Tables & getExternalTables() const { return external_tables; }
/// Create Set-s that we can from IN section to use the index on them.
void makeSetsForIndex();
private:
ASTPtr ast;
ASTSelectQuery * select_query;
@ -276,7 +272,7 @@ private:
void optimizeIfWithConstantConditionImpl(ASTPtr & current_ast, Aliases & aliases) const;
bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value) const;
void makeSet(const ASTFunction * node, const Block & sample_block);
void makeSet(const ASTFunction * node, const Block & sample_block, bool may_benefit_for_index);
/// Adds a list of ALIAS columns from the table.
void addAliasColumns();
@ -341,14 +337,6 @@ private:
*/
void makeExplicitSet(const ASTFunction * node, const Block & sample_block, bool create_ordered_set);
/**
* Create Set from a subuqery or a table expression in the query. The created set is suitable for using the index.
* The set will not be created if its size hits the limit.
*/
void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name);
void makeSetsForIndexImpl(const ASTPtr & node, const Block & sample_block);
/** Translate qualified names such as db.table.column, table.column, table_alias.column
* to unqualified names. This is done in a poor transitional way:
* only one ("main") table is supported. Ambiguity is not detected or resolved.

View File

@ -44,6 +44,8 @@
#include <Columns/Collator.h>
#include <Common/typeid_cast.h>
#include <Core/iostream_debug_helpers.h>
namespace DB
{
@ -647,12 +649,12 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline
if (max_streams > 1 && !is_remote)
max_streams *= settings.max_streams_to_max_threads_ratio;
query_analyzer->makeSetsForIndex();
SelectQueryInfo query_info;
query_info.query = query_ptr;
query_info.sets = query_analyzer->getPreparedSets();
DUMP(query_info.sets);
/// PREWHERE optimization
{
auto optimize_prewhere = [&](auto & merge_tree)