mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-04 13:32:13 +00:00
Simplification of creating sets that are used for index [#CLICKHOUSE-3796]
This commit is contained in:
parent
8f8c14954e
commit
e10f0ed6f4
@ -62,6 +62,8 @@
|
||||
#include <Functions/FunctionsMiscellaneous.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
|
||||
#include <Core/iostream_debug_helpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -1461,99 +1463,7 @@ void ExpressionAnalyzer::optimizeLimitBy()
|
||||
}
|
||||
|
||||
|
||||
void ExpressionAnalyzer::makeSetsForIndex()
|
||||
{
|
||||
if (storage && select_query && storage->supportsIndexForIn())
|
||||
{
|
||||
if (select_query->where_expression)
|
||||
makeSetsForIndexImpl(select_query->where_expression, storage->getSampleBlock());
|
||||
if (select_query->prewhere_expression)
|
||||
makeSetsForIndexImpl(select_query->prewhere_expression, storage->getSampleBlock());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name)
|
||||
{
|
||||
BlockIO res = interpretSubquery(subquery_or_table_name, context, subquery_depth + 1, {})->execute();
|
||||
|
||||
SizeLimits set_for_index_size_limits;
|
||||
if (settings.use_index_for_in_with_subqueries_max_values && settings.use_index_for_in_with_subqueries_max_values < settings.max_rows_in_set)
|
||||
{
|
||||
/// Silently cancel creating the set for index if the specific limit has been reached.
|
||||
set_for_index_size_limits = SizeLimits(settings.use_index_for_in_with_subqueries_max_values, settings.max_bytes_in_set, OverflowMode::BREAK);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// If the limit specific for set for index is lower than general limits for set - use general limit.
|
||||
set_for_index_size_limits = SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode);
|
||||
}
|
||||
|
||||
SetPtr set = std::make_shared<Set>(set_for_index_size_limits, true);
|
||||
|
||||
set->setHeader(res.in->getHeader());
|
||||
while (Block block = res.in->read())
|
||||
{
|
||||
/// If the limits have been exceeded, give up and let the default subquery processing actions take place.
|
||||
if (!set->insertFromBlock(block))
|
||||
return;
|
||||
}
|
||||
|
||||
prepared_sets[subquery_or_table_name->range] = std::move(set);
|
||||
}
|
||||
|
||||
|
||||
void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block & sample_block)
|
||||
{
|
||||
for (auto & child : node->children)
|
||||
{
|
||||
/// Don't descent into subqueries.
|
||||
if (typeid_cast<ASTSubquery *>(child.get()))
|
||||
continue;
|
||||
|
||||
/// Don't dive into lambda functions
|
||||
const ASTFunction * func = typeid_cast<const ASTFunction *>(child.get());
|
||||
if (func && func->name == "lambda")
|
||||
continue;
|
||||
|
||||
makeSetsForIndexImpl(child, sample_block);
|
||||
}
|
||||
|
||||
const ASTFunction * func = typeid_cast<const ASTFunction *>(node.get());
|
||||
if (func && functionIsInOperator(func->name))
|
||||
{
|
||||
const IAST & args = *func->arguments;
|
||||
|
||||
if (storage && storage->mayBenefitFromIndexForIn(args.children.at(0)))
|
||||
{
|
||||
const ASTPtr & arg = args.children.at(1);
|
||||
|
||||
if (!prepared_sets.count(arg->range)) /// Not already prepared.
|
||||
{
|
||||
if (typeid_cast<ASTSubquery *>(arg.get()) || typeid_cast<ASTIdentifier *>(arg.get()))
|
||||
{
|
||||
if (settings.use_index_for_in_with_subqueries)
|
||||
tryMakeSetForIndexFromSubquery(arg);
|
||||
}
|
||||
else
|
||||
{
|
||||
NamesAndTypesList temp_columns = source_columns;
|
||||
temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end());
|
||||
temp_columns.insert(temp_columns.end(), columns_added_by_join.begin(), columns_added_by_join.end());
|
||||
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(temp_columns, settings);
|
||||
getRootActions(func->arguments->children.at(0), true, false, temp_actions);
|
||||
|
||||
Block sample_block_with_calculated_columns = temp_actions->getSampleBlock();
|
||||
if (sample_block_with_calculated_columns.has(args.children.at(0)->getColumnName()))
|
||||
makeExplicitSet(func, sample_block_with_calculated_columns, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_block)
|
||||
void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_block, bool may_benefit_for_index)
|
||||
{
|
||||
/** You need to convert the right argument to a set.
|
||||
* This can be a table name, a value, a value enumeration, or a subquery.
|
||||
@ -1594,14 +1504,16 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_
|
||||
|
||||
SubqueryForSet & subquery_for_set = subqueries_for_sets[set_id];
|
||||
|
||||
/// If you already created a Set with the same subquery / table.
|
||||
/// If we already created a Set with the same subquery / table.
|
||||
if (subquery_for_set.set)
|
||||
{
|
||||
prepared_sets[arg->range] = subquery_for_set.set;
|
||||
return;
|
||||
}
|
||||
|
||||
SetPtr set = std::make_shared<Set>(SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode), false);
|
||||
SetPtr set = std::make_shared<Set>(
|
||||
SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode),
|
||||
may_benefit_for_index && settings.use_index_for_in_with_subqueries);
|
||||
|
||||
/** The following happens for GLOBAL INs:
|
||||
* - in the addExternalStorage function, the IN (SELECT ...) subquery is replaced with IN _data1,
|
||||
@ -1644,11 +1556,29 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_
|
||||
|
||||
subquery_for_set.set = set;
|
||||
prepared_sets[arg->range] = set;
|
||||
|
||||
/// Create the set right now - before begin of query execution - if it is needed for the index.
|
||||
if (may_benefit_for_index && settings.use_index_for_in_with_subqueries)
|
||||
{
|
||||
Poco::Logger * log = &Poco::Logger::get("ExpressionAnalyzer");
|
||||
LOG_DEBUG(log, "Creating set for index.");
|
||||
|
||||
size_t rows_read = 0;
|
||||
set->setHeader(subquery_for_set.source->getHeader());
|
||||
while (Block block = subquery_for_set.source->read())
|
||||
{
|
||||
rows_read += block.rows();
|
||||
if (!set->insertFromBlock(block))
|
||||
break;
|
||||
}
|
||||
|
||||
LOG_DEBUG(log, "Created. Set with " << set->getTotalRowCount() << " entries from " << rows_read << " rows.");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/// An explicit enumeration of values in parentheses.
|
||||
makeExplicitSet(node, sample_block, false);
|
||||
makeExplicitSet(node, sample_block, may_benefit_for_index);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2059,8 +1989,19 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries,
|
||||
getActionsImpl(node->arguments->children.at(0), no_subqueries, only_consts, actions_stack,
|
||||
projection_manipulator);
|
||||
|
||||
bool may_benefit_for_index = false;
|
||||
if (storage
|
||||
&& select_query
|
||||
&& storage->supportsIndexForIn()
|
||||
&& functionIsInOperator(node->name)
|
||||
&& storage->mayBenefitFromIndexForIn(node->arguments->children.at(0)))
|
||||
{
|
||||
/// TODO Check that it's in WHERE or PREWHERE and not in lambda function or subquery.
|
||||
may_benefit_for_index = true;
|
||||
}
|
||||
|
||||
/// Transform tuple or subquery into a set.
|
||||
makeSet(node, actions_stack.getSampleBlock());
|
||||
makeSet(node, actions_stack.getSampleBlock(), may_benefit_for_index);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -168,10 +168,6 @@ public:
|
||||
*/
|
||||
const Tables & getExternalTables() const { return external_tables; }
|
||||
|
||||
/// Create Set-s that we can from IN section to use the index on them.
|
||||
void makeSetsForIndex();
|
||||
|
||||
|
||||
private:
|
||||
ASTPtr ast;
|
||||
ASTSelectQuery * select_query;
|
||||
@ -276,7 +272,7 @@ private:
|
||||
void optimizeIfWithConstantConditionImpl(ASTPtr & current_ast, Aliases & aliases) const;
|
||||
bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value) const;
|
||||
|
||||
void makeSet(const ASTFunction * node, const Block & sample_block);
|
||||
void makeSet(const ASTFunction * node, const Block & sample_block, bool may_benefit_for_index);
|
||||
|
||||
/// Adds a list of ALIAS columns from the table.
|
||||
void addAliasColumns();
|
||||
@ -341,14 +337,6 @@ private:
|
||||
*/
|
||||
void makeExplicitSet(const ASTFunction * node, const Block & sample_block, bool create_ordered_set);
|
||||
|
||||
/**
|
||||
* Create Set from a subuqery or a table expression in the query. The created set is suitable for using the index.
|
||||
* The set will not be created if its size hits the limit.
|
||||
*/
|
||||
void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name);
|
||||
|
||||
void makeSetsForIndexImpl(const ASTPtr & node, const Block & sample_block);
|
||||
|
||||
/** Translate qualified names such as db.table.column, table.column, table_alias.column
|
||||
* to unqualified names. This is done in a poor transitional way:
|
||||
* only one ("main") table is supported. Ambiguity is not detected or resolved.
|
||||
|
@ -44,6 +44,8 @@
|
||||
#include <Columns/Collator.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
#include <Core/iostream_debug_helpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -647,12 +649,12 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline
|
||||
if (max_streams > 1 && !is_remote)
|
||||
max_streams *= settings.max_streams_to_max_threads_ratio;
|
||||
|
||||
query_analyzer->makeSetsForIndex();
|
||||
|
||||
SelectQueryInfo query_info;
|
||||
query_info.query = query_ptr;
|
||||
query_info.sets = query_analyzer->getPreparedSets();
|
||||
|
||||
DUMP(query_info.sets);
|
||||
|
||||
/// PREWHERE optimization
|
||||
{
|
||||
auto optimize_prewhere = [&](auto & merge_tree)
|
||||
|
Loading…
Reference in New Issue
Block a user