From e10f0ed6f42307564ce7fa429d60cccb12d35161 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 17 Jul 2018 00:23:45 +0300 Subject: [PATCH] Simplification of creating sets that are used for index [#CLICKHOUSE-3796] --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 135 +++++------------- dbms/src/Interpreters/ExpressionAnalyzer.h | 14 +- .../Interpreters/InterpreterSelectQuery.cpp | 6 +- 3 files changed, 43 insertions(+), 112 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index ccb934a9b8d..592ec7f89b2 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -62,6 +62,8 @@ #include #include +#include + namespace DB { @@ -1461,99 +1463,7 @@ void ExpressionAnalyzer::optimizeLimitBy() } -void ExpressionAnalyzer::makeSetsForIndex() -{ - if (storage && select_query && storage->supportsIndexForIn()) - { - if (select_query->where_expression) - makeSetsForIndexImpl(select_query->where_expression, storage->getSampleBlock()); - if (select_query->prewhere_expression) - makeSetsForIndexImpl(select_query->prewhere_expression, storage->getSampleBlock()); - } -} - - -void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name) -{ - BlockIO res = interpretSubquery(subquery_or_table_name, context, subquery_depth + 1, {})->execute(); - - SizeLimits set_for_index_size_limits; - if (settings.use_index_for_in_with_subqueries_max_values && settings.use_index_for_in_with_subqueries_max_values < settings.max_rows_in_set) - { - /// Silently cancel creating the set for index if the specific limit has been reached. - set_for_index_size_limits = SizeLimits(settings.use_index_for_in_with_subqueries_max_values, settings.max_bytes_in_set, OverflowMode::BREAK); - } - else - { - /// If the limit specific for set for index is lower than general limits for set - use general limit. - set_for_index_size_limits = SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode); - } - - SetPtr set = std::make_shared(set_for_index_size_limits, true); - - set->setHeader(res.in->getHeader()); - while (Block block = res.in->read()) - { - /// If the limits have been exceeded, give up and let the default subquery processing actions take place. - if (!set->insertFromBlock(block)) - return; - } - - prepared_sets[subquery_or_table_name->range] = std::move(set); -} - - -void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block & sample_block) -{ - for (auto & child : node->children) - { - /// Don't descent into subqueries. - if (typeid_cast(child.get())) - continue; - - /// Don't dive into lambda functions - const ASTFunction * func = typeid_cast(child.get()); - if (func && func->name == "lambda") - continue; - - makeSetsForIndexImpl(child, sample_block); - } - - const ASTFunction * func = typeid_cast(node.get()); - if (func && functionIsInOperator(func->name)) - { - const IAST & args = *func->arguments; - - if (storage && storage->mayBenefitFromIndexForIn(args.children.at(0))) - { - const ASTPtr & arg = args.children.at(1); - - if (!prepared_sets.count(arg->range)) /// Not already prepared. - { - if (typeid_cast(arg.get()) || typeid_cast(arg.get())) - { - if (settings.use_index_for_in_with_subqueries) - tryMakeSetForIndexFromSubquery(arg); - } - else - { - NamesAndTypesList temp_columns = source_columns; - temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end()); - temp_columns.insert(temp_columns.end(), columns_added_by_join.begin(), columns_added_by_join.end()); - ExpressionActionsPtr temp_actions = std::make_shared(temp_columns, settings); - getRootActions(func->arguments->children.at(0), true, false, temp_actions); - - Block sample_block_with_calculated_columns = temp_actions->getSampleBlock(); - if (sample_block_with_calculated_columns.has(args.children.at(0)->getColumnName())) - makeExplicitSet(func, sample_block_with_calculated_columns, true); - } - } - } - } -} - - -void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_block) +void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_block, bool may_benefit_for_index) { /** You need to convert the right argument to a set. * This can be a table name, a value, a value enumeration, or a subquery. @@ -1594,14 +1504,16 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_ SubqueryForSet & subquery_for_set = subqueries_for_sets[set_id]; - /// If you already created a Set with the same subquery / table. + /// If we already created a Set with the same subquery / table. if (subquery_for_set.set) { prepared_sets[arg->range] = subquery_for_set.set; return; } - SetPtr set = std::make_shared(SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode), false); + SetPtr set = std::make_shared( + SizeLimits(settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode), + may_benefit_for_index && settings.use_index_for_in_with_subqueries); /** The following happens for GLOBAL INs: * - in the addExternalStorage function, the IN (SELECT ...) subquery is replaced with IN _data1, @@ -1644,11 +1556,29 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_ subquery_for_set.set = set; prepared_sets[arg->range] = set; + + /// Create the set right now - before begin of query execution - if it is needed for the index. + if (may_benefit_for_index && settings.use_index_for_in_with_subqueries) + { + Poco::Logger * log = &Poco::Logger::get("ExpressionAnalyzer"); + LOG_DEBUG(log, "Creating set for index."); + + size_t rows_read = 0; + set->setHeader(subquery_for_set.source->getHeader()); + while (Block block = subquery_for_set.source->read()) + { + rows_read += block.rows(); + if (!set->insertFromBlock(block)) + break; + } + + LOG_DEBUG(log, "Created. Set with " << set->getTotalRowCount() << " entries from " << rows_read << " rows."); + } } else { /// An explicit enumeration of values in parentheses. - makeExplicitSet(node, sample_block, false); + makeExplicitSet(node, sample_block, may_benefit_for_index); } } @@ -2059,8 +1989,19 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, getActionsImpl(node->arguments->children.at(0), no_subqueries, only_consts, actions_stack, projection_manipulator); + bool may_benefit_for_index = false; + if (storage + && select_query + && storage->supportsIndexForIn() + && functionIsInOperator(node->name) + && storage->mayBenefitFromIndexForIn(node->arguments->children.at(0))) + { + /// TODO Check that it's in WHERE or PREWHERE and not in lambda function or subquery. + may_benefit_for_index = true; + } + /// Transform tuple or subquery into a set. - makeSet(node, actions_stack.getSampleBlock()); + makeSet(node, actions_stack.getSampleBlock(), may_benefit_for_index); } else { diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 4b8d21daa0f..9afacc675d7 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -168,10 +168,6 @@ public: */ const Tables & getExternalTables() const { return external_tables; } - /// Create Set-s that we can from IN section to use the index on them. - void makeSetsForIndex(); - - private: ASTPtr ast; ASTSelectQuery * select_query; @@ -276,7 +272,7 @@ private: void optimizeIfWithConstantConditionImpl(ASTPtr & current_ast, Aliases & aliases) const; bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value) const; - void makeSet(const ASTFunction * node, const Block & sample_block); + void makeSet(const ASTFunction * node, const Block & sample_block, bool may_benefit_for_index); /// Adds a list of ALIAS columns from the table. void addAliasColumns(); @@ -341,14 +337,6 @@ private: */ void makeExplicitSet(const ASTFunction * node, const Block & sample_block, bool create_ordered_set); - /** - * Create Set from a subuqery or a table expression in the query. The created set is suitable for using the index. - * The set will not be created if its size hits the limit. - */ - void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name); - - void makeSetsForIndexImpl(const ASTPtr & node, const Block & sample_block); - /** Translate qualified names such as db.table.column, table.column, table_alias.column * to unqualified names. This is done in a poor transitional way: * only one ("main") table is supported. Ambiguity is not detected or resolved. diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index ec79dcf902f..26c96a6a98a 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -44,6 +44,8 @@ #include #include +#include + namespace DB { @@ -647,12 +649,12 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline if (max_streams > 1 && !is_remote) max_streams *= settings.max_streams_to_max_threads_ratio; - query_analyzer->makeSetsForIndex(); - SelectQueryInfo query_info; query_info.query = query_ptr; query_info.sets = query_analyzer->getPreparedSets(); + DUMP(query_info.sets); + /// PREWHERE optimization { auto optimize_prewhere = [&](auto & merge_tree)