ISSUES-863 fix predicate optimizer not work for asterisk

This commit is contained in:
zhang2014 2018-08-22 14:42:37 +08:00
parent 6497d3032e
commit 32cc611f5d
9 changed files with 465 additions and 298 deletions

View File

@ -66,6 +66,7 @@
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Interpreters/evaluateQualified.h>
namespace DB
@ -164,35 +165,6 @@ void removeDuplicateColumns(NamesAndTypesList & columns)
}
String DatabaseAndTableWithAlias::getQualifiedNamePrefix() const
{
return (!alias.empty() ? alias : (database + '.' + table)) + '.';
}
void DatabaseAndTableWithAlias::makeQualifiedName(const ASTPtr & ast) const
{
if (auto identifier = typeid_cast<ASTIdentifier *>(ast.get()))
{
String prefix = getQualifiedNamePrefix();
identifier->name.insert(identifier->name.begin(), prefix.begin(), prefix.end());
Names qualifiers;
if (!alias.empty())
qualifiers.push_back(alias);
else
{
qualifiers.push_back(database);
qualifiers.push_back(table);
}
for (const auto & qualifier : qualifiers)
identifier->children.emplace_back(std::make_shared<ASTIdentifier>(qualifier));
}
}
ExpressionAnalyzer::ExpressionAnalyzer(
const ASTPtr & ast_,
const Context & context_,
@ -274,7 +246,7 @@ ExpressionAnalyzer::ExpressionAnalyzer(
getArrayJoinedColumns();
/// Push the predicate expression down to the subqueries.
rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings).optimize();
rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize();
/// Delete the unnecessary from `source_columns` list. Create `unknown_required_source_columns`. Form `columns_added_by_join`.
collectUsedColumns();
@ -293,46 +265,6 @@ ExpressionAnalyzer::ExpressionAnalyzer(
analyzeAggregation();
}
static DatabaseAndTableWithAlias getTableNameWithAliasFromTableExpression(const ASTTableExpression & table_expression,
const Context & context)
{
DatabaseAndTableWithAlias database_and_table_with_alias;
if (table_expression.database_and_table_name)
{
const auto & identifier = static_cast<const ASTIdentifier &>(*table_expression.database_and_table_name);
database_and_table_with_alias.alias = identifier.tryGetAlias();
if (table_expression.database_and_table_name->children.empty())
{
database_and_table_with_alias.database = context.getCurrentDatabase();
database_and_table_with_alias.table = identifier.name;
}
else
{
if (table_expression.database_and_table_name->children.size() != 2)
throw Exception("Logical error: number of components in table expression not equal to two", ErrorCodes::LOGICAL_ERROR);
database_and_table_with_alias.database = static_cast<const ASTIdentifier &>(*identifier.children[0]).name;
database_and_table_with_alias.table = static_cast<const ASTIdentifier &>(*identifier.children[1]).name;
}
}
else if (table_expression.table_function)
{
database_and_table_with_alias.alias = table_expression.table_function->tryGetAlias();
}
else if (table_expression.subquery)
{
database_and_table_with_alias.alias = table_expression.subquery->tryGetAlias();
}
else
throw Exception("Logical error: no known elements in ASTTableExpression", ErrorCodes::LOGICAL_ERROR);
return database_and_table_with_alias;
}
void ExpressionAnalyzer::translateQualifiedNames()
{
if (!select_query || !select_query->tables || select_query->tables->children.empty())
@ -357,80 +289,6 @@ void ExpressionAnalyzer::translateQualifiedNames()
translateQualifiedNamesImpl(ast, tables);
}
/// Get the number of components of identifier which are correspond to 'alias.', 'table.' or 'databas.table.' from names.
static size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier,
const DatabaseAndTableWithAlias & names)
{
size_t num_qualifiers_to_strip = 0;
auto get_identifier_name = [](const ASTPtr & ast) { return static_cast<const ASTIdentifier &>(*ast).name; };
/// It is compound identifier
if (!identifier.children.empty())
{
size_t num_components = identifier.children.size();
/// database.table.column
if (num_components >= 3
&& !names.database.empty()
&& get_identifier_name(identifier.children[0]) == names.database
&& get_identifier_name(identifier.children[1]) == names.table)
{
num_qualifiers_to_strip = 2;
}
/// table.column or alias.column. If num_components > 2, it is like table.nested.column.
if (num_components >= 2
&& ((!names.table.empty() && get_identifier_name(identifier.children[0]) == names.table)
|| (!names.alias.empty() && get_identifier_name(identifier.children[0]) == names.alias)))
{
num_qualifiers_to_strip = 1;
}
}
return num_qualifiers_to_strip;
}
/// Checks that ast is ASTIdentifier and remove num_qualifiers_to_strip components from left.
/// Example: 'database.table.name' -> (num_qualifiers_to_strip = 2) -> 'name'.
static void stripIdentifier(ASTPtr & ast, size_t num_qualifiers_to_strip)
{
ASTIdentifier * identifier = typeid_cast<ASTIdentifier *>(ast.get());
if (!identifier)
throw Exception("ASTIdentifier expected for stripIdentifier", ErrorCodes::LOGICAL_ERROR);
if (num_qualifiers_to_strip)
{
size_t num_components = identifier->children.size();
/// plain column
if (num_components - num_qualifiers_to_strip == 1)
{
String node_alias = identifier->tryGetAlias();
ast = identifier->children.back();
if (!node_alias.empty())
ast->setAlias(node_alias);
}
else
/// nested column
{
identifier->children.erase(identifier->children.begin(), identifier->children.begin() + num_qualifiers_to_strip);
String new_name;
for (const auto & child : identifier->children)
{
if (!new_name.empty())
new_name += '.';
new_name += static_cast<const ASTIdentifier &>(*child.get()).name;
}
identifier->name = new_name;
}
}
}
void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const std::vector<DatabaseAndTableWithAlias> & tables)
{
if (auto * identifier = typeid_cast<ASTIdentifier *>(ast.get()))
@ -509,7 +367,6 @@ void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const std::ve
}
}
void ExpressionAnalyzer::optimizeIfWithConstantCondition()
{
optimizeIfWithConstantConditionImpl(ast, aliases);
@ -765,23 +622,6 @@ void ExpressionAnalyzer::findExternalTables(ASTPtr & ast)
external_tables[node->name] = external_storage;
}
static std::pair<String, String> getDatabaseAndTableNameFromIdentifier(const ASTIdentifier & identifier)
{
std::pair<String, String> res;
res.second = identifier.name;
if (!identifier.children.empty())
{
if (identifier.children.size() != 2)
throw Exception("Qualified table name could have only two components", ErrorCodes::LOGICAL_ERROR);
res.first = typeid_cast<const ASTIdentifier &>(*identifier.children[0]).name;
res.second = typeid_cast<const ASTIdentifier &>(*identifier.children[1]).name;
}
return res;
}
static std::shared_ptr<InterpreterSelectWithUnionQuery> interpretSubquery(
const ASTPtr & subquery_or_table_name, const Context & context, size_t subquery_depth, const Names & required_source_columns)
{

View File

@ -4,6 +4,7 @@
#include <Interpreters/Settings.h>
#include <Core/Block.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/evaluateQualified.h>
#include <Interpreters/ProjectionManipulation.h>
#include <Parsers/StringRange.h>
#include <Parsers/ASTTablesInSelectQuery.h>
@ -91,19 +92,6 @@ struct ScopeStack
const Block & getSampleBlock() const;
};
struct DatabaseAndTableWithAlias
{
String database;
String table;
String alias;
/// "alias." or "database.table." if alias is empty
String getQualifiedNamePrefix() const;
/// If ast is ASTIdentifier, prepend getQualifiedNamePrefix() to it's name.
void makeQualifiedName(const ASTPtr & ast) const;
};
/** Transforms an expression from a syntax tree into a sequence of actions to execute it.
*
* NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer.

View File

@ -2,11 +2,11 @@
#include <Storages/IStorage.h>
#include <Interpreters/PredicateExpressionsOptimizer.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/queryToString.h>
#include <iostream>
#include <Parsers/ASTAsterisk.h>
#include <Parsers/ASTQualifiedAsterisk.h>
#include <Parsers/queryToString.h>
namespace DB
{
@ -14,18 +14,18 @@ namespace DB
static constexpr auto and_function_name = "and";
PredicateExpressionsOptimizer::PredicateExpressionsOptimizer(
ASTSelectQuery * ast_select_, const Settings & settings_)
: ast_select(ast_select_), settings(settings_)
ASTSelectQuery * ast_select_, const Settings & settings_, const Context & context_)
: ast_select(ast_select_), settings(settings_), context(context_)
{
}
bool PredicateExpressionsOptimizer::optimize()
{
if (!settings.enable_optimize_predicate_expression || !ast_select || !ast_select->tables)
if (!settings.enable_optimize_predicate_expression || !ast_select || !ast_select->tables || ast_select->tables->children.empty())
return false;
SubqueriesProjectionColumns all_subquery_projection_columns;
getAllSubqueryProjectionColumns(ast_select->tables.get(), all_subquery_projection_columns);
getAllSubqueryProjectionColumns(all_subquery_projection_columns);
bool is_rewrite_subqueries = false;
if (!all_subquery_projection_columns.empty())
@ -42,11 +42,16 @@ bool PredicateExpressionsOptimizer::optimizeImpl(
/// split predicate with `and`
PredicateExpressions outer_predicate_expressions = splitConjunctionPredicate(outer_expression);
std::vector<ASTTableExpression *> tables_expression = getSelectTablesExpression(ast_select);
std::vector<DatabaseAndTableWithAlias> database_and_table_with_aliases;
for (const auto & table_expression : tables_expression)
database_and_table_with_aliases.emplace_back(getTableNameWithAliasFromTableExpression(*table_expression, context));
bool is_rewrite_subquery = false;
for (const auto & outer_predicate : outer_predicate_expressions)
{
ASTs outer_predicate_dependent;
getExpressionDependentColumns(outer_predicate, outer_predicate_dependent);
IdentifiersWithQualifiedNameSet outer_predicate_dependencies;
getDependenciesAndQualifiedOfExpression(outer_predicate, outer_predicate_dependencies, database_and_table_with_aliases);
/// TODO: remove origin expression
for (const auto & subquery_projection_columns : subqueries_projection_columns)
@ -55,10 +60,10 @@ bool PredicateExpressionsOptimizer::optimizeImpl(
const ProjectionsWithAliases projection_columns = subquery_projection_columns.second;
OptimizeKind optimize_kind = OptimizeKind::NONE;
if (!cannotPushDownOuterPredicate(projection_columns, subquery, outer_predicate_dependent, is_prewhere, optimize_kind))
if (!cannotPushDownOuterPredicate(projection_columns, subquery, outer_predicate_dependencies, is_prewhere, optimize_kind))
{
ASTPtr inner_predicate;
cloneOuterPredicateForInnerPredicate(outer_predicate, projection_columns, outer_predicate_dependent, inner_predicate);
cloneOuterPredicateForInnerPredicate(outer_predicate, projection_columns, database_and_table_with_aliases, inner_predicate);
switch(optimize_kind)
{
@ -109,34 +114,57 @@ PredicateExpressions PredicateExpressionsOptimizer::splitConjunctionPredicate(AS
return predicate_expressions;
}
void PredicateExpressionsOptimizer::getExpressionDependentColumns(const ASTPtr & expression, ASTs & expression_dependent_columns)
void PredicateExpressionsOptimizer::getDependenciesAndQualifiedOfExpression(const ASTPtr & expression,
IdentifiersWithQualifiedNameSet & dependencies_and_qualified,
std::vector<DatabaseAndTableWithAlias> & tables_with_aliases)
{
if (!typeid_cast<ASTIdentifier *>(expression.get()))
if (const auto identifier = typeid_cast<ASTIdentifier *>(expression.get()))
{
if (!identifier->children.empty())
dependencies_and_qualified.emplace_back(std::pair(identifier, expression->getAliasOrColumnName()));
else
{
size_t best_table_pos = 0;
size_t max_num_qualifiers_to_strip = 0;
/// translate qualifiers for dependent columns
for (size_t table_pos = 0; table_pos < tables_with_aliases.size(); ++table_pos)
{
const auto & table = tables_with_aliases[table_pos];
auto num_qualifiers_to_strip = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, table);
if (num_qualifiers_to_strip > max_num_qualifiers_to_strip)
{
max_num_qualifiers_to_strip = num_qualifiers_to_strip;
best_table_pos = table_pos;
}
}
String qualified_name = tables_with_aliases[best_table_pos].getQualifiedNamePrefix() + expression->getAliasOrColumnName();
dependencies_and_qualified.emplace_back(std::pair(identifier, qualified_name));
}
}
else
{
for (const auto & child : expression->children)
getExpressionDependentColumns(child, expression_dependent_columns);
return;
getDependenciesAndQualifiedOfExpression(child, dependencies_and_qualified, tables_with_aliases);
}
expression_dependent_columns.emplace_back(expression);
}
bool PredicateExpressionsOptimizer::cannotPushDownOuterPredicate(
const ProjectionsWithAliases & subquery_projection_columns, ASTSelectQuery * subquery,
ASTs & expression_dependent_columns, bool & is_prewhere, OptimizeKind & optimize_kind)
IdentifiersWithQualifiedNameSet & outer_predicate_dependencies, bool & is_prewhere, OptimizeKind & optimize_kind)
{
if (subquery->final() || subquery->limit_by_expression_list || subquery->limit_offset || subquery->with_expression_list)
return true;
for (auto & dependent_column : expression_dependent_columns)
for (auto & predicate_dependency : outer_predicate_dependencies)
{
bool is_found = false;
String dependent_column_name = dependent_column->getAliasOrColumnName();
for (auto projection_column : subquery_projection_columns)
{
if (projection_column.second == dependent_column_name)
if (projection_column.second == predicate_dependency.second)
{
is_found = true;
optimize_kind = isAggregateFunction(projection_column.first) ? OptimizeKind::PUSH_TO_HAVING : optimize_kind;
@ -168,39 +196,21 @@ bool PredicateExpressionsOptimizer::isAggregateFunction(ASTPtr & node)
return false;
}
void PredicateExpressionsOptimizer::getAllSubqueryProjectionColumns(IAST * node, SubqueriesProjectionColumns & all_subquery_projection_columns)
{
if (auto ast_subquery = typeid_cast<ASTSubquery *>(node))
{
ASTs output_projection;
IAST * subquery = ast_subquery->children.at(0).get();
getSubqueryProjectionColumns(subquery, all_subquery_projection_columns, output_projection);
return;
}
for (auto & child : node->children)
getAllSubqueryProjectionColumns(child.get(), all_subquery_projection_columns);
}
void PredicateExpressionsOptimizer::cloneOuterPredicateForInnerPredicate(
const ASTPtr & outer_predicate, const ProjectionsWithAliases & projection_columns, ASTs & predicate_dependent_columns,
ASTPtr & inner_predicate)
const ASTPtr & outer_predicate, const ProjectionsWithAliases & projection_columns,
std::vector<DatabaseAndTableWithAlias> & tables, ASTPtr & inner_predicate)
{
inner_predicate = outer_predicate->clone();
ASTs new_expression_require_columns;
new_expression_require_columns.reserve(predicate_dependent_columns.size());
getExpressionDependentColumns(inner_predicate, new_expression_require_columns);
IdentifiersWithQualifiedNameSet new_expression_requires;
getDependenciesAndQualifiedOfExpression(inner_predicate, new_expression_requires, tables);
for (auto & expression : new_expression_require_columns)
for (auto & require : new_expression_requires)
{
if (auto identifier = typeid_cast<ASTIdentifier *>(expression.get()))
for (auto projection : projection_columns)
{
for (auto projection : projection_columns)
{
if (identifier->name == projection.second)
identifier->name = projection.first->getAliasOrColumnName();
}
if (require.second == projection.second)
require.first->name = projection.first->getAliasOrColumnName();
}
}
}
@ -221,32 +231,155 @@ bool PredicateExpressionsOptimizer::optimizeExpression(const ASTPtr & outer_expr
return true;
}
void PredicateExpressionsOptimizer::getSubqueryProjectionColumns(IAST * subquery, SubqueriesProjectionColumns & all_subquery_projection_columns, ASTs & output_projections)
void PredicateExpressionsOptimizer::getAllSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns)
{
if (auto * with_union_subquery = typeid_cast<ASTSelectWithUnionQuery *>(subquery))
for (auto & select : with_union_subquery->list_of_selects->children)
getSubqueryProjectionColumns(select.get(), all_subquery_projection_columns, output_projections);
const auto tables_expression = getSelectTablesExpression(ast_select);
if (auto * without_union_subquery = typeid_cast<ASTSelectQuery *>(subquery))
for (const auto & table_expression : tables_expression)
{
const auto expression_list = without_union_subquery->select_expression_list->children;
/// use first projection as the output projection
if (output_projections.empty())
output_projections = expression_list;
if (output_projections.size() != expression_list.size())
throw Exception("Number of columns doesn't match", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
ProjectionsWithAliases subquery_projections;
subquery_projections.reserve(expression_list.size());
for (size_t idx = 0; idx < expression_list.size(); idx++)
subquery_projections.emplace_back(std::pair(expression_list.at(idx), output_projections.at(idx)->getAliasOrColumnName()));
all_subquery_projection_columns.insert(std::pair(subquery, subquery_projections));
if (table_expression->subquery)
{
/// Use qualifiers to translate the columns of subqueries
const auto database_and_table_with_alias = getTableNameWithAliasFromTableExpression(*table_expression, context);
String qualified_name_prefix = database_and_table_with_alias.getQualifiedNamePrefix();
getSubqueryProjectionColumns(all_subquery_projection_columns, qualified_name_prefix,
static_cast<const ASTSubquery *>(table_expression->subquery.get())->children[0]);
}
}
}
void PredicateExpressionsOptimizer::getSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns,
String & qualified_name_prefix, const ASTPtr & subquery)
{
ASTs select_with_union_projections;
auto select_with_union_query = static_cast<ASTSelectWithUnionQuery *>(subquery.get());
for (auto & select_without_union_query : select_with_union_query->list_of_selects->children)
{
ProjectionsWithAliases subquery_projections;
auto select_projection_columns = getSelectQueryProjectionColumns(select_without_union_query);
if (select_with_union_projections.empty())
select_with_union_projections = select_projection_columns;
for (size_t i = 0; i < select_projection_columns.size(); i++)
subquery_projections.emplace_back(std::pair(select_projection_columns[i],
qualified_name_prefix + select_with_union_projections[i]->getAliasOrColumnName()));
all_subquery_projection_columns.insert(std::pair(select_without_union_query.get(), subquery_projections));
}
}
ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast)
{
ASTs projection_columns;
auto select_query = static_cast<ASTSelectQuery *>(ast.get());
for (const auto & projection_column : select_query->select_expression_list->children)
{
if (typeid_cast<ASTAsterisk *>(projection_column.get()) || typeid_cast<ASTQualifiedAsterisk *>(projection_column.get()))
{
ASTs evaluated_columns = evaluateAsterisk(select_query, projection_column);
for (const auto & column : evaluated_columns)
projection_columns.emplace_back(column);
continue;
}
projection_columns.emplace_back(projection_column);
}
return projection_columns;
}
ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery *select_query, const ASTPtr &asterisk)
{
if (!select_query->tables || select_query->tables->children.empty())
throw Exception("Logical error: The asterisk cannot be replaced, because there is no table.", ErrorCodes::LOGICAL_ERROR);
std::vector<ASTTableExpression *> tables_expression = getSelectTablesExpression(select_query);
if (const auto qualified_asterisk = typeid_cast<ASTQualifiedAsterisk *>(asterisk.get()))
{
if (qualified_asterisk->children.size() != 1)
throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR);
ASTIdentifier * ident = typeid_cast<ASTIdentifier *>(qualified_asterisk->children[0].get());
if (!ident)
throw Exception("Logical error: qualified asterisk must have identifier as its child", ErrorCodes::LOGICAL_ERROR);
size_t num_components = ident->children.size();
if (num_components > 2)
throw Exception("Qualified asterisk cannot have more than two qualifiers", ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
for (auto it = tables_expression.begin(); it != tables_expression.end(); ++it)
{
const ASTTableExpression * table_expression = *it;
const auto database_and_table_with_alias = getTableNameWithAliasFromTableExpression(*table_expression, context);
/// database.table.*
if (num_components == 2 && !database_and_table_with_alias.database.empty()
&& static_cast<const ASTIdentifier &>(*ident->children[0]).name == database_and_table_with_alias.database
&& static_cast<const ASTIdentifier &>(*ident->children[1]).name == database_and_table_with_alias.table)
continue;
/// table.* or alias.*
else if (num_components == 0
&& ((!database_and_table_with_alias.table.empty() && ident->name == database_and_table_with_alias.table)
|| (!database_and_table_with_alias.alias.empty() && ident->name == database_and_table_with_alias.alias)))
continue;
else
/// It's not a required table
tables_expression.erase(it);
}
}
ASTs projection_columns;
for (auto & table_expression : tables_expression)
{
if (table_expression->subquery)
{
const auto subquery = static_cast<const ASTSubquery *>(table_expression->subquery.get());
const auto select_with_union_query = static_cast<ASTSelectWithUnionQuery *>(subquery->children[0].get());
const auto subquery_projections = getSelectQueryProjectionColumns(select_with_union_query->list_of_selects->children[0]);
projection_columns.insert(projection_columns.end(), subquery_projections.begin(), subquery_projections.end());
}
else
{
StoragePtr storage;
if (table_expression->table_function)
storage = const_cast<Context &>(context).executeTableFunction(table_expression->table_function);
else if (table_expression->database_and_table_name)
{
const auto database_and_table_ast = static_cast<ASTIdentifier*>(table_expression->database_and_table_name.get());
const auto database_and_table_name = getDatabaseAndTableNameFromIdentifier(*database_and_table_ast);
storage = context.tryGetTable(database_and_table_name.first, database_and_table_name.second);
}
const auto block = storage->getSampleBlock();
for (size_t idx = 0; idx < block.columns(); idx++)
projection_columns.emplace_back(std::make_shared<ASTIdentifier>(block.getByPosition(idx).name));
}
}
return projection_columns;
}
std::vector<ASTTableExpression *> PredicateExpressionsOptimizer::getSelectTablesExpression(ASTSelectQuery * select_query)
{
if (!select_query->tables)
return {};
std::vector<ASTTableExpression *> tables_expression;
const ASTTablesInSelectQuery & tables_in_select_query = static_cast<const ASTTablesInSelectQuery &>(*select_query->tables);
for (const auto & child : tables_in_select_query.children)
{
ASTTablesInSelectQueryElement * tables_element = static_cast<ASTTablesInSelectQueryElement *>(child.get());
if (tables_element->table_expression)
tables_expression.emplace_back(static_cast<ASTTableExpression *>(tables_element->table_expression.get()));
}
return tables_expression;
}
}

View File

@ -7,6 +7,9 @@
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Interpreters/evaluateQualified.h>
namespace DB
{
@ -21,6 +24,8 @@ using PredicateExpressions = std::vector<ASTPtr>;
using ProjectionWithAlias = std::pair<ASTPtr, String>;
using ProjectionsWithAliases = std::vector<ProjectionWithAlias>;
using SubqueriesProjectionColumns = std::map<IAST *, ProjectionsWithAliases>;
using IdentifierWithQualifiedName = std::pair<ASTIdentifier *, String>;
using IdentifiersWithQualifiedNameSet = std::vector<IdentifierWithQualifiedName>;
/** This class provides functions for Push-Down predicate expressions
@ -37,13 +42,14 @@ using SubqueriesProjectionColumns = std::map<IAST *, ProjectionsWithAliases>;
class PredicateExpressionsOptimizer
{
public:
PredicateExpressionsOptimizer(ASTSelectQuery * ast_select_, const Settings & settings_);
PredicateExpressionsOptimizer(ASTSelectQuery * ast_select_, const Settings & settings_, const Context & context_);
bool optimize();
private:
ASTSelectQuery * ast_select;
const Settings & settings;
const Context & context;
enum OptimizeKind
{
@ -57,24 +63,29 @@ private:
PredicateExpressions splitConjunctionPredicate(ASTPtr & predicate_expression);
void getExpressionDependentColumns(const ASTPtr & expression, ASTs & expression_dependent_columns);
void getDependenciesAndQualifiedOfExpression(const ASTPtr & expression, IdentifiersWithQualifiedNameSet & dependencies_and_qualified,
std::vector<DatabaseAndTableWithAlias> & tables_with_aliases);
bool optimizeExpression(const ASTPtr & outer_expression, ASTPtr & subquery_expression, ASTSelectQuery * subquery);
bool optimizeImpl(ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, bool is_prewhere);
bool cannotPushDownOuterPredicate(
const ProjectionsWithAliases & subquery_projection_columns, ASTSelectQuery * subquery,
ASTs & expression_dependent_columns, bool & is_prewhere, OptimizeKind & optimize_kind);
bool cannotPushDownOuterPredicate(const ProjectionsWithAliases & subquery_projection_columns, ASTSelectQuery * subquery,
IdentifiersWithQualifiedNameSet & outer_predicate_dependencies, bool & is_prewhere, OptimizeKind & optimize_kind);
void cloneOuterPredicateForInnerPredicate(
const ASTPtr & outer_predicate, const ProjectionsWithAliases & projection_columns, ASTs & predicate_dependent_columns,
ASTPtr & inner_predicate);
void cloneOuterPredicateForInnerPredicate(const ASTPtr & outer_predicate, const ProjectionsWithAliases & projection_columns,
std::vector<DatabaseAndTableWithAlias> & tables, ASTPtr & inner_predicate);
void getAllSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns);
void getAllSubqueryProjectionColumns(IAST * node, SubqueriesProjectionColumns & all_subquery_projection_columns);
void getSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns,
String & qualified_name_prefix, const ASTPtr & subquery);
void getSubqueryProjectionColumns(IAST * subquery, SubqueriesProjectionColumns & all_subquery_projection_columns, ASTs & output_projections);
ASTs getSelectQueryProjectionColumns(ASTPtr & ast);
std::vector<ASTTableExpression *> getSelectTablesExpression(ASTSelectQuery * select_query);
ASTs evaluateAsterisk(ASTSelectQuery * select_query, const ASTPtr & asterisk);
};
}

View File

@ -272,7 +272,7 @@ struct Settings
M(SettingBool, log_query_settings, true, "Log query settings into the query_log.") \
M(SettingBool, log_query_threads, true, "Log query threads into system.query_thread_log table.") \
M(SettingString, send_logs_level, "none", "Send server text logs with specified minumum level to client. Valid values: 'trace', 'debug', 'info', 'warning', 'error', 'none'") \
M(SettingBool, enable_optimize_predicate_expression, 0, "If it is set to true, optimize predicates to subqueries.") \
M(SettingBool, enable_optimize_predicate_expression, 1, "If it is set to true, optimize predicates to subqueries.") \
\
M(SettingUInt64, low_cardinality_max_dictionary_size, 8192, "Maximum size (in rows) of shared global dictionary for LowCardinality type.") \
M(SettingBool, low_cardinality_use_single_dictionary_for_part, false, "LowCardinality type serialization setting. If is true, than will use additional keys when global dictionary overflows. Otherwise, will create several shared dictionaries.") \

View File

@ -0,0 +1,160 @@
#include <Interpreters/evaluateQualified.h>
#include <Interpreters/Context.h>
#include <Common/typeid_cast.h>
namespace DB
{
/// Checks that ast is ASTIdentifier and remove num_qualifiers_to_strip components from left.
/// Example: 'database.table.name' -> (num_qualifiers_to_strip = 2) -> 'name'.
void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip)
{
ASTIdentifier * identifier = typeid_cast<ASTIdentifier *>(ast.get());
if (!identifier)
throw DB::Exception("ASTIdentifier expected for stripIdentifier", DB::ErrorCodes::LOGICAL_ERROR);
if (num_qualifiers_to_strip)
{
size_t num_components = identifier->children.size();
/// plain column
if (num_components - num_qualifiers_to_strip == 1)
{
DB::String node_alias = identifier->tryGetAlias();
ast = identifier->children.back();
if (!node_alias.empty())
ast->setAlias(node_alias);
}
else
/// nested column
{
identifier->children.erase(identifier->children.begin(), identifier->children.begin() + num_qualifiers_to_strip);
DB::String new_name;
for (const auto & child : identifier->children)
{
if (!new_name.empty())
new_name += '.';
new_name += static_cast<const ASTIdentifier &>(*child.get()).name;
}
identifier->name = new_name;
}
}
}
DatabaseAndTableWithAlias getTableNameWithAliasFromTableExpression(const ASTTableExpression & table_expression,
const Context & context)
{
DatabaseAndTableWithAlias database_and_table_with_alias;
if (table_expression.database_and_table_name)
{
const auto & identifier = static_cast<const ASTIdentifier &>(*table_expression.database_and_table_name);
database_and_table_with_alias.alias = identifier.tryGetAlias();
if (table_expression.database_and_table_name->children.empty())
{
database_and_table_with_alias.database = context.getCurrentDatabase();
database_and_table_with_alias.table = identifier.name;
}
else
{
if (table_expression.database_and_table_name->children.size() != 2)
throw Exception("Logical error: number of components in table expression not equal to two", ErrorCodes::LOGICAL_ERROR);
database_and_table_with_alias.database = static_cast<const ASTIdentifier &>(*identifier.children[0]).name;
database_and_table_with_alias.table = static_cast<const ASTIdentifier &>(*identifier.children[1]).name;
}
}
else if (table_expression.table_function)
{
database_and_table_with_alias.alias = table_expression.table_function->tryGetAlias();
}
else if (table_expression.subquery)
{
database_and_table_with_alias.alias = table_expression.subquery->tryGetAlias();
}
else
throw Exception("Logical error: no known elements in ASTTableExpression", ErrorCodes::LOGICAL_ERROR);
return database_and_table_with_alias;
}
/// Get the number of components of identifier which are correspond to 'alias.', 'table.' or 'databas.table.' from names.
size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier,
const DatabaseAndTableWithAlias & names)
{
size_t num_qualifiers_to_strip = 0;
auto get_identifier_name = [](const ASTPtr & ast) { return static_cast<const ASTIdentifier &>(*ast).name; };
/// It is compound identifier
if (!identifier.children.empty())
{
size_t num_components = identifier.children.size();
/// database.table.column
if (num_components >= 3
&& !names.database.empty()
&& get_identifier_name(identifier.children[0]) == names.database
&& get_identifier_name(identifier.children[1]) == names.table)
{
num_qualifiers_to_strip = 2;
}
/// table.column or alias.column. If num_components > 2, it is like table.nested.column.
if (num_components >= 2
&& ((!names.table.empty() && get_identifier_name(identifier.children[0]) == names.table)
|| (!names.alias.empty() && get_identifier_name(identifier.children[0]) == names.alias)))
{
num_qualifiers_to_strip = 1;
}
}
return num_qualifiers_to_strip;
}
std::pair<String, String> getDatabaseAndTableNameFromIdentifier(const ASTIdentifier & identifier)
{
std::pair<String, String> res;
res.second = identifier.name;
if (!identifier.children.empty())
{
if (identifier.children.size() != 2)
throw Exception("Qualified table name could have only two components", ErrorCodes::LOGICAL_ERROR);
res.first = typeid_cast<const ASTIdentifier &>(*identifier.children[0]).name;
res.second = typeid_cast<const ASTIdentifier &>(*identifier.children[1]).name;
}
return res;
}
String DatabaseAndTableWithAlias::getQualifiedNamePrefix() const
{
return (!alias.empty() ? alias : (database + '.' + table)) + '.';
}
void DatabaseAndTableWithAlias::makeQualifiedName(const ASTPtr & ast) const
{
if (auto identifier = typeid_cast<ASTIdentifier *>(ast.get()))
{
String prefix = getQualifiedNamePrefix();
identifier->name.insert(identifier->name.begin(), prefix.begin(), prefix.end());
Names qualifiers;
if (!alias.empty())
qualifiers.push_back(alias);
else
{
qualifiers.push_back(database);
qualifiers.push_back(table);
}
for (const auto & qualifier : qualifiers)
identifier->children.emplace_back(std::make_shared<ASTIdentifier>(qualifier));
}
}
}

View File

@ -0,0 +1,34 @@
#pragma once
#include <Parsers/IAST.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Interpreters/Context.h>
namespace DB
{
struct DatabaseAndTableWithAlias
{
String database;
String table;
String alias;
/// "alias." or "database.table." if alias is empty
String getQualifiedNamePrefix() const;
/// If ast is ASTIdentifier, prepend getQualifiedNamePrefix() to it's name.
void makeQualifiedName(const ASTPtr & ast) const;
};
void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip);
DatabaseAndTableWithAlias getTableNameWithAliasFromTableExpression(const ASTTableExpression & table_expression,
const Context & context);
size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier,
const DatabaseAndTableWithAlias & names);
std::pair<String, String> getDatabaseAndTableNameFromIdentifier(const ASTIdentifier & identifier);
}

View File

@ -1,15 +1,23 @@
-------Query that previously worked but now doesn\'t work.-------
-------Not need optimize predicate, but it works.-------
1
1
1
2000-01-01 1 test string 1 1
-------Need push down-------
1
1
2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1
2000-01-01 1
2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1
1 test string 1 1 test string 1
1 test string 1 1 test string 1
test string 1 1 1
test string 1 1 1
2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1
1 2000-01-01 1
2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1
1 2000-01-01 2000-01-01 1 test string 1 1
-------Push to having expression, need check.-------

View File

@ -1,63 +1,56 @@
SET send_logs_level = 'none';
DROP TABLE IF EXISTS test.test;
DROP TABLE IF EXISTS test.test_union_1;
DROP TABLE IF EXISTS test.test_union_2;
DROP TABLE IF EXISTS test.test_join_1;
DROP TABLE IF EXISTS test.test_join_2;
CREATE TABLE test.test(date Date, id Int8, name String, value Int64) ENGINE = MergeTree(date, (id, date), 8192);
CREATE TABLE test.test_union_1(date_1 Date, id_1 Int8, name_1 String, value_1 Int64) ENGINE = MergeTree(date_1, (id_1, date_1), 8192);
CREATE TABLE test.test_union_2(date_2 Date, id_2 Int8, name_2 String, value_2 Int64) ENGINE = MergeTree(date_2, (id_2, date_2), 8192);
CREATE TABLE test.test_join_1(date_1 Date, id_1 Int8, name_1 String, value_1 Int64) ENGINE = MergeTree(date_1, (id_1, date_1), 8192);
CREATE TABLE test.test_join_2(date_2 Date, id_2 Int8, name_2 String, value_2 Int64) ENGINE = MergeTree(date_2, (id_2, date_2), 8192);
INSERT INTO test.test VALUES('2000-01-01', 1, 'test string 1', 1);
INSERT INTO test.test VALUES('2000-01-01', 2, 'test string 2', 2);
INSERT INTO test.test_union_1 VALUES('2000-01-01', 1, 'test string 1', 1);
INSERT INTO test.test_union_1 VALUES('2000-01-01', 2, 'test string 2', 2);
INSERT INTO test.test_union_2 VALUES('2000-01-01', 1, 'test string 1', 1);
INSERT INTO test.test_union_2 VALUES('2000-01-01', 2, 'test string 2', 2);
INSERT INTO test.test_join_1 VALUES('2000-01-01', 1, 'test string 1', 1);
INSERT INTO test.test_join_1 VALUES('2000-01-01', 2, 'test string 2', 2);
INSERT INTO test.test_join_2 VALUES('2000-01-01', 1, 'test string 1', 1);
INSERT INTO test.test_join_2 VALUES('2000-01-01', 2, 'test string 2', 2);
SET enable_optimize_predicate_expression = 1;
-- Query that previously worked but now doesn't work.
SELECT '-------Query that previously worked but now doesn\'t work.-------';
SELECT * FROM (SELECT 1) WHERE `1` = 1; -- { serverError 47 }
SELECT 1; -- Not need push down, but it works.
SELECT '-------Not need optimize predicate, but it works.-------';
SELECT 1;
SELECT 1 AS id WHERE id = 1;
SELECT arrayJoin([1,2,3]) AS id WHERE id = 1;
SELECT * FROM (SELECT * FROM test.test) WHERE id = 1;
-- Need push down
SELECT '-------Need push down-------';
SELECT * FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1;
SELECT id FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1;
SELECT date, id, name, value FROM (SELECT date, name, value,min(id) AS id FROM test.test GROUP BY date, name, value) WHERE id = 1;
SET force_primary_key = 1;
SELECT date, id, name, value FROM (SELECT date, id, name, value FROM test.test) WHERE id = 1;
SELECT date, id FROM (SELECT id, date, min(value) FROM test.test GROUP BY id, date) WHERE id = 1;
SELECT date_1, id_1, name_1, value_1 FROM (SELECT date_1, id_1, name_1, value_1 FROM test.test_union_1 UNION ALL SELECT date_2, id_2, name_2, value_2 FROM test.test_union_2) WHERE id_1 = 1;
SELECT * FROM (SELECT id_1, name_1 AS name FROM test.test_join_1) ANY LEFT JOIN (SELECT id_2, name_2 AS name FROM test.test_join_2) USING name WHERE id_1 = 1 AND id_2 = 1;
SELECT * FROM (SELECT id_1, name_1 AS name FROM test.test_join_1) ANY LEFT JOIN (SELECT id_2, name_2 AS name FROM test.test_union_2 UNION ALL SELECT id_1, name_1 AS name FROM test.test_union_1) USING name WHERE id_1 = 1 AND id_2 = 1;
SELECT * FROM (SELECT name_1,id_1 AS id_1, id_1 AS id_2 FROM test.test_union_1 UNION ALL (SELECT name,id_1,id_2 FROM (SELECT name_1 AS name, id_1 FROM test.test_join_1) ANY INNER JOIN (SELECT name_2 AS name, id_2 FROM test.test_join_2) USING (name))) WHERE id_1 = 1 AND id_2 = 1;
-- Optimize predicate expression with asterisk
SELECT * FROM (SELECT * FROM test.test) WHERE id = 1;
-- Optimize predicate expression with asterisk and nested subquery
SELECT * FROM (SELECT * FROM (SELECT * FROM test.test)) WHERE id = 1;
-- Optimize predicate expression with qualified asterisk
SELECT * FROM (SELECT b.* FROM (SELECT * FROM test.test) AS b) WHERE id = 1;
-- Optimize predicate expression without asterisk
SELECT * FROM (SELECT date, id, name, value FROM test.test) WHERE id = 1;
-- Optimize predicate expression without asterisk and contains nested subquery
SELECT * FROM (SELECT date, id, name, value FROM (SELECT date, id, name, value FROM test.test)) WHERE id = 1;
-- Optimize predicate expression with qualified
SELECT * FROM (SELECT * FROM test.test) AS b WHERE b.id = 1;
-- Optimize predicate expression with qualified and nested subquery
SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) AS a) AS b WHERE b.id = 1;
-- Optimize predicate expression with aggregate function
SELECT * FROM (SELECT id, date, min(value) AS value FROM test.test GROUP BY id, date) WHERE id = 1;
-- TODO This should work:
SELECT * FROM (SELECT * FROM test.test) WHERE id = 1; -- { serverError 277 }
-- Optimize predicate expression with union all query
SELECT * FROM (SELECT * FROM test.test UNION ALL SELECT * FROM test.test) WHERE id = 1;
-- Optimize predicate expression with join query
SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) USING id WHERE id = 1;
-- Optimize predicate expression with join and nested subquery
SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) USING id) WHERE id = 1;
-- Optimize predicate expression with join query and qualified
SELECT * FROM (SELECT 1 AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date WHERE b.id = 1;
SELECT '-------Push to having expression, need check.-------';
SELECT id FROM (SELECT min(id) AS id FROM test.test) WHERE id = 1; -- { serverError 277 }
DROP TABLE IF EXISTS test.test;
DROP TABLE IF EXISTS test.test_union_1;
DROP TABLE IF EXISTS test.test_union_2;
DROP TABLE IF EXISTS test.test_join_1;
DROP TABLE IF EXISTS test.test_join_2;