diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 889524a3fa5..aeb44ee4afb 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -66,6 +66,7 @@ #include #include #include +#include namespace DB @@ -164,35 +165,6 @@ void removeDuplicateColumns(NamesAndTypesList & columns) } - -String DatabaseAndTableWithAlias::getQualifiedNamePrefix() const -{ - return (!alias.empty() ? alias : (database + '.' + table)) + '.'; -} - - -void DatabaseAndTableWithAlias::makeQualifiedName(const ASTPtr & ast) const -{ - if (auto identifier = typeid_cast(ast.get())) - { - String prefix = getQualifiedNamePrefix(); - identifier->name.insert(identifier->name.begin(), prefix.begin(), prefix.end()); - - Names qualifiers; - if (!alias.empty()) - qualifiers.push_back(alias); - else - { - qualifiers.push_back(database); - qualifiers.push_back(table); - } - - for (const auto & qualifier : qualifiers) - identifier->children.emplace_back(std::make_shared(qualifier)); - } -} - - ExpressionAnalyzer::ExpressionAnalyzer( const ASTPtr & ast_, const Context & context_, @@ -274,7 +246,7 @@ ExpressionAnalyzer::ExpressionAnalyzer( getArrayJoinedColumns(); /// Push the predicate expression down to the subqueries. - rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings).optimize(); + rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize(); /// Delete the unnecessary from `source_columns` list. Create `unknown_required_source_columns`. Form `columns_added_by_join`. collectUsedColumns(); @@ -293,46 +265,6 @@ ExpressionAnalyzer::ExpressionAnalyzer( analyzeAggregation(); } -static DatabaseAndTableWithAlias getTableNameWithAliasFromTableExpression(const ASTTableExpression & table_expression, - const Context & context) -{ - DatabaseAndTableWithAlias database_and_table_with_alias; - - if (table_expression.database_and_table_name) - { - const auto & identifier = static_cast(*table_expression.database_and_table_name); - - database_and_table_with_alias.alias = identifier.tryGetAlias(); - - if (table_expression.database_and_table_name->children.empty()) - { - database_and_table_with_alias.database = context.getCurrentDatabase(); - database_and_table_with_alias.table = identifier.name; - } - else - { - if (table_expression.database_and_table_name->children.size() != 2) - throw Exception("Logical error: number of components in table expression not equal to two", ErrorCodes::LOGICAL_ERROR); - - database_and_table_with_alias.database = static_cast(*identifier.children[0]).name; - database_and_table_with_alias.table = static_cast(*identifier.children[1]).name; - } - } - else if (table_expression.table_function) - { - database_and_table_with_alias.alias = table_expression.table_function->tryGetAlias(); - } - else if (table_expression.subquery) - { - database_and_table_with_alias.alias = table_expression.subquery->tryGetAlias(); - } - else - throw Exception("Logical error: no known elements in ASTTableExpression", ErrorCodes::LOGICAL_ERROR); - - return database_and_table_with_alias; -} - - void ExpressionAnalyzer::translateQualifiedNames() { if (!select_query || !select_query->tables || select_query->tables->children.empty()) @@ -357,80 +289,6 @@ void ExpressionAnalyzer::translateQualifiedNames() translateQualifiedNamesImpl(ast, tables); } - -/// Get the number of components of identifier which are correspond to 'alias.', 'table.' or 'databas.table.' from names. -static size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier, - const DatabaseAndTableWithAlias & names) -{ - size_t num_qualifiers_to_strip = 0; - - auto get_identifier_name = [](const ASTPtr & ast) { return static_cast(*ast).name; }; - - /// It is compound identifier - if (!identifier.children.empty()) - { - size_t num_components = identifier.children.size(); - - /// database.table.column - if (num_components >= 3 - && !names.database.empty() - && get_identifier_name(identifier.children[0]) == names.database - && get_identifier_name(identifier.children[1]) == names.table) - { - num_qualifiers_to_strip = 2; - } - - /// table.column or alias.column. If num_components > 2, it is like table.nested.column. - if (num_components >= 2 - && ((!names.table.empty() && get_identifier_name(identifier.children[0]) == names.table) - || (!names.alias.empty() && get_identifier_name(identifier.children[0]) == names.alias))) - { - num_qualifiers_to_strip = 1; - } - } - - return num_qualifiers_to_strip; -} - - -/// Checks that ast is ASTIdentifier and remove num_qualifiers_to_strip components from left. -/// Example: 'database.table.name' -> (num_qualifiers_to_strip = 2) -> 'name'. -static void stripIdentifier(ASTPtr & ast, size_t num_qualifiers_to_strip) -{ - ASTIdentifier * identifier = typeid_cast(ast.get()); - - if (!identifier) - throw Exception("ASTIdentifier expected for stripIdentifier", ErrorCodes::LOGICAL_ERROR); - - if (num_qualifiers_to_strip) - { - size_t num_components = identifier->children.size(); - - /// plain column - if (num_components - num_qualifiers_to_strip == 1) - { - String node_alias = identifier->tryGetAlias(); - ast = identifier->children.back(); - if (!node_alias.empty()) - ast->setAlias(node_alias); - } - else - /// nested column - { - identifier->children.erase(identifier->children.begin(), identifier->children.begin() + num_qualifiers_to_strip); - String new_name; - for (const auto & child : identifier->children) - { - if (!new_name.empty()) - new_name += '.'; - new_name += static_cast(*child.get()).name; - } - identifier->name = new_name; - } - } -} - - void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const std::vector & tables) { if (auto * identifier = typeid_cast(ast.get())) @@ -509,7 +367,6 @@ void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const std::ve } } - void ExpressionAnalyzer::optimizeIfWithConstantCondition() { optimizeIfWithConstantConditionImpl(ast, aliases); @@ -765,23 +622,6 @@ void ExpressionAnalyzer::findExternalTables(ASTPtr & ast) external_tables[node->name] = external_storage; } - -static std::pair getDatabaseAndTableNameFromIdentifier(const ASTIdentifier & identifier) -{ - std::pair res; - res.second = identifier.name; - if (!identifier.children.empty()) - { - if (identifier.children.size() != 2) - throw Exception("Qualified table name could have only two components", ErrorCodes::LOGICAL_ERROR); - - res.first = typeid_cast(*identifier.children[0]).name; - res.second = typeid_cast(*identifier.children[1]).name; - } - return res; -} - - static std::shared_ptr interpretSubquery( const ASTPtr & subquery_or_table_name, const Context & context, size_t subquery_depth, const Names & required_source_columns) { diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 28d4ca7c10f..ebb25234424 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -91,19 +92,6 @@ struct ScopeStack const Block & getSampleBlock() const; }; -struct DatabaseAndTableWithAlias -{ - String database; - String table; - String alias; - - /// "alias." or "database.table." if alias is empty - String getQualifiedNamePrefix() const; - - /// If ast is ASTIdentifier, prepend getQualifiedNamePrefix() to it's name. - void makeQualifiedName(const ASTPtr & ast) const; -}; - /** Transforms an expression from a syntax tree into a sequence of actions to execute it. * * NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer. diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 7f6470f7218..d3e5fa05cee 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -2,11 +2,11 @@ #include #include #include -#include #include -#include -#include #include +#include +#include +#include namespace DB { @@ -14,18 +14,18 @@ namespace DB static constexpr auto and_function_name = "and"; PredicateExpressionsOptimizer::PredicateExpressionsOptimizer( - ASTSelectQuery * ast_select_, const Settings & settings_) - : ast_select(ast_select_), settings(settings_) + ASTSelectQuery * ast_select_, const Settings & settings_, const Context & context_) + : ast_select(ast_select_), settings(settings_), context(context_) { } bool PredicateExpressionsOptimizer::optimize() { - if (!settings.enable_optimize_predicate_expression || !ast_select || !ast_select->tables) + if (!settings.enable_optimize_predicate_expression || !ast_select || !ast_select->tables || ast_select->tables->children.empty()) return false; SubqueriesProjectionColumns all_subquery_projection_columns; - getAllSubqueryProjectionColumns(ast_select->tables.get(), all_subquery_projection_columns); + getAllSubqueryProjectionColumns(all_subquery_projection_columns); bool is_rewrite_subqueries = false; if (!all_subquery_projection_columns.empty()) @@ -42,11 +42,16 @@ bool PredicateExpressionsOptimizer::optimizeImpl( /// split predicate with `and` PredicateExpressions outer_predicate_expressions = splitConjunctionPredicate(outer_expression); + std::vector tables_expression = getSelectTablesExpression(ast_select); + std::vector database_and_table_with_aliases; + for (const auto & table_expression : tables_expression) + database_and_table_with_aliases.emplace_back(getTableNameWithAliasFromTableExpression(*table_expression, context)); + bool is_rewrite_subquery = false; for (const auto & outer_predicate : outer_predicate_expressions) { - ASTs outer_predicate_dependent; - getExpressionDependentColumns(outer_predicate, outer_predicate_dependent); + IdentifiersWithQualifiedNameSet outer_predicate_dependencies; + getDependenciesAndQualifiedOfExpression(outer_predicate, outer_predicate_dependencies, database_and_table_with_aliases); /// TODO: remove origin expression for (const auto & subquery_projection_columns : subqueries_projection_columns) @@ -55,10 +60,10 @@ bool PredicateExpressionsOptimizer::optimizeImpl( const ProjectionsWithAliases projection_columns = subquery_projection_columns.second; OptimizeKind optimize_kind = OptimizeKind::NONE; - if (!cannotPushDownOuterPredicate(projection_columns, subquery, outer_predicate_dependent, is_prewhere, optimize_kind)) + if (!cannotPushDownOuterPredicate(projection_columns, subquery, outer_predicate_dependencies, is_prewhere, optimize_kind)) { ASTPtr inner_predicate; - cloneOuterPredicateForInnerPredicate(outer_predicate, projection_columns, outer_predicate_dependent, inner_predicate); + cloneOuterPredicateForInnerPredicate(outer_predicate, projection_columns, database_and_table_with_aliases, inner_predicate); switch(optimize_kind) { @@ -109,34 +114,57 @@ PredicateExpressions PredicateExpressionsOptimizer::splitConjunctionPredicate(AS return predicate_expressions; } -void PredicateExpressionsOptimizer::getExpressionDependentColumns(const ASTPtr & expression, ASTs & expression_dependent_columns) +void PredicateExpressionsOptimizer::getDependenciesAndQualifiedOfExpression(const ASTPtr & expression, + IdentifiersWithQualifiedNameSet & dependencies_and_qualified, + std::vector & tables_with_aliases) { - if (!typeid_cast(expression.get())) + if (const auto identifier = typeid_cast(expression.get())) + { + if (!identifier->children.empty()) + dependencies_and_qualified.emplace_back(std::pair(identifier, expression->getAliasOrColumnName())); + else + { + size_t best_table_pos = 0; + size_t max_num_qualifiers_to_strip = 0; + + /// translate qualifiers for dependent columns + for (size_t table_pos = 0; table_pos < tables_with_aliases.size(); ++table_pos) + { + const auto & table = tables_with_aliases[table_pos]; + auto num_qualifiers_to_strip = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, table); + + if (num_qualifiers_to_strip > max_num_qualifiers_to_strip) + { + max_num_qualifiers_to_strip = num_qualifiers_to_strip; + best_table_pos = table_pos; + } + } + + String qualified_name = tables_with_aliases[best_table_pos].getQualifiedNamePrefix() + expression->getAliasOrColumnName(); + dependencies_and_qualified.emplace_back(std::pair(identifier, qualified_name)); + } + } + else { for (const auto & child : expression->children) - getExpressionDependentColumns(child, expression_dependent_columns); - - return; + getDependenciesAndQualifiedOfExpression(child, dependencies_and_qualified, tables_with_aliases); } - - expression_dependent_columns.emplace_back(expression); } bool PredicateExpressionsOptimizer::cannotPushDownOuterPredicate( const ProjectionsWithAliases & subquery_projection_columns, ASTSelectQuery * subquery, - ASTs & expression_dependent_columns, bool & is_prewhere, OptimizeKind & optimize_kind) + IdentifiersWithQualifiedNameSet & outer_predicate_dependencies, bool & is_prewhere, OptimizeKind & optimize_kind) { if (subquery->final() || subquery->limit_by_expression_list || subquery->limit_offset || subquery->with_expression_list) return true; - for (auto & dependent_column : expression_dependent_columns) + for (auto & predicate_dependency : outer_predicate_dependencies) { bool is_found = false; - String dependent_column_name = dependent_column->getAliasOrColumnName(); for (auto projection_column : subquery_projection_columns) { - if (projection_column.second == dependent_column_name) + if (projection_column.second == predicate_dependency.second) { is_found = true; optimize_kind = isAggregateFunction(projection_column.first) ? OptimizeKind::PUSH_TO_HAVING : optimize_kind; @@ -168,39 +196,21 @@ bool PredicateExpressionsOptimizer::isAggregateFunction(ASTPtr & node) return false; } -void PredicateExpressionsOptimizer::getAllSubqueryProjectionColumns(IAST * node, SubqueriesProjectionColumns & all_subquery_projection_columns) -{ - if (auto ast_subquery = typeid_cast(node)) - { - ASTs output_projection; - IAST * subquery = ast_subquery->children.at(0).get(); - getSubqueryProjectionColumns(subquery, all_subquery_projection_columns, output_projection); - return; - } - - for (auto & child : node->children) - getAllSubqueryProjectionColumns(child.get(), all_subquery_projection_columns); -} - void PredicateExpressionsOptimizer::cloneOuterPredicateForInnerPredicate( - const ASTPtr & outer_predicate, const ProjectionsWithAliases & projection_columns, ASTs & predicate_dependent_columns, - ASTPtr & inner_predicate) + const ASTPtr & outer_predicate, const ProjectionsWithAliases & projection_columns, + std::vector & tables, ASTPtr & inner_predicate) { inner_predicate = outer_predicate->clone(); - ASTs new_expression_require_columns; - new_expression_require_columns.reserve(predicate_dependent_columns.size()); - getExpressionDependentColumns(inner_predicate, new_expression_require_columns); + IdentifiersWithQualifiedNameSet new_expression_requires; + getDependenciesAndQualifiedOfExpression(inner_predicate, new_expression_requires, tables); - for (auto & expression : new_expression_require_columns) + for (auto & require : new_expression_requires) { - if (auto identifier = typeid_cast(expression.get())) + for (auto projection : projection_columns) { - for (auto projection : projection_columns) - { - if (identifier->name == projection.second) - identifier->name = projection.first->getAliasOrColumnName(); - } + if (require.second == projection.second) + require.first->name = projection.first->getAliasOrColumnName(); } } } @@ -221,32 +231,155 @@ bool PredicateExpressionsOptimizer::optimizeExpression(const ASTPtr & outer_expr return true; } -void PredicateExpressionsOptimizer::getSubqueryProjectionColumns(IAST * subquery, SubqueriesProjectionColumns & all_subquery_projection_columns, ASTs & output_projections) +void PredicateExpressionsOptimizer::getAllSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns) { - if (auto * with_union_subquery = typeid_cast(subquery)) - for (auto & select : with_union_subquery->list_of_selects->children) - getSubqueryProjectionColumns(select.get(), all_subquery_projection_columns, output_projections); + const auto tables_expression = getSelectTablesExpression(ast_select); - - if (auto * without_union_subquery = typeid_cast(subquery)) + for (const auto & table_expression : tables_expression) { - const auto expression_list = without_union_subquery->select_expression_list->children; - - /// use first projection as the output projection - if (output_projections.empty()) - output_projections = expression_list; - - if (output_projections.size() != expression_list.size()) - throw Exception("Number of columns doesn't match", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); - - ProjectionsWithAliases subquery_projections; - subquery_projections.reserve(expression_list.size()); - - for (size_t idx = 0; idx < expression_list.size(); idx++) - subquery_projections.emplace_back(std::pair(expression_list.at(idx), output_projections.at(idx)->getAliasOrColumnName())); - - all_subquery_projection_columns.insert(std::pair(subquery, subquery_projections)); + if (table_expression->subquery) + { + /// Use qualifiers to translate the columns of subqueries + const auto database_and_table_with_alias = getTableNameWithAliasFromTableExpression(*table_expression, context); + String qualified_name_prefix = database_and_table_with_alias.getQualifiedNamePrefix(); + getSubqueryProjectionColumns(all_subquery_projection_columns, qualified_name_prefix, + static_cast(table_expression->subquery.get())->children[0]); + } } } +void PredicateExpressionsOptimizer::getSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns, + String & qualified_name_prefix, const ASTPtr & subquery) +{ + ASTs select_with_union_projections; + auto select_with_union_query = static_cast(subquery.get()); + + for (auto & select_without_union_query : select_with_union_query->list_of_selects->children) + { + ProjectionsWithAliases subquery_projections; + auto select_projection_columns = getSelectQueryProjectionColumns(select_without_union_query); + + if (select_with_union_projections.empty()) + select_with_union_projections = select_projection_columns; + + for (size_t i = 0; i < select_projection_columns.size(); i++) + subquery_projections.emplace_back(std::pair(select_projection_columns[i], + qualified_name_prefix + select_with_union_projections[i]->getAliasOrColumnName())); + + all_subquery_projection_columns.insert(std::pair(select_without_union_query.get(), subquery_projections)); + } +} + +ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast) +{ + ASTs projection_columns; + auto select_query = static_cast(ast.get()); + + for (const auto & projection_column : select_query->select_expression_list->children) + { + if (typeid_cast(projection_column.get()) || typeid_cast(projection_column.get())) + { + ASTs evaluated_columns = evaluateAsterisk(select_query, projection_column); + + for (const auto & column : evaluated_columns) + projection_columns.emplace_back(column); + + continue; + } + + projection_columns.emplace_back(projection_column); + } + return projection_columns; +} + +ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery *select_query, const ASTPtr &asterisk) +{ + if (!select_query->tables || select_query->tables->children.empty()) + throw Exception("Logical error: The asterisk cannot be replaced, because there is no table.", ErrorCodes::LOGICAL_ERROR); + + std::vector tables_expression = getSelectTablesExpression(select_query); + + if (const auto qualified_asterisk = typeid_cast(asterisk.get())) + { + if (qualified_asterisk->children.size() != 1) + throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR); + + ASTIdentifier * ident = typeid_cast(qualified_asterisk->children[0].get()); + if (!ident) + throw Exception("Logical error: qualified asterisk must have identifier as its child", ErrorCodes::LOGICAL_ERROR); + + size_t num_components = ident->children.size(); + if (num_components > 2) + throw Exception("Qualified asterisk cannot have more than two qualifiers", ErrorCodes::UNKNOWN_ELEMENT_IN_AST); + + for (auto it = tables_expression.begin(); it != tables_expression.end(); ++it) + { + const ASTTableExpression * table_expression = *it; + const auto database_and_table_with_alias = getTableNameWithAliasFromTableExpression(*table_expression, context); + /// database.table.* + if (num_components == 2 && !database_and_table_with_alias.database.empty() + && static_cast(*ident->children[0]).name == database_and_table_with_alias.database + && static_cast(*ident->children[1]).name == database_and_table_with_alias.table) + continue; + /// table.* or alias.* + else if (num_components == 0 + && ((!database_and_table_with_alias.table.empty() && ident->name == database_and_table_with_alias.table) + || (!database_and_table_with_alias.alias.empty() && ident->name == database_and_table_with_alias.alias))) + continue; + else + /// It's not a required table + tables_expression.erase(it); + } + } + + ASTs projection_columns; + for (auto & table_expression : tables_expression) + { + if (table_expression->subquery) + { + const auto subquery = static_cast(table_expression->subquery.get()); + const auto select_with_union_query = static_cast(subquery->children[0].get()); + const auto subquery_projections = getSelectQueryProjectionColumns(select_with_union_query->list_of_selects->children[0]); + projection_columns.insert(projection_columns.end(), subquery_projections.begin(), subquery_projections.end()); + } + else + { + StoragePtr storage; + + if (table_expression->table_function) + storage = const_cast(context).executeTableFunction(table_expression->table_function); + else if (table_expression->database_and_table_name) + { + const auto database_and_table_ast = static_cast(table_expression->database_and_table_name.get()); + const auto database_and_table_name = getDatabaseAndTableNameFromIdentifier(*database_and_table_ast); + storage = context.tryGetTable(database_and_table_name.first, database_and_table_name.second); + } + + const auto block = storage->getSampleBlock(); + for (size_t idx = 0; idx < block.columns(); idx++) + projection_columns.emplace_back(std::make_shared(block.getByPosition(idx).name)); + } + } + return projection_columns; +} + +std::vector PredicateExpressionsOptimizer::getSelectTablesExpression(ASTSelectQuery * select_query) +{ + if (!select_query->tables) + return {}; + + std::vector tables_expression; + const ASTTablesInSelectQuery & tables_in_select_query = static_cast(*select_query->tables); + + for (const auto & child : tables_in_select_query.children) + { + ASTTablesInSelectQueryElement * tables_element = static_cast(child.get()); + + if (tables_element->table_expression) + tables_expression.emplace_back(static_cast(tables_element->table_expression.get())); + } + + return tables_expression; +} + } diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h index 723fe0b118c..f3d00a6fce9 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h @@ -7,6 +7,9 @@ #include #include #include +#include +#include +#include namespace DB { @@ -21,6 +24,8 @@ using PredicateExpressions = std::vector; using ProjectionWithAlias = std::pair; using ProjectionsWithAliases = std::vector; using SubqueriesProjectionColumns = std::map; +using IdentifierWithQualifiedName = std::pair; +using IdentifiersWithQualifiedNameSet = std::vector; /** This class provides functions for Push-Down predicate expressions @@ -37,13 +42,14 @@ using SubqueriesProjectionColumns = std::map; class PredicateExpressionsOptimizer { public: - PredicateExpressionsOptimizer(ASTSelectQuery * ast_select_, const Settings & settings_); + PredicateExpressionsOptimizer(ASTSelectQuery * ast_select_, const Settings & settings_, const Context & context_); bool optimize(); private: ASTSelectQuery * ast_select; const Settings & settings; + const Context & context; enum OptimizeKind { @@ -57,24 +63,29 @@ private: PredicateExpressions splitConjunctionPredicate(ASTPtr & predicate_expression); - void getExpressionDependentColumns(const ASTPtr & expression, ASTs & expression_dependent_columns); + void getDependenciesAndQualifiedOfExpression(const ASTPtr & expression, IdentifiersWithQualifiedNameSet & dependencies_and_qualified, + std::vector & tables_with_aliases); bool optimizeExpression(const ASTPtr & outer_expression, ASTPtr & subquery_expression, ASTSelectQuery * subquery); bool optimizeImpl(ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, bool is_prewhere); - bool cannotPushDownOuterPredicate( - const ProjectionsWithAliases & subquery_projection_columns, ASTSelectQuery * subquery, - ASTs & expression_dependent_columns, bool & is_prewhere, OptimizeKind & optimize_kind); + bool cannotPushDownOuterPredicate(const ProjectionsWithAliases & subquery_projection_columns, ASTSelectQuery * subquery, + IdentifiersWithQualifiedNameSet & outer_predicate_dependencies, bool & is_prewhere, OptimizeKind & optimize_kind); - void cloneOuterPredicateForInnerPredicate( - const ASTPtr & outer_predicate, const ProjectionsWithAliases & projection_columns, ASTs & predicate_dependent_columns, - ASTPtr & inner_predicate); + void cloneOuterPredicateForInnerPredicate(const ASTPtr & outer_predicate, const ProjectionsWithAliases & projection_columns, + std::vector & tables, ASTPtr & inner_predicate); + void getAllSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns); - void getAllSubqueryProjectionColumns(IAST * node, SubqueriesProjectionColumns & all_subquery_projection_columns); + void getSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns, + String & qualified_name_prefix, const ASTPtr & subquery); - void getSubqueryProjectionColumns(IAST * subquery, SubqueriesProjectionColumns & all_subquery_projection_columns, ASTs & output_projections); + ASTs getSelectQueryProjectionColumns(ASTPtr & ast); + + std::vector getSelectTablesExpression(ASTSelectQuery * select_query); + + ASTs evaluateAsterisk(ASTSelectQuery * select_query, const ASTPtr & asterisk); }; } diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index e1e02af96e4..524906bddfe 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -272,7 +272,7 @@ struct Settings M(SettingBool, log_query_settings, true, "Log query settings into the query_log.") \ M(SettingBool, log_query_threads, true, "Log query threads into system.query_thread_log table.") \ M(SettingString, send_logs_level, "none", "Send server text logs with specified minumum level to client. Valid values: 'trace', 'debug', 'info', 'warning', 'error', 'none'") \ - M(SettingBool, enable_optimize_predicate_expression, 0, "If it is set to true, optimize predicates to subqueries.") \ + M(SettingBool, enable_optimize_predicate_expression, 1, "If it is set to true, optimize predicates to subqueries.") \ \ M(SettingUInt64, low_cardinality_max_dictionary_size, 8192, "Maximum size (in rows) of shared global dictionary for LowCardinality type.") \ M(SettingBool, low_cardinality_use_single_dictionary_for_part, false, "LowCardinality type serialization setting. If is true, than will use additional keys when global dictionary overflows. Otherwise, will create several shared dictionaries.") \ diff --git a/dbms/src/Interpreters/evaluateQualified.cpp b/dbms/src/Interpreters/evaluateQualified.cpp new file mode 100644 index 00000000000..262cbf6d661 --- /dev/null +++ b/dbms/src/Interpreters/evaluateQualified.cpp @@ -0,0 +1,160 @@ +#include +#include +#include + +namespace DB +{ + +/// Checks that ast is ASTIdentifier and remove num_qualifiers_to_strip components from left. +/// Example: 'database.table.name' -> (num_qualifiers_to_strip = 2) -> 'name'. +void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip) +{ + ASTIdentifier * identifier = typeid_cast(ast.get()); + + if (!identifier) + throw DB::Exception("ASTIdentifier expected for stripIdentifier", DB::ErrorCodes::LOGICAL_ERROR); + + if (num_qualifiers_to_strip) + { + size_t num_components = identifier->children.size(); + + /// plain column + if (num_components - num_qualifiers_to_strip == 1) + { + DB::String node_alias = identifier->tryGetAlias(); + ast = identifier->children.back(); + if (!node_alias.empty()) + ast->setAlias(node_alias); + } + else + /// nested column + { + identifier->children.erase(identifier->children.begin(), identifier->children.begin() + num_qualifiers_to_strip); + DB::String new_name; + for (const auto & child : identifier->children) + { + if (!new_name.empty()) + new_name += '.'; + new_name += static_cast(*child.get()).name; + } + identifier->name = new_name; + } + } +} + + +DatabaseAndTableWithAlias getTableNameWithAliasFromTableExpression(const ASTTableExpression & table_expression, + const Context & context) +{ + DatabaseAndTableWithAlias database_and_table_with_alias; + + if (table_expression.database_and_table_name) + { + const auto & identifier = static_cast(*table_expression.database_and_table_name); + + database_and_table_with_alias.alias = identifier.tryGetAlias(); + + if (table_expression.database_and_table_name->children.empty()) + { + database_and_table_with_alias.database = context.getCurrentDatabase(); + database_and_table_with_alias.table = identifier.name; + } + else + { + if (table_expression.database_and_table_name->children.size() != 2) + throw Exception("Logical error: number of components in table expression not equal to two", ErrorCodes::LOGICAL_ERROR); + + database_and_table_with_alias.database = static_cast(*identifier.children[0]).name; + database_and_table_with_alias.table = static_cast(*identifier.children[1]).name; + } + } + else if (table_expression.table_function) + { + database_and_table_with_alias.alias = table_expression.table_function->tryGetAlias(); + } + else if (table_expression.subquery) + { + database_and_table_with_alias.alias = table_expression.subquery->tryGetAlias(); + } + else + throw Exception("Logical error: no known elements in ASTTableExpression", ErrorCodes::LOGICAL_ERROR); + + return database_and_table_with_alias; +} + +/// Get the number of components of identifier which are correspond to 'alias.', 'table.' or 'databas.table.' from names. +size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier, + const DatabaseAndTableWithAlias & names) +{ + size_t num_qualifiers_to_strip = 0; + + auto get_identifier_name = [](const ASTPtr & ast) { return static_cast(*ast).name; }; + + /// It is compound identifier + if (!identifier.children.empty()) + { + size_t num_components = identifier.children.size(); + + /// database.table.column + if (num_components >= 3 + && !names.database.empty() + && get_identifier_name(identifier.children[0]) == names.database + && get_identifier_name(identifier.children[1]) == names.table) + { + num_qualifiers_to_strip = 2; + } + + /// table.column or alias.column. If num_components > 2, it is like table.nested.column. + if (num_components >= 2 + && ((!names.table.empty() && get_identifier_name(identifier.children[0]) == names.table) + || (!names.alias.empty() && get_identifier_name(identifier.children[0]) == names.alias))) + { + num_qualifiers_to_strip = 1; + } + } + + return num_qualifiers_to_strip; +} + +std::pair getDatabaseAndTableNameFromIdentifier(const ASTIdentifier & identifier) +{ + std::pair res; + res.second = identifier.name; + if (!identifier.children.empty()) + { + if (identifier.children.size() != 2) + throw Exception("Qualified table name could have only two components", ErrorCodes::LOGICAL_ERROR); + + res.first = typeid_cast(*identifier.children[0]).name; + res.second = typeid_cast(*identifier.children[1]).name; + } + return res; +} + +String DatabaseAndTableWithAlias::getQualifiedNamePrefix() const +{ + return (!alias.empty() ? alias : (database + '.' + table)) + '.'; +} + +void DatabaseAndTableWithAlias::makeQualifiedName(const ASTPtr & ast) const +{ + if (auto identifier = typeid_cast(ast.get())) + { + String prefix = getQualifiedNamePrefix(); + identifier->name.insert(identifier->name.begin(), prefix.begin(), prefix.end()); + + Names qualifiers; + if (!alias.empty()) + qualifiers.push_back(alias); + else + { + qualifiers.push_back(database); + qualifiers.push_back(table); + } + + for (const auto & qualifier : qualifiers) + identifier->children.emplace_back(std::make_shared(qualifier)); + } +} + +} \ No newline at end of file diff --git a/dbms/src/Interpreters/evaluateQualified.h b/dbms/src/Interpreters/evaluateQualified.h new file mode 100644 index 00000000000..01f1dc8f9a6 --- /dev/null +++ b/dbms/src/Interpreters/evaluateQualified.h @@ -0,0 +1,34 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +struct DatabaseAndTableWithAlias +{ + String database; + String table; + String alias; + + /// "alias." or "database.table." if alias is empty + String getQualifiedNamePrefix() const; + + /// If ast is ASTIdentifier, prepend getQualifiedNamePrefix() to it's name. + void makeQualifiedName(const ASTPtr & ast) const; +}; + +void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip); + +DatabaseAndTableWithAlias getTableNameWithAliasFromTableExpression(const ASTTableExpression & table_expression, + const Context & context); + +size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier, + const DatabaseAndTableWithAlias & names); + +std::pair getDatabaseAndTableNameFromIdentifier(const ASTIdentifier & identifier); + +} \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference index c3cb55cdac6..7d03801e78f 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference @@ -1,15 +1,23 @@ +-------Query that previously worked but now doesn\'t work.------- +-------Not need optimize predicate, but it works.------- 1 1 1 -2000-01-01 1 test string 1 1 +-------Need push down------- 1 1 2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 -2000-01-01 1 2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 -1 test string 1 1 test string 1 -1 test string 1 1 test string 1 -test string 1 1 1 -test string 1 1 1 +2000-01-01 1 test string 1 1 +2000-01-01 1 test string 1 1 +2000-01-01 1 test string 1 1 +2000-01-01 1 test string 1 1 +1 2000-01-01 1 +2000-01-01 1 test string 1 1 +2000-01-01 1 test string 1 1 +2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 +2000-01-01 1 test string 1 1 +1 2000-01-01 2000-01-01 1 test string 1 1 +-------Push to having expression, need check.------- diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql index b884d9feae7..156eebaf16b 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql @@ -1,63 +1,56 @@ SET send_logs_level = 'none'; DROP TABLE IF EXISTS test.test; -DROP TABLE IF EXISTS test.test_union_1; -DROP TABLE IF EXISTS test.test_union_2; -DROP TABLE IF EXISTS test.test_join_1; -DROP TABLE IF EXISTS test.test_join_2; - CREATE TABLE test.test(date Date, id Int8, name String, value Int64) ENGINE = MergeTree(date, (id, date), 8192); -CREATE TABLE test.test_union_1(date_1 Date, id_1 Int8, name_1 String, value_1 Int64) ENGINE = MergeTree(date_1, (id_1, date_1), 8192); -CREATE TABLE test.test_union_2(date_2 Date, id_2 Int8, name_2 String, value_2 Int64) ENGINE = MergeTree(date_2, (id_2, date_2), 8192); -CREATE TABLE test.test_join_1(date_1 Date, id_1 Int8, name_1 String, value_1 Int64) ENGINE = MergeTree(date_1, (id_1, date_1), 8192); -CREATE TABLE test.test_join_2(date_2 Date, id_2 Int8, name_2 String, value_2 Int64) ENGINE = MergeTree(date_2, (id_2, date_2), 8192); - INSERT INTO test.test VALUES('2000-01-01', 1, 'test string 1', 1); INSERT INTO test.test VALUES('2000-01-01', 2, 'test string 2', 2); -INSERT INTO test.test_union_1 VALUES('2000-01-01', 1, 'test string 1', 1); -INSERT INTO test.test_union_1 VALUES('2000-01-01', 2, 'test string 2', 2); -INSERT INTO test.test_union_2 VALUES('2000-01-01', 1, 'test string 1', 1); -INSERT INTO test.test_union_2 VALUES('2000-01-01', 2, 'test string 2', 2); -INSERT INTO test.test_join_1 VALUES('2000-01-01', 1, 'test string 1', 1); -INSERT INTO test.test_join_1 VALUES('2000-01-01', 2, 'test string 2', 2); -INSERT INTO test.test_join_2 VALUES('2000-01-01', 1, 'test string 1', 1); -INSERT INTO test.test_join_2 VALUES('2000-01-01', 2, 'test string 2', 2); - SET enable_optimize_predicate_expression = 1; --- Query that previously worked but now doesn't work. +SELECT '-------Query that previously worked but now doesn\'t work.-------'; SELECT * FROM (SELECT 1) WHERE `1` = 1; -- { serverError 47 } -SELECT 1; -- Not need push down, but it works. +SELECT '-------Not need optimize predicate, but it works.-------'; +SELECT 1; SELECT 1 AS id WHERE id = 1; SELECT arrayJoin([1,2,3]) AS id WHERE id = 1; -SELECT * FROM (SELECT * FROM test.test) WHERE id = 1; --- Need push down +SELECT '-------Need push down-------'; SELECT * FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1; SELECT id FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1; SELECT date, id, name, value FROM (SELECT date, name, value,min(id) AS id FROM test.test GROUP BY date, name, value) WHERE id = 1; - SET force_primary_key = 1; -SELECT date, id, name, value FROM (SELECT date, id, name, value FROM test.test) WHERE id = 1; -SELECT date, id FROM (SELECT id, date, min(value) FROM test.test GROUP BY id, date) WHERE id = 1; -SELECT date_1, id_1, name_1, value_1 FROM (SELECT date_1, id_1, name_1, value_1 FROM test.test_union_1 UNION ALL SELECT date_2, id_2, name_2, value_2 FROM test.test_union_2) WHERE id_1 = 1; -SELECT * FROM (SELECT id_1, name_1 AS name FROM test.test_join_1) ANY LEFT JOIN (SELECT id_2, name_2 AS name FROM test.test_join_2) USING name WHERE id_1 = 1 AND id_2 = 1; -SELECT * FROM (SELECT id_1, name_1 AS name FROM test.test_join_1) ANY LEFT JOIN (SELECT id_2, name_2 AS name FROM test.test_union_2 UNION ALL SELECT id_1, name_1 AS name FROM test.test_union_1) USING name WHERE id_1 = 1 AND id_2 = 1; -SELECT * FROM (SELECT name_1,id_1 AS id_1, id_1 AS id_2 FROM test.test_union_1 UNION ALL (SELECT name,id_1,id_2 FROM (SELECT name_1 AS name, id_1 FROM test.test_join_1) ANY INNER JOIN (SELECT name_2 AS name, id_2 FROM test.test_join_2) USING (name))) WHERE id_1 = 1 AND id_2 = 1; +-- Optimize predicate expression with asterisk +SELECT * FROM (SELECT * FROM test.test) WHERE id = 1; +-- Optimize predicate expression with asterisk and nested subquery +SELECT * FROM (SELECT * FROM (SELECT * FROM test.test)) WHERE id = 1; +-- Optimize predicate expression with qualified asterisk +SELECT * FROM (SELECT b.* FROM (SELECT * FROM test.test) AS b) WHERE id = 1; +-- Optimize predicate expression without asterisk +SELECT * FROM (SELECT date, id, name, value FROM test.test) WHERE id = 1; +-- Optimize predicate expression without asterisk and contains nested subquery +SELECT * FROM (SELECT date, id, name, value FROM (SELECT date, id, name, value FROM test.test)) WHERE id = 1; +-- Optimize predicate expression with qualified +SELECT * FROM (SELECT * FROM test.test) AS b WHERE b.id = 1; +-- Optimize predicate expression with qualified and nested subquery +SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) AS a) AS b WHERE b.id = 1; +-- Optimize predicate expression with aggregate function +SELECT * FROM (SELECT id, date, min(value) AS value FROM test.test GROUP BY id, date) WHERE id = 1; --- TODO This should work: -SELECT * FROM (SELECT * FROM test.test) WHERE id = 1; -- { serverError 277 } +-- Optimize predicate expression with union all query +SELECT * FROM (SELECT * FROM test.test UNION ALL SELECT * FROM test.test) WHERE id = 1; +-- Optimize predicate expression with join query +SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) USING id WHERE id = 1; +-- Optimize predicate expression with join and nested subquery +SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) USING id) WHERE id = 1; +-- Optimize predicate expression with join query and qualified +SELECT * FROM (SELECT 1 AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date WHERE b.id = 1; +SELECT '-------Push to having expression, need check.-------'; SELECT id FROM (SELECT min(id) AS id FROM test.test) WHERE id = 1; -- { serverError 277 } DROP TABLE IF EXISTS test.test; -DROP TABLE IF EXISTS test.test_union_1; -DROP TABLE IF EXISTS test.test_union_2; -DROP TABLE IF EXISTS test.test_join_1; -DROP TABLE IF EXISTS test.test_join_2;