#include #include #include #include #include #include #include #include #include #include #include #include namespace DB { PredicateRewriteVisitorData::PredicateRewriteVisitorData( ContextPtr context_, const ASTs & predicates_, const TableWithColumnNamesAndTypes & table_columns_, bool optimize_final_, bool optimize_with_) : WithContext(context_) , predicates(predicates_) , table_columns(table_columns_) , optimize_final(optimize_final_) , optimize_with(optimize_with_) { } void PredicateRewriteVisitorData::visit(ASTSelectWithUnionQuery & union_select_query, ASTPtr &) { auto & internal_select_list = union_select_query.list_of_selects->children; for (size_t index = 0; index < internal_select_list.size(); ++index) { if (auto * child_union = internal_select_list[index]->as()) visit(*child_union, internal_select_list[index]); else { if (index == 0) visitFirstInternalSelect(*internal_select_list[0]->as(), internal_select_list[0]); else visitOtherInternalSelect(*internal_select_list[index]->as(), internal_select_list[index]); } } } void PredicateRewriteVisitorData::visitFirstInternalSelect(ASTSelectQuery & select_query, ASTPtr &) { /// In this case inner_columns same as outer_columns from table_columns is_rewrite |= rewriteSubquery(select_query, table_columns.columns.getNames()); } void PredicateRewriteVisitorData::visitOtherInternalSelect(ASTSelectQuery & select_query, ASTPtr &) { /// For non first select, its alias has no more significance, so we can set a temporary alias for them ASTPtr temp_internal_select = select_query.clone(); ASTSelectQuery * temp_select_query = temp_internal_select->as(); size_t alias_index = 0; for (auto & ref_select : temp_select_query->refSelect()->children) { if (!ref_select->as() && !ref_select->as() && !ref_select->as() && !ref_select->as()) { if (const auto & alias = ref_select->tryGetAlias(); alias.empty()) ref_select->setAlias("--predicate_optimizer_" + toString(alias_index++)); } } const Names & internal_columns = InterpreterSelectQuery( temp_internal_select, const_pointer_cast(getContext()), SelectQueryOptions().analyze()).getSampleBlock().getNames(); if (rewriteSubquery(*temp_select_query, internal_columns)) { is_rewrite |= true; select_query.setExpression(ASTSelectQuery::Expression::SELECT, std::move(temp_select_query->refSelect())); select_query.setExpression(ASTSelectQuery::Expression::HAVING, std::move(temp_select_query->refHaving())); } } static void cleanAliasAndCollectIdentifiers(ASTPtr & predicate, std::vector & identifiers) { /// Skip WHERE x in (SELECT ...) if (!predicate->as()) { for (auto & children : predicate->children) cleanAliasAndCollectIdentifiers(children, identifiers); } if (const auto alias = predicate->tryGetAlias(); !alias.empty()) predicate->setAlias({}); if (ASTIdentifier * identifier = predicate->as()) identifiers.emplace_back(identifier); } bool PredicateRewriteVisitorData::rewriteSubquery(ASTSelectQuery & subquery, const Names & inner_columns) { if ((!optimize_final && subquery.final()) || (!optimize_with && subquery.with()) || subquery.withFill() || subquery.limitBy() || subquery.limitLength() || hasNonRewritableFunction(subquery.select(), getContext())) return false; Names outer_columns = table_columns.columns.getNames(); for (const auto & predicate : predicates) { std::vector identifiers; ASTPtr optimize_predicate = predicate->clone(); cleanAliasAndCollectIdentifiers(optimize_predicate, identifiers); for (const auto & identifier : identifiers) { IdentifierSemantic::setColumnShortName(*identifier, table_columns.table); const auto & column_name = identifier->name(); /// For lambda functions, we can't always find them in the list of columns /// For example: SELECT * FROM system.one WHERE arrayMap(x -> x, [dummy]) = [0] const auto & outer_column_iterator = std::find(outer_columns.begin(), outer_columns.end(), column_name); if (outer_column_iterator != outer_columns.end()) { identifier->setShortName(inner_columns[outer_column_iterator - outer_columns.begin()]); } } /// We only need to push all the predicates to subquery having /// The subquery optimizer will move the appropriate predicates from having to where subquery.setExpression(ASTSelectQuery::Expression::HAVING, subquery.having() ? makeASTFunction("and", optimize_predicate, subquery.having()) : optimize_predicate); } return true; } }