From 1d5b62cc18678ae2287f69ad608d927b9fd9fbab Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 3 Jan 2021 13:24:09 +0300 Subject: [PATCH] add simple optimization --- src/CMakeLists.txt | 2 +- src/Core/Settings.h | 1 + src/Interpreters/ConstraintMatcherVisitor.cpp | 1 + src/Interpreters/ConstraintMatcherVisitor.h | 48 +++++++++++++++++++ src/Interpreters/TreeOptimizer.cpp | 21 ++++++++ 5 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 src/Interpreters/ConstraintMatcherVisitor.cpp create mode 100644 src/Interpreters/ConstraintMatcherVisitor.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4e04f5607df..53232e742a7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -63,7 +63,7 @@ add_subdirectory (Server) set(dbms_headers) -set(dbms_sources) +set(dbms_sources Interpreters/ConstraintMatcherVisitor.cpp Interpreters/ConstraintMatcherVisitor.h) add_headers_and_sources(clickhouse_common_io Common) add_headers_and_sources(clickhouse_common_io Common/HashTable) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b09e960da36..eea95a8b45a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -378,6 +378,7 @@ class IColumn; M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \ M(Bool, optimize_monotonous_functions_in_order_by, true, "Replace monotonous function with its argument in ORDER BY", 0) \ + M(Bool, optimize_using_constraints, true, "Use constraints for query optimization", 0) \ M(Bool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ diff --git a/src/Interpreters/ConstraintMatcherVisitor.cpp b/src/Interpreters/ConstraintMatcherVisitor.cpp new file mode 100644 index 00000000000..61b1c436b3a --- /dev/null +++ b/src/Interpreters/ConstraintMatcherVisitor.cpp @@ -0,0 +1 @@ +#include "ConstraintMatcherVisitor.h" diff --git a/src/Interpreters/ConstraintMatcherVisitor.h b/src/Interpreters/ConstraintMatcherVisitor.h new file mode 100644 index 00000000000..246afcc91bd --- /dev/null +++ b/src/Interpreters/ConstraintMatcherVisitor.h @@ -0,0 +1,48 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +/// recursive traversal and check for optimizeGroupByFunctionKeys +struct ConstraintMatcher +{ + struct Data + { + std::unordered_map> constraints; + }; + + using Visitor = InDepthNodeVisitor; + + static bool needChildVisit(const ASTPtr & node, const ASTPtr &) { return (node->as()) && (node->as()); } + + static bool alwaysTrue(const ASTPtr & node, Data & data) { + const auto it = data.constraints.find(node->getTreeHash().second); + if (it != std::end(data.constraints)) { + for (const auto & ast : it->second) { + if (node->getColumnName() == ast->getColumnName()) { + return true; + } + } + } + return false; + } + + static void visit(ASTPtr & ast, Data & data) + { + if (alwaysTrue(ast, data)) { + ast = std::make_shared(1); + } + } +}; + +using ConstraintMatcherVisitor = InDepthNodeVisitor; + +} diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index cee19c632fa..504ead7465a 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -506,6 +507,20 @@ void optimizeLimitBy(const ASTSelectQuery * select_query) elems = std::move(unique_elems); } +void optimizeWithConstraints(ASTPtr & query, const NameSet & /* source_columns_set */, + const std::vector & /* tables_with_columns */, + const StorageMetadataPtr & metadata_snapshot) +{ + ConstraintMatcherVisitor::Data constraint_data; + + for (const auto & constraint : metadata_snapshot->getConstraints().constraints) + { + constraint_data.constraints[constraint->getTreeHash().second].push_back(constraint); + } + + ConstraintMatcherVisitor(constraint_data).visit(query); +} + /// Remove duplicated columns from USING(...). void optimizeUsing(const ASTSelectQuery * select_query) { @@ -597,6 +612,12 @@ void TreeOptimizer::apply(ASTPtr & query, Aliases & aliases, const NameSet & sou /// Push the predicate expression down to the subqueries. rewrite_subqueries = PredicateExpressionsOptimizer(context, tables_with_columns, settings).optimize(*select_query); + if (settings.optimize_using_constraints) + { + optimizeWithConstraints(select_query->refWhere(), source_columns_set, tables_with_columns, metadata_snapshot); + optimizeWithConstraints(select_query->refPrewhere(), source_columns_set, tables_with_columns, metadata_snapshot); + } + /// GROUP BY injective function elimination. optimizeGroupBy(select_query, source_columns_set, context);