From 5663db33b1166ff2d7eff08e1a1f0bad421e43c5 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 26 Apr 2021 14:26:54 +0300 Subject: [PATCH] impr --- src/Interpreters/ComparisonGraph.cpp | 30 +++- src/Interpreters/ComparisonGraph.h | 3 + .../MergeTree/SubstituteColumnOptimizer.cpp | 167 +++++++++++++----- 3 files changed, 152 insertions(+), 48 deletions(-) diff --git a/src/Interpreters/ComparisonGraph.cpp b/src/Interpreters/ComparisonGraph.cpp index da0bd25c99e..aff272a76d2 100644 --- a/src/Interpreters/ComparisonGraph.cpp +++ b/src/Interpreters/ComparisonGraph.cpp @@ -203,25 +203,39 @@ ComparisonGraph::CompareResult ComparisonGraph::compare(const ASTPtr & left, con } std::vector ComparisonGraph::getEqual(const ASTPtr & ast) const +{ + const auto res = getComponentId(ast); + if (!res) + return {}; + else + return getComponent(res.value()); +} + +std::optional ComparisonGraph::getComponentId(const ASTPtr & ast) const { const auto hash_it = graph.ast_hash_to_component.find(ast->getTreeHash()); if (hash_it == std::end(graph.ast_hash_to_component)) return {}; const size_t index = hash_it->second; if (std::any_of( - std::cbegin(graph.vertexes[index].asts), - std::cend(graph.vertexes[index].asts), - [ast](const ASTPtr & constraint_ast) - { - return constraint_ast->getTreeHash() == ast->getTreeHash() && - constraint_ast->getColumnName() == ast->getColumnName(); - })) { - return graph.vertexes[index].asts; + std::cbegin(graph.vertexes[index].asts), + std::cend(graph.vertexes[index].asts), + [ast](const ASTPtr & constraint_ast) + { + return constraint_ast->getTreeHash() == ast->getTreeHash() && + constraint_ast->getColumnName() == ast->getColumnName(); + })) { + return index; } else { return {}; } } +std::vector ComparisonGraph::getComponent(const std::size_t id) const +{ + return graph.vertexes[id].asts; +} + bool ComparisonGraph::EqualComponent::hasConstant() const { return constant_index != -1; } diff --git a/src/Interpreters/ComparisonGraph.h b/src/Interpreters/ComparisonGraph.h index a6b40fc148a..a47f9fbdf90 100644 --- a/src/Interpreters/ComparisonGraph.h +++ b/src/Interpreters/ComparisonGraph.h @@ -35,6 +35,9 @@ public: std::vector getEqual(const ASTPtr & ast) const; std::optional getEqualConst(const ASTPtr & ast) const; + std::optional getComponentId(const ASTPtr & ast) const; + std::vector getComponent(const std::size_t id) const; + /// Find constants lessOrEqual and greaterOrEqual. /// For int and double linear programming can be applied here. /// Returns: {constant, is strict less/greater} diff --git a/src/Storages/MergeTree/SubstituteColumnOptimizer.cpp b/src/Storages/MergeTree/SubstituteColumnOptimizer.cpp index 7f349c5d66c..e832087bab1 100644 --- a/src/Storages/MergeTree/SubstituteColumnOptimizer.cpp +++ b/src/Storages/MergeTree/SubstituteColumnOptimizer.cpp @@ -1,10 +1,9 @@ #include -#include #include #include #include #include -#include +#include #include #include #include @@ -21,52 +20,118 @@ namespace ErrorCodes namespace { -class SubstituteColumnMatcher + +const String COMPONENT = "__constraint_component_"; + +class ComponentMatcher { public: - using Visitor = InDepthNodeVisitor; + using Visitor = InDepthNodeVisitor; struct Data { const ComparisonGraph & graph; - ConstStoragePtr storage; - Data(const ComparisonGraph & graph_, const ConstStoragePtr & storage_) - : graph(graph_), storage(storage_) + Data(const ComparisonGraph & graph_) + : graph(graph_) { } }; static void visit(ASTPtr & ast, Data & data) { - const auto column_sizes = data.storage->getColumnSizes(); + const auto id = data.graph.getComponentId(ast); + if (id) + ast = std::make_shared(COMPONENT + std::to_string(id.value())); + } - // like TreeRewriter - struct ColumnSizeTuple + static bool needChildVisit(const ASTPtr &, const ASTPtr &) + { + return true; + } +}; + +using ComponentVisitor = ComponentMatcher::Visitor; + + +class IdentifierSetMatcher +{ +public: + using Visitor = InDepthNodeVisitor; + + struct Data + { + std::unordered_set identifiers; + }; + + static void visit(ASTPtr & ast, Data & data) + { + const auto * identifier = ast->as(); + if (identifier) + data.identifiers.insert(identifier->name()); + } + + static bool needChildVisit(const ASTPtr &, const ASTPtr &) + { + return true; + } +}; + +using IdentifierSetVisitor = IdentifierSetMatcher::Visitor; + + +class SubstituteColumnMatcher +{ +public: + using Visitor = InDepthNodeVisitor; + + struct Data + { + const ComparisonGraph & graph; + const std::unordered_set & identifiers; + ConstStoragePtr storage; + + Data(const ComparisonGraph & graph_, + const std::unordered_set & identifiers_, + const ConstStoragePtr & storage_) + : graph(graph_) + , identifiers(identifiers_) + , storage(storage_) { - size_t compressed_size; - size_t uncompressed_size; - const ASTPtr & ast; - - bool operator<(const ColumnSizeTuple & that) const - { - return std::tie(compressed_size, uncompressed_size) - < std::tie(that.compressed_size, that.uncompressed_size); - } - }; - - std::vector columns; - for (const auto & equal_ast : data.graph.getEqual(ast)) - { - if (const auto it = column_sizes.find(equal_ast->getColumnName()); it != std::end(column_sizes)) - columns.push_back({ - it->second.data_compressed, - it->second.data_uncompressed, - equal_ast}); } + }; - if (!columns.empty()) - ast = std::min_element(std::begin(columns), std::end(columns))->ast->clone(); + static void visit(ASTPtr & ast, Data & data) + { + const auto * identifier = ast->as(); + if (identifier && identifier->name().starts_with(COMPONENT)) + { + const std::size_t id = std::stoll(identifier->name().substr(COMPONENT.size(), identifier->name().size())); + // like TreeRewriter + struct ColumnSizeTuple + { + size_t compressed_size; + size_t uncompressed_size; + const ASTPtr & ast; + + bool operator<(const ColumnSizeTuple & that) const + { + return std::tie(compressed_size, uncompressed_size) < std::tie(that.compressed_size, that.uncompressed_size); + } + }; + + const auto column_sizes = data.storage->getColumnSizes(); + + std::vector columns; + for (const auto & equal_ast : data.graph.getComponent(id)) + { + if (const auto it = column_sizes.find(equal_ast->getColumnName()); it != std::end(column_sizes)) + columns.push_back({it->second.data_compressed, it->second.data_uncompressed, equal_ast}); + } + + if (!columns.empty()) + ast = std::min_element(std::begin(columns), std::end(columns))->ast->clone(); + } } static bool needChildVisit(const ASTPtr &, const ASTPtr &) @@ -78,6 +143,7 @@ public: using SubstituteColumnVisitor = SubstituteColumnMatcher::Visitor; } + SubstituteColumnOptimizer::SubstituteColumnOptimizer( ASTSelectQuery * select_query_, Aliases & /*aliases_*/, @@ -99,15 +165,36 @@ void SubstituteColumnOptimizer::perform() if (!storage) return; const auto compare_graph = metadata_snapshot->getConstraints().getGraph(); - SubstituteColumnVisitor::Data data(compare_graph, storage); - if (select_query->where()) - SubstituteColumnVisitor(data).visit(select_query->refWhere()); - if (select_query->prewhere()) - SubstituteColumnVisitor(data).visit(select_query->refPrewhere()); - if (select_query->select()) - SubstituteColumnVisitor(data).visit(select_query->refSelect()); - if (select_query->having()) - SubstituteColumnVisitor(data).visit(select_query->refHaving()); + + auto run_for_all = [&](const auto func) { + if (select_query->where()) + func(select_query->refWhere()); + if (select_query->prewhere()) + func(select_query->refPrewhere()); + if (select_query->select()) + func(select_query->refSelect()); + if (select_query->having()) + func(select_query->refHaving()); + }; + + ComponentVisitor::Data component_data(compare_graph); + IdentifierSetVisitor::Data identifier_data; + auto preprocess = [&](ASTPtr & ast) { + ComponentVisitor(component_data).visit(ast); + IdentifierSetVisitor(identifier_data).visit(ast); + }; + + auto process = [&](ASTPtr & ast) { + SubstituteColumnVisitor::Data substitute_data(compare_graph, identifier_data.identifiers, storage); + SubstituteColumnVisitor(substitute_data).visit(ast); + }; + + ASTPtr old_query = select_query->clone(); + + run_for_all(preprocess); + run_for_all(process); + + } }