From 1fb947b70b23bb3e26fe4bd904cbdca8776640d6 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 4 Mar 2021 15:11:43 +0300 Subject: [PATCH] impr --- src/CMakeLists.txt | 2 +- src/Interpreters/ComparisonGraph.cpp | 38 ++++++ src/Interpreters/ComparisonGraph.h | 76 ++++++++++++ src/Interpreters/TreeCNFConverter.cpp | 113 ++++++++++-------- src/Interpreters/TreeCNFConverter.h | 26 ++-- .../WhereConstraintsOptimizer.cpp | 77 +++++++----- src/Storages/ConstraintsDescription.h | 5 + 7 files changed, 247 insertions(+), 90 deletions(-) create mode 100644 src/Interpreters/ComparisonGraph.cpp create mode 100644 src/Interpreters/ComparisonGraph.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d178d2356c6..f4709af1ad3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -63,7 +63,7 @@ add_subdirectory (Server) set(dbms_headers) -set(dbms_sources Interpreters/ConstraintMatcherVisitor.cpp Interpreters/ConstraintMatcherVisitor.h Interpreters/WhereConstraintsOptimizer.cpp Interpreters/WhereConstraintsOptimizer.h Interpreters/TreeCNFConverter.cpp Interpreters/TreeCNFConverter.h) +set(dbms_sources Interpreters/ConstraintMatcherVisitor.cpp Interpreters/ConstraintMatcherVisitor.h Interpreters/WhereConstraintsOptimizer.cpp Interpreters/WhereConstraintsOptimizer.h Interpreters/TreeCNFConverter.cpp Interpreters/TreeCNFConverter.h Interpreters/ComparisonGraph.cpp Interpreters/ComparisonGraph.h) add_headers_and_sources(clickhouse_common_io Common) add_headers_and_sources(clickhouse_common_io Common/HashTable) diff --git a/src/Interpreters/ComparisonGraph.cpp b/src/Interpreters/ComparisonGraph.cpp new file mode 100644 index 00000000000..0a281848e59 --- /dev/null +++ b/src/Interpreters/ComparisonGraph.cpp @@ -0,0 +1,38 @@ +#include + +#include + +namespace DB +{ + +ComparisonGraph::ComparisonGraph(const std::vector & /*atomic_formulas*/) +{ +} + +std::vector ComparisonGraph::getEqual(const ASTPtr & ast) const +{ + const auto hash_it = graph.ast_hash_to_component.find(ast->getTreeHash().second); + if (hash_it != std::end(graph.ast_hash_to_component)) + return {}; + const size_t index = hash_it->second; + //const auto vertex_it = std::find(std::begin(graph.vertexes[index].asts), std::end(graph.vertexes[index].asts), ast, ); + if (std::any_of( + std::cbegin(graph.vertexes[index].asts), + std::cend(graph.vertexes[index].asts), + [ast](const ASTPtr & constraint_ast) + { + return constraint_ast->getTreeHash() == ast->getTreeHash() && + constraint_ast->getColumnName() == ast->getColumnName(); + })) { + return graph.vertexes[index].asts; + } else { + return {}; + } +} + +ComparisonGraph::Graph ComparisonGraph::BuildGraphFromAsts(const Graph & /*asts_graph*/) +{ + return {}; +} + +} diff --git a/src/Interpreters/ComparisonGraph.h b/src/Interpreters/ComparisonGraph.h new file mode 100644 index 00000000000..5469e623786 --- /dev/null +++ b/src/Interpreters/ComparisonGraph.h @@ -0,0 +1,76 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +class ComparisonGraph +{ +public: + ComparisonGraph(const std::vector & atomic_formulas); + + /// Works for string and num. + /// For other -- only eq. + enum class CompareResult + { + LESS, + LESS_OR_EQUAL, + EQUAL, + GREATER_OR_EQUAL, + GREATER, + //NOT_EQUAL, + UNKNOWN, + }; + + // TODO: implement + CompareResult compare(const ASTPtr & /*left*/, const ASTPtr & /*right*/) const { return CompareResult::UNKNOWN; } + + std::vector getEqual(const ASTPtr & ast) const; + + /// Find constants less and greater. + /// For int and double linear programming can be applied here. + // TODO: implement + ASTPtr getMax(const ASTPtr &) const { return nullptr; } // sup + ASTPtr getMin(const ASTPtr &) const { return nullptr; } // inf + +private: + /// strongly connected component + struct EqualComponent + { + std::vector asts; + }; + + /// TODO: move to diff for int and double: + /// LESS and LESS_OR_EQUAL with +const or 0 --- ok + /// with -const --- not ok + /// EQUAL is ok only for 0 + struct Edge + { + enum Type + { + LESS, + LESS_OR_EQUAL, + EQUAL, + }; + + Type type; + EqualComponent to; + }; + + struct Graph + { + std::unordered_map ast_hash_to_component; + std::vector vertexes; + std::vector> edges; + }; + + Graph BuildGraphFromAsts(const Graph & asts_graph); + + Graph graph; +}; + +} diff --git a/src/Interpreters/TreeCNFConverter.cpp b/src/Interpreters/TreeCNFConverter.cpp index c547cdd63ac..ad991b64299 100644 --- a/src/Interpreters/TreeCNFConverter.cpp +++ b/src/Interpreters/TreeCNFConverter.cpp @@ -149,9 +149,13 @@ void traverseCNF(const ASTPtr & node, CNFQuery::AndGroup & and_group, CNFQuery:: traverseCNF(child, and_group, or_group); } } + else if (func && func->name == "not") + { + or_group.insert(CNFQuery::AtomicFormula{true, func->arguments->children.front()}); + } else { - or_group.insert(node); + or_group.insert(CNFQuery::AtomicFormula{false, node}); } } @@ -190,13 +194,23 @@ ASTPtr TreeCNFConverter::fromCNF(const CNFQuery & cnf) for (const auto & group : groups) { if (group.size() == 1) - or_groups.push_back((*group.begin())->clone()); + { + if ((*group.begin()).negative) + or_groups.push_back(makeASTFunction("not", (*group.begin()).ast->clone())); + else + or_groups.push_back((*group.begin()).ast->clone()); + } else if (group.size() > 1) { or_groups.push_back(makeASTFunction("or")); auto * func = or_groups.back()->as(); - for (const auto & ast : group) - func->arguments->children.push_back(ast->clone()); + for (const auto & atom : group) + { + if ((*group.begin()).negative) + func->arguments->children.push_back(makeASTFunction("not", atom.ast->clone())); + else + func->arguments->children.push_back(atom.ast->clone()); + } } } @@ -211,7 +225,23 @@ ASTPtr TreeCNFConverter::fromCNF(const CNFQuery & cnf) return res; } -void pullNotOut(ASTPtr & node) +void pushPullNotInAtom(CNFQuery::AtomicFormula & atom, const std::map & inverse_relations) +{ + auto * func = atom.ast->as(); + if (!func) + return; + if (auto it = inverse_relations.find(func->name); it != std::end(inverse_relations)) + { + /// inverse func + atom.ast = atom.ast->clone(); + auto * new_func = atom.ast->as(); + new_func->name = it->second; + /// add not + atom.negative = !atom.negative; + } +} + +void pullNotOut(CNFQuery::AtomicFormula & atom) { static const std::map inverse_relations = { {"notEquals", "equals"}, @@ -222,22 +252,14 @@ void pullNotOut(ASTPtr & node) {"notEmpty", "empty"}, }; - auto * func = node->as(); - if (!func) - return; - if (auto it = inverse_relations.find(func->name); it != std::end(inverse_relations)) - { - /// inverse func - node = node->clone(); - auto * new_func = node->as(); - new_func->name = it->second; - /// add not - node = makeASTFunction("not", node); - } + pushPullNotInAtom(atom, inverse_relations); } -void pushNotIn(ASTPtr & node) +void pushNotIn(CNFQuery::AtomicFormula & atom) { + if (!atom.negative) + return; + static const std::map inverse_relations = { {"equals", "notEquals"}, {"less", "greaterOrEquals"}, @@ -245,51 +267,34 @@ void pushNotIn(ASTPtr & node) {"in", "notIn"}, {"like", "notLike"}, {"empty", "notEmpty"}, + {"notEquals", "equals"}, + {"greaterOrEquals", "less"}, + {"greater", "lessOrEquals"}, + {"notIn", "in"}, + {"notLike", "like"}, + {"notEmpty", "empty"}, }; - auto * func = node->as(); - if (!func) - return; - if (auto it = inverse_relations.find(func->name); it != std::end(inverse_relations)) - { - /// inverse func - node = node->clone(); - auto * new_func = node->as(); - new_func->name = it->second; - /// add not - node = makeASTFunction("not", node); - } + pushPullNotInAtom(atom, inverse_relations); } CNFQuery & CNFQuery::pullNotOutFunctions() { - transformAtoms([](const ASTPtr & node) -> ASTPtr - { - auto * func = node->as(); - if (!func) - return node; - ASTPtr result = node->clone(); - if (func->name == "not") - pullNotOut(func->arguments->children.front()); - else - pullNotOut(result); - traversePushNot(result, false); - return result; - }); + transformAtoms([](const AtomicFormula & atom) -> AtomicFormula + { + AtomicFormula result{atom.negative, atom.ast->clone()}; + pullNotOut(result); + return result; + }); return *this; } CNFQuery & CNFQuery::pushNotInFuntions() { - transformAtoms([](const ASTPtr & node) -> ASTPtr + transformAtoms([](const AtomicFormula & atom) -> AtomicFormula { - auto * func = node->as(); - if (!func) - return node; - ASTPtr result = node->clone(); - if (func->name == "not") - pushNotIn(func->arguments->children.front()); - traversePushNot(result, false); + AtomicFormula result{atom.negative, atom.ast->clone()}; + pushNotIn(result); return result; }); return *this; @@ -306,12 +311,14 @@ std::string CNFQuery::dump() const first = false; res << "("; bool first_in_group = true; - for (const auto & ast : group) + for (const auto & atom : group) { if (!first_in_group) res << " OR "; first_in_group = false; - res << ast->getColumnName(); + if (atom.negative) + res << " NOT "; + res << atom.ast->getColumnName(); } res << ")"; } diff --git a/src/Interpreters/TreeCNFConverter.h b/src/Interpreters/TreeCNFConverter.h index a27df8bd2b9..51c5ae3aa43 100644 --- a/src/Interpreters/TreeCNFConverter.h +++ b/src/Interpreters/TreeCNFConverter.h @@ -12,7 +12,19 @@ namespace DB class CNFQuery { public: - using OrGroup = std::set; // Add NOT container??? + struct AtomicFormula + { + bool negative = false; + ASTPtr ast; + + /// for set + bool operator<(const AtomicFormula & rhs) const + { + return ast == rhs.ast ? negative < rhs.negative : ast < rhs.ast; + } + }; + + using OrGroup = std::set; using AndGroup = std::set; CNFQuery(AndGroup && statements_) : statements(std::move(statements_)) { } @@ -46,10 +58,10 @@ public: filtered.insert(filtered_group); else { - /// all atoms false -> group false -> CNF false + /// all atoms false -> group false -> CNF false filtered.clear(); filtered_group.clear(); - filtered_group.insert(std::make_shared(static_cast(0))); + filtered_group.insert(AtomicFormula{false, std::make_shared(static_cast(0))}); filtered.insert(filtered_group); std::swap(statements, filtered); return *this; @@ -79,11 +91,11 @@ public: transformGroups([func](const OrGroup & group) -> OrGroup { OrGroup result; - for (const auto & ast : group) + for (const auto & atom : group) { - auto new_ast = func(ast); - if (new_ast) - result.insert(std::move(new_ast)); + auto new_atom = func(atom); + if (new_atom.ast) + result.insert(std::move(new_atom)); } return result; }); diff --git a/src/Interpreters/WhereConstraintsOptimizer.cpp b/src/Interpreters/WhereConstraintsOptimizer.cpp index 3953e34f267..b0ebaffdfed 100644 --- a/src/Interpreters/WhereConstraintsOptimizer.cpp +++ b/src/Interpreters/WhereConstraintsOptimizer.cpp @@ -9,9 +9,9 @@ namespace DB { -std::vector> getConstraintData(const StorageMetadataPtr & metadata_snapshot) +std::vector> getConstraintData(const StorageMetadataPtr & metadata_snapshot) { - std::vector> constraint_data; + std::vector> constraint_data; for (const auto & constraint : metadata_snapshot->getConstraints().filterConstraints(ConstraintsDescription::ConstraintType::ALWAYS_TRUE)) { @@ -24,6 +24,39 @@ std::vector> getConstraintData(const StorageMetadataPtr & me return constraint_data; } +std::vector getAtomicConstraintData(const StorageMetadataPtr & metadata_snapshot) +{ + std::vector constraint_data; + for (const auto & constraint : + metadata_snapshot->getConstraints().filterConstraints(ConstraintsDescription::ConstraintType::ALWAYS_TRUE)) + { + const auto cnf = TreeCNFConverter::toCNF(constraint->as()->expr->ptr()) + .pullNotOutFunctions(); /// TODO: move prepare stage to ConstraintsDescription + for (const auto & group : cnf.getStatements()) { + if (group.size() == 1) + constraint_data.push_back(*group.begin()); + } + } + + return constraint_data; +} + +std::vector> getEqualConstraintData(const StorageMetadataPtr & metadata_snapshot) +{ + std::vector> equal_constraints; + const std::vector atomic_constraints = getAtomicConstraintData(metadata_snapshot); + for (const auto & constraint : atomic_constraints) { + auto * func = constraint.ast->as(); + if (func && (func->name == "equal" && !constraint.negative)) + { + equal_constraints.emplace_back( + func->arguments->children[0], + func->arguments->children[1]); + } + } + return equal_constraints; +} + WhereConstraintsOptimizer::WhereConstraintsOptimizer( ASTSelectQuery * select_query_, Aliases & /*aliases_*/, @@ -48,37 +81,22 @@ namespace }; } -MatchState match(ASTPtr a, ASTPtr b) +MatchState match(CNFQuery::AtomicFormula a, CNFQuery::AtomicFormula b) { - bool match_means_ok = true; + bool match_means_ok = true ^ a.negative ^ b.negative; - { - auto * func_a = a->as(); - if (func_a && func_a->name == "not") - { - a = func_a->arguments->children.front(); - match_means_ok ^= true; - } - } - { - auto * func_b = b->as(); - if (func_b && func_b->name == "not") - { - b = func_b->arguments->children.front(); - match_means_ok ^= true; - } - } - - if (a->getTreeHash() == b->getTreeHash() && - a->getColumnName() == b->getColumnName()) + if (a.ast->getTreeHash() == b.ast->getTreeHash() && + a.ast->getColumnName() == b.ast->getColumnName()) { return match_means_ok ? MatchState::FULL_MATCH : MatchState::NOT_MATCH; } return MatchState::NONE; } -bool checkIfGroupAlwaysTrue(const CNFQuery::OrGroup & group, const std::vector> & constraints) +bool checkIfGroupAlwaysTrue(const CNFQuery::OrGroup & group, const std::vector> & constraints) { + /// TODO: constraints graph + /// TODO: this is temporary; need to write more effective search /// TODO: go deeper into asts (a < b, a = b,...) with z3 or some visitor for (const auto & constraint : constraints) /// one constraint in group is enough, @@ -111,7 +129,7 @@ bool checkIfGroupAlwaysTrue(const CNFQuery::OrGroup & group, const std::vector> & constraints) +bool checkIfAtomAlwaysFalse(const CNFQuery::AtomicFormula & atom, const std::vector> & constraints) { /// TODO: more efficient matching @@ -120,9 +138,9 @@ bool checkIfAtomAlwaysFalse(const ASTPtr & atom, const std::vector 1) continue; /// TMP - for (const auto & constraint_ast : constraint) + for (const auto & constraint_atoms : constraint) { - const auto match_result = match(constraint_ast, atom); + const auto match_result = match(constraint_atoms, atom); if (match_result != MatchState::NONE) return match_result == MatchState::NOT_MATCH; @@ -137,14 +155,15 @@ void WhereConstraintsOptimizer::perform() if (select_query->where() && metadata_snapshot) { const auto constraint_data = getConstraintData(metadata_snapshot); + Poco::Logger::get("BEFORE CNF ").information(select_query->where()->dumpTree()); auto cnf = TreeCNFConverter::toCNF(select_query->where()); Poco::Logger::get("BEFORE OPT").information(cnf.dump()); cnf.pullNotOutFunctions() .filterAlwaysTrueGroups([&constraint_data](const auto & group) { /// remove always true groups from CNF return !checkIfGroupAlwaysTrue(group, constraint_data); }) - .filterAlwaysFalseAtoms([&constraint_data](const auto & ast) { /// remove always false atoms from CNF - return !checkIfAtomAlwaysFalse(ast, constraint_data); + .filterAlwaysFalseAtoms([&constraint_data](const auto & atom) { /// remove always false atoms from CNF + return !checkIfAtomAlwaysFalse(atom, constraint_data); }) .pushNotInFuntions(); diff --git a/src/Storages/ConstraintsDescription.h b/src/Storages/ConstraintsDescription.h index 3bdd9bc5503..219dce89851 100644 --- a/src/Storages/ConstraintsDescription.h +++ b/src/Storages/ConstraintsDescription.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -11,6 +12,7 @@ using ConstraintsExpressions = std::vector; struct ConstraintsDescription { std::vector constraints; + std::vector cnf_constraints; ConstraintsDescription() = default; @@ -27,6 +29,9 @@ struct ConstraintsDescription }; ASTs filterConstraints(ConstraintType selection) const; + // TODO: перенести преобразование в КНФ + get constraitns + //ASTs filterAtomicConstraints(ConstraintType selection) const; + //ASTs filterEqualConstraints(ConstraintType selection) const; ConstraintsExpressions getExpressionsToCheck(const Context & context, const NamesAndTypesList & source_columns_) const;