ClickHouse/src/Interpreters/ComparisonGraph.h

197 lines
6.4 KiB
C++
Raw Normal View History

2021-03-04 12:11:43 +00:00
#pragma once
#include <Parsers/IAST_fwd.h>
2021-11-10 17:57:59 +00:00
#include <Interpreters/TreeCNFConverter.h>
#include <Analyzer/Passes/CNF.h>
2023-03-27 10:03:40 +00:00
#include <Analyzer/HashUtils.h>
#include <Analyzer/IQueryTreeNode.h>
#include <type_traits>
2021-03-04 12:11:43 +00:00
#include <unordered_map>
2021-04-03 16:30:49 +00:00
#include <map>
2021-03-04 12:11:43 +00:00
#include <vector>
namespace DB
{
enum class ComparisonGraphCompareResult : uint8_t
{
LESS,
LESS_OR_EQUAL,
EQUAL,
GREATER_OR_EQUAL,
GREATER,
NOT_EQUAL,
UNKNOWN,
};
template <typename T>
concept ComparisonGraphNodeType = std::same_as<T, ASTPtr> || std::same_as<T, QueryTreeNodePtr>;
2021-03-05 12:46:42 +00:00
/*
* Graph of relations between terms in constraints.
* Allows to compare terms and get equal terms.
*/
2023-03-17 13:38:01 +00:00
template <ComparisonGraphNodeType Node>
2021-03-04 12:11:43 +00:00
class ComparisonGraph
{
public:
static constexpr bool with_ast = std::same_as<Node, ASTPtr>;
using NodeContainer = std::conditional_t<with_ast, ASTs, QueryTreeNodes>;
using CNF = std::conditional_t<with_ast, CNFQuery, Analyzer::CNF>;
2021-03-04 12:11:43 +00:00
/// atomic_formulas are extracted from constraints.
explicit ComparisonGraph(const NodeContainer & atomic_formulas, ContextPtr context = nullptr);
2021-03-04 12:11:43 +00:00
static ComparisonGraphCompareResult atomToCompareResult(const typename CNF::AtomicFormula & atom);
2021-05-04 10:47:23 +00:00
ComparisonGraphCompareResult compare(const Node & left, const Node & right) const;
2021-03-04 12:11:43 +00:00
2021-05-02 19:16:40 +00:00
/// It's possible that left <expected> right
bool isPossibleCompare(ComparisonGraphCompareResult expected, const Node & left, const Node & right) const;
2021-05-02 19:16:40 +00:00
/// It's always true that left <expected> right
bool isAlwaysCompare(ComparisonGraphCompareResult expected, const Node & left, const Node & right) const;
2021-05-02 19:16:40 +00:00
/// Returns all expressions from component to which @node belongs if any.
NodeContainer getEqual(const Node & node) const;
2021-11-10 17:57:59 +00:00
/// Returns constant expression from component to which @node belongs if any.
std::optional<Node> getEqualConst(const Node & node) const;
2021-03-04 12:11:43 +00:00
/// Finds component id to which @node belongs if any.
std::optional<std::size_t> getComponentId(const Node & node) const;
2021-11-10 17:57:59 +00:00
/// Returns all expressions from component.
NodeContainer getComponent(size_t id) const;
2021-11-10 17:57:59 +00:00
size_t getNumOfComponents() const { return graph.vertices.size(); }
bool hasPath(size_t left, size_t right) const;
2021-04-26 11:26:54 +00:00
2021-04-03 12:12:45 +00:00
/// Find constants lessOrEqual and greaterOrEqual.
2021-03-04 12:11:43 +00:00
/// For int and double linear programming can be applied here.
2021-04-10 20:46:53 +00:00
/// Returns: {constant, is strict less/greater}
std::optional<std::pair<Field, bool>> getConstUpperBound(const Node & node) const;
std::optional<std::pair<Field, bool>> getConstLowerBound(const Node & node) const;
2021-03-04 12:11:43 +00:00
2021-11-10 17:57:59 +00:00
/// Returns all expression in graph.
std::vector<NodeContainer> getVertices() const;
2021-04-28 17:35:51 +00:00
2021-03-04 12:11:43 +00:00
private:
2021-11-10 17:57:59 +00:00
/// Strongly connected component
2021-03-04 12:11:43 +00:00
struct EqualComponent
{
2021-11-10 17:57:59 +00:00
/// All these expressions are considered as equal.
NodeContainer nodes;
2021-11-10 17:57:59 +00:00
std::optional<size_t> constant_index;
2021-04-10 15:47:50 +00:00
bool hasConstant() const;
Node getConstant() const;
2021-04-10 15:47:50 +00:00
void buildConstants();
2021-03-04 12:11:43 +00:00
};
2021-11-10 17:57:59 +00:00
/// Edge (from, to, type) means that it's always true that @from <op> @to,
/// where @op is the operation of type @type.
///
2021-03-04 12:11:43 +00:00
/// TODO: move to diff for int and double:
2021-11-10 17:57:59 +00:00
/// GREATER and GREATER_OR_EQUAL with +const or 0 --- ok
2021-03-04 12:11:43 +00:00
/// with -const --- not ok
/// EQUAL is ok only for 0
struct Edge
{
enum Type
{
2021-11-10 17:57:59 +00:00
GREATER,
GREATER_OR_EQUAL,
2021-03-04 12:11:43 +00:00
EQUAL,
};
Type type;
2021-03-05 09:54:13 +00:00
size_t to;
2021-03-04 12:11:43 +00:00
};
struct Graph
{
2021-05-04 18:43:58 +00:00
struct ASTHash
{
size_t operator() (const IAST::Hash & hash) const
{
2023-07-06 00:35:44 +00:00
return hash.low64;
2021-03-05 09:54:13 +00:00
}
};
static auto getHash(const Node & node)
{
if constexpr (with_ast)
return node->getTreeHash(/*ignore_aliases=*/ true);
else
return QueryTreeNodePtrWithHash{node};
}
using NodeHashToComponentContainer = std::conditional_t<with_ast, std::unordered_map<IAST::Hash, size_t, ASTHash>, QueryTreeNodePtrWithHashMap<size_t>>;
NodeHashToComponentContainer node_hash_to_component;
2021-05-04 18:43:58 +00:00
std::vector<EqualComponent> vertices;
2021-03-04 12:11:43 +00:00
std::vector<std::vector<Edge>> edges;
};
2021-11-19 14:14:56 +00:00
/// Receives graph, in which each vertex corresponds to one expression.
2021-11-10 17:57:59 +00:00
/// Then finds strongly connected components and builds graph on them.
static Graph buildGraphFromNodesGraph(const Graph & nodes_graph);
2021-03-05 09:54:13 +00:00
static Graph reverseGraph(const Graph & nodes_graph);
2021-11-10 17:57:59 +00:00
/// The first part of finding strongly connected components.
/// Finds order of exit from vertices of dfs traversal of graph.
static void dfsOrder(const Graph & nodes_graph, size_t v, std::vector<bool> & visited, std::vector<size_t> & order);
2021-03-04 12:11:43 +00:00
2021-11-10 17:57:59 +00:00
using OptionalIndices = std::vector<std::optional<size_t>>;
/// The second part of finding strongly connected components.
/// Assigns index of component for each vertex.
static void dfsComponents(
const Graph & reversed_graph, size_t v,
OptionalIndices & components, size_t component);
2021-03-05 12:13:00 +00:00
2021-04-03 16:30:49 +00:00
enum class Path
{
2021-11-10 17:57:59 +00:00
GREATER,
GREATER_OR_EQUAL,
2021-04-03 16:30:49 +00:00
};
static ComparisonGraphCompareResult pathToCompareResult(Path path, bool inverse);
std::optional<Path> findPath(size_t start, size_t finish) const;
2021-11-10 17:57:59 +00:00
/// Calculate @dists.
static std::map<std::pair<size_t, size_t>, Path> buildDistsFromGraph(const Graph & g);
/// Calculate @nodeconst_lower_bound and @node_const_lower_bound.
2021-04-10 20:46:53 +00:00
std::pair<std::vector<ssize_t>, std::vector<ssize_t>> buildConstBounds() const;
2021-04-03 16:30:49 +00:00
2021-11-10 17:57:59 +00:00
/// Direct acyclic graph in which each vertex corresponds
/// to one equivalence class of expressions.
/// Each edge sets the relation between classes (GREATER or GREATER_OR_EQUAL).
2021-03-04 12:11:43 +00:00
Graph graph;
2021-11-10 17:57:59 +00:00
/// Precalculated distances between each pair of vertices.
/// Distance can be either 0 or -1.
/// 0 means GREATER_OR_EQUAL.
/// -1 means GREATER.
2021-04-03 16:30:49 +00:00
std::map<std::pair<size_t, size_t>, Path> dists;
2021-11-10 17:57:59 +00:00
/// Explicitly collected components, for which it's known
/// that expressions in them are unequal.
2021-05-04 10:47:23 +00:00
std::set<std::pair<size_t, size_t>> not_equal;
2021-11-10 17:57:59 +00:00
/// Maximal constant value for each component that
/// is lower bound for all expressions in component.
std::vector<ssize_t> node_const_lower_bound;
2021-11-10 17:57:59 +00:00
/// Minimal constant value for each component that
/// is upper bound for all expressions in component.
std::vector<ssize_t> node_const_upper_bound;
2021-03-04 12:11:43 +00:00
};
}