2021-03-04 12:11:43 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <Parsers/IAST_fwd.h>
|
2021-11-10 17:57:59 +00:00
|
|
|
#include <Interpreters/TreeCNFConverter.h>
|
2021-03-04 12:11:43 +00:00
|
|
|
#include <unordered_map>
|
2021-04-03 16:30:49 +00:00
|
|
|
#include <map>
|
2021-03-04 12:11:43 +00:00
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2021-03-05 12:46:42 +00:00
|
|
|
/*
|
|
|
|
* Graph of relations between terms in constraints.
|
|
|
|
* Allows to compare terms and get equal terms.
|
|
|
|
*/
|
2021-03-04 12:11:43 +00:00
|
|
|
class ComparisonGraph
|
|
|
|
{
|
|
|
|
public:
|
2021-11-10 17:57:59 +00:00
|
|
|
/// atomic_formulas are extracted from constraints.
|
2022-03-13 11:59:20 +00:00
|
|
|
explicit ComparisonGraph(const std::vector<ASTPtr> & atomic_formulas);
|
2021-03-04 12:11:43 +00:00
|
|
|
|
|
|
|
enum class CompareResult
|
|
|
|
{
|
|
|
|
LESS,
|
|
|
|
LESS_OR_EQUAL,
|
|
|
|
EQUAL,
|
|
|
|
GREATER_OR_EQUAL,
|
|
|
|
GREATER,
|
2021-05-04 10:47:23 +00:00
|
|
|
NOT_EQUAL,
|
2021-03-04 12:11:43 +00:00
|
|
|
UNKNOWN,
|
|
|
|
};
|
|
|
|
|
2021-11-10 17:57:59 +00:00
|
|
|
static CompareResult atomToCompareResult(const CNFQuery::AtomicFormula & atom);
|
|
|
|
static CompareResult functionNameToCompareResult(const std::string & name);
|
2022-03-13 11:59:20 +00:00
|
|
|
static CompareResult inverseCompareResult(CompareResult result);
|
2021-05-04 10:47:23 +00:00
|
|
|
|
2021-03-05 09:54:13 +00:00
|
|
|
CompareResult compare(const ASTPtr & left, const ASTPtr & right) const;
|
2021-03-04 12:11:43 +00:00
|
|
|
|
2021-05-02 19:16:40 +00:00
|
|
|
/// It's possible that left <expected> right
|
2022-03-13 11:59:20 +00:00
|
|
|
bool isPossibleCompare(CompareResult expected, const ASTPtr & left, const ASTPtr & right) const;
|
2021-05-02 19:16:40 +00:00
|
|
|
|
|
|
|
/// It's always true that left <expected> right
|
2022-03-13 11:59:20 +00:00
|
|
|
bool isAlwaysCompare(CompareResult expected, const ASTPtr & left, const ASTPtr & right) const;
|
2021-05-02 19:16:40 +00:00
|
|
|
|
2021-11-10 17:57:59 +00:00
|
|
|
/// Returns all expressions from component to which @ast belongs if any.
|
2021-03-04 12:11:43 +00:00
|
|
|
std::vector<ASTPtr> getEqual(const ASTPtr & ast) const;
|
2021-11-10 17:57:59 +00:00
|
|
|
|
|
|
|
/// Returns constant expression from component to which @ast belongs if any.
|
2021-03-05 12:46:42 +00:00
|
|
|
std::optional<ASTPtr> getEqualConst(const ASTPtr & ast) const;
|
2021-03-04 12:11:43 +00:00
|
|
|
|
2021-11-10 17:57:59 +00:00
|
|
|
/// Finds component id to which @ast belongs if any.
|
2021-04-26 11:26:54 +00:00
|
|
|
std::optional<std::size_t> getComponentId(const ASTPtr & ast) const;
|
2021-11-10 17:57:59 +00:00
|
|
|
|
|
|
|
/// Returns all expressions from component.
|
2022-03-13 11:59:20 +00:00
|
|
|
std::vector<ASTPtr> getComponent(size_t id) const;
|
2021-11-10 17:57:59 +00:00
|
|
|
|
|
|
|
size_t getNumOfComponents() const { return graph.vertices.size(); }
|
|
|
|
|
2022-03-13 11:59:20 +00:00
|
|
|
bool hasPath(size_t left, size_t right) const;
|
2021-04-26 11:26:54 +00:00
|
|
|
|
2021-04-03 12:12:45 +00:00
|
|
|
/// Find constants lessOrEqual and greaterOrEqual.
|
2021-03-04 12:11:43 +00:00
|
|
|
/// For int and double linear programming can be applied here.
|
2021-04-10 20:46:53 +00:00
|
|
|
/// Returns: {constant, is strict less/greater}
|
|
|
|
std::optional<std::pair<Field, bool>> getConstUpperBound(const ASTPtr & ast) const;
|
|
|
|
std::optional<std::pair<Field, bool>> getConstLowerBound(const ASTPtr & ast) const;
|
2021-03-04 12:11:43 +00:00
|
|
|
|
2021-11-10 17:57:59 +00:00
|
|
|
/// Returns all expression in graph.
|
2021-05-04 18:43:58 +00:00
|
|
|
std::vector<ASTs> getVertices() const;
|
2021-04-28 17:35:51 +00:00
|
|
|
|
2021-03-04 12:11:43 +00:00
|
|
|
private:
|
2021-11-10 17:57:59 +00:00
|
|
|
/// Strongly connected component
|
2021-03-04 12:11:43 +00:00
|
|
|
struct EqualComponent
|
|
|
|
{
|
2021-11-10 17:57:59 +00:00
|
|
|
/// All these expressions are considered as equal.
|
2021-03-04 12:11:43 +00:00
|
|
|
std::vector<ASTPtr> asts;
|
2021-11-10 17:57:59 +00:00
|
|
|
std::optional<size_t> constant_index;
|
2021-04-10 15:47:50 +00:00
|
|
|
|
|
|
|
bool hasConstant() const;
|
|
|
|
ASTPtr getConstant() const;
|
|
|
|
void buildConstants();
|
2021-03-04 12:11:43 +00:00
|
|
|
};
|
|
|
|
|
2021-11-10 17:57:59 +00:00
|
|
|
/// Edge (from, to, type) means that it's always true that @from <op> @to,
|
|
|
|
/// where @op is the operation of type @type.
|
|
|
|
///
|
2021-03-04 12:11:43 +00:00
|
|
|
/// TODO: move to diff for int and double:
|
2021-11-10 17:57:59 +00:00
|
|
|
/// GREATER and GREATER_OR_EQUAL with +const or 0 --- ok
|
2021-03-04 12:11:43 +00:00
|
|
|
/// with -const --- not ok
|
|
|
|
/// EQUAL is ok only for 0
|
|
|
|
struct Edge
|
|
|
|
{
|
|
|
|
enum Type
|
|
|
|
{
|
2021-11-10 17:57:59 +00:00
|
|
|
GREATER,
|
|
|
|
GREATER_OR_EQUAL,
|
2021-03-04 12:11:43 +00:00
|
|
|
EQUAL,
|
|
|
|
};
|
|
|
|
|
|
|
|
Type type;
|
2021-03-05 09:54:13 +00:00
|
|
|
size_t to;
|
2021-03-04 12:11:43 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct Graph
|
|
|
|
{
|
2021-05-04 18:43:58 +00:00
|
|
|
struct ASTHash
|
|
|
|
{
|
|
|
|
size_t operator() (const IAST::Hash & hash) const
|
|
|
|
{
|
2021-03-05 09:54:13 +00:00
|
|
|
return hash.first;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
std::unordered_map<IAST::Hash, size_t, ASTHash> ast_hash_to_component;
|
2021-05-04 18:43:58 +00:00
|
|
|
std::vector<EqualComponent> vertices;
|
2021-03-04 12:11:43 +00:00
|
|
|
std::vector<std::vector<Edge>> edges;
|
|
|
|
};
|
|
|
|
|
2021-11-19 14:14:56 +00:00
|
|
|
/// Receives graph, in which each vertex corresponds to one expression.
|
2021-11-10 17:57:59 +00:00
|
|
|
/// Then finds strongly connected components and builds graph on them.
|
|
|
|
static Graph buildGraphFromAstsGraph(const Graph & asts_graph);
|
2021-03-05 09:54:13 +00:00
|
|
|
|
2021-05-06 08:29:24 +00:00
|
|
|
static Graph reverseGraph(const Graph & asts_graph);
|
2021-11-10 17:57:59 +00:00
|
|
|
|
|
|
|
/// The first part of finding strongly connected components.
|
|
|
|
/// Finds order of exit from vertices of dfs traversal of graph.
|
2021-05-06 08:29:24 +00:00
|
|
|
static void dfsOrder(const Graph & asts_graph, size_t v, std::vector<bool> & visited, std::vector<size_t> & order);
|
2021-03-04 12:11:43 +00:00
|
|
|
|
2021-11-10 17:57:59 +00:00
|
|
|
using OptionalIndices = std::vector<std::optional<size_t>>;
|
|
|
|
|
|
|
|
/// The second part of finding strongly connected components.
|
|
|
|
/// Assigns index of component for each vertex.
|
|
|
|
static void dfsComponents(
|
|
|
|
const Graph & reversed_graph, size_t v,
|
2022-03-13 11:59:20 +00:00
|
|
|
OptionalIndices & components, size_t component);
|
2021-03-05 12:13:00 +00:00
|
|
|
|
2021-04-03 16:30:49 +00:00
|
|
|
enum class Path
|
|
|
|
{
|
2021-11-10 17:57:59 +00:00
|
|
|
GREATER,
|
|
|
|
GREATER_OR_EQUAL,
|
2021-04-03 16:30:49 +00:00
|
|
|
};
|
|
|
|
|
2021-11-10 17:57:59 +00:00
|
|
|
static CompareResult pathToCompareResult(Path path, bool inverse);
|
2022-03-13 11:59:20 +00:00
|
|
|
std::optional<Path> findPath(size_t start, size_t finish) const;
|
2021-11-10 17:57:59 +00:00
|
|
|
|
|
|
|
/// Calculate @dists.
|
|
|
|
static std::map<std::pair<size_t, size_t>, Path> buildDistsFromGraph(const Graph & g);
|
|
|
|
|
|
|
|
/// Calculate @ast_const_lower_bound and @ast_const_lower_bound.
|
2021-04-10 20:46:53 +00:00
|
|
|
std::pair<std::vector<ssize_t>, std::vector<ssize_t>> buildConstBounds() const;
|
2021-04-03 16:30:49 +00:00
|
|
|
|
2021-11-10 17:57:59 +00:00
|
|
|
/// Direct acyclic graph in which each vertex corresponds
|
|
|
|
/// to one equivalence class of expressions.
|
|
|
|
/// Each edge sets the relation between classes (GREATER or GREATER_OR_EQUAL).
|
2021-03-04 12:11:43 +00:00
|
|
|
Graph graph;
|
2021-11-10 17:57:59 +00:00
|
|
|
|
|
|
|
/// Precalculated distances between each pair of vertices.
|
|
|
|
/// Distance can be either 0 or -1.
|
|
|
|
/// 0 means GREATER_OR_EQUAL.
|
|
|
|
/// -1 means GREATER.
|
2021-04-03 16:30:49 +00:00
|
|
|
std::map<std::pair<size_t, size_t>, Path> dists;
|
2021-11-10 17:57:59 +00:00
|
|
|
|
|
|
|
/// Explicitly collected components, for which it's known
|
|
|
|
/// that expressions in them are unequal.
|
2021-05-04 10:47:23 +00:00
|
|
|
std::set<std::pair<size_t, size_t>> not_equal;
|
2021-11-10 17:57:59 +00:00
|
|
|
|
|
|
|
/// Maximal constant value for each component that
|
|
|
|
/// is lower bound for all expressions in component.
|
2021-04-10 20:46:53 +00:00
|
|
|
std::vector<ssize_t> ast_const_lower_bound;
|
2021-11-10 17:57:59 +00:00
|
|
|
|
|
|
|
/// Minimal constant value for each component that
|
|
|
|
/// is upper bound for all expressions in component.
|
2021-04-10 20:46:53 +00:00
|
|
|
std::vector<ssize_t> ast_const_upper_bound;
|
2021-03-04 12:11:43 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|