#pragma once #include #include #include #include #include #include #include #include #include #include namespace DB { class IFunction; using FunctionBasePtr = std::shared_ptr; /** Range with open or closed ends; possibly unbounded. */ struct Range { private: static bool equals(const Field & lhs, const Field & rhs); static bool less(const Field & lhs, const Field & rhs); public: Field left; /// the left border, if any Field right; /// the right border, if any bool left_bounded = false; /// bounded at the left bool right_bounded = false; /// bounded at the right bool left_included = false; /// includes the left border, if any bool right_included = false; /// includes the right border, if any /// The whole unversum. Range() {} /// One point. Range(const Field & point) : left(point), right(point), left_bounded(true), right_bounded(true), left_included(true), right_included(true) {} /// A bounded two-sided range. Range(const Field & left_, bool left_included_, const Field & right_, bool right_included_) : left(left_), right(right_), left_bounded(true), right_bounded(true), left_included(left_included_), right_included(right_included_) { shrinkToIncludedIfPossible(); } static Range createRightBounded(const Field & right_point, bool right_included) { Range r; r.right = right_point; r.right_bounded = true; r.right_included = right_included; r.shrinkToIncludedIfPossible(); return r; } static Range createLeftBounded(const Field & left_point, bool left_included) { Range r; r.left = left_point; r.left_bounded = true; r.left_included = left_included; r.shrinkToIncludedIfPossible(); return r; } /** Optimize the range. If it has an open boundary and the Field type is "loose" * - then convert it to closed, narrowing by one. * That is, for example, turn (0,2) into [1]. */ void shrinkToIncludedIfPossible() { if (left_bounded && !left_included) { if (left.getType() == Field::Types::UInt64 && left.get() != std::numeric_limits::max()) { ++left.get(); left_included = true; } if (left.getType() == Field::Types::Int64 && left.get() != std::numeric_limits::max()) { ++left.get(); left_included = true; } } if (right_bounded && !right_included) { if (right.getType() == Field::Types::UInt64 && right.get() != std::numeric_limits::min()) { --right.get(); right_included = true; } if (right.getType() == Field::Types::Int64 && right.get() != std::numeric_limits::min()) { --right.get(); right_included = true; } } } bool empty() const { return left_bounded && right_bounded && (less(right, left) || ((!left_included || !right_included) && !less(left, right))); } /// x contained in the range bool contains(const Field & x) const { return !leftThan(x) && !rightThan(x); } /// x is to the left bool rightThan(const Field & x) const { return (left_bounded ? !(less(left, x) || (left_included && equals(x, left))) : false); } /// x is to the right bool leftThan(const Field & x) const { return (right_bounded ? !(less(x, right) || (right_included && equals(x, right))) : false); } bool intersectsRange(const Range & r) const { /// r to the left of me. if (r.right_bounded && left_bounded && (less(r.right, left) || ((!left_included || !r.right_included) && equals(r.right, left)))) return false; /// r to the right of me. if (r.left_bounded && right_bounded && (less(right, r.left) /// ...} {... || ((!right_included || !r.left_included) /// ...) [... or ...] (... && equals(r.left, right)))) return false; return true; } bool containsRange(const Range & r) const { /// r starts to the left of me. if (left_bounded && (!r.left_bounded || less(r.left, left) || (r.left_included && !left_included && equals(r.left, left)))) return false; /// r ends right of me. if (right_bounded && (!r.right_bounded || less(right, r.right) || (r.right_included && !right_included && equals(r.right, right)))) return false; return true; } void swapLeftAndRight() { std::swap(left, right); std::swap(left_bounded, right_bounded); std::swap(left_included, right_included); } String toString() const; }; /// Class that extends arbitrary objects with infinities, like +-inf for floats class FieldWithInfinity { public: enum Type { MINUS_INFINITY = -1, NORMAL = 0, PLUS_INFINITY = 1 }; explicit FieldWithInfinity(const Field & field_); FieldWithInfinity(Field && field_); static FieldWithInfinity getMinusInfinity(); static FieldWithInfinity getPlusinfinity(); bool operator<(const FieldWithInfinity & other) const; bool operator==(const FieldWithInfinity & other) const; private: Field field; Type type; FieldWithInfinity(const Type type_); }; /** Condition on the index. * * Consists of the conditions for the key belonging to all possible ranges or sets, * as well as logical operators AND/OR/NOT above these conditions. * * Constructs a reverse polish notation from these conditions * and can calculate (interpret) its satisfiability over key ranges. */ class KeyCondition { public: /// Does not take into account the SAMPLE section. all_columns - the set of all columns of the table. KeyCondition( const SelectQueryInfo & query_info, const Context & context, const NamesAndTypesList & all_columns, const Names & key_column_names, const ExpressionActionsPtr & key_expr); /// Whether the condition is feasible in the key range. /// left_key and right_key must contain all fields in the sort_descr in the appropriate order. /// data_types - the types of the key columns. bool mayBeTrueInRange(size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const; /// Whether the condition is feasible in the direct product of single column ranges specified by `parallelogram`. bool mayBeTrueInParallelogram(const std::vector & parallelogram, const DataTypes & data_types) const; /// Is the condition valid in a semi-infinite (not limited to the right) key range. /// left_key must contain all the fields in the sort_descr in the appropriate order. bool mayBeTrueAfter(size_t used_key_size, const Field * left_key, const DataTypes & data_types) const; /// Checks that the index can not be used. bool alwaysUnknownOrTrue() const; /// Get the maximum number of the key element used in the condition. size_t getMaxKeyColumn() const; /// Impose an additional condition: the value in the column column must be in the `range` range. /// Returns whether there is such a column in the key. bool addCondition(const String & column, const Range & range); String toString() const; /** A chain of possibly monotone functions. * If the key column is wrapped in functions that can be monotonous in some value ranges * (for example: -toFloat64(toDayOfWeek(date))), then here the functions will be located: toDayOfWeek, toFloat64, negate. */ using MonotonicFunctionsChain = std::vector; static Block getBlockWithConstants( const ASTPtr & query, const SyntaxAnalyzerResultPtr & syntax_analyzer_result, const Context & context, const NamesAndTypesList & all_columns); static std::optional applyMonotonicFunctionsChainToRange( Range key_range, MonotonicFunctionsChain & functions, DataTypePtr current_type); private: /// The expression is stored as Reverse Polish Notation. struct RPNElement { enum Function { /// Atoms of a Boolean expression. FUNCTION_IN_RANGE, FUNCTION_NOT_IN_RANGE, FUNCTION_IN_SET, FUNCTION_NOT_IN_SET, FUNCTION_UNKNOWN, /// Can take any value. /// Operators of the logical expression. FUNCTION_NOT, FUNCTION_AND, FUNCTION_OR, /// Constants ALWAYS_FALSE, ALWAYS_TRUE, }; RPNElement() {} RPNElement(Function function_) : function(function_) {} RPNElement(Function function_, size_t key_column_) : function(function_), key_column(key_column_) {} RPNElement(Function function_, size_t key_column_, const Range & range_) : function(function_), range(range_), key_column(key_column_) {} String toString() const; Function function = FUNCTION_UNKNOWN; /// For FUNCTION_IN_RANGE and FUNCTION_NOT_IN_RANGE. Range range; size_t key_column = 0; /// For FUNCTION_IN_SET, FUNCTION_NOT_IN_SET ASTPtr in_function; using MergeTreeSetIndexPtr = std::shared_ptr; MergeTreeSetIndexPtr set_index; mutable MonotonicFunctionsChain monotonic_functions_chain; /// The function execution does not violate the constancy. }; using RPN = std::vector; using ColumnIndices = std::map; using AtomMap = std::unordered_map; public: static const AtomMap atom_map; private: bool mayBeTrueInRange( size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types, bool right_bounded) const; void traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants); bool atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out); bool operatorFromAST(const ASTFunction * func, RPNElement & out); /** Is node the key column * or expression in which column of key is wrapped by chain of functions, * that can be monotomic on certain ranges? * If these conditions are true, then returns number of column in key, type of resulting expression * and fills chain of possibly-monotonic functions. */ bool isKeyPossiblyWrappedByMonotonicFunctions( const ASTPtr & node, const Context & context, size_t & out_key_column_num, DataTypePtr & out_key_res_column_type, MonotonicFunctionsChain & out_functions_chain); bool isKeyPossiblyWrappedByMonotonicFunctionsImpl( const ASTPtr & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, std::vector & out_functions_chain); bool canConstantBeWrappedByMonotonicFunctions( const ASTPtr & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type); void getKeyTuplePositionMapping( const ASTPtr & node, const Context & context, std::vector & indexes_mapping, const size_t tuple_index, size_t & out_key_column_num); /// If it's possible to make an RPNElement /// that will filter values (possibly tuples) by the content of 'prepared_set', /// do it and return true. bool tryPrepareSetIndex( const ASTPtr & node, const Context & context, RPNElement & out, const SetPtr & prepared_set, size_t & out_key_column_num); RPN rpn; ColumnIndices key_columns; ExpressionActionsPtr key_expr; PreparedSets prepared_sets; }; }