ClickHouse/dbms/src/Storages/MergeTree/PKCondition.h

337 lines
11 KiB
C++
Raw Normal View History

2012-12-05 12:44:55 +00:00
#pragma once
#include <sstream>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/Set.h>
#include <Core/SortDescription.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Storages/SelectQueryInfo.h>
2012-12-05 12:44:55 +00:00
namespace DB
{
Squashed commit of the following: commit e712f469a55ff34ad34b482b15cc4153b7ad7233 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:59:13 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a002823084e3a79bffcc17d479620a68eb0644b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:58:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9e06f407c8ee781ed8ddf98bdfcc31846bf2a0fe Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:55:14 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9581620f1e839f456fa7894aa1f996d5162ac6cd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:54:22 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a8564c68cb6cc3649fafaf401256d43c9a2e777 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:47:34 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit cf60632d78ec656be3304ef4565e859bb6ce80ba Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:40:09 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit ee3d1dc6e0c4ca60e3ac1e0c30d4b3ed1e66eca0 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:22:49 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 65592ef7116a90104fcd524b53ef8b7cf22640f2 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:18:17 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 37972c257320d3b7e7b294e0fdeffff218647bfd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:17:06 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit dd909d149974ce5bed2456de1261aa5a368fd3ff Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:16:28 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 3cf43266ca7e30adf01212b1a739ba5fe43639fd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:15:42 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 6731a3df96d1609286e2536b6432916af7743f0f Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:13:35 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 1b5727e0d56415b7add4cb76110105358663602c Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:11:18 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit bbcf726a55685b8e72f5b40ba0bf1904bd1c0407 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:09:04 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit c03b477d5e2e65014e8906ecfa2efb67ee295af1 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:06:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2986e2fb0466bc18d73693dcdded28fccc0dc66b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:05:44 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 5d6cdef13d2e02bd5c4954983334e9162ab2635b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:04:53 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit f2b819b25ce8b2ccdcb201eefb03e1e6f5aab590 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:01:47 2017 +0300 Less dependencies [#CLICKHOUSE-2]
2017-01-14 09:00:19 +00:00
class IFunction;
using FunctionPtr = std::shared_ptr<IFunction>;
2017-04-16 15:00:33 +00:00
/** Range with open or closed ends; Perhaps unlimited.
*/
2012-12-05 12:44:55 +00:00
struct Range
{
private:
static bool equals(const Field & lhs, const Field & rhs);
static bool less(const Field & lhs, const Field & rhs);
public:
2017-04-16 15:00:33 +00:00
Field left; /// the left border, if any
Field right; /// the right border, if any
bool left_bounded = false; /// limited to the left
bool right_bounded = false; /// limited to the right
bool left_included = false; /// includes the left border, if any
bool right_included = false; /// includes the right border, if any
/// The whole set.
Range() {}
2017-04-16 15:00:33 +00:00
/// One point.
Range(const Field & point)
: left(point), right(point), left_bounded(true), right_bounded(true), left_included(true), right_included(true) {}
2017-04-16 15:00:33 +00:00
/// A bounded two-sided range.
Range(const Field & left_, bool left_included_, const Field & right_, bool right_included_)
: left(left_), right(right_),
left_bounded(true), right_bounded(true),
left_included(left_included_), right_included(right_included_)
{
shrinkToIncludedIfPossible();
}
static Range createRightBounded(const Field & right_point, bool right_included)
{
Range r;
r.right = right_point;
r.right_bounded = true;
r.right_included = right_included;
r.shrinkToIncludedIfPossible();
return r;
}
static Range createLeftBounded(const Field & left_point, bool left_included)
{
Range r;
r.left = left_point;
r.left_bounded = true;
r.left_included = left_included;
r.shrinkToIncludedIfPossible();
return r;
}
2017-04-16 15:00:33 +00:00
/** Optimize the range. If it has an open boundary and the Field type is "loose"
* - then convert it to closed, narrowing by one.
* That is, for example, turn (0,2) into [1].
*/
void shrinkToIncludedIfPossible()
{
if (left_bounded && !left_included)
{
if (left.getType() == Field::Types::UInt64 && left.get<UInt64>() != std::numeric_limits<UInt64>::max())
{
++left.get<UInt64 &>();
left_included = true;
}
if (left.getType() == Field::Types::Int64 && left.get<Int64>() != std::numeric_limits<Int64>::max())
{
++left.get<Int64 &>();
left_included = true;
}
}
if (right_bounded && !right_included)
{
if (right.getType() == Field::Types::UInt64 && right.get<UInt64>() != std::numeric_limits<UInt64>::min())
{
--right.get<UInt64 &>();
right_included = true;
}
if (right.getType() == Field::Types::Int64 && right.get<Int64>() != std::numeric_limits<Int64>::min())
{
--right.get<Int64 &>();
right_included = true;
}
}
}
bool empty() const
{
return left_bounded && right_bounded
&& (less(right, left)
|| ((!left_included || !right_included) && !less(left, right)));
}
2017-04-16 15:00:33 +00:00
/// x contained in the range
bool contains(const Field & x) const
{
return !leftThan(x) && !rightThan(x);
}
2017-04-16 15:00:33 +00:00
/// x is to the left
bool rightThan(const Field & x) const
{
return (left_bounded
? !(less(left, x) || (left_included && equals(x, left)))
: false);
}
2017-04-16 15:00:33 +00:00
/// x is to the right
bool leftThan(const Field & x) const
{
return (right_bounded
? !(less(x, right) || (right_included && equals(x, right)))
: false);
}
bool intersectsRange(const Range & r) const
{
2017-04-16 15:00:33 +00:00
/// r to the left of me.
if (r.right_bounded
&& left_bounded
&& (less(r.right, left)
|| ((!left_included || !r.right_included)
&& equals(r.right, left))))
return false;
2017-04-16 15:00:33 +00:00
/// r to the right of me.
if (r.left_bounded
&& right_bounded
&& (less(right, r.left) /// ...} {...
2017-04-16 15:00:33 +00:00
|| ((!right_included || !r.left_included) /// ...) [... or ...] (...
&& equals(r.left, right))))
return false;
return true;
}
bool containsRange(const Range & r) const
{
2017-04-16 15:00:33 +00:00
/// r starts to the left of me.
if (left_bounded
&& (!r.left_bounded
|| less(r.left, left)
|| (r.left_included
&& !left_included
&& equals(r.left, left))))
return false;
2017-04-16 15:00:33 +00:00
/// r ends right of me.
if (right_bounded
&& (!r.right_bounded
|| less(right, r.right)
|| (r.right_included
&& !right_included
&& equals(r.right, right))))
return false;
return true;
}
void swapLeftAndRight()
{
std::swap(left, right);
std::swap(left_bounded, right_bounded);
std::swap(left_included, right_included);
}
String toString() const;
2012-12-05 12:44:55 +00:00
};
2017-04-16 15:00:33 +00:00
/** Condition on the index.
*
2017-04-16 15:00:33 +00:00
* Consists of the conditions for the key belonging to all possible ranges or sets,
* as well as logical links AND/OR/NOT above these conditions.
*
2017-04-16 15:00:33 +00:00
* Constructs a reverse polish notation from these conditions
* and can calculate (interpret) its feasibility over key ranges.
*/
2012-12-10 10:23:10 +00:00
class PKCondition
2012-12-05 12:44:55 +00:00
{
public:
2017-04-16 15:00:33 +00:00
/// Does not include the SAMPLE section. all_columns - the set of all columns of the table.
PKCondition(
const SelectQueryInfo & query_info,
const Context & context,
const NamesAndTypesList & all_columns,
const SortDescription & sort_descr,
ExpressionActionsPtr pk_expr);
2017-04-16 15:00:33 +00:00
/// Whether the condition is feasible in the key range.
/// left_pk and right_pk must contain all fields in the sort_descr in the appropriate order.
/// data_types - the types of the primary key columns.
bool mayBeTrueInRange(size_t used_key_size, const Field * left_pk, const Field * right_pk, const DataTypes & data_types) const;
2017-04-16 15:00:33 +00:00
/// Is the condition valid in a semi-infinite (not limited to the right) key range.
/// left_pk must contain all the fields in the sort_descr in the appropriate order.
bool mayBeTrueAfter(size_t used_key_size, const Field * left_pk, const DataTypes & data_types) const;
2017-04-16 15:00:33 +00:00
/// Checks that the index can not be used.
bool alwaysUnknownOrTrue() const;
2017-04-16 15:00:33 +00:00
/// Get the maximum number of the primary key element used in the condition.
size_t getMaxKeyColumn() const;
2017-04-16 15:00:33 +00:00
/// Impose an additional condition: the value in the column column must be in the `range` range.
/// Returns whether there is such a column in the primary key.
bool addCondition(const String & column, const Range & range);
String toString() const;
2017-04-16 15:00:33 +00:00
/// The expression is stored as Reverse Polish Notation.
struct RPNElement
{
enum Function
{
2017-04-16 15:00:33 +00:00
/// Atoms of a Boolean expression.
FUNCTION_IN_RANGE,
FUNCTION_NOT_IN_RANGE,
FUNCTION_IN_SET,
FUNCTION_NOT_IN_SET,
2017-04-16 15:00:33 +00:00
FUNCTION_UNKNOWN, /// Can take any value.
/// Operators of the logical expression.
FUNCTION_NOT,
FUNCTION_AND,
FUNCTION_OR,
2017-04-16 15:00:33 +00:00
/// Constants
ALWAYS_FALSE,
ALWAYS_TRUE,
};
RPNElement() {}
RPNElement(Function function_) : function(function_) {}
RPNElement(Function function_, size_t key_column_) : function(function_), key_column(key_column_) {}
RPNElement(Function function_, size_t key_column_, const Range & range_)
: function(function_), range(range_), key_column(key_column_) {}
String toString() const;
Function function = FUNCTION_UNKNOWN;
2017-04-16 15:00:33 +00:00
/// For FUNCTION_IN_RANGE and FUNCTION_NOT_IN_RANGE.
Range range;
size_t key_column;
2017-04-16 15:00:33 +00:00
/// For FUNCTION_IN_SET, FUNCTION_NOT_IN_SET
ASTPtr in_function;
2017-04-16 15:00:33 +00:00
/** A chain of possibly monotone functions.
* If the primary key column is wrapped in functions that can be monotonous in some value ranges
* (for example: -toFloat64(toDayOfWeek(date))), then here the functions will be located: toDayOfWeek, toFloat64, negate.
*/
using MonotonicFunctionsChain = std::vector<FunctionPtr>;
2017-04-16 15:00:33 +00:00
mutable MonotonicFunctionsChain monotonic_functions_chain; /// The function execution does not violate the constancy.
};
static Block getBlockWithConstants(
const ASTPtr & query, const Context & context, const NamesAndTypesList & all_columns);
using AtomMap = std::unordered_map<std::string, bool(*)(RPNElement & out, const Field & value, const ASTPtr & node)>;
static const AtomMap atom_map;
Squashed commit of the following: commit e712f469a55ff34ad34b482b15cc4153b7ad7233 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:59:13 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a002823084e3a79bffcc17d479620a68eb0644b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:58:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9e06f407c8ee781ed8ddf98bdfcc31846bf2a0fe Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:55:14 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9581620f1e839f456fa7894aa1f996d5162ac6cd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:54:22 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a8564c68cb6cc3649fafaf401256d43c9a2e777 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:47:34 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit cf60632d78ec656be3304ef4565e859bb6ce80ba Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:40:09 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit ee3d1dc6e0c4ca60e3ac1e0c30d4b3ed1e66eca0 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:22:49 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 65592ef7116a90104fcd524b53ef8b7cf22640f2 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:18:17 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 37972c257320d3b7e7b294e0fdeffff218647bfd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:17:06 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit dd909d149974ce5bed2456de1261aa5a368fd3ff Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:16:28 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 3cf43266ca7e30adf01212b1a739ba5fe43639fd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:15:42 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 6731a3df96d1609286e2536b6432916af7743f0f Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:13:35 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 1b5727e0d56415b7add4cb76110105358663602c Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:11:18 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit bbcf726a55685b8e72f5b40ba0bf1904bd1c0407 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:09:04 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit c03b477d5e2e65014e8906ecfa2efb67ee295af1 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:06:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2986e2fb0466bc18d73693dcdded28fccc0dc66b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:05:44 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 5d6cdef13d2e02bd5c4954983334e9162ab2635b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:04:53 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit f2b819b25ce8b2ccdcb201eefb03e1e6f5aab590 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:01:47 2017 +0300 Less dependencies [#CLICKHOUSE-2]
2017-01-14 09:00:19 +00:00
private:
using RPN = std::vector<RPNElement>;
using ColumnIndices = std::map<String, size_t>;
bool mayBeTrueInRange(
size_t used_key_size,
const Field * left_pk,
const Field * right_pk,
const DataTypes & data_types,
bool right_bounded) const;
bool mayBeTrueInRangeImpl(const std::vector<Range> & key_ranges, const DataTypes & data_types) const;
void traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants);
bool atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out);
bool operatorFromAST(const ASTFunction * func, RPNElement & out);
/** Is node the primary key column
* or expression in which column of primary key is wrapped by chain of functions,
* that can be monotomic on certain ranges?
* If these conditions are true, then returns number of column in primary key, type of resulting expression
* and fills chain of possibly-monotonic functions.
*/
bool isPrimaryKeyPossiblyWrappedByMonotonicFunctions(
const ASTPtr & node,
const Context & context,
size_t & out_primary_key_column_num,
DataTypePtr & out_primary_key_res_column_type,
RPNElement::MonotonicFunctionsChain & out_functions_chain);
bool isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(
const ASTPtr & node,
size_t & out_primary_key_column_num,
DataTypePtr & out_primary_key_column_type,
std::vector<const ASTFunction *> & out_functions_chain);
bool canConstantBeWrappedByMonotonicFunctions(
const ASTPtr & node,
const Context & context,
size_t & out_primary_key_column_num,
DataTypePtr & out_primary_key_column_type,
Field & out_value,
DataTypePtr & out_type);
RPN rpn;
SortDescription sort_descr;
ColumnIndices pk_columns;
ExpressionActionsPtr pk_expr;
PreparedSets prepared_sets;
2012-12-05 12:44:55 +00:00
};
}