ClickHouse/dbms/src/Storages/MergeTree/PKCondition.cpp

597 lines
20 KiB
C++
Raw Normal View History

2013-04-24 10:31:32 +00:00
#include <DB/Storages/MergeTree/PKCondition.h>
2012-12-05 12:44:55 +00:00
#include <DB/DataTypes/DataTypesNumberFixed.h>
2013-06-03 13:17:17 +00:00
#include <DB/Interpreters/ExpressionAnalyzer.h>
2014-03-20 12:25:26 +00:00
#include <DB/Columns/ColumnSet.h>
2014-03-26 18:19:25 +00:00
#include <DB/Columns/ColumnTuple.h>
#include <DB/Parsers/ASTSet.h>
2012-12-05 12:44:55 +00:00
namespace DB
{
const PKCondition::AtomMap PKCondition::atom_map{
{
"notEquals",
[] (RPNElement & out, const Field & value, ASTPtr &) {
out.function = RPNElement::FUNCTION_NOT_IN_RANGE;
out.range = Range(value);
}
},
{
"equals",
[] (RPNElement & out, const Field & value, ASTPtr &) {
out.range = Range(value);
}
},
{
"less",
[] (RPNElement & out, const Field & value, ASTPtr &) {
out.range = Range::createRightBounded(value, false);
}
},
{
"greater",
[] (RPNElement & out, const Field & value, ASTPtr &) {
out.range = Range::createLeftBounded(value, false);
}
},
{
"lessOrEquals",
[] (RPNElement & out, const Field & value, ASTPtr &) {
out.range = Range::createRightBounded(value, true);
}
},
{
"greaterOrEquals",
[] (RPNElement & out, const Field & value, ASTPtr &) {
out.range = Range::createLeftBounded(value, true);
}
},
{
"in",
[] (RPNElement & out, const Field & value, ASTPtr & node) {
out.function = RPNElement::FUNCTION_IN_SET;
out.in_function = node;
}
},
{
"notIn",
[] (RPNElement & out, const Field & value, ASTPtr & node) {
out.function = RPNElement::FUNCTION_NOT_IN_SET;
out.in_function = node;
}
}
};
/// Преобразование строки с датой или датой-с-временем в UInt64, содержащим числовое значение даты или даты-с-временем.
UInt64 stringToDateOrDateTime(const String & s)
{
ReadBufferFromString in(s);
if (s.size() == strlen("YYYY-MM-DD"))
{
DayNum_t date{};
readDateText(date, in);
return UInt64(date);
}
else
{
time_t date_time{};
readDateTimeText(date_time, in);
if (!in.eof())
throw Exception("String is too long for DateTime: " + s);
return UInt64(date_time);
}
}
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsign-compare"
/** Более точное сравнение.
* Отличается от Field::operator< и Field::operator== тем, что сравнивает значения разных числовых типов между собой.
* Правила сравнения - такие же, что и в FunctionsComparison.
* В том числе, сравнение знаковых и беззнаковых оставляем UB.
*/
class FieldVisitorAccurateEquals : public StaticVisitor<bool>
{
public:
bool operator() (const Null & l, const Null & r) const { return true; }
bool operator() (const Null & l, const UInt64 & r) const { return false; }
bool operator() (const Null & l, const Int64 & r) const { return false; }
bool operator() (const Null & l, const Float64 & r) const { return false; }
bool operator() (const Null & l, const String & r) const { return false; }
bool operator() (const Null & l, const Array & r) const { return false; }
bool operator() (const UInt64 & l, const Null & r) const { return false; }
bool operator() (const UInt64 & l, const UInt64 & r) const { return l == r; }
bool operator() (const UInt64 & l, const Int64 & r) const { return l == r; }
bool operator() (const UInt64 & l, const Float64 & r) const { return l == r; }
bool operator() (const UInt64 & l, const String & r) const { return l == stringToDateOrDateTime(r); }
bool operator() (const UInt64 & l, const Array & r) const { return false; }
bool operator() (const Int64 & l, const Null & r) const { return false; }
bool operator() (const Int64 & l, const UInt64 & r) const { return l == r; }
bool operator() (const Int64 & l, const Int64 & r) const { return l == r; }
bool operator() (const Int64 & l, const Float64 & r) const { return l == r; }
bool operator() (const Int64 & l, const String & r) const { return false; }
bool operator() (const Int64 & l, const Array & r) const { return false; }
bool operator() (const Float64 & l, const Null & r) const { return false; }
bool operator() (const Float64 & l, const UInt64 & r) const { return l == r; }
bool operator() (const Float64 & l, const Int64 & r) const { return l == r; }
bool operator() (const Float64 & l, const Float64 & r) const { return l == r; }
bool operator() (const Float64 & l, const String & r) const { return false; }
bool operator() (const Float64 & l, const Array & r) const { return false; }
bool operator() (const String & l, const Null & r) const { return false; }
bool operator() (const String & l, const UInt64 & r) const { return stringToDateOrDateTime(l) == r; }
bool operator() (const String & l, const Int64 & r) const { return false; }
bool operator() (const String & l, const Float64 & r) const { return false; }
bool operator() (const String & l, const String & r) const { return l == r; }
bool operator() (const String & l, const Array & r) const { return false; }
bool operator() (const Array & l, const Null & r) const { return false; }
bool operator() (const Array & l, const UInt64 & r) const { return false; }
bool operator() (const Array & l, const Int64 & r) const { return false; }
bool operator() (const Array & l, const Float64 & r) const { return false; }
bool operator() (const Array & l, const String & r) const { return false; }
bool operator() (const Array & l, const Array & r) const { return l == r; }
};
class FieldVisitorAccurateLess : public StaticVisitor<bool>
{
public:
bool operator() (const Null & l, const Null & r) const { return false; }
bool operator() (const Null & l, const UInt64 & r) const { return true; }
bool operator() (const Null & l, const Int64 & r) const { return true; }
bool operator() (const Null & l, const Float64 & r) const { return true; }
bool operator() (const Null & l, const String & r) const { return true; }
bool operator() (const Null & l, const Array & r) const { return true; }
bool operator() (const UInt64 & l, const Null & r) const { return false; }
bool operator() (const UInt64 & l, const UInt64 & r) const { return l < r; }
bool operator() (const UInt64 & l, const Int64 & r) const { return l < r; }
bool operator() (const UInt64 & l, const Float64 & r) const { return l < r; }
bool operator() (const UInt64 & l, const String & r) const { return l < stringToDateOrDateTime(r); }
bool operator() (const UInt64 & l, const Array & r) const { return true; }
bool operator() (const Int64 & l, const Null & r) const { return false; }
bool operator() (const Int64 & l, const UInt64 & r) const { return l < r; }
bool operator() (const Int64 & l, const Int64 & r) const { return l < r; }
bool operator() (const Int64 & l, const Float64 & r) const { return l < r; }
bool operator() (const Int64 & l, const String & r) const { return true; }
bool operator() (const Int64 & l, const Array & r) const { return true; }
bool operator() (const Float64 & l, const Null & r) const { return false; }
bool operator() (const Float64 & l, const UInt64 & r) const { return l < r; }
bool operator() (const Float64 & l, const Int64 & r) const { return l < r; }
bool operator() (const Float64 & l, const Float64 & r) const { return l < r; }
bool operator() (const Float64 & l, const String & r) const { return true; }
bool operator() (const Float64 & l, const Array & r) const { return true; }
bool operator() (const String & l, const Null & r) const { return false; }
bool operator() (const String & l, const UInt64 & r) const { return stringToDateOrDateTime(l) < r; }
bool operator() (const String & l, const Int64 & r) const { return false; }
bool operator() (const String & l, const Float64 & r) const { return false; }
bool operator() (const String & l, const String & r) const { return l < r; }
bool operator() (const String & l, const Array & r) const { return true; }
bool operator() (const Array & l, const Null & r) const { return false; }
bool operator() (const Array & l, const UInt64 & r) const { return false; }
bool operator() (const Array & l, const Int64 & r) const { return false; }
bool operator() (const Array & l, const Float64 & r) const { return false; }
bool operator() (const Array & l, const String & r) const { return false; }
bool operator() (const Array & l, const Array & r) const { return l < r; }
};
#pragma GCC diagnostic pop
inline bool Range::equals(const Field & lhs, const Field & rhs) { return apply_visitor(FieldVisitorAccurateEquals(), lhs, rhs); }
inline bool Range::less(const Field & lhs, const Field & rhs) { return apply_visitor(FieldVisitorAccurateLess(), lhs, rhs); }
Block PKCondition::getBlockWithConstants(
const ASTPtr & query, const Context & context, const NamesAndTypesList & all_columns)
{
Block result{
{ new ColumnConstUInt8{1, 0}, new DataTypeUInt8, "_dummy" }
};
const auto expr_for_constant_folding = ExpressionAnalyzer{query, context, nullptr, all_columns}
.getConstActions();
expr_for_constant_folding->execute(result);
return result;
}
2013-05-06 12:15:34 +00:00
PKCondition::PKCondition(ASTPtr query, const Context & context_, const NamesAndTypesList & all_columns, const SortDescription & sort_descr_)
: sort_descr(sort_descr_)
2012-12-05 12:44:55 +00:00
{
for (size_t i = 0; i < sort_descr.size(); ++i)
{
std::string name = sort_descr[i].column_name;
if (!pk_columns.count(name))
pk_columns[name] = i;
2012-12-05 12:44:55 +00:00
}
2012-12-05 12:44:55 +00:00
/** Вычисление выражений, зависящих только от констант.
* Чтобы индекс мог использоваться, если написано, например WHERE Date = toDate(now()).
*/
Block block_with_constants = getBlockWithConstants(query, context_, all_columns);
2012-12-05 12:44:55 +00:00
/// Преобразуем секцию WHERE в обратную польскую строку.
ASTSelectQuery & select = typeid_cast<ASTSelectQuery &>(*query);
2012-12-05 12:44:55 +00:00
if (select.where_expression)
{
traverseAST(select.where_expression, block_with_constants);
if (select.prewhere_expression)
{
traverseAST(select.prewhere_expression, block_with_constants);
2015-03-27 03:37:46 +00:00
rpn.emplace_back(RPNElement::FUNCTION_AND);
}
}
else if (select.prewhere_expression)
{
traverseAST(select.prewhere_expression, block_with_constants);
2012-12-05 12:44:55 +00:00
}
else
{
2015-03-27 03:37:46 +00:00
rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN);
2012-12-05 12:44:55 +00:00
}
}
bool PKCondition::addCondition(const String & column, const Range & range)
{
if (!pk_columns.count(column))
return false;
2015-03-27 03:37:46 +00:00
rpn.emplace_back(RPNElement::FUNCTION_IN_RANGE, pk_columns[column], range);
rpn.emplace_back(RPNElement::FUNCTION_AND);
return true;
}
2012-12-05 12:44:55 +00:00
/** Получить значение константного выражения.
* Вернуть false, если выражение не константно.
*/
static bool getConstant(ASTPtr & expr, Block & block_with_constants, Field & value)
{
String column_name = expr->getColumnName();
if (ASTLiteral * lit = typeid_cast<ASTLiteral *>(&*expr))
2012-12-05 12:44:55 +00:00
{
/// литерал
value = lit->value;
return true;
}
else if (block_with_constants.has(column_name) && block_with_constants.getByName(column_name).column->isConst())
{
/// выражение, вычислившееся в константу
value = (*block_with_constants.getByName(column_name).column)[0];
return true;
}
else
return false;
}
2012-12-10 10:23:10 +00:00
void PKCondition::traverseAST(ASTPtr & node, Block & block_with_constants)
2012-12-05 12:44:55 +00:00
{
RPNElement element;
if (ASTFunction * func = typeid_cast<ASTFunction *>(&*node))
2012-12-05 12:44:55 +00:00
{
if (operatorFromAST(func, element))
{
ASTs & args = typeid_cast<ASTExpressionList &>(*func->arguments).children;
2012-12-05 12:44:55 +00:00
for (size_t i = 0; i < args.size(); ++i)
{
traverseAST(args[i], block_with_constants);
/** Первая часть условия - для корректной поддержки функций and и or произвольной арности
* - в этом случае добавляется n - 1 элементов (где n - количество аргументов).
*/
if (i != 0 || element.function == RPNElement::FUNCTION_NOT)
rpn.push_back(element);
2012-12-05 12:44:55 +00:00
}
2012-12-05 12:44:55 +00:00
return;
}
}
2012-12-05 12:44:55 +00:00
if (!atomFromAST(node, block_with_constants, element))
{
element.function = RPNElement::FUNCTION_UNKNOWN;
}
2012-12-05 12:44:55 +00:00
rpn.push_back(element);
}
2012-12-10 10:23:10 +00:00
bool PKCondition::atomFromAST(ASTPtr & node, Block & block_with_constants, RPNElement & out)
2012-12-05 12:44:55 +00:00
{
2014-03-20 12:25:26 +00:00
/// Фнукции < > = != <= >= in , у которых один агрумент константа, другой - один из столбцов первичного ключа.
if (ASTFunction * func = typeid_cast<ASTFunction *>(&*node))
2012-12-05 12:44:55 +00:00
{
ASTs & args = typeid_cast<ASTExpressionList &>(*func->arguments).children;
2012-12-05 12:44:55 +00:00
if (args.size() != 2)
return false;
2012-12-05 12:44:55 +00:00
/// Если true, слева константа.
bool inverted;
size_t column;
Field value;
2014-03-20 12:25:26 +00:00
2012-12-05 12:44:55 +00:00
if (pk_columns.count(args[0]->getColumnName()) && getConstant(args[1], block_with_constants, value))
{
inverted = false;
column = pk_columns[args[0]->getColumnName()];
}
else if (pk_columns.count(args[1]->getColumnName()) && getConstant(args[0], block_with_constants, value))
{
inverted = true;
column = pk_columns[args[1]->getColumnName()];
}
else if (pk_columns.count(args[0]->getColumnName()) && typeid_cast<ASTSet *>(args[1].get()))
2014-03-20 12:25:26 +00:00
{
2014-03-26 18:19:25 +00:00
inverted = false;
2014-03-26 10:56:21 +00:00
column = pk_columns[args[0]->getColumnName()];
2014-03-20 12:25:26 +00:00
}
2012-12-05 12:44:55 +00:00
else
return false;
2012-12-05 12:44:55 +00:00
std::string func_name = func->name;
2012-12-05 12:44:55 +00:00
/// Заменим <const> <sign> <column> на <column> <-sign> <const>
if (inverted)
{
if (func_name == "less")
func_name = "greater";
else if (func_name == "greater")
func_name = "less";
else if (func_name == "greaterOrEquals")
func_name = "lessOrEquals";
else if (func_name == "lessOrEquals")
func_name = "greaterOrEquals";
}
2012-12-05 12:44:55 +00:00
out.function = RPNElement::FUNCTION_IN_RANGE;
out.key_column = column;
const auto atom_it = atom_map.find(func_name);
if (atom_it == std::end(atom_map))
2012-12-05 12:44:55 +00:00
return false;
atom_it->second(out, value, node);
2012-12-05 12:44:55 +00:00
return true;
}
2012-12-05 12:44:55 +00:00
return false;
}
2012-12-10 10:23:10 +00:00
bool PKCondition::operatorFromAST(ASTFunction * func, RPNElement & out)
2012-12-05 12:44:55 +00:00
{
/// Функции AND, OR, NOT.
ASTs & args = typeid_cast<ASTExpressionList &>(*func->arguments).children;
2012-12-05 12:44:55 +00:00
if (func->name == "not")
{
if (args.size() != 1)
return false;
2012-12-05 12:44:55 +00:00
out.function = RPNElement::FUNCTION_NOT;
}
else
{
if (func->name == "and")
out.function = RPNElement::FUNCTION_AND;
else if (func->name == "or")
out.function = RPNElement::FUNCTION_OR;
else
return false;
}
2012-12-05 12:44:55 +00:00
return true;
}
2015-03-27 03:37:46 +00:00
String PKCondition::toString() const
2012-12-05 12:44:55 +00:00
{
String res;
for (size_t i = 0; i < rpn.size(); ++i)
{
if (i)
res += ", ";
res += rpn[i].toString();
}
return res;
}
2015-03-27 03:37:46 +00:00
bool PKCondition::mayBeTrueInRange(const Field * left_pk, const Field * right_pk, bool right_bounded) const
2012-12-05 12:44:55 +00:00
{
/// Найдем диапазоны элементов ключа.
std::vector<Range> key_ranges(sort_descr.size(), Range());
2012-12-06 09:45:09 +00:00
if (right_bounded)
2012-12-05 12:44:55 +00:00
{
2012-12-06 09:45:09 +00:00
for (size_t i = 0; i < sort_descr.size(); ++i)
2012-12-05 12:44:55 +00:00
{
2012-12-06 09:45:09 +00:00
if (left_pk[i] == right_pk[i])
{
key_ranges[i] = Range(left_pk[i]);
}
else
{
key_ranges[i] = Range(left_pk[i], true, right_pk[i], true);
break;
}
2012-12-05 12:44:55 +00:00
}
}
2012-12-06 09:45:09 +00:00
else
{
key_ranges[0] = Range::createLeftBounded(left_pk[0], true);
2012-12-06 09:45:09 +00:00
}
2012-12-05 12:44:55 +00:00
std::vector<BoolMask> rpn_stack;
for (size_t i = 0; i < rpn.size(); ++i)
{
2015-03-27 03:37:46 +00:00
const auto & element = rpn[i];
2012-12-05 12:44:55 +00:00
if (element.function == RPNElement::FUNCTION_UNKNOWN)
{
rpn_stack.emplace_back(true, true);
2012-12-05 12:44:55 +00:00
}
else if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE || element.function == RPNElement::FUNCTION_IN_RANGE)
{
const Range & key_range = key_ranges[element.key_column];
2012-12-05 12:44:55 +00:00
bool intersects = element.range.intersectsRange(key_range);
bool contains = element.range.containsRange(key_range);
rpn_stack.emplace_back(intersects, !contains);
2012-12-05 12:44:55 +00:00
if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE)
rpn_stack.back() = !rpn_stack.back();
}
2014-03-26 10:56:21 +00:00
else if (element.function == RPNElement::FUNCTION_IN_SET || element.function == RPNElement::FUNCTION_NOT_IN_SET)
{
2015-03-27 03:37:46 +00:00
auto in_func = typeid_cast<const ASTFunction *>(element.in_function.get());
const ASTs & args = typeid_cast<const ASTExpressionList &>(*in_func->arguments).children;
auto ast_set = typeid_cast<const ASTSet *>(args[1].get());
if (in_func && ast_set)
{
const Range & key_range = key_ranges[element.key_column];
2014-03-26 10:56:21 +00:00
2014-04-01 10:09:22 +00:00
rpn_stack.push_back(ast_set->set->mayBeTrueInRange(key_range));
if (element.function == RPNElement::FUNCTION_NOT_IN_SET)
rpn_stack.back() = !rpn_stack.back();
}
else
{
throw DB::Exception("Set for IN is not created yet!", ErrorCodes::LOGICAL_ERROR);
}
2014-03-26 10:56:21 +00:00
}
2012-12-05 12:44:55 +00:00
else if (element.function == RPNElement::FUNCTION_NOT)
{
rpn_stack.back() = !rpn_stack.back();
}
else if (element.function == RPNElement::FUNCTION_AND)
{
auto arg1 = rpn_stack.back();
2012-12-05 12:44:55 +00:00
rpn_stack.pop_back();
auto arg2 = rpn_stack.back();
2012-12-05 12:44:55 +00:00
rpn_stack.back() = arg1 & arg2;
}
else if (element.function == RPNElement::FUNCTION_OR)
{
auto arg1 = rpn_stack.back();
2012-12-05 12:44:55 +00:00
rpn_stack.pop_back();
auto arg2 = rpn_stack.back();
2012-12-05 12:44:55 +00:00
rpn_stack.back() = arg1 | arg2;
}
else
2012-12-10 10:23:10 +00:00
throw Exception("Unexpected function type in PKCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
2012-12-05 12:44:55 +00:00
}
2012-12-05 12:44:55 +00:00
if (rpn_stack.size() != 1)
throw Exception("Unexpected stack size in PkCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR);
2012-12-05 12:44:55 +00:00
return rpn_stack[0].can_be_true;
}
2015-03-27 03:37:46 +00:00
bool PKCondition::mayBeTrueInRange(const Field * left_pk, const Field * right_pk) const
2012-12-06 09:45:09 +00:00
{
return mayBeTrueInRange(left_pk, right_pk, true);
}
2015-03-27 03:37:46 +00:00
bool PKCondition::mayBeTrueAfter(const Field * left_pk) const
2012-12-06 09:45:09 +00:00
{
2014-04-08 07:58:53 +00:00
return mayBeTrueInRange(left_pk, nullptr, false);
2012-12-06 09:45:09 +00:00
}
2015-03-27 03:37:46 +00:00
const ASTSet * PKCondition::RPNElement::inFunctionToSet() const
{
2015-03-27 03:37:46 +00:00
auto in_func = typeid_cast<const ASTFunction *>(in_function.get());
if (!in_func)
return nullptr;
2015-03-27 03:37:46 +00:00
const ASTs & args = typeid_cast<const ASTExpressionList &>(*in_func->arguments).children;
auto ast_set = typeid_cast<const ASTSet *>(args[1].get());
return ast_set;
}
2015-03-27 03:37:46 +00:00
String PKCondition::RPNElement::toString() const
{
std::ostringstream ss;
switch (function)
{
case FUNCTION_AND:
return "and";
case FUNCTION_OR:
return "or";
case FUNCTION_NOT:
return "not";
case FUNCTION_UNKNOWN:
return "unknown";
case FUNCTION_NOT_IN_SET:
case FUNCTION_IN_SET:
{
ss << "(column " << key_column << (function == FUNCTION_IN_SET ? " in " : " notIn ") << inFunctionToSet()->set->describe() << ")";
return ss.str();
}
case FUNCTION_IN_RANGE:
case FUNCTION_NOT_IN_RANGE:
{
ss << "(column " << key_column << (function == FUNCTION_NOT_IN_RANGE ? " not" : "") << " in " << range.toString() << ")";
return ss.str();
}
default:
return "ERROR";
}
}
2015-03-27 03:37:46 +00:00
bool PKCondition::alwaysUnknown() const
{
std::vector<UInt8> rpn_stack;
for (size_t i = 0; i < rpn.size(); ++i)
{
2015-03-27 03:37:46 +00:00
const auto & element = rpn[i];
if (element.function == RPNElement::FUNCTION_UNKNOWN)
{
rpn_stack.push_back(true);
}
else if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE
|| element.function == RPNElement::FUNCTION_IN_RANGE
|| element.function == RPNElement::FUNCTION_IN_SET
|| element.function == RPNElement::FUNCTION_NOT_IN_SET)
{
rpn_stack.push_back(false);
}
else if (element.function == RPNElement::FUNCTION_NOT)
{
}
else if (element.function == RPNElement::FUNCTION_AND)
{
auto arg1 = rpn_stack.back();
rpn_stack.pop_back();
auto arg2 = rpn_stack.back();
rpn_stack.back() = arg1 & arg2;
}
else if (element.function == RPNElement::FUNCTION_OR)
{
auto arg1 = rpn_stack.back();
rpn_stack.pop_back();
auto arg2 = rpn_stack.back();
rpn_stack.back() = arg1 | arg2;
}
else
throw Exception("Unexpected function type in PKCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
}
return rpn_stack[0];
}
2012-12-05 12:44:55 +00:00
}