Renamed PK to Key where it's appropriate #2246

This commit is contained in:
Alexey Milovidov 2018-04-20 03:27:25 +03:00
parent d71b3a95ef
commit 70be882b64
4 changed files with 87 additions and 88 deletions

View File

@ -389,19 +389,19 @@ void Set::executeOrdinary(
}
MergeTreeSetIndex::MergeTreeSetIndex(const SetElements & set_elements, std::vector<PKTuplePositionMapping> && index_mapping_)
MergeTreeSetIndex::MergeTreeSetIndex(const SetElements & set_elements, std::vector<KeyTuplePositionMapping> && index_mapping_)
: ordered_set(),
indexes_mapping(std::move(index_mapping_))
{
std::sort(indexes_mapping.begin(), indexes_mapping.end(),
[](const PKTuplePositionMapping & l, const PKTuplePositionMapping & r)
[](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r)
{
return std::forward_as_tuple(l.key_index, l.tuple_index) < std::forward_as_tuple(r.key_index, r.tuple_index);
});
indexes_mapping.erase(std::unique(
indexes_mapping.begin(), indexes_mapping.end(),
[](const PKTuplePositionMapping & l, const PKTuplePositionMapping & r)
[](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r)
{
return l.key_index == r.key_index;
}), indexes_mapping.end());

View File

@ -167,21 +167,21 @@ public:
* position of pk index and data type of this pk column
* and functions chain applied to this column.
*/
struct PKTuplePositionMapping
struct KeyTuplePositionMapping
{
size_t tuple_index;
size_t key_index;
std::vector<FunctionBasePtr> functions;
};
MergeTreeSetIndex(const SetElements & set_elements, std::vector<PKTuplePositionMapping> && indexes_mapping_);
MergeTreeSetIndex(const SetElements & set_elements, std::vector<KeyTuplePositionMapping> && indexes_mapping_);
BoolMask mayBeTrueInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types);
private:
using OrderedTuples = std::vector<std::vector<FieldWithInfinity>>;
OrderedTuples ordered_set;
std::vector<PKTuplePositionMapping> indexes_mapping;
std::vector<KeyTuplePositionMapping> indexes_mapping;
};
}

View File

@ -403,8 +403,8 @@ void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Blo
bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
const ASTPtr & node,
size_t & out_primary_key_column_num,
DataTypePtr & out_primary_key_column_type,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
Field & out_value,
DataTypePtr & out_type)
{
@ -416,8 +416,8 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
bool found_transformation = false;
for (const ExpressionAction & a : key_expr->getActions())
{
/** The primary key functional expression constraint may be inferred from a plain column in the expression.
* For example, if the primary key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`,
/** The key functional expression constraint may be inferred from a plain column in the expression.
* For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`,
* it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())`
* condition also holds, so the index may be used to select only parts satisfying this condition.
*
@ -446,12 +446,12 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
out_type.swap(new_type);
expr_name = a.result_name;
// Transformation results in a primary key expression, accept
// Transformation results in a key expression, accept
auto it = key_columns.find(expr_name);
if (key_columns.end() != it)
{
out_primary_key_column_num = it->second;
out_primary_key_column_type = sample_block.getByName(it->first).type;
out_key_column_num = it->second;
out_key_column_type = sample_block.getByName(it->first).type;
found_transformation = true;
break;
}
@ -461,39 +461,39 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
return found_transformation;
}
void KeyCondition::getPKTuplePositionMapping(
void KeyCondition::getKeyTuplePositionMapping(
const ASTPtr & node,
const Context & context,
std::vector<MergeTreeSetIndex::PKTuplePositionMapping> & indexes_mapping,
std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> & indexes_mapping,
const size_t tuple_index,
size_t & out_primary_key_column_num)
size_t & out_key_column_num)
{
MergeTreeSetIndex::PKTuplePositionMapping index_mapping;
MergeTreeSetIndex::KeyTuplePositionMapping index_mapping;
index_mapping.tuple_index = tuple_index;
DataTypePtr data_type;
if (isPrimaryKeyPossiblyWrappedByMonotonicFunctions(
if (isKeyPossiblyWrappedByMonotonicFunctions(
node, context, index_mapping.key_index,
data_type, index_mapping.functions))
{
indexes_mapping.push_back(index_mapping);
if (out_primary_key_column_num < index_mapping.key_index)
if (out_key_column_num < index_mapping.key_index)
{
out_primary_key_column_num = index_mapping.key_index;
out_key_column_num = index_mapping.key_index;
}
}
}
/// Try to prepare PKTuplePositionMapping for tuples from IN expression.
/// Try to prepare KeyTuplePositionMapping for tuples from IN expression.
bool KeyCondition::isTupleIndexable(
const ASTPtr & node,
const Context & context,
RPNElement & out,
const SetPtr & prepared_set,
size_t & out_primary_key_column_num)
size_t & out_key_column_num)
{
out_primary_key_column_num = 0;
std::vector<MergeTreeSetIndex::PKTuplePositionMapping> indexes_mapping;
out_key_column_num = 0;
std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> indexes_mapping;
size_t num_key_columns = prepared_set->getDataTypes().size();
@ -511,13 +511,13 @@ bool KeyCondition::isTupleIndexable(
size_t current_tuple_index = 0;
for (const auto & arg : node_tuple->arguments->children)
{
getPKTuplePositionMapping(arg, context, indexes_mapping, current_tuple_index, out_primary_key_column_num);
getKeyTuplePositionMapping(arg, context, indexes_mapping, current_tuple_index, out_key_column_num);
++current_tuple_index;
}
}
else
{
getPKTuplePositionMapping(node, context, indexes_mapping, 0, out_primary_key_column_num);
getKeyTuplePositionMapping(node, context, indexes_mapping, 0, out_key_column_num);
}
if (indexes_mapping.empty())
@ -530,44 +530,44 @@ bool KeyCondition::isTupleIndexable(
}
bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctions(
bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions(
const ASTPtr & node,
const Context & context,
size_t & out_primary_key_column_num,
DataTypePtr & out_primary_key_res_column_type,
size_t & out_key_column_num,
DataTypePtr & out_key_res_column_type,
RPNElement::MonotonicFunctionsChain & out_functions_chain)
{
std::vector<const ASTFunction *> chain_not_tested_for_monotonicity;
DataTypePtr primary_key_column_type;
DataTypePtr key_column_type;
if (!isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(node, out_primary_key_column_num, primary_key_column_type, chain_not_tested_for_monotonicity))
if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(node, out_key_column_num, key_column_type, chain_not_tested_for_monotonicity))
return false;
for (auto it = chain_not_tested_for_monotonicity.rbegin(); it != chain_not_tested_for_monotonicity.rend(); ++it)
{
auto func_builder = FunctionFactory::instance().tryGet((*it)->name, context);
ColumnsWithTypeAndName arguments{{ nullptr, primary_key_column_type, "" }};
ColumnsWithTypeAndName arguments{{ nullptr, key_column_type, "" }};
auto func = func_builder->build(arguments);
if (!func || !func->hasInformationAboutMonotonicity())
return false;
primary_key_column_type = func->getReturnType();
key_column_type = func->getReturnType();
out_functions_chain.push_back(func);
}
out_primary_key_res_column_type = primary_key_column_type;
out_key_res_column_type = key_column_type;
return true;
}
bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(
bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl(
const ASTPtr & node,
size_t & out_primary_key_column_num,
DataTypePtr & out_primary_key_column_type,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
std::vector<const ASTFunction *> & out_functions_chain)
{
/** By itself, the primary key column can be a functional expression. for example, `intHash32(UserID)`.
/** By itself, the key column can be a functional expression. for example, `intHash32(UserID)`.
* Therefore, use the full name of the expression for search.
*/
const auto & sample_block = key_expr->getSampleBlock();
@ -576,8 +576,8 @@ bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(
auto it = key_columns.find(name);
if (key_columns.end() != it)
{
out_primary_key_column_num = it->second;
out_primary_key_column_type = sample_block.getByName(it->first).type;
out_key_column_num = it->second;
out_key_column_type = sample_block.getByName(it->first).type;
return true;
}
@ -589,8 +589,7 @@ bool KeyCondition::isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(
out_functions_chain.push_back(func);
if (!isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(args[0], out_primary_key_column_num, out_primary_key_column_type,
out_functions_chain))
if (!isKeyPossiblyWrappedByMonotonicFunctionsImpl(args[0], out_key_column_num, out_key_column_type, out_functions_chain))
return false;
return true;
@ -612,7 +611,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value,
}
catch (...)
{
throw Exception("Primary key expression contains comparison between inconvertible types: " +
throw Exception("Key expression contains comparison between inconvertible types: " +
desired_type->getName() + " and " + src_type->getName() +
" inside " + queryToString(node),
ErrorCodes::BAD_TYPE_OF_FIELD);
@ -622,7 +621,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value,
bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out)
{
/** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of primary key,
/** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of key,
* or itself, wrapped in a chain of possibly-monotonic functions,
* or constant expression - number.
*/
@ -635,9 +634,9 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
if (args.size() != 2)
return false;
DataTypePtr key_expr_type; /// Type of expression containing primary key column
size_t key_arg_pos; /// Position of argument with primary key column (non-const argument)
size_t key_column_num; /// Number of a primary key column (inside sort_descr array)
DataTypePtr key_expr_type; /// Type of expression containing key column
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
size_t key_column_num; /// Number of a key column (inside sort_descr array)
RPNElement::MonotonicFunctionsChain chain;
bool is_set_const = false;
bool is_constant_transformed = false;
@ -649,7 +648,7 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
is_set_const = true;
}
else if (getConstant(args[1], block_with_constants, const_value, const_type)
&& isPrimaryKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain))
&& isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain))
{
key_arg_pos = 0;
}
@ -660,7 +659,7 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
is_constant_transformed = true;
}
else if (getConstant(args[0], block_with_constants, const_value, const_type)
&& isPrimaryKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain))
&& isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain))
{
key_arg_pos = 1;
}
@ -777,16 +776,16 @@ String KeyCondition::toString() const
}
/** Index is the value of primary key every `index_granularity` rows.
/** Index is the value of key every `index_granularity` rows.
* This value is called a "mark". That is, the index consists of marks.
*
* The primary key is the tuple.
* The data is sorted by primary key in the sense of lexicographic order over tuples.
* The key is the tuple.
* The data is sorted by key in the sense of lexicographic order over tuples.
*
* A pair of marks specifies a segment with respect to the order over the tuples.
* Denote it like this: [ x1 y1 z1 .. x2 y2 z2 ],
* where x1 y1 z1 - tuple - value of primary key in left border of segment;
* x2 y2 z2 - tuple - value of primary key in right boundary of segment.
* where x1 y1 z1 - tuple - value of key in left border of segment;
* x2 y2 z2 - tuple - value of key in right boundary of segment.
* In this section there are data between these marks.
*
* Or, the last mark specifies the range open on the right: [ a b c .. + inf )
@ -898,8 +897,8 @@ static bool forAnyParallelogram(
bool KeyCondition::mayBeTrueInRange(
size_t used_key_size,
const Field * left_pk,
const Field * right_pk,
const Field * left_key,
const Field * right_key,
const DataTypes & data_types,
bool right_bounded) const
{
@ -907,19 +906,19 @@ bool KeyCondition::mayBeTrueInRange(
/* std::cerr << "Checking for: [";
for (size_t i = 0; i != used_key_size; ++i)
std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), left_pk[i]);
std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), left_key[i]);
std::cerr << " ... ";
if (right_bounded)
{
for (size_t i = 0; i != used_key_size; ++i)
std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), right_pk[i]);
std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), right_key[i]);
std::cerr << "]\n";
}
else
std::cerr << "+inf)\n";*/
return forAnyParallelogram(used_key_size, left_pk, right_pk, true, right_bounded, key_ranges, 0,
return forAnyParallelogram(used_key_size, left_key, right_key, true, right_bounded, key_ranges, 0,
[&] (const std::vector<Range> & key_ranges)
{
auto res = mayBeTrueInRangeImpl(key_ranges, data_types);
@ -1065,15 +1064,15 @@ bool KeyCondition::mayBeTrueInRangeImpl(const std::vector<Range> & key_ranges, c
bool KeyCondition::mayBeTrueInRange(
size_t used_key_size, const Field * left_pk, const Field * right_pk, const DataTypes & data_types) const
size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const
{
return mayBeTrueInRange(used_key_size, left_pk, right_pk, data_types, true);
return mayBeTrueInRange(used_key_size, left_key, right_key, data_types, true);
}
bool KeyCondition::mayBeTrueAfter(
size_t used_key_size, const Field * left_pk, const DataTypes & data_types) const
size_t used_key_size, const Field * left_key, const DataTypes & data_types) const
{
return mayBeTrueInRange(used_key_size, left_pk, nullptr, data_types, false);
return mayBeTrueInRange(used_key_size, left_key, nullptr, data_types, false);
}

View File

@ -236,22 +236,22 @@ public:
const ExpressionActionsPtr & key_expr);
/// Whether the condition is feasible in the key range.
/// left_pk and right_pk must contain all fields in the sort_descr in the appropriate order.
/// data_types - the types of the primary key columns.
bool mayBeTrueInRange(size_t used_key_size, const Field * left_pk, const Field * right_pk, const DataTypes & data_types) const;
/// left_key and right_key must contain all fields in the sort_descr in the appropriate order.
/// data_types - the types of the key columns.
bool mayBeTrueInRange(size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const;
/// Is the condition valid in a semi-infinite (not limited to the right) key range.
/// left_pk must contain all the fields in the sort_descr in the appropriate order.
bool mayBeTrueAfter(size_t used_key_size, const Field * left_pk, const DataTypes & data_types) const;
/// left_key must contain all the fields in the sort_descr in the appropriate order.
bool mayBeTrueAfter(size_t used_key_size, const Field * left_key, const DataTypes & data_types) const;
/// Checks that the index can not be used.
bool alwaysUnknownOrTrue() const;
/// Get the maximum number of the primary key element used in the condition.
/// Get the maximum number of the key element used in the condition.
size_t getMaxKeyColumn() const;
/// Impose an additional condition: the value in the column column must be in the `range` range.
/// Returns whether there is such a column in the primary key.
/// Returns whether there is such a column in the key.
bool addCondition(const String & column, const Range & range);
String toString() const;
@ -296,7 +296,7 @@ public:
MergeTreeSetIndexPtr set_index;
/** A chain of possibly monotone functions.
* If the primary key column is wrapped in functions that can be monotonous in some value ranges
* If the key column is wrapped in functions that can be monotonous in some value ranges
* (for example: -toFloat64(toDayOfWeek(date))), then here the functions will be located: toDayOfWeek, toFloat64, negate.
*/
using MonotonicFunctionsChain = std::vector<FunctionBasePtr>;
@ -320,8 +320,8 @@ private:
bool mayBeTrueInRange(
size_t used_key_size,
const Field * left_pk,
const Field * right_pk,
const Field * left_key,
const Field * right_key,
const DataTypes & data_types,
bool right_bounded) const;
@ -331,45 +331,45 @@ private:
bool atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out);
bool operatorFromAST(const ASTFunction * func, RPNElement & out);
/** Is node the primary key column
* or expression in which column of primary key is wrapped by chain of functions,
/** Is node the key column
* or expression in which column of key is wrapped by chain of functions,
* that can be monotomic on certain ranges?
* If these conditions are true, then returns number of column in primary key, type of resulting expression
* If these conditions are true, then returns number of column in key, type of resulting expression
* and fills chain of possibly-monotonic functions.
*/
bool isPrimaryKeyPossiblyWrappedByMonotonicFunctions(
bool isKeyPossiblyWrappedByMonotonicFunctions(
const ASTPtr & node,
const Context & context,
size_t & out_primary_key_column_num,
DataTypePtr & out_primary_key_res_column_type,
size_t & out_key_column_num,
DataTypePtr & out_key_res_column_type,
RPNElement::MonotonicFunctionsChain & out_functions_chain);
bool isPrimaryKeyPossiblyWrappedByMonotonicFunctionsImpl(
bool isKeyPossiblyWrappedByMonotonicFunctionsImpl(
const ASTPtr & node,
size_t & out_primary_key_column_num,
DataTypePtr & out_primary_key_column_type,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
std::vector<const ASTFunction *> & out_functions_chain);
bool canConstantBeWrappedByMonotonicFunctions(
const ASTPtr & node,
size_t & out_primary_key_column_num,
DataTypePtr & out_primary_key_column_type,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
Field & out_value,
DataTypePtr & out_type);
void getPKTuplePositionMapping(
void getKeyTuplePositionMapping(
const ASTPtr & node,
const Context & context,
std::vector<MergeTreeSetIndex::PKTuplePositionMapping> & indexes_mapping,
std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> & indexes_mapping,
const size_t tuple_index,
size_t & out_primary_key_column_num);
size_t & out_key_column_num);
bool isTupleIndexable(
const ASTPtr & node,
const Context & context,
RPNElement & out,
const SetPtr & prepared_set,
size_t & out_primary_key_column_num);
size_t & out_key_column_num);
RPN rpn;