A little bit better index description.

This commit is contained in:
Nikolai Kochetov 2021-04-16 12:42:23 +03:00
parent be52b2889a
commit 8d8e57615c
6 changed files with 83 additions and 26 deletions

View File

@ -193,17 +193,26 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const
if (index_stats && !index_stats->empty())
{
std::string indent(format_settings.indent, format_settings.indent_char);
format_settings.out << prefix << "Indexes:\n";
for (const auto & stat : *index_stats)
/// Do not print anything if no indexes is applied.
if (index_stats->size() > 1 || index_stats->front().type != IndexType::None)
format_settings.out << prefix << "Indexes:\n";
for (size_t i = 0; i < index_stats->size(); ++i)
{
const auto & stat = (*index_stats)[i];
if (stat.type == IndexType::None)
continue;
format_settings.out << prefix << indent << indexTypeToString(stat.type) << '\n';
if (!stat.name.empty())
format_settings.out << prefix << indent << indent << "Name: " << stat.name << '\n';
if (!stat.description.empty())
format_settings.out << prefix << indent << indent << "Description: " << stat.description << '\n';
if (!stat.used_keys.empty())
{
format_settings.out << prefix << indent << indent << "Keys: " << stat.name << '\n';
@ -211,12 +220,22 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const
format_settings.out << prefix << indent << indent << indent << used_key << '\n';
}
if (!stat.description.empty())
format_settings.out << prefix << indent << indent << "Description: " << stat.description << '\n';
if (!stat.condition.empty())
format_settings.out << prefix << indent << indent << "Description: " << stat.condition << '\n';
format_settings.out << prefix << indent << indent << "Parts: " << stat.num_parts_after << '\n';
format_settings.out << prefix << indent << indent << "Granules: " << stat.num_granules_after << '\n';
format_settings.out << prefix << indent << indent << "Parts: " << stat.num_parts_after;
if (i)
format_settings.out << '/' << (*index_stats)[i - 1].num_parts_after;
format_settings.out << '\n';
format_settings.out << prefix << indent << indent << "Granules: " << stat.num_granules_after;
if (i)
format_settings.out << '/' << (*index_stats)[i - 1].num_granules_after;
format_settings.out << '\n';
}
format_settings.out << prefix << "Parts: " << index_stats->back().num_parts_after << '\n';
format_settings.out << prefix << "Granules: " << index_stats->back().num_granules_after << '\n';
}
}

View File

@ -29,6 +29,7 @@ public:
IndexType type;
std::string name;
std::string description;
std::string condition;
std::vector<std::string> used_keys;
size_t num_parts_after;
size_t num_granules_after;

View File

@ -1313,14 +1313,22 @@ String KeyCondition::toString() const
KeyCondition::Description KeyCondition::getDescription() const
{
/// This code may seem to be too difficult.
/// Here we want to convert RPN back to tree, and also simplify some logical expressions like `and(x, true) -> x`.
Description description;
/// That's a binary tree. Explicit.
/// Build and optimize it simultaneously.
struct Node
{
enum class Type
{
/// Leaf, which is RPNElement.
Leaf,
/// Leafs, which are logical constants.
True,
False,
/// Binary operators.
And,
Or,
};
@ -1329,20 +1337,27 @@ KeyCondition::Description KeyCondition::getDescription() const
/// Only for Leaf
const RPNElement * element = nullptr;
/// This means that logical NOT is applied to leaf.
bool negate = false;
std::unique_ptr<Node> left = nullptr;
std::unique_ptr<Node> right = nullptr;
};
/// The algorithm is the same as in KeyCondition::checkInHyperrectangle
/// We build a pair of trees on stack. For checking if key condition may be true, and if it may be false.
/// We need only `can_be_true` in result.
struct Frame
{
std::unique_ptr<Node> can_be_true;
std::unique_ptr<Node> can_be_false;
};
/// Combine two subtrees using logical operator.
auto combine = [](std::unique_ptr<Node> left, std::unique_ptr<Node> right, Node::Type type)
{
/// Simplify operators with for one constant condition.
if (type == Node::Type::And)
{
/// false AND right
@ -1471,36 +1486,53 @@ KeyCondition::Description KeyCondition::getDescription() const
for (const auto & key : key_columns)
key_names[key.second] = key.first;
std::function<std::string(const Node *)> describe;
describe = [&describe, &key_names, &is_key_used](const Node * node) -> std::string
WriteBufferFromOwnString buf;
std::function<void(const Node *)> describe;
describe = [&describe, &key_names, &is_key_used, &buf](const Node * node)
{
switch (node->type)
{
case Node::Type::Leaf:
{
is_key_used[node->element->key_column] = true;
std::string res;
/// Note: for condition with double negation, like `not(x not in set)`,
/// we can replace it to `x in set` here.
/// But I won't do it, because `cloneASTWithInversionPushDown` already push down `not`.
/// So, this seem to be impossible for `can_be_true` tree.
if (node->negate)
res += "not(";
res += node->element->toString(key_names[node->element->key_column], true);
buf << "not(";
buf << node->element->toString(key_names[node->element->key_column], true);
if (node->negate)
res += ")";
return res;
buf << ")";
break;
}
case Node::Type::True:
return "true";
buf << "true";
break;
case Node::Type::False:
return "false";
buf << "false";
break;
case Node::Type::And:
return "and(" + describe(node->left.get()) + ", " + describe(node->right.get()) + ")";
buf << "and(";
describe(node->left.get());
buf << ", ";
describe(node->right.get());
buf << ")";
break;
case Node::Type::Or:
return "or(" + describe(node->left.get()) + ", " + describe(node->right.get()) + ")";
buf << "or(";
describe(node->left.get());
buf << ", ";
describe(node->right.get());
buf << ")";
break;
}
__builtin_unreachable();
};
description.condition = describe(rpn_stack.front().can_be_true.get());
describe(rpn_stack.front().can_be_true.get());
description.condition = std::move(buf.str());
for (size_t i = 0; i < key_names.size(); ++i)
if (is_key_used[i])

View File

@ -296,13 +296,14 @@ public:
/// Condition description for EXPLAIN query.
struct Description
{
/// Which columns from PK were used, in PK order.
std::vector<std::string> used_keys;
/// Condition which was applied, mostly human-readable.
std::string condition;
};
Description getDescription() const;
/** A chain of possibly monotone functions.
* If the key column is wrapped in functions that can be monotonous in some value ranges
* (for example: -toFloat64(toDayOfWeek(date))), then here the functions will be located: toDayOfWeek, toFloat64, negate.

View File

@ -297,18 +297,22 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
if (minmax_idx_condition)
{
auto description = minmax_idx_condition->getDescription();
index_stats->emplace_back(ReadFromMergeTree::IndexStat{
.type = ReadFromMergeTree::IndexType::MinMax,
.description = minmax_idx_condition->toString(),
.condition = std::move(description.condition),
.used_keys = std::move(description.used_keys),
.num_parts_after = part_filter_counters.num_parts_after_minmax,
.num_granules_after = part_filter_counters.num_granules_after_minmax});
}
if (partition_pruner)
{
auto description = partition_pruner->getKeyCondition().getDescription();
index_stats->emplace_back(ReadFromMergeTree::IndexStat{
.type = ReadFromMergeTree::IndexType::Partition,
.description = partition_pruner->toString(),
.condition = std::move(description.condition),
.used_keys = std::move(description.used_keys),
.num_parts_after = part_filter_counters.num_parts_after_partition_pruner,
.num_granules_after = part_filter_counters.num_granules_after_partition_pruner});
}
@ -778,7 +782,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
index_stats->emplace_back(ReadFromMergeTree::IndexStat{
.type = ReadFromMergeTree::IndexType::PrimaryKey,
.description = std::move(description.condition),
.condition = std::move(description.condition),
.used_keys = std::move(description.used_keys),
.num_parts_after = sum_parts_pk.load(std::memory_order_relaxed),
.num_granules_after = sum_marks_pk.load(std::memory_order_relaxed)});

View File

@ -33,7 +33,7 @@ public:
bool isUseless() const { return useless; }
std::string toString() const { return partition_condition.toString(); }
const KeyCondition & getKeyCondition() const { return partition_condition; }
};
}