Better description for key condition.

This commit is contained in:
Nikolai Kochetov 2021-04-15 20:30:04 +03:00
parent 531e9ba6e3
commit be52b2889a
5 changed files with 248 additions and 7 deletions

View File

@ -204,6 +204,13 @@ void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const
if (!stat.name.empty())
format_settings.out << prefix << indent << indent << "Name: " << stat.name << '\n';
if (!stat.used_keys.empty())
{
format_settings.out << prefix << indent << indent << "Keys: " << stat.name << '\n';
for (const auto & used_key : stat.used_keys)
format_settings.out << prefix << indent << indent << indent << used_key << '\n';
}
if (!stat.description.empty())
format_settings.out << prefix << indent << indent << "Description: " << stat.description << '\n';

View File

@ -9,7 +9,7 @@ namespace DB
/// This step is created to read from MergeTree* table.
/// For now, it takes a list of parts and creates source from it.
class ReadFromMergeTree : public ISourceStep
class ReadFromMergeTree final : public ISourceStep
{
public:
@ -29,6 +29,7 @@ public:
IndexType type;
std::string name;
std::string description;
std::vector<std::string> used_keys;
size_t num_parts_after;
size_t num_granules_after;
};
@ -64,7 +65,7 @@ public:
InReverseOrder,
};
explicit ReadFromMergeTree(
ReadFromMergeTree(
const MergeTreeData & storage_,
StorageMetadataPtr metadata_snapshot_,
String query_id_,

View File

@ -938,6 +938,9 @@ public:
return func->getMonotonicityForRange(type, left, right);
}
Kind getKind() const { return kind; }
const ColumnWithTypeAndName & getConstArg() const { return const_arg; }
private:
FunctionBasePtr func;
ColumnWithTypeAndName const_arg;
@ -1308,6 +1311,203 @@ String KeyCondition::toString() const
return res;
}
KeyCondition::Description KeyCondition::getDescription() const
{
Description description;
struct Node
{
enum class Type
{
Leaf,
True,
False,
And,
Or,
};
Type type;
/// Only for Leaf
const RPNElement * element = nullptr;
bool negate = false;
std::unique_ptr<Node> left = nullptr;
std::unique_ptr<Node> right = nullptr;
};
struct Frame
{
std::unique_ptr<Node> can_be_true;
std::unique_ptr<Node> can_be_false;
};
auto combine = [](std::unique_ptr<Node> left, std::unique_ptr<Node> right, Node::Type type)
{
if (type == Node::Type::And)
{
/// false AND right
if (left->type == Node::Type::False)
return left;
/// left AND false
if (right->type == Node::Type::False)
return right;
/// true AND right
if (left->type == Node::Type::True)
return right;
/// left AND true
if (right->type == Node::Type::True)
return left;
}
if (type == Node::Type::Or)
{
/// false OR right
if (left->type == Node::Type::False)
return right;
/// left OR false
if (right->type == Node::Type::False)
return left;
/// true OR right
if (left->type == Node::Type::True)
return left;
/// left OR true
if (right->type == Node::Type::True)
return right;
}
return std::make_unique<Node>(Node{
.type = type,
.left = std::move(left),
.right = std::move(right)
});
};
std::vector<Frame> rpn_stack;
for (const auto & element : rpn)
{
if (element.function == RPNElement::FUNCTION_UNKNOWN)
{
auto can_be_true = std::make_unique<Node>(Node{.type = Node::Type::True});
auto can_be_false = std::make_unique<Node>(Node{.type = Node::Type::True});
rpn_stack.emplace_back(Frame{.can_be_true = std::move(can_be_true), .can_be_false = std::move(can_be_false)});
}
else if (
element.function == RPNElement::FUNCTION_IN_RANGE
|| element.function == RPNElement::FUNCTION_NOT_IN_RANGE
|| element.function == RPNElement::FUNCTION_IN_SET
|| element.function == RPNElement::FUNCTION_NOT_IN_SET)
{
auto can_be_true = std::make_unique<Node>(Node{.type = Node::Type::Leaf, .element = &element, .negate = false});
auto can_be_false = std::make_unique<Node>(Node{.type = Node::Type::Leaf, .element = &element, .negate = true});
rpn_stack.emplace_back(Frame{.can_be_true = std::move(can_be_true), .can_be_false = std::move(can_be_false)});
}
else if (element.function == RPNElement::FUNCTION_NOT)
{
assert(!rpn_stack.empty());
std::swap(rpn_stack.back().can_be_true, rpn_stack.back().can_be_false);
}
else if (element.function == RPNElement::FUNCTION_AND)
{
assert(!rpn_stack.empty());
auto arg1 = std::move(rpn_stack.back());
rpn_stack.pop_back();
assert(!rpn_stack.empty());
auto arg2 = std::move(rpn_stack.back());
Frame frame;
frame.can_be_true = combine(std::move(arg1.can_be_true), std::move(arg2.can_be_true), Node::Type::And);
frame.can_be_false = combine(std::move(arg1.can_be_false), std::move(arg2.can_be_false), Node::Type::Or);
rpn_stack.back() = std::move(frame);
}
else if (element.function == RPNElement::FUNCTION_OR)
{
assert(!rpn_stack.empty());
auto arg1 = std::move(rpn_stack.back());
rpn_stack.pop_back();
assert(!rpn_stack.empty());
auto arg2 = std::move(rpn_stack.back());
Frame frame;
frame.can_be_true = combine(std::move(arg1.can_be_true), std::move(arg2.can_be_true), Node::Type::Or);
frame.can_be_false = combine(std::move(arg1.can_be_false), std::move(arg2.can_be_false), Node::Type::And);
rpn_stack.back() = std::move(frame);
}
else if (element.function == RPNElement::ALWAYS_FALSE)
{
auto can_be_true = std::make_unique<Node>(Node{.type = Node::Type::False});
auto can_be_false = std::make_unique<Node>(Node{.type = Node::Type::True});
rpn_stack.emplace_back(Frame{.can_be_true = std::move(can_be_true), .can_be_false = std::move(can_be_false)});
}
else if (element.function == RPNElement::ALWAYS_TRUE)
{
auto can_be_true = std::make_unique<Node>(Node{.type = Node::Type::True});
auto can_be_false = std::make_unique<Node>(Node{.type = Node::Type::False});
rpn_stack.emplace_back(Frame{.can_be_true = std::move(can_be_true), .can_be_false = std::move(can_be_false)});
}
else
throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
}
if (rpn_stack.size() != 1)
throw Exception("Unexpected stack size in KeyCondition::checkInRange", ErrorCodes::LOGICAL_ERROR);
std::vector<std::string_view> key_names(key_columns.size());
std::vector<bool> is_key_used(key_columns.size(), false);
for (const auto & key : key_columns)
key_names[key.second] = key.first;
std::function<std::string(const Node *)> describe;
describe = [&describe, &key_names, &is_key_used](const Node * node) -> std::string
{
switch (node->type)
{
case Node::Type::Leaf:
{
is_key_used[node->element->key_column] = true;
std::string res;
if (node->negate)
res += "not(";
res += node->element->toString(key_names[node->element->key_column], true);
if (node->negate)
res += ")";
return res;
}
case Node::Type::True:
return "true";
case Node::Type::False:
return "false";
case Node::Type::And:
return "and(" + describe(node->left.get()) + ", " + describe(node->right.get()) + ")";
case Node::Type::Or:
return "or(" + describe(node->left.get()) + ", " + describe(node->right.get()) + ")";
}
__builtin_unreachable();
};
description.condition = describe(rpn_stack.front().can_be_true.get());
for (size_t i = 0; i < key_names.size(); ++i)
if (is_key_used[i])
description.used_keys.emplace_back(key_names[i]);
return description;
}
/** Index is the value of key every `index_granularity` rows.
* This value is called a "mark". That is, the index consists of marks.
@ -1732,18 +1932,38 @@ bool KeyCondition::mayBeTrueAfter(
return checkInRange(used_key_size, left_key, nullptr, data_types, false, BoolMask::consider_only_can_be_true).can_be_true;
}
String KeyCondition::RPNElement::toString() const
String KeyCondition::RPNElement::toString() const { return toString("column " + std::to_string(key_column), false); }
String KeyCondition::RPNElement::toString(const std::string_view & column_name, bool print_constants) const
{
auto print_wrapped_column = [this](WriteBuffer & buf)
auto print_wrapped_column = [this, &column_name, print_constants](WriteBuffer & buf)
{
for (auto it = monotonic_functions_chain.rbegin(); it != monotonic_functions_chain.rend(); ++it)
{
buf << (*it)->getName() << "(";
if (print_constants)
{
if (const auto * func = typeid_cast<const FunctionWithOptionalConstArg *>(it->get()))
{
if (func->getKind() == FunctionWithOptionalConstArg::Kind::LEFT_CONST)
buf << applyVisitor(FieldVisitorToString(), (*func->getConstArg().column)[0]) << ", ";
}
}
}
buf << "column " << key_column;
buf << column_name;
for (auto it = monotonic_functions_chain.rbegin(); it != monotonic_functions_chain.rend(); ++it)
{
if (print_constants)
{
if (const auto * func = typeid_cast<const FunctionWithOptionalConstArg *>(it->get()))
{
if (func->getKind() == FunctionWithOptionalConstArg::Kind::RIGHT_CONST)
buf << ", " << applyVisitor(FieldVisitorToString(), (*func->getConstArg().column)[0]);
}
}
buf << ")";
}
};
WriteBufferFromOwnString buf;

View File

@ -293,6 +293,15 @@ public:
String toString() const;
/// Condition description for EXPLAIN query.
struct Description
{
std::vector<std::string> used_keys;
std::string condition;
};
Description getDescription() const;
/** A chain of possibly monotone functions.
* If the key column is wrapped in functions that can be monotonous in some value ranges
@ -345,6 +354,7 @@ private:
: function(function_), range(range_), key_column(key_column_) {}
String toString() const;
String toString(const std::string_view & column_name, bool print_constants) const;
Function function = FUNCTION_UNKNOWN;

View File

@ -774,9 +774,12 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
if (metadata_snapshot->hasPrimaryKey())
{
auto description = key_condition.getDescription();
index_stats->emplace_back(ReadFromMergeTree::IndexStat{
.type = ReadFromMergeTree::IndexType::PrimaryKey,
.description = key_condition.toString(),
.description = std::move(description.condition),
.used_keys = std::move(description.used_keys),
.num_parts_after = sum_parts_pk.load(std::memory_order_relaxed),
.num_granules_after = sum_marks_pk.load(std::memory_order_relaxed)});
}