Merge pull request #67395 from ClickHouse/anoter-case-of-non-deterministic-func-in-group-by-key

Fix another one case of non-deterministic PK
This commit is contained in:
Nikolai Kochetov 2024-08-06 08:56:54 +00:00 committed by GitHub
commit 614b78495a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 90 additions and 46 deletions

View File

@ -100,6 +100,13 @@ bool isConstantFromScalarSubquery(const ActionsDAG::Node * node)
}
bool ActionsDAG::Node::isDeterministic() const
{
bool deterministic_if_func = type != ActionType::FUNCTION || function_base->isDeterministic();
bool deterministic_if_const = type != ActionType::COLUMN || is_deterministic_constant;
return deterministic_if_func && deterministic_if_const;
}
void ActionsDAG::Node::toTree(JSONBuilder::JSONMap & map) const
{
map.add("Node Type", magic_enum::enum_name(type));
@ -318,7 +325,6 @@ const ActionsDAG::Node & ActionsDAG::addFunctionImpl(
node.function_base = function_base;
node.result_type = result_type;
node.function = node.function_base->prepare(arguments);
node.is_deterministic = node.function_base->isDeterministic();
/// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function.
if (node.function_base->isSuitableForConstantFolding())
@ -536,64 +542,99 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs, bool allow_consta
void ActionsDAG::removeUnusedActions(const std::unordered_set<const Node *> & used_inputs, bool allow_constant_folding)
{
std::unordered_set<const Node *> visited_nodes;
std::stack<Node *> stack;
for (const auto * node : outputs)
{
visited_nodes.insert(node);
stack.push(const_cast<Node *>(node));
}
NodeRawConstPtrs roots;
roots.reserve(outputs.size() + used_inputs.size());
roots = outputs;
for (auto & node : nodes)
{
/// We cannot remove arrayJoin because it changes the number of rows.
bool is_array_join = node.type == ActionType::ARRAY_JOIN;
if (is_array_join && !visited_nodes.contains(&node))
{
visited_nodes.insert(&node);
stack.push(&node);
}
if (node.type == ActionType::ARRAY_JOIN)
roots.push_back(&node);
if (node.type == ActionType::INPUT && used_inputs.contains(&node))
visited_nodes.insert(&node);
roots.push_back(&node);
}
while (!stack.empty())
std::unordered_set<const Node *> required_nodes;
std::unordered_set<const Node *> non_deterministic_nodes;
struct Frame
{
auto * node = stack.top();
stack.pop();
const ActionsDAG::Node * node;
size_t next_child_to_visit = 0;
};
/// Constant folding.
if (allow_constant_folding && !node->children.empty() && node->column && isColumnConst(*node->column))
std::stack<Frame> stack;
enum class VisitStage { NonDeterministic, Required };
for (auto stage : {VisitStage::NonDeterministic, VisitStage::Required})
{
required_nodes.clear();
for (const auto * root : roots)
{
node->type = ActionsDAG::ActionType::COLUMN;
for (const auto & child : node->children)
if (!required_nodes.contains(root))
{
if (!child->is_deterministic)
required_nodes.insert(root);
stack.push({.node = root});
}
while (!stack.empty())
{
auto & frame = stack.top();
auto * node = const_cast<Node *>(frame.node);
while (frame.next_child_to_visit < node->children.size())
{
node->is_deterministic = false;
break;
const auto * child = node->children[frame.next_child_to_visit];
++frame.next_child_to_visit;
if (!required_nodes.contains(child))
{
required_nodes.insert(child);
stack.push({.node = child});
break;
}
}
if (stack.top().node != node)
continue;
stack.pop();
if (stage == VisitStage::Required)
continue;
if (!node->isDeterministic())
non_deterministic_nodes.insert(node);
else
{
for (const auto * child : node->children)
{
if (non_deterministic_nodes.contains(child))
{
non_deterministic_nodes.insert(node);
break;
}
}
}
/// Constant folding.
if (allow_constant_folding && !node->children.empty()
&& node->column && isColumnConst(*node->column))
{
node->type = ActionsDAG::ActionType::COLUMN;
node->children.clear();
node->is_deterministic_constant = !non_deterministic_nodes.contains(node);
}
}
node->children.clear();
}
for (const auto * child : node->children)
{
if (!visited_nodes.contains(child))
{
stack.push(const_cast<Node *>(child));
visited_nodes.insert(child);
}
}
}
std::erase_if(nodes, [&](const Node & node) { return !visited_nodes.contains(&node); });
std::erase_if(inputs, [&](const Node * node) { return !visited_nodes.contains(node); });
std::erase_if(nodes, [&](const Node & node) { return !required_nodes.contains(&node); });
std::erase_if(inputs, [&](const Node * node) { return !required_nodes.contains(node); });
}
@ -1379,7 +1420,7 @@ bool ActionsDAG::trivial() const
void ActionsDAG::assertDeterministic() const
{
for (const auto & node : nodes)
if (!node.is_deterministic)
if (!node.isDeterministic())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Expression must be deterministic but it contains non-deterministic part `{}`", node.result_name);
}
@ -1387,7 +1428,7 @@ void ActionsDAG::assertDeterministic() const
bool ActionsDAG::hasNonDeterministic() const
{
for (const auto & node : nodes)
if (!node.is_deterministic)
if (!node.isDeterministic())
return true;
return false;
}

View File

@ -80,13 +80,15 @@ public:
ExecutableFunctionPtr function;
/// If function is a compiled statement.
bool is_function_compiled = false;
/// It is deterministic (See IFunction::isDeterministic).
/// This property is kept after constant folding of non-deterministic functions like 'now', 'today'.
bool is_deterministic = true;
/// It is a constant calculated from deterministic functions (See IFunction::isDeterministic).
/// This property is kept after constant folding of non-deterministic functions like 'now', 'today'.
bool is_deterministic_constant = true;
/// For COLUMN node and propagated constants.
ColumnPtr column;
/// If result of this not is deterministic. Checks only this node, not a subtree.
bool isDeterministic() const;
void toTree(JSONBuilder::JSONMap & map) const;
};

View File

@ -1,3 +1,4 @@
CREATE TABLE a (number UInt64) ENGINE = MergeTree ORDER BY if(now() > toDateTime('2020-06-01 13:31:40'), toInt64(number), -number); -- { serverError BAD_ARGUMENTS }
CREATE TABLE b (number UInt64) ENGINE = MergeTree ORDER BY now() > toDateTime(number); -- { serverError BAD_ARGUMENTS }
CREATE TABLE c (number UInt64) ENGINE = MergeTree ORDER BY now(); -- { serverError BAD_ARGUMENTS }
CREATE TABLE d (number UInt64) ENGINE = MergeTree ORDER BY now() + 1 + 1 + number; -- { serverError BAD_ARGUMENTS }