mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-19 21:03:51 +00:00
Merge pull request #66303 from ClickHouse/backport/24.6/65835
Backport #65835 to 24.6: Better handling of join conditions involving IS NULL checks
This commit is contained in:
commit
1881dc3b6c
@ -10,6 +10,7 @@
|
|||||||
#include <Analyzer/Utils.h>
|
#include <Analyzer/Utils.h>
|
||||||
|
|
||||||
#include <DataTypes/DataTypeLowCardinality.h>
|
#include <DataTypes/DataTypeLowCardinality.h>
|
||||||
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -26,12 +27,103 @@ static constexpr std::array boolean_functions{
|
|||||||
"like"sv, "notLike"sv, "ilike"sv, "notILike"sv, "empty"sv, "notEmpty"sv, "not"sv, "and"sv,
|
"like"sv, "notLike"sv, "ilike"sv, "notILike"sv, "empty"sv, "notEmpty"sv, "not"sv, "and"sv,
|
||||||
"or"sv};
|
"or"sv};
|
||||||
|
|
||||||
static bool isBooleanFunction(const String & func_name)
|
|
||||||
|
bool isBooleanFunction(const String & func_name)
|
||||||
{
|
{
|
||||||
return std::any_of(
|
return std::any_of(
|
||||||
boolean_functions.begin(), boolean_functions.end(), [&](const auto boolean_func) { return func_name == boolean_func; });
|
boolean_functions.begin(), boolean_functions.end(), [&](const auto boolean_func) { return func_name == boolean_func; });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool isNodeFunction(const QueryTreeNodePtr & node, const String & func_name)
|
||||||
|
{
|
||||||
|
if (const auto * function_node = node->as<FunctionNode>())
|
||||||
|
return function_node->getFunctionName() == func_name;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
QueryTreeNodePtr getFunctionArgument(const QueryTreeNodePtr & node, size_t idx)
|
||||||
|
{
|
||||||
|
if (const auto * function_node = node->as<FunctionNode>())
|
||||||
|
{
|
||||||
|
const auto & args = function_node->getArguments().getNodes();
|
||||||
|
if (idx < args.size())
|
||||||
|
return args[idx];
|
||||||
|
}
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected '{}' to be a function with at least {} arguments", node->formatASTForErrorMessage(), idx + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes)
|
||||||
|
{
|
||||||
|
for (const auto & node : nodes)
|
||||||
|
{
|
||||||
|
const auto * function_node = node->as<FunctionNode>();
|
||||||
|
if (function_node && function_node->getFunctionName() == "equals" &&
|
||||||
|
function_node->getArguments().getNodes().size() == 2)
|
||||||
|
{
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value)
|
||||||
|
{
|
||||||
|
const auto * constant_node = node->as<ConstantNode>();
|
||||||
|
if (!constant_node || !constant_node->getResultType()->equals(DataTypeUInt8()))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
UInt64 constant_value;
|
||||||
|
return (constant_node->getValue().tryGet<UInt64>(constant_value) && constant_value == expected_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if expression consists of only conjunctions of functions with the specified name or true constants
|
||||||
|
bool isOnlyConjunctionOfFunctions(
|
||||||
|
const QueryTreeNodePtr & node,
|
||||||
|
const String & func_name,
|
||||||
|
const QueryTreeNodePtrWithHashSet & allowed_arguments)
|
||||||
|
{
|
||||||
|
if (isBooleanConstant(node, true))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
const auto * node_function = node->as<FunctionNode>();
|
||||||
|
if (!node_function)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (node_function->getFunctionName() == func_name
|
||||||
|
&& allowed_arguments.contains(node_function->getArgumentsNode()))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (node_function->getFunctionName() == "and")
|
||||||
|
{
|
||||||
|
for (const auto & and_argument : node_function->getArguments().getNodes())
|
||||||
|
{
|
||||||
|
if (!isOnlyConjunctionOfFunctions(and_argument, func_name, allowed_arguments))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// We can rewrite to a <=> b only if we are joining on a and b,
|
||||||
|
/// because the function is not yet implemented for other cases.
|
||||||
|
bool isTwoArgumentsFromDifferentSides(const FunctionNode & node_function, const JoinNode & join_node)
|
||||||
|
{
|
||||||
|
const auto & argument_nodes = node_function.getArguments().getNodes();
|
||||||
|
if (argument_nodes.size() != 2)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
auto first_src = getExpressionSource(argument_nodes[0]);
|
||||||
|
auto second_src = getExpressionSource(argument_nodes[1]);
|
||||||
|
if (!first_src || !second_src)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const auto & lhs_join = *join_node.getLeftTableExpression();
|
||||||
|
const auto & rhs_join = *join_node.getRightTableExpression();
|
||||||
|
return (first_src->isEqual(lhs_join) && second_src->isEqual(rhs_join)) ||
|
||||||
|
(first_src->isEqual(rhs_join) && second_src->isEqual(lhs_join));
|
||||||
|
}
|
||||||
|
|
||||||
/// Visitor that optimizes logical expressions _only_ in JOIN ON section
|
/// Visitor that optimizes logical expressions _only_ in JOIN ON section
|
||||||
class JoinOnLogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithContext<JoinOnLogicalExpressionOptimizerVisitor>
|
class JoinOnLogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithContext<JoinOnLogicalExpressionOptimizerVisitor>
|
||||||
{
|
{
|
||||||
@ -47,15 +139,16 @@ public:
|
|||||||
{
|
{
|
||||||
auto * function_node = node->as<FunctionNode>();
|
auto * function_node = node->as<FunctionNode>();
|
||||||
|
|
||||||
if (!function_node)
|
QueryTreeNodePtr new_node = nullptr;
|
||||||
return;
|
if (function_node && function_node->getFunctionName() == "or")
|
||||||
|
new_node = tryOptimizeJoinOnNulls(function_node->getArguments().getNodes(), getContext());
|
||||||
|
else
|
||||||
|
new_node = tryOptimizeJoinOnNulls({node}, getContext());
|
||||||
|
|
||||||
if (function_node->getFunctionName() == "or")
|
if (new_node)
|
||||||
{
|
{
|
||||||
bool is_argument_type_changed = tryOptimizeIsNotDistinctOrIsNull(node, getContext());
|
need_rerun_resolve |= !new_node->getResultType()->equals(*node->getResultType());
|
||||||
if (is_argument_type_changed)
|
node = new_node;
|
||||||
need_rerun_resolve = true;
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -72,15 +165,11 @@ private:
|
|||||||
const JoinNode * join_node;
|
const JoinNode * join_node;
|
||||||
bool need_rerun_resolve = false;
|
bool need_rerun_resolve = false;
|
||||||
|
|
||||||
/// Returns true if type of some operand is changed and parent function needs to be re-resolved
|
/// Returns optimized node or nullptr if nothing have been changed
|
||||||
bool tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node, const ContextPtr & context)
|
QueryTreeNodePtr tryOptimizeJoinOnNulls(const QueryTreeNodes & nodes, const ContextPtr & context)
|
||||||
{
|
{
|
||||||
auto & function_node = node->as<FunctionNode &>();
|
|
||||||
chassert(function_node.getFunctionName() == "or");
|
|
||||||
|
|
||||||
|
|
||||||
QueryTreeNodes or_operands;
|
QueryTreeNodes or_operands;
|
||||||
or_operands.reserve(function_node.getArguments().getNodes().size());
|
or_operands.reserve(nodes.size());
|
||||||
|
|
||||||
/// Indices of `equals` or `isNotDistinctFrom` functions in the vector above
|
/// Indices of `equals` or `isNotDistinctFrom` functions in the vector above
|
||||||
std::vector<size_t> equals_functions_indices;
|
std::vector<size_t> equals_functions_indices;
|
||||||
@ -93,47 +182,73 @@ private:
|
|||||||
* b => [(a IS NULL AND b IS NULL)]
|
* b => [(a IS NULL AND b IS NULL)]
|
||||||
* c => [(a IS NULL AND c IS NULL)]
|
* c => [(a IS NULL AND c IS NULL)]
|
||||||
* }
|
* }
|
||||||
* Then for each a <=> b we can find all operands that contains both a IS NULL and b IS NULL
|
* Then for each equality a = b we can check if we have operand (a IS NULL AND b IS NULL)
|
||||||
*/
|
*/
|
||||||
QueryTreeNodePtrWithHashMap<std::vector<size_t>> is_null_argument_to_indices;
|
QueryTreeNodePtrWithHashMap<std::vector<size_t>> is_null_argument_to_indices;
|
||||||
|
|
||||||
for (const auto & argument : function_node.getArguments())
|
bool is_anything_changed = false;
|
||||||
{
|
|
||||||
or_operands.push_back(argument);
|
|
||||||
|
|
||||||
auto * argument_function = argument->as<FunctionNode>();
|
for (const auto & node : nodes)
|
||||||
|
{
|
||||||
|
if (isBooleanConstant(node, false))
|
||||||
|
{
|
||||||
|
/// Remove false constants from OR
|
||||||
|
is_anything_changed = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
or_operands.push_back(node);
|
||||||
|
auto * argument_function = node->as<FunctionNode>();
|
||||||
if (!argument_function)
|
if (!argument_function)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
const auto & func_name = argument_function->getFunctionName();
|
const auto & func_name = argument_function->getFunctionName();
|
||||||
if (func_name == "equals" || func_name == "isNotDistinctFrom")
|
if (func_name == "equals" || func_name == "isNotDistinctFrom")
|
||||||
{
|
{
|
||||||
const auto & argument_nodes = argument_function->getArguments().getNodes();
|
if (isTwoArgumentsFromDifferentSides(*argument_function, *join_node))
|
||||||
if (argument_nodes.size() != 2)
|
|
||||||
continue;
|
|
||||||
/// We can rewrite to a <=> b only if we are joining on a and b,
|
|
||||||
/// because the function is not yet implemented for other cases.
|
|
||||||
auto first_src = getExpressionSource(argument_nodes[0]);
|
|
||||||
auto second_src = getExpressionSource(argument_nodes[1]);
|
|
||||||
if (!first_src || !second_src)
|
|
||||||
continue;
|
|
||||||
const auto & lhs_join = *join_node->getLeftTableExpression();
|
|
||||||
const auto & rhs_join = *join_node->getRightTableExpression();
|
|
||||||
bool arguments_from_both_sides = (first_src->isEqual(lhs_join) && second_src->isEqual(rhs_join)) ||
|
|
||||||
(first_src->isEqual(rhs_join) && second_src->isEqual(lhs_join));
|
|
||||||
if (!arguments_from_both_sides)
|
|
||||||
continue;
|
|
||||||
equals_functions_indices.push_back(or_operands.size() - 1);
|
equals_functions_indices.push_back(or_operands.size() - 1);
|
||||||
}
|
}
|
||||||
else if (func_name == "and")
|
else if (func_name == "and")
|
||||||
{
|
{
|
||||||
for (const auto & and_argument : argument_function->getArguments().getNodes())
|
const auto & and_arguments = argument_function->getArguments().getNodes();
|
||||||
|
bool all_are_is_null = and_arguments.size() == 2 && isNodeFunction(and_arguments[0], "isNull") && isNodeFunction(and_arguments[1], "isNull");
|
||||||
|
if (all_are_is_null)
|
||||||
{
|
{
|
||||||
auto * and_argument_function = and_argument->as<FunctionNode>();
|
is_null_argument_to_indices[getFunctionArgument(and_arguments.front(), 0)].push_back(or_operands.size() - 1);
|
||||||
if (and_argument_function && and_argument_function->getFunctionName() == "isNull")
|
is_null_argument_to_indices[getFunctionArgument(and_arguments.back(), 0)].push_back(or_operands.size() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b`
|
||||||
|
/// Even though this expression are not equivalent (first is NULL on NULLs, while second is FALSE),
|
||||||
|
/// it is still correct since for JOIN ON condition NULL is treated as FALSE
|
||||||
|
if (const auto & equals_function = findEqualsFunction(and_arguments))
|
||||||
{
|
{
|
||||||
const auto & is_null_argument = and_argument_function->getArguments().getNodes()[0];
|
const auto & equals_arguments = equals_function->as<FunctionNode>()->getArguments().getNodes();
|
||||||
is_null_argument_to_indices[is_null_argument].push_back(or_operands.size() - 1);
|
/// Expected isNotNull arguments
|
||||||
|
QueryTreeNodePtrWithHashSet allowed_arguments;
|
||||||
|
allowed_arguments.insert(QueryTreeNodePtrWithHash(std::make_shared<ListNode>(QueryTreeNodes{equals_arguments[0]})));
|
||||||
|
allowed_arguments.insert(QueryTreeNodePtrWithHash(std::make_shared<ListNode>(QueryTreeNodes{equals_arguments[1]})));
|
||||||
|
|
||||||
|
bool can_be_optimized = true;
|
||||||
|
for (const auto & and_argument : and_arguments)
|
||||||
|
{
|
||||||
|
if (and_argument.get() == equals_function.get())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (isOnlyConjunctionOfFunctions(and_argument, "isNotNull", allowed_arguments))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
can_be_optimized = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (can_be_optimized)
|
||||||
|
{
|
||||||
|
is_anything_changed = true;
|
||||||
|
or_operands.pop_back();
|
||||||
|
or_operands.push_back(equals_function);
|
||||||
|
if (isTwoArgumentsFromDifferentSides(equals_function->as<FunctionNode &>(), *join_node))
|
||||||
|
equals_functions_indices.push_back(or_operands.size() - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -144,9 +259,9 @@ private:
|
|||||||
|
|
||||||
for (size_t equals_function_idx : equals_functions_indices)
|
for (size_t equals_function_idx : equals_functions_indices)
|
||||||
{
|
{
|
||||||
auto * equals_function = or_operands[equals_function_idx]->as<FunctionNode>();
|
const auto * equals_function = or_operands[equals_function_idx]->as<FunctionNode>();
|
||||||
|
|
||||||
/// For a <=> b we are looking for expressions containing both `a IS NULL` and `b IS NULL` combined with AND
|
/// For a = b we are looking for all expressions `a IS NULL AND b IS NULL`
|
||||||
const auto & argument_nodes = equals_function->getArguments().getNodes();
|
const auto & argument_nodes = equals_function->getArguments().getNodes();
|
||||||
const auto & lhs_is_null_parents = is_null_argument_to_indices[argument_nodes[0]];
|
const auto & lhs_is_null_parents = is_null_argument_to_indices[argument_nodes[0]];
|
||||||
const auto & rhs_is_null_parents = is_null_argument_to_indices[argument_nodes[1]];
|
const auto & rhs_is_null_parents = is_null_argument_to_indices[argument_nodes[1]];
|
||||||
@ -161,60 +276,40 @@ private:
|
|||||||
|
|
||||||
for (size_t to_optimize_idx : operands_to_optimize)
|
for (size_t to_optimize_idx : operands_to_optimize)
|
||||||
{
|
{
|
||||||
/// We are looking for operand `a IS NULL AND b IS NULL AND ...`
|
/// Remove `a IS NULL AND b IS NULL`
|
||||||
auto * operand_to_optimize = or_operands[to_optimize_idx]->as<FunctionNode>();
|
or_operands[to_optimize_idx] = nullptr;
|
||||||
|
is_anything_changed = true;
|
||||||
/// Remove `a IS NULL` and `b IS NULL` arguments from AND
|
|
||||||
QueryTreeNodes new_arguments;
|
|
||||||
for (const auto & and_argument : operand_to_optimize->getArguments().getNodes())
|
|
||||||
{
|
|
||||||
bool to_eliminate = false;
|
|
||||||
|
|
||||||
const auto * and_argument_function = and_argument->as<FunctionNode>();
|
|
||||||
if (and_argument_function && and_argument_function->getFunctionName() == "isNull")
|
|
||||||
{
|
|
||||||
const auto & is_null_argument = and_argument_function->getArguments().getNodes()[0];
|
|
||||||
to_eliminate = (is_null_argument->isEqual(*argument_nodes[0]) || is_null_argument->isEqual(*argument_nodes[1]));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (to_eliminate)
|
|
||||||
arguments_to_reresolve.insert(to_optimize_idx);
|
|
||||||
else
|
|
||||||
new_arguments.emplace_back(and_argument);
|
|
||||||
}
|
|
||||||
/// If less than two arguments left, we will remove or replace the whole AND below
|
|
||||||
operand_to_optimize->getArguments().getNodes() = std::move(new_arguments);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (arguments_to_reresolve.empty())
|
if (arguments_to_reresolve.empty() && !is_anything_changed)
|
||||||
/// Nothing have been changed
|
/// Nothing have been changed
|
||||||
return false;
|
return nullptr;
|
||||||
|
|
||||||
auto and_function_resolver = FunctionFactory::instance().get("and", context);
|
auto and_function_resolver = FunctionFactory::instance().get("and", context);
|
||||||
auto strict_equals_function_resolver = FunctionFactory::instance().get("isNotDistinctFrom", context);
|
auto strict_equals_function_resolver = FunctionFactory::instance().get("isNotDistinctFrom", context);
|
||||||
|
|
||||||
bool need_reresolve = false;
|
|
||||||
QueryTreeNodes new_or_operands;
|
QueryTreeNodes new_or_operands;
|
||||||
for (size_t i = 0; i < or_operands.size(); ++i)
|
for (size_t i = 0; i < or_operands.size(); ++i)
|
||||||
{
|
{
|
||||||
if (arguments_to_reresolve.contains(i))
|
if (arguments_to_reresolve.contains(i))
|
||||||
{
|
{
|
||||||
auto * function = or_operands[i]->as<FunctionNode>();
|
const auto * function = or_operands[i]->as<FunctionNode>();
|
||||||
if (function->getFunctionName() == "equals")
|
if (function->getFunctionName() == "equals")
|
||||||
{
|
{
|
||||||
/// We should replace `a = b` with `a <=> b` because we removed checks for IS NULL
|
/// We should replace `a = b` with `a <=> b` because we removed checks for IS NULL
|
||||||
need_reresolve |= function->getResultType()->isNullable();
|
auto new_function = or_operands[i]->clone();
|
||||||
function->resolveAsFunction(strict_equals_function_resolver);
|
new_function->as<FunctionNode>()->resolveAsFunction(strict_equals_function_resolver);
|
||||||
new_or_operands.emplace_back(std::move(or_operands[i]));
|
new_or_operands.emplace_back(std::move(new_function));
|
||||||
}
|
}
|
||||||
else if (function->getFunctionName() == "and")
|
else if (function->getFunctionName() == "and")
|
||||||
{
|
{
|
||||||
const auto & and_arguments = function->getArguments().getNodes();
|
const auto & and_arguments = function->getArguments().getNodes();
|
||||||
if (and_arguments.size() > 1)
|
if (and_arguments.size() > 1)
|
||||||
{
|
{
|
||||||
function->resolveAsFunction(and_function_resolver);
|
auto new_function = or_operands[i]->clone();
|
||||||
new_or_operands.emplace_back(std::move(or_operands[i]));
|
new_function->as<FunctionNode>()->resolveAsFunction(and_function_resolver);
|
||||||
|
new_or_operands.emplace_back(std::move(new_function));
|
||||||
}
|
}
|
||||||
else if (and_arguments.size() == 1)
|
else if (and_arguments.size() == 1)
|
||||||
{
|
{
|
||||||
@ -223,25 +318,26 @@ private:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function name: '{}'", function->getFunctionName());
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function '{}'", function->getFunctionName());
|
||||||
}
|
}
|
||||||
else
|
else if (or_operands[i])
|
||||||
{
|
{
|
||||||
new_or_operands.emplace_back(std::move(or_operands[i]));
|
new_or_operands.emplace_back(std::move(or_operands[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (new_or_operands.empty())
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
if (new_or_operands.size() == 1)
|
if (new_or_operands.size() == 1)
|
||||||
{
|
return new_or_operands[0];
|
||||||
node = std::move(new_or_operands[0]);
|
|
||||||
return need_reresolve;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Rebuild OR function
|
/// Rebuild OR function
|
||||||
auto or_function_resolver = FunctionFactory::instance().get("or", context);
|
auto or_function_resolver = FunctionFactory::instance().get("or", context);
|
||||||
function_node.getArguments().getNodes() = std::move(new_or_operands);
|
auto function_node = std::make_shared<FunctionNode>("or");
|
||||||
function_node.resolveAsFunction(or_function_resolver);
|
function_node->getArguments().getNodes() = std::move(new_or_operands);
|
||||||
return need_reresolve;
|
function_node->resolveAsFunction(or_function_resolver);
|
||||||
|
return function_node;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -940,6 +940,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
|
|||||||
table_join.locality,
|
table_join.locality,
|
||||||
result_join_strictness,
|
result_join_strictness,
|
||||||
result_join_kind);
|
result_join_kind);
|
||||||
|
join_node->setOriginalAST(table_element.table_join);
|
||||||
|
|
||||||
/** Original AST is not set because it will contain only join part and does
|
/** Original AST is not set because it will contain only join part and does
|
||||||
* not include left table expression.
|
* not include left table expression.
|
||||||
|
@ -419,6 +419,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
|
|||||||
"less", " < ",
|
"less", " < ",
|
||||||
"greater", " > ",
|
"greater", " > ",
|
||||||
"equals", " = ",
|
"equals", " = ",
|
||||||
|
"isNotDistinctFrom", " <=> ",
|
||||||
"like", " LIKE ",
|
"like", " LIKE ",
|
||||||
"ilike", " ILIKE ",
|
"ilike", " ILIKE ",
|
||||||
"notLike", " NOT LIKE ",
|
"notLike", " NOT LIKE ",
|
||||||
|
@ -528,7 +528,7 @@ JoinClausesAndActions buildJoinClausesAndActions(
|
|||||||
size_t join_clause_key_nodes_size = join_clause.getLeftKeyNodes().size();
|
size_t join_clause_key_nodes_size = join_clause.getLeftKeyNodes().size();
|
||||||
|
|
||||||
if (join_clause_key_nodes_size == 0)
|
if (join_clause_key_nodes_size == 0)
|
||||||
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "JOIN {} cannot get JOIN keys",
|
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Cannot determine join keys in {}",
|
||||||
join_node.formatASTForErrorMessage());
|
join_node.formatASTForErrorMessage());
|
||||||
|
|
||||||
for (size_t i = 0; i < join_clause_key_nodes_size; ++i)
|
for (size_t i = 0; i < join_clause_key_nodes_size; ++i)
|
||||||
|
@ -72,7 +72,7 @@ SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.id + 2; -- { serverE
|
|||||||
SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.id + 2; -- { serverError 403 }
|
SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.id + 2; -- { serverError 403 }
|
||||||
SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.key; -- { serverError 43, 403 }
|
SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.key; -- { serverError 43, 403 }
|
||||||
SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key; -- { serverError 43, 403 }
|
SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key; -- { serverError 43, 403 }
|
||||||
SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 }
|
SELECT * FROM t1 JOIN t2_nullable as t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 }
|
||||||
SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 OR t1.id == t2.id; -- { serverError 403 }
|
SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 OR t1.id == t2.id; -- { serverError 403 }
|
||||||
SELECT * FROM t1 JOIN t2 ON (t2.key == t2.key2 AND (t1.key == t1.key2 AND t1.key != 'XXX' OR t1.id == t2.id)) AND t1.id == t2.id; -- { serverError 403 }
|
SELECT * FROM t1 JOIN t2 ON (t2.key == t2.key2 AND (t1.key == t1.key2 AND t1.key != 'XXX' OR t1.id == t2.id)) AND t1.id == t2.id; -- { serverError 403 }
|
||||||
SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key != 'XXX' AND t1.id == t2.id OR t2.key == t2.key2 AND t1.id == t2.id AND t1.id == t2.id;
|
SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key != 'XXX' AND t1.id == t2.id OR t2.key == t2.key2 AND t1.id == t2.id AND t1.id == t2.id;
|
||||||
|
@ -70,7 +70,7 @@ SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.id + 2; -- { serverE
|
|||||||
SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.id + 2; -- { serverError 403 }
|
SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.id + 2; -- { serverError 403 }
|
||||||
SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.key; -- { serverError 43, 403 }
|
SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.key; -- { serverError 43, 403 }
|
||||||
SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key; -- { serverError 43, 403 }
|
SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key; -- { serverError 43, 403 }
|
||||||
SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 }
|
SELECT * FROM t1 JOIN t2_nullable as t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 }
|
||||||
SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 OR t1.id == t2.id; -- { serverError 403 }
|
SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 OR t1.id == t2.id; -- { serverError 403 }
|
||||||
SELECT * FROM t1 JOIN t2 ON (t2.key == t2.key2 AND (t1.key == t1.key2 AND t1.key != 'XXX' OR t1.id == t2.id)) AND t1.id == t2.id; -- { serverError 403 }
|
SELECT * FROM t1 JOIN t2 ON (t2.key == t2.key2 AND (t1.key == t1.key2 AND t1.key != 'XXX' OR t1.id == t2.id)) AND t1.id == t2.id; -- { serverError 403 }
|
||||||
SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key != 'XXX' AND t1.id == t2.id OR t2.key == t2.key2 AND t1.id == t2.id AND t1.id == t2.id; -- { serverError 48 }
|
SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key != 'XXX' AND t1.id == t2.id OR t2.key == t2.key2 AND t1.id == t2.id AND t1.id == t2.id; -- { serverError 48 }
|
||||||
|
@ -3,7 +3,7 @@ SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) O
|
|||||||
2 2 2 2
|
2 2 2 2
|
||||||
3 3 3 33
|
3 3 3 33
|
||||||
\N \N \N \N
|
\N \N \N \N
|
||||||
SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) ORDER BY t1.x NULLS LAST;
|
SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t2.x IS NULL) OR t1.y <=> t2.y ORDER BY t1.x NULLS LAST;
|
||||||
1 42 4 42
|
1 42 4 42
|
||||||
2 2 2 2
|
2 2 2 2
|
||||||
3 3 3 33
|
3 3 3 33
|
||||||
@ -12,14 +12,14 @@ SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) ORDER
|
|||||||
2 2 2 2
|
2 2 2 2
|
||||||
3 3 3 33
|
3 3 3 33
|
||||||
\N \N \N \N
|
\N \N \N \N
|
||||||
SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x;
|
SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND ((t1.x = t1.y) OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x;
|
||||||
2 2 2 2
|
2 2 2 2
|
||||||
3 3 3 33
|
3 3 3 33
|
||||||
\N \N \N \N
|
\N \N \N \N
|
||||||
SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST;
|
SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST;
|
||||||
2 2 2 2
|
2 2 2 2
|
||||||
\N \N \N \N
|
\N \N \N \N
|
||||||
SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t1.y IS NULL AND t2.x IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST;
|
SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t2.x IS NULL) OR (t1.y IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST;
|
||||||
1 42 4 42
|
1 42 4 42
|
||||||
2 2 2 2
|
2 2 2 2
|
||||||
3 3 3 33
|
3 3 3 33
|
||||||
@ -31,3 +31,30 @@ SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST;
|
|||||||
1
|
1
|
||||||
1
|
1
|
||||||
1
|
1
|
||||||
|
SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST;
|
||||||
|
2 2 2 2
|
||||||
|
3 3 3 33
|
||||||
|
\N \N \N \N
|
||||||
|
-- aliases defined in the join condition are valid
|
||||||
|
SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST;
|
||||||
|
1 42 \N \N \N 0
|
||||||
|
2 2 2 2 1 1
|
||||||
|
3 3 3 33 1 1
|
||||||
|
\N \N 4 42 \N 0
|
||||||
|
\N \N \N \N \N 1
|
||||||
|
SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST;
|
||||||
|
1 42 \N \N \N 0
|
||||||
|
2 2 2 2 1 1
|
||||||
|
3 3 3 33 1 1
|
||||||
|
\N \N 4 42 \N 0
|
||||||
|
\N \N \N \N \N 0
|
||||||
|
\N \N \N \N \N 0
|
||||||
|
-- check for non-nullable columns for which `is null` is replaced with constant
|
||||||
|
SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST;
|
||||||
|
2 2 2 2
|
||||||
|
3 3 3 33
|
||||||
|
--
|
||||||
|
0
|
||||||
|
0
|
||||||
|
2
|
||||||
|
2
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
DROP TABLE IF EXISTS t1;
|
DROP TABLE IF EXISTS t1;
|
||||||
DROP TABLE IF EXISTS t2;
|
DROP TABLE IF EXISTS t2;
|
||||||
|
DROP TABLE IF EXISTS t1n;
|
||||||
|
DROP TABLE IF EXISTS t2n;
|
||||||
|
|
||||||
CREATE TABLE t1 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog;
|
CREATE TABLE t1 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog;
|
||||||
CREATE TABLE t2 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog;
|
CREATE TABLE t2 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog;
|
||||||
@ -7,24 +9,62 @@ CREATE TABLE t2 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog;
|
|||||||
INSERT INTO t1 VALUES (1,42), (2,2), (3,3), (NULL,NULL);
|
INSERT INTO t1 VALUES (1,42), (2,2), (3,3), (NULL,NULL);
|
||||||
INSERT INTO t2 VALUES (NULL,NULL), (2,2), (3,33), (4,42);
|
INSERT INTO t2 VALUES (NULL,NULL), (2,2), (3,33), (4,42);
|
||||||
|
|
||||||
|
CREATE TABLE t1n (x Int64, y UInt64) ENGINE = TinyLog;
|
||||||
|
CREATE TABLE t2n (x Int64, y UInt64) ENGINE = TinyLog;
|
||||||
|
|
||||||
|
INSERT INTO t1n VALUES (1,42), (2,2), (3,3);
|
||||||
|
INSERT INTO t2n VALUES (2,2), (3,33), (4,42);
|
||||||
|
|
||||||
SET allow_experimental_analyzer = 1;
|
SET allow_experimental_analyzer = 1;
|
||||||
|
|
||||||
-- { echoOn }
|
-- { echoOn }
|
||||||
SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) ORDER BY t1.x NULLS LAST;
|
SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) ORDER BY t1.x NULLS LAST;
|
||||||
|
|
||||||
SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) ORDER BY t1.x NULLS LAST;
|
SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t2.x IS NULL) OR t1.y <=> t2.y ORDER BY t1.x NULLS LAST;
|
||||||
|
|
||||||
SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) ORDER BY t1.x;
|
SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) ORDER BY t1.x;
|
||||||
SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x;
|
SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND ((t1.x = t1.y) OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x;
|
||||||
|
|
||||||
SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST;
|
SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST;
|
||||||
|
|
||||||
SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t1.y IS NULL AND t2.x IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST;
|
SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t2.x IS NULL) OR (t1.y IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST;
|
||||||
|
|
||||||
SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) AND (t1.y == t2.y OR (t1.y IS NULL AND t2.y IS NULL)) AND COALESCE(t1.x, 0) != 2 ORDER BY t1.x NULLS LAST;
|
SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) AND (t1.y == t2.y OR (t1.y IS NULL AND t2.y IS NULL)) AND COALESCE(t1.x, 0) != 2 ORDER BY t1.x NULLS LAST;
|
||||||
|
|
||||||
SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST;
|
SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST;
|
||||||
|
|
||||||
|
SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST;
|
||||||
|
|
||||||
|
-- aliases defined in the join condition are valid
|
||||||
|
SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST;
|
||||||
|
SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST;
|
||||||
|
|
||||||
|
-- check for non-nullable columns for which `is null` is replaced with constant
|
||||||
|
SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST;
|
||||||
|
|
||||||
-- { echoOff }
|
-- { echoOff }
|
||||||
|
|
||||||
|
SELECT '--';
|
||||||
|
|
||||||
|
-- IS NOT NULL and constants are optimized out
|
||||||
|
SELECT count() FROM ( EXPLAIN QUERY TREE
|
||||||
|
SELECT * FROM t1 JOIN t2 ON ( (t1.x = t2.x) AND (t1.x IS NOT NULL) AND true AND (t2.x IS NOT NULL) )
|
||||||
|
) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%';
|
||||||
|
|
||||||
|
SELECT count() FROM ( EXPLAIN QUERY TREE
|
||||||
|
SELECT * FROM t1 JOIN t2 ON ( (t1.x = t2.x) AND true )
|
||||||
|
) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%';
|
||||||
|
|
||||||
|
-- this is not optimized out
|
||||||
|
SELECT count() FROM ( EXPLAIN QUERY TREE
|
||||||
|
SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL)
|
||||||
|
) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%';
|
||||||
|
|
||||||
|
SELECT count() FROM ( EXPLAIN QUERY TREE
|
||||||
|
SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL)
|
||||||
|
) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%';
|
||||||
|
|
||||||
DROP TABLE IF EXISTS t1;
|
DROP TABLE IF EXISTS t1;
|
||||||
DROP TABLE IF EXISTS t2;
|
DROP TABLE IF EXISTS t2;
|
||||||
|
DROP TABLE IF EXISTS t1n;
|
||||||
|
DROP TABLE IF EXISTS t2n;
|
||||||
|
Loading…
Reference in New Issue
Block a user