2019-07-23 19:49:15 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <Parsers/ASTFunction.h>
|
|
|
|
#include <Parsers/queryToString.h>
|
|
|
|
|
|
|
|
#include <Interpreters/InDepthNodeVisitor.h>
|
2019-07-24 15:37:37 +00:00
|
|
|
#include <Interpreters/Aliases.h>
|
2019-07-23 19:49:15 +00:00
|
|
|
#include <Interpreters/SyntaxAnalyzer.h>
|
|
|
|
|
2019-07-24 15:37:37 +00:00
|
|
|
|
2019-07-23 19:49:15 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int INVALID_JOIN_ON_EXPRESSION;
|
2019-07-24 15:37:37 +00:00
|
|
|
extern const int AMBIGUOUS_COLUMN_NAME;
|
|
|
|
extern const int LOGICAL_ERROR;
|
2019-07-23 19:49:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
class CollectJoinOnKeysMatcher
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
using Visitor = ConstInDepthNodeVisitor<CollectJoinOnKeysMatcher, true>;
|
|
|
|
|
|
|
|
struct Data
|
|
|
|
{
|
|
|
|
AnalyzedJoin & analyzed_join;
|
2019-07-24 15:37:37 +00:00
|
|
|
const NameSet & source_columns;
|
|
|
|
const NameSet & joined_columns;
|
|
|
|
const Aliases & aliases;
|
2019-07-23 19:49:15 +00:00
|
|
|
bool has_some = false;
|
|
|
|
};
|
|
|
|
|
|
|
|
static void visit(const ASTPtr & ast, Data & data)
|
|
|
|
{
|
|
|
|
if (auto * func = ast->as<ASTFunction>())
|
|
|
|
visit(*func, ast, data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool needChildVisit(const ASTPtr & node, const ASTPtr &)
|
|
|
|
{
|
|
|
|
if (auto * func = node->as<ASTFunction>())
|
|
|
|
if (func->name == "equals")
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
static void visit(const ASTFunction & func, const ASTPtr & ast, Data & data)
|
|
|
|
{
|
|
|
|
if (func.name == "and")
|
|
|
|
return; /// go into children
|
|
|
|
|
|
|
|
if (func.name == "equals")
|
|
|
|
{
|
|
|
|
ASTPtr left = func.arguments->children.at(0)->clone();
|
|
|
|
ASTPtr right = func.arguments->children.at(1)->clone();
|
2019-07-24 15:37:37 +00:00
|
|
|
addJoinKeys(ast, left, right, data);
|
2019-07-23 19:49:15 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
throwSyntaxException("Expected equals expression, got " + queryToString(ast) + ".");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void getIdentifiers(const ASTPtr & ast, std::vector<const ASTIdentifier *> & out)
|
|
|
|
{
|
|
|
|
if (const auto * ident = ast->as<ASTIdentifier>())
|
|
|
|
{
|
|
|
|
if (IdentifierSemantic::getColumnName(*ident))
|
|
|
|
out.push_back(ident);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const auto & child : ast->children)
|
|
|
|
getIdentifiers(child, out);
|
|
|
|
}
|
|
|
|
|
2019-07-24 15:37:37 +00:00
|
|
|
static void addJoinKeys(const ASTPtr & expr, ASTPtr left_ast, ASTPtr right_ast, Data & data)
|
2019-07-23 19:49:15 +00:00
|
|
|
{
|
|
|
|
std::vector<const ASTIdentifier *> left_identifiers;
|
|
|
|
std::vector<const ASTIdentifier *> right_identifiers;
|
|
|
|
|
|
|
|
getIdentifiers(left_ast, left_identifiers);
|
|
|
|
getIdentifiers(right_ast, right_identifiers);
|
|
|
|
|
2019-07-24 15:37:37 +00:00
|
|
|
size_t left_idents_table = getTableForIdentifiers(left_identifiers, data);
|
|
|
|
size_t right_idents_table = getTableForIdentifiers(right_identifiers, data);
|
2019-07-23 19:49:15 +00:00
|
|
|
|
|
|
|
if (left_idents_table && left_idents_table == right_idents_table)
|
|
|
|
{
|
|
|
|
auto left_name = queryToString(*left_identifiers[0]);
|
|
|
|
auto right_name = queryToString(*right_identifiers[0]);
|
|
|
|
|
|
|
|
throwSyntaxException("In expression " + queryToString(expr) + " columns " + left_name + " and " + right_name
|
|
|
|
+ " are from the same table but from different arguments of equal function.");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (left_idents_table == 1 || right_idents_table == 2)
|
|
|
|
data.analyzed_join.addOnKeys(left_ast, right_ast);
|
|
|
|
else if (left_idents_table == 2 || right_idents_table == 1)
|
|
|
|
data.analyzed_join.addOnKeys(right_ast, left_ast);
|
|
|
|
else
|
2019-07-24 15:37:37 +00:00
|
|
|
throw Exception("Cannot detect left and right JOIN keys. JOIN ON section is ambiguous.",
|
|
|
|
ErrorCodes::AMBIGUOUS_COLUMN_NAME);
|
|
|
|
|
|
|
|
data.has_some = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const ASTIdentifier * unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases)
|
|
|
|
{
|
2019-07-25 19:25:51 +00:00
|
|
|
if (identifier->compound())
|
|
|
|
return identifier;
|
|
|
|
|
2019-07-24 15:37:37 +00:00
|
|
|
UInt32 max_attempts = 100;
|
|
|
|
for (auto it = aliases.find(identifier->name); it != aliases.end();)
|
2019-07-23 19:49:15 +00:00
|
|
|
{
|
2019-07-24 15:37:37 +00:00
|
|
|
const ASTIdentifier * parent = identifier;
|
|
|
|
identifier = it->second->as<ASTIdentifier>();
|
|
|
|
if (!identifier)
|
|
|
|
break; /// not a column alias
|
|
|
|
if (identifier == parent)
|
|
|
|
break; /// alias to itself with the same name: 'a as a'
|
|
|
|
if (identifier->compound())
|
|
|
|
break; /// not an alias. Break to prevent cycle through short names: 'a as b, t1.b as a'
|
|
|
|
|
|
|
|
it = aliases.find(identifier->name);
|
|
|
|
if (!max_attempts--)
|
|
|
|
throw Exception("Cannot unroll aliases for '" + identifier->name + "'", ErrorCodes::LOGICAL_ERROR);
|
2019-07-23 19:49:15 +00:00
|
|
|
}
|
|
|
|
|
2019-07-24 15:37:37 +00:00
|
|
|
return identifier;
|
2019-07-23 19:49:15 +00:00
|
|
|
}
|
|
|
|
|
2019-07-24 15:37:37 +00:00
|
|
|
/// @returns 1 if identifiers belongs to left table, 2 for right table and 0 if unknown. Throws on table mix.
|
|
|
|
/// Place detected identifier into identifiers[0] if any.
|
|
|
|
static size_t getTableForIdentifiers(std::vector<const ASTIdentifier *> & identifiers, const Data & data)
|
2019-07-23 19:49:15 +00:00
|
|
|
{
|
|
|
|
size_t table_number = 0;
|
|
|
|
|
2019-07-24 15:37:37 +00:00
|
|
|
for (auto & ident : identifiers)
|
2019-07-23 19:49:15 +00:00
|
|
|
{
|
2019-07-24 15:37:37 +00:00
|
|
|
const ASTIdentifier * identifier = unrollAliases(ident, data.aliases);
|
|
|
|
if (!identifier)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/// Column name could be cropped to a short form in TranslateQualifiedNamesVisitor.
|
|
|
|
/// In this case it saves membership in IdentifierSemantic.
|
2019-07-23 19:49:15 +00:00
|
|
|
size_t membership = IdentifierSemantic::getMembership(*identifier);
|
2019-07-24 15:37:37 +00:00
|
|
|
|
|
|
|
if (!membership)
|
|
|
|
{
|
|
|
|
const String & name = identifier->name;
|
|
|
|
bool in_left_table = data.source_columns.count(name);
|
|
|
|
bool in_right_table = data.joined_columns.count(name);
|
|
|
|
|
|
|
|
if (in_left_table && in_right_table)
|
|
|
|
throw Exception("Column '" + name + "' is ambiguous", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
|
|
|
|
|
|
|
|
if (in_left_table)
|
|
|
|
membership = 1;
|
|
|
|
if (in_right_table)
|
|
|
|
membership = 2;
|
|
|
|
}
|
|
|
|
|
2019-07-23 19:49:15 +00:00
|
|
|
if (membership && table_number == 0)
|
|
|
|
{
|
|
|
|
table_number = membership;
|
2019-07-24 15:37:37 +00:00
|
|
|
std::swap(ident, identifiers[0]); /// move first detected identifier to the first position
|
2019-07-23 19:49:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (membership && membership != table_number)
|
|
|
|
{
|
|
|
|
throw Exception("Invalid columns in JOIN ON section. Columns "
|
2019-07-24 15:37:37 +00:00
|
|
|
+ identifiers[0]->getAliasOrColumnName() + " and " + ident->getAliasOrColumnName()
|
2019-07-23 19:49:15 +00:00
|
|
|
+ " are from different tables.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return table_number;
|
|
|
|
}
|
|
|
|
|
|
|
|
[[noreturn]] static void throwSyntaxException(const String & msg)
|
|
|
|
{
|
|
|
|
throw Exception("Invalid expression for JOIN ON. " + msg +
|
|
|
|
" Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) "
|
|
|
|
"[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]",
|
|
|
|
ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Parse JOIN ON expression and collect ASTs for joined columns.
|
|
|
|
using CollectJoinOnKeysVisitor = CollectJoinOnKeysMatcher::Visitor;
|
|
|
|
|
|
|
|
}
|