2019-02-01 16:36:40 +00:00
|
|
|
#include <Common/typeid_cast.h>
|
2021-09-29 10:58:14 +00:00
|
|
|
#include <Parsers/queryToString.h>
|
2019-03-12 13:17:17 +00:00
|
|
|
#include <Functions/FunctionsComparison.h>
|
|
|
|
#include <Functions/FunctionsLogical.h>
|
2021-02-12 14:50:09 +00:00
|
|
|
#include <IO/WriteHelpers.h>
|
2019-02-01 16:36:40 +00:00
|
|
|
#include <Interpreters/CrossToInnerJoinVisitor.h>
|
|
|
|
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
|
|
|
#include <Interpreters/IdentifierSemantic.h>
|
2019-10-23 13:59:03 +00:00
|
|
|
#include <Interpreters/misc.h>
|
2021-02-12 14:50:09 +00:00
|
|
|
#include <Parsers/ASTExpressionList.h>
|
|
|
|
#include <Parsers/ASTFunction.h>
|
|
|
|
#include <Parsers/ASTIdentifier.h>
|
|
|
|
#include <Parsers/ASTLiteral.h>
|
2019-02-01 16:36:40 +00:00
|
|
|
#include <Parsers/ASTSelectQuery.h>
|
2020-03-08 11:07:05 +00:00
|
|
|
#include <Parsers/ASTSubquery.h>
|
2019-02-01 16:36:40 +00:00
|
|
|
#include <Parsers/ASTTablesInSelectQuery.h>
|
|
|
|
#include <Parsers/ExpressionListParsers.h>
|
2021-02-12 14:50:09 +00:00
|
|
|
#include <Parsers/ParserTablesInSelectQuery.h>
|
2019-02-01 16:36:40 +00:00
|
|
|
#include <Parsers/parseQuery.h>
|
|
|
|
|
2022-05-10 15:12:17 +00:00
|
|
|
#include <Common/logger_useful.h>
|
|
|
|
|
2019-02-01 16:36:40 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2022-05-10 15:12:17 +00:00
|
|
|
extern const int INCORRECT_QUERY;
|
2019-02-01 16:36:40 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2021-09-29 10:23:29 +00:00
|
|
|
extern const int NOT_IMPLEMENTED;
|
2019-02-01 16:36:40 +00:00
|
|
|
}
|
|
|
|
|
2019-03-11 19:45:04 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
2020-03-08 11:07:05 +00:00
|
|
|
struct JoinedElement
|
2019-03-11 19:45:04 +00:00
|
|
|
{
|
2020-03-18 03:27:32 +00:00
|
|
|
explicit JoinedElement(const ASTTablesInSelectQueryElement & table_element)
|
2020-03-08 11:07:05 +00:00
|
|
|
: element(table_element)
|
2019-03-11 19:45:04 +00:00
|
|
|
{
|
2020-03-08 11:07:05 +00:00
|
|
|
if (element.table_join)
|
|
|
|
join = element.table_join->as<ASTTableJoin>();
|
|
|
|
}
|
2019-03-11 19:45:04 +00:00
|
|
|
|
2020-03-08 11:07:05 +00:00
|
|
|
void checkTableName(const DatabaseAndTableWithAlias & table, const String & current_database) const
|
|
|
|
{
|
|
|
|
if (!element.table_expression)
|
|
|
|
throw Exception("Not a table expression in JOIN (ARRAY JOIN?)", ErrorCodes::LOGICAL_ERROR);
|
2019-03-11 19:45:04 +00:00
|
|
|
|
2020-03-08 11:07:05 +00:00
|
|
|
ASTTableExpression * table_expression = element.table_expression->as<ASTTableExpression>();
|
|
|
|
if (!table_expression)
|
|
|
|
throw Exception("Wrong table expression in JOIN", ErrorCodes::LOGICAL_ERROR);
|
2019-03-11 19:45:04 +00:00
|
|
|
|
2020-03-08 11:07:05 +00:00
|
|
|
if (!table.same(DatabaseAndTableWithAlias(*table_expression, current_database)))
|
|
|
|
throw Exception("Inconsistent table names", ErrorCodes::LOGICAL_ERROR);
|
2019-03-11 19:45:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void rewriteCommaToCross()
|
|
|
|
{
|
2020-04-16 20:28:23 +00:00
|
|
|
if (join && join->kind == ASTTableJoin::Kind::Comma)
|
2019-03-11 19:45:04 +00:00
|
|
|
join->kind = ASTTableJoin::Kind::Cross;
|
|
|
|
}
|
|
|
|
|
2020-04-16 20:28:23 +00:00
|
|
|
bool rewriteCrossToInner(ASTPtr on_expression)
|
2020-03-08 11:07:05 +00:00
|
|
|
{
|
2020-04-16 20:28:23 +00:00
|
|
|
if (join->kind != ASTTableJoin::Kind::Cross)
|
|
|
|
return false;
|
|
|
|
|
2020-03-08 11:07:05 +00:00
|
|
|
join->kind = ASTTableJoin::Kind::Inner;
|
|
|
|
join->strictness = ASTTableJoin::Strictness::All;
|
|
|
|
|
|
|
|
join->on_expression = on_expression;
|
|
|
|
join->children.push_back(join->on_expression);
|
2020-04-16 20:28:23 +00:00
|
|
|
return true;
|
2020-03-08 11:07:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ASTPtr arrayJoin() const { return element.array_join; }
|
|
|
|
const ASTTableJoin * tableJoin() const { return join; }
|
|
|
|
|
2019-03-11 19:45:04 +00:00
|
|
|
bool canAttachOnExpression() const { return join && !join->on_expression; }
|
2020-03-08 11:07:05 +00:00
|
|
|
bool hasUsing() const { return join && join->using_expression_list; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
const ASTTablesInSelectQueryElement & element;
|
|
|
|
ASTTableJoin * join = nullptr;
|
2019-03-11 19:45:04 +00:00
|
|
|
};
|
|
|
|
|
2021-02-15 12:00:08 +00:00
|
|
|
bool isAllowedToRewriteCrossJoin(const ASTPtr & node, const Aliases & aliases)
|
|
|
|
{
|
2021-02-16 08:21:54 +00:00
|
|
|
if (node->as<ASTFunction>())
|
2021-02-15 12:00:08 +00:00
|
|
|
{
|
|
|
|
auto idents = IdentifiersCollector::collect(node);
|
|
|
|
for (const auto * ident : idents)
|
|
|
|
{
|
2022-04-18 10:18:43 +00:00
|
|
|
if (ident->isShort() && aliases.contains(ident->shortName()))
|
2021-02-15 12:00:08 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
2019-02-04 18:45:31 +00:00
|
|
|
}
|
2021-02-15 12:00:08 +00:00
|
|
|
return node->as<ASTIdentifier>() || node->as<ASTLiteral>();
|
|
|
|
}
|
2019-02-04 18:45:31 +00:00
|
|
|
|
2021-02-19 14:06:57 +00:00
|
|
|
/// Return mapping table_no -> expression with expression that can be moved into JOIN ON section
|
|
|
|
std::map<size_t, std::vector<ASTPtr>> moveExpressionToJoinOn(
|
|
|
|
const ASTPtr & ast,
|
|
|
|
const std::vector<JoinedElement> & joined_tables,
|
|
|
|
const std::vector<TableWithColumnNamesAndTypes> & tables,
|
|
|
|
const Aliases & aliases)
|
2021-02-15 12:00:08 +00:00
|
|
|
{
|
2021-02-19 14:06:57 +00:00
|
|
|
std::map<size_t, std::vector<ASTPtr>> asts_to_join_on;
|
|
|
|
for (const auto & node : collectConjunctions(ast))
|
2019-02-01 16:36:40 +00:00
|
|
|
{
|
2021-02-15 12:00:08 +00:00
|
|
|
if (const auto * func = node->as<ASTFunction>(); func && func->name == NameEquals::name)
|
|
|
|
{
|
|
|
|
if (!func->arguments || func->arguments->children.size() != 2)
|
2021-02-19 14:06:57 +00:00
|
|
|
return {};
|
2019-02-04 18:45:31 +00:00
|
|
|
|
2021-02-15 12:00:08 +00:00
|
|
|
/// Check if the identifiers are from different joined tables.
|
|
|
|
/// If it's a self joint, tables should have aliases.
|
2021-04-01 11:21:36 +00:00
|
|
|
auto left_table_pos = IdentifierSemantic::getIdentsMembership(func->arguments->children[0], tables, aliases);
|
|
|
|
auto right_table_pos = IdentifierSemantic::getIdentsMembership(func->arguments->children[1], tables, aliases);
|
2021-02-19 12:14:24 +00:00
|
|
|
|
|
|
|
/// Identifiers from different table move to JOIN ON
|
|
|
|
if (left_table_pos && right_table_pos && *left_table_pos != *right_table_pos)
|
2021-02-15 12:00:08 +00:00
|
|
|
{
|
2021-02-19 12:14:24 +00:00
|
|
|
size_t table_pos = std::max(*left_table_pos, *right_table_pos);
|
|
|
|
if (joined_tables[table_pos].canAttachOnExpression())
|
|
|
|
asts_to_join_on[table_pos].push_back(node);
|
2021-02-15 12:00:08 +00:00
|
|
|
else
|
2021-02-19 14:06:57 +00:00
|
|
|
return {};
|
2021-02-15 12:00:08 +00:00
|
|
|
}
|
2019-03-11 19:45:04 +00:00
|
|
|
}
|
2021-02-15 12:00:08 +00:00
|
|
|
|
|
|
|
if (!isAllowedToRewriteCrossJoin(node, aliases))
|
2021-02-19 14:06:57 +00:00
|
|
|
return {};
|
2019-02-01 16:36:40 +00:00
|
|
|
}
|
2021-02-19 14:06:57 +00:00
|
|
|
return asts_to_join_on;
|
2021-02-15 12:00:08 +00:00
|
|
|
}
|
2019-02-01 16:36:40 +00:00
|
|
|
|
2021-02-15 12:00:08 +00:00
|
|
|
ASTPtr makeOnExpression(const std::vector<ASTPtr> & expressions)
|
|
|
|
{
|
|
|
|
if (expressions.size() == 1)
|
|
|
|
return expressions[0]->clone();
|
|
|
|
|
|
|
|
std::vector<ASTPtr> arguments;
|
|
|
|
arguments.reserve(expressions.size());
|
|
|
|
for (const auto & ast : expressions)
|
|
|
|
arguments.emplace_back(ast->clone());
|
2019-02-01 16:36:40 +00:00
|
|
|
|
2021-02-15 12:00:08 +00:00
|
|
|
return makeASTFunction(NameAnd::name, std::move(arguments));
|
|
|
|
}
|
2019-02-01 16:36:40 +00:00
|
|
|
|
2021-09-29 10:21:06 +00:00
|
|
|
std::vector<JoinedElement> getTables(const ASTSelectQuery & select)
|
2019-02-01 16:36:40 +00:00
|
|
|
{
|
2019-04-09 14:22:35 +00:00
|
|
|
if (!select.tables())
|
2021-09-29 10:21:06 +00:00
|
|
|
return {};
|
2019-02-01 16:36:40 +00:00
|
|
|
|
2019-04-09 14:22:35 +00:00
|
|
|
const auto * tables = select.tables()->as<ASTTablesInSelectQuery>();
|
2019-02-01 16:36:40 +00:00
|
|
|
if (!tables)
|
2021-09-29 10:21:06 +00:00
|
|
|
return {};
|
2019-02-01 16:36:40 +00:00
|
|
|
|
|
|
|
size_t num_tables = tables->children.size();
|
2019-03-11 19:45:04 +00:00
|
|
|
if (num_tables < 2)
|
2021-09-29 10:21:06 +00:00
|
|
|
return {};
|
2019-02-01 16:36:40 +00:00
|
|
|
|
2021-09-29 10:21:06 +00:00
|
|
|
std::vector<JoinedElement> joined_tables;
|
2019-03-11 19:45:04 +00:00
|
|
|
joined_tables.reserve(num_tables);
|
2021-09-29 10:58:14 +00:00
|
|
|
bool has_using = false;
|
2019-04-08 12:35:26 +00:00
|
|
|
|
2020-04-22 05:39:31 +00:00
|
|
|
for (const auto & child : tables->children)
|
2019-02-01 16:36:40 +00:00
|
|
|
{
|
2021-09-29 10:58:14 +00:00
|
|
|
const auto * table_element = child->as<ASTTablesInSelectQueryElement>();
|
2020-03-08 11:07:05 +00:00
|
|
|
if (!table_element)
|
|
|
|
throw Exception("Logical error: TablesInSelectQueryElement expected", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2021-09-29 10:58:14 +00:00
|
|
|
JoinedElement & t = joined_tables.emplace_back(*table_element);
|
|
|
|
t.rewriteCommaToCross();
|
2020-03-08 11:07:05 +00:00
|
|
|
|
|
|
|
if (t.arrayJoin())
|
2021-09-29 10:58:14 +00:00
|
|
|
return {};
|
2019-02-01 16:36:40 +00:00
|
|
|
|
2020-03-08 11:07:05 +00:00
|
|
|
if (t.hasUsing())
|
2019-04-08 12:35:26 +00:00
|
|
|
{
|
2021-09-29 10:58:14 +00:00
|
|
|
if (has_using)
|
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Multuple USING statements are not supported");
|
|
|
|
has_using = true;
|
2019-04-08 12:35:26 +00:00
|
|
|
}
|
2019-02-01 16:36:40 +00:00
|
|
|
|
2021-09-29 10:58:14 +00:00
|
|
|
if (const auto * join = t.tableJoin(); join && isCrossOrComma(join->kind))
|
2020-03-08 11:07:05 +00:00
|
|
|
{
|
2021-09-29 10:58:14 +00:00
|
|
|
if (!join->children.empty())
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::LOGICAL_ERROR, "CROSS JOIN has {} expressions: [{}, ...]",
|
|
|
|
join->children.size(), queryToString(join->children[0]));
|
2020-03-08 11:07:05 +00:00
|
|
|
}
|
2019-02-01 16:36:40 +00:00
|
|
|
}
|
2019-04-08 12:35:26 +00:00
|
|
|
|
2021-09-29 10:58:14 +00:00
|
|
|
return joined_tables;
|
2019-03-11 19:45:04 +00:00
|
|
|
}
|
2019-02-01 16:36:40 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-03-08 11:07:05 +00:00
|
|
|
bool CrossToInnerJoinMatcher::needChildVisit(ASTPtr & node, const ASTPtr &)
|
|
|
|
{
|
2020-03-08 23:48:08 +00:00
|
|
|
return !node->as<ASTSubquery>();
|
2020-03-08 11:07:05 +00:00
|
|
|
}
|
|
|
|
|
2019-02-22 13:33:56 +00:00
|
|
|
void CrossToInnerJoinMatcher::visit(ASTPtr & ast, Data & data)
|
2019-02-01 16:36:40 +00:00
|
|
|
{
|
2019-03-11 13:22:51 +00:00
|
|
|
if (auto * t = ast->as<ASTSelectQuery>())
|
2019-02-01 16:36:40 +00:00
|
|
|
visit(*t, ast, data);
|
|
|
|
}
|
|
|
|
|
2019-03-11 19:45:04 +00:00
|
|
|
void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & data)
|
2019-02-01 16:36:40 +00:00
|
|
|
{
|
2021-09-29 10:21:06 +00:00
|
|
|
std::vector<JoinedElement> joined_tables = getTables(select);
|
|
|
|
if (joined_tables.empty())
|
2019-02-01 16:36:40 +00:00
|
|
|
return;
|
|
|
|
|
2020-03-08 11:07:05 +00:00
|
|
|
/// Check if joined_tables are consistent with known tables_with_columns
|
|
|
|
{
|
|
|
|
if (joined_tables.size() != data.tables_with_columns.size())
|
2021-09-29 10:21:06 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
|
|
|
"Logical error: inconsistent number of tables: {} != {}",
|
|
|
|
joined_tables.size(), data.tables_with_columns.size());
|
2020-03-08 11:07:05 +00:00
|
|
|
|
|
|
|
for (size_t i = 0; i < joined_tables.size(); ++i)
|
|
|
|
joined_tables[i].checkTableName(data.tables_with_columns[i].table, data.current_database);
|
|
|
|
}
|
|
|
|
|
2019-03-11 19:45:04 +00:00
|
|
|
/// CROSS to INNER
|
2021-02-19 14:06:57 +00:00
|
|
|
if (data.cross_to_inner_join_rewrite && select.where())
|
2019-03-11 19:45:04 +00:00
|
|
|
{
|
2021-02-19 14:06:57 +00:00
|
|
|
auto asts_to_join_on = moveExpressionToJoinOn(select.where(), joined_tables, data.tables_with_columns, data.aliases);
|
|
|
|
for (size_t i = 1; i < joined_tables.size(); ++i)
|
2019-03-11 19:45:04 +00:00
|
|
|
{
|
2022-05-10 15:12:17 +00:00
|
|
|
auto & joined = joined_tables[i];
|
|
|
|
if (joined.tableJoin()->kind != ASTTableJoin::Kind::Cross)
|
|
|
|
continue;
|
|
|
|
|
2022-05-11 10:56:43 +00:00
|
|
|
String query_before = queryToString(*joined.tableJoin());
|
|
|
|
bool rewritten = false;
|
2021-02-19 14:06:57 +00:00
|
|
|
const auto & expr_it = asts_to_join_on.find(i);
|
|
|
|
if (expr_it != asts_to_join_on.end())
|
2021-02-15 12:00:08 +00:00
|
|
|
{
|
2022-05-10 15:12:17 +00:00
|
|
|
ASTPtr on_expr = makeOnExpression(expr_it->second);
|
2022-05-11 10:56:43 +00:00
|
|
|
if (rewritten = joined.rewriteCrossToInner(on_expr); rewritten)
|
2022-05-10 15:12:17 +00:00
|
|
|
{
|
|
|
|
LOG_DEBUG(&Poco::Logger::get("CrossToInnerJoin"), "Rewritten '{}' to '{}'", query_before, queryToString(*joined.tableJoin()));
|
|
|
|
}
|
2022-05-11 10:56:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (data.cross_to_inner_join_rewrite > 1 && !rewritten)
|
|
|
|
{
|
|
|
|
throw Exception(ErrorCodes::INCORRECT_QUERY, "Failed to rewrite '{} WHERE {}' to INNER JOIN",
|
|
|
|
query_before, queryToString(select.where()));
|
2021-02-15 12:00:08 +00:00
|
|
|
}
|
2019-03-11 19:45:04 +00:00
|
|
|
}
|
2019-03-05 15:16:59 +00:00
|
|
|
}
|
2019-02-01 16:36:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|