fix name clash for multi join rewrit

This commit is contained in:
wangtao.2077 2023-09-04 16:46:11 +08:00
parent 64227bc1bd
commit d80de4ff3a
3 changed files with 183 additions and 20 deletions

View File

@ -37,10 +37,13 @@ namespace
/// @note we use `--` prefix for unique short names and `--.` for subqueries.
/// It expects that user do not use names starting with `--` and column names starting with dot.
ASTPtr makeSubqueryTemplate()
ASTPtr makeSubqueryTemplate(const String & table_alias)
{
ParserTablesInSelectQueryElement parser(true);
ASTPtr subquery_template = parseQuery(parser, "(select * from _t) as `--.s`", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
String query_template = "(select * from _t)";
if (!table_alias.empty())
query_template += " as " + table_alias;
ASTPtr subquery_template = parseQuery(parser, query_template, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
if (!subquery_template)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot parse subquery template");
return subquery_template;
@ -232,8 +235,7 @@ struct RewriteTablesVisitorData
{
using TypeToVisit = ASTTablesInSelectQuery;
ASTPtr left;
ASTPtr right;
ASTs new_tables;
bool done = false;
/// @note Do not change ASTTablesInSelectQuery itself. No need to change select.tables.
@ -241,7 +243,6 @@ struct RewriteTablesVisitorData
{
if (done)
return;
ASTs new_tables{left, right};
ast->children.swap(new_tables);
done = true;
}
@ -483,7 +484,11 @@ struct TableNeededColumns
/// t.x as `some`
static void addAliasedName(const String & table, const String & column, const String & alias, ASTExpressionList & expression_list)
{
auto ident = std::make_shared<ASTIdentifier>(std::vector<String>{table, column});
std::vector<String> name_parts;
if (!table.empty())
name_parts.push_back(table);
name_parts.push_back(column);
auto ident = std::make_shared<ASTIdentifier>(std::move(name_parts));
ident->setAlias(alias);
expression_list.children.emplace_back(std::move(ident));
}
@ -494,7 +499,8 @@ class UniqueShortNames
public:
/// We know that long names are unique (do not clashes with others).
/// So we could make unique names base on this knolage by adding some unused prefix.
static constexpr const char * pattern = "--";
/// Add a heading underscore to make unique names valid for `isValidIdentifierBegin`
static constexpr const char * pattern = "_--";
String longToShort(const String & long_name)
{
@ -558,8 +564,6 @@ std::vector<TableNeededColumns> normalizeColumnNamesExtractNeeded(
const std::unordered_set<ASTIdentifier *> & public_identifiers,
UniqueShortNames & unique_names)
{
size_t last_table_pos = tables.size() - 1;
std::vector<TableNeededColumns> needed_columns;
needed_columns.reserve(tables.size());
for (const auto & table : tables)
@ -602,12 +606,9 @@ std::vector<TableNeededColumns> normalizeColumnNamesExtractNeeded(
const auto & unique_long_name = ident->name();
/// For tables moved into subselects we need unique short names for clashed names
if (*table_pos != last_table_pos)
{
String unique_short_name = unique_names.longToShort(unique_long_name);
ident->setShortName(unique_short_name);
needed_columns[*table_pos].column_clashes.emplace(short_name, unique_short_name);
}
String unique_short_name = unique_names.longToShort(unique_long_name);
ident->setShortName(unique_short_name);
needed_columns[*table_pos].column_clashes.emplace(short_name, unique_short_name);
}
else
{
@ -625,9 +626,47 @@ std::vector<TableNeededColumns> normalizeColumnNamesExtractNeeded(
restoreName(*ident, original_long_name, restored_names);
}
else if (got_alias)
needed_columns[*table_pos].alias_clashes.emplace(ident->shortName());
{
String short_name = ident->shortName();
if (!isValidIdentifierBegin(short_name.at(0)))
{
String original_long_name;
if (public_identifiers.contains(ident))
original_long_name = ident->name();
const auto & table = tables[*table_pos];
IdentifierSemantic::setColumnLongName(*ident, table.table); /// table.column -> table_alias.column
const auto & unique_long_name = ident->name();
String unique_short_name = unique_names.longToShort(unique_long_name);
ident->setShortName(unique_short_name);
needed_columns[*table_pos].column_clashes.emplace(short_name, unique_short_name);
restoreName(*ident, original_long_name, restored_names);
}
else
needed_columns[*table_pos].alias_clashes.emplace(ident->shortName());
}
else
needed_columns[*table_pos].no_clashes.emplace(ident->shortName());
{
String short_name = ident->shortName();
if (!isValidIdentifierBegin(short_name.at(0)))
{
String original_long_name;
if (public_identifiers.contains(ident))
original_long_name = ident->name();
const auto & table = tables[*table_pos];
IdentifierSemantic::setColumnLongName(*ident, table.table); /// table.column -> table_alias.column
const auto & unique_long_name = ident->name();
String unique_short_name = unique_names.longToShort(unique_long_name);
ident->setShortName(unique_short_name);
needed_columns[*table_pos].column_clashes.emplace(short_name, unique_short_name);
restoreName(*ident, original_long_name, restored_names);
}
else
needed_columns[*table_pos].no_clashes.emplace(ident->shortName());
}
}
}
@ -785,7 +824,7 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast
ASTPtr left_table = src_tables[0];
static ASTPtr subquery_template = makeSubqueryTemplate();
static ASTPtr subquery_template = makeSubqueryTemplate("`--.s`");
for (size_t i = 1; i < src_tables.size() - 1; ++i)
{
@ -798,7 +837,36 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast
left_table = replaceJoin(left_table, src_tables[i], subquery);
}
RewriteVisitor::Data visitor_data{left_table, src_tables.back()};
// expand the last table into a subselect, to resolve alias clashes inside it
static ASTPtr last_select_template = makeSubqueryTemplate("`--.t`");
auto last_select = last_select_template->clone();
{
auto expression_list = std::make_shared<ASTExpressionList>();
needed_columns[src_tables.size() - 1].fillExpressionList(*expression_list);
SubqueryExpressionsRewriteVisitor::Data expr_rewrite_data{std::move(expression_list)};
SubqueryExpressionsRewriteVisitor(expr_rewrite_data).visit(last_select);
// move ASTTableJoin out of subquery
auto * last_table_elem = src_tables.back()->as<ASTTablesInSelectQueryElement>();
auto * last_select_elem = last_select->as<ASTTablesInSelectQueryElement>();
if (!last_table_elem || !last_select_elem)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Two TablesInSelectQueryElements expected");
if (!last_table_elem->table_join)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Table join expected");
last_select_elem->table_join = std::move(last_table_elem->table_join);
last_select_elem->children.emplace_back(last_select_elem->table_join);
last_table_elem->children.erase(
std::remove(last_table_elem->children.begin(), last_table_elem->children.end(), last_select_elem->table_join),
last_table_elem->children.end());
RewriteVisitor::Data visitor_data{{src_tables.back()}};
RewriteVisitor(visitor_data).visit(last_select);
}
RewriteVisitor::Data visitor_data{{left_table, last_select}};
RewriteVisitor(visitor_data).visit(select.refTables());
data.done = true;
@ -815,7 +883,7 @@ ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTPtr ast_left, ASTPtr ast_r
throw Exception(ErrorCodes::LOGICAL_ERROR, "Table join expected");
/// replace '_t' with pair of joined tables
RewriteVisitor::Data visitor_data{ast_left, ast_right};
RewriteVisitor::Data visitor_data{{ast_left, ast_right}};
RewriteVisitor(visitor_data).visit(subquery_template);
return subquery_template;
}

View File

@ -0,0 +1,48 @@
-- { echo }
-- no clash name
SELECT
c + 1,
Z.c + 1
FROM
(SELECT 10 a) X
CROSS JOIN
(SELECT 20 b) Y
CROSS JOIN
(SELECT 30 c) Z;
31 31
-- alias clash
SELECT
(a + 1) AS c,
Z.c + 1
FROM
(SELECT 10 a) X
CROSS JOIN
(SELECT 20 b) Y
CROSS JOIN
(SELECT 30 c) Z;
11 31
-- column clash
SELECT
(X.c + 1) AS c,
Z.c + 1
FROM
(SELECT 10 c) X
CROSS JOIN
(SELECT 20 b) Y
CROSS JOIN
(SELECT 30 c) Z;
11 31
SELECT
(X.a + 1) AS a,
(Y.a + 1) AS Y_a,
(Z.a + 1) AS Z_a,
(Y.b + 1) AS b,
(Z.b + 1) AS Z_b
FROM
(SELECT 10 a) X
CROSS JOIN
(SELECT 20 a, 21 as b) Y
CROSS JOIN
(SELECT 30 a, 31 as b, 32 as c) Z;
11 21 31 22 32

View File

@ -0,0 +1,47 @@
-- { echo }
-- no clash name
SELECT
c + 1,
Z.c + 1
FROM
(SELECT 10 a) X
CROSS JOIN
(SELECT 20 b) Y
CROSS JOIN
(SELECT 30 c) Z;
-- alias clash
SELECT
(a + 1) AS c,
Z.c + 1
FROM
(SELECT 10 a) X
CROSS JOIN
(SELECT 20 b) Y
CROSS JOIN
(SELECT 30 c) Z;
-- column clash
SELECT
(X.c + 1) AS c,
Z.c + 1
FROM
(SELECT 10 c) X
CROSS JOIN
(SELECT 20 b) Y
CROSS JOIN
(SELECT 30 c) Z;
SELECT
(X.a + 1) AS a,
(Y.a + 1) AS Y_a,
(Z.a + 1) AS Z_a,
(Y.b + 1) AS b,
(Z.b + 1) AS Z_b
FROM
(SELECT 10 a) X
CROSS JOIN
(SELECT 20 a, 21 as b) Y
CROSS JOIN
(SELECT 30 a, 31 as b, 32 as c) Z;