join on allow aliases for right table columns

This commit is contained in:
chertus 2019-02-15 18:34:30 +03:00
parent dc1a0ed3f4
commit b72d12d12e
7 changed files with 125 additions and 37 deletions

View File

@ -67,7 +67,7 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions(
ASTPtr query = expression_list;
auto syntax_result = SyntaxAnalyzer(context).analyze(query, source_column_names, required_columns);
ExpressionAnalyzer analyzer(query, syntax_result, context, {}, required_columns_set);
return analyzer.getActions(false);
return analyzer.getActions(true, false);
}
Names AnalyzedJoin::getOriginalColumnNames(const NameSet & required_columns_from_joined_table) const

View File

@ -15,7 +15,7 @@ bool ColumnNamesContext::addTableAliasIfAny(const IAST & ast)
return true;
}
bool ColumnNamesContext::addColumnAliasIfAny(const IAST & ast, bool is_public)
bool ColumnNamesContext::addColumnAliasIfAny(const IAST & ast)
{
String alias = ast.tryGetAlias();
if (alias.empty())
@ -24,21 +24,22 @@ bool ColumnNamesContext::addColumnAliasIfAny(const IAST & ast, bool is_public)
if (required_names.count(alias))
masked_columns.insert(alias);
if (is_public)
public_columns.insert(alias);
column_aliases.insert(alias);
complex_aliases.insert(alias);
return true;
}
void ColumnNamesContext::addColumnIdentifier(const ASTIdentifier & node, bool is_public)
void ColumnNamesContext::addColumnIdentifier(const ASTIdentifier & node)
{
if (!IdentifierSemantic::getColumnName(node))
return;
required_names.insert(node.name);
/// There should be no complex cases after query normalization. Names to aliases: one-to-many.
String alias = node.tryGetAlias();
if (!alias.empty())
required_names[node.name].insert(alias);
if (!addColumnAliasIfAny(node, is_public) && is_public)
public_columns.insert(node.name);
if (!required_names.count(node.name))
required_names[node.name] = {};
}
bool ColumnNamesContext::addArrayJoinAliasIfAny(const IAST & ast)
@ -56,19 +57,24 @@ void ColumnNamesContext::addArrayJoinIdentifier(const ASTIdentifier & node)
array_join_columns.insert(node.name);
}
NameSet ColumnNamesContext::requiredColumns() const
NameSet ColumnNamesContext::requiredColumns(std::unordered_map<String, String> & alias_map) const
{
NameSet required;
for (const auto & name : required_names)
for (const auto & pr : required_names)
{
const auto & name = pr.first;
String table_name = Nested::extractTableName(name);
/// Tech debt. There's its own logic for ARRAY JOIN columns.
if (array_join_columns.count(name) || array_join_columns.count(table_name))
continue;
if (!column_aliases.count(name) || masked_columns.count(name))
if (!complex_aliases.count(name) || masked_columns.count(name))
required.insert(name);
/// It's possible that column is masked but alias is available
for (const String & alias : pr.second)
alias_map[alias] = name;
}
return required;
}
@ -76,9 +82,13 @@ NameSet ColumnNamesContext::requiredColumns() const
std::ostream & operator << (std::ostream & os, const ColumnNamesContext & cols)
{
os << "required_names: ";
for (const auto & x : cols.required_names)
os << "'" << x << "' ";
os << "source_tables: ";
for (const auto & pr : cols.required_names)
{
os << "'" << pr.first << "'";
for (auto & alias : pr.second)
os << "/'" << alias << "'";
}
os << " source_tables: ";
for (const auto & x : cols.tables)
{
auto alias = x.alias();
@ -93,14 +103,8 @@ std::ostream & operator << (std::ostream & os, const ColumnNamesContext & cols)
os << "table_aliases: ";
for (const auto & x : cols.table_aliases)
os << "'" << x << "' ";
os << "private_aliases: ";
for (const auto & x : cols.private_aliases)
os << "'" << x << "' ";
os << "column_aliases: ";
for (const auto & x : cols.column_aliases)
os << "'" << x << "' ";
os << "public_columns: ";
for (const auto & x : cols.public_columns)
os << "complex_aliases: ";
for (const auto & x : cols.complex_aliases)
os << "'" << x << "' ";
os << "masked_columns: ";
for (const auto & x : cols.masked_columns)

View File

@ -51,24 +51,24 @@ struct ColumnNamesContext
}
};
NameSet required_names;
std::unordered_map<String, std::set<String>> required_names; /// names with aliases
NameSet table_aliases;
NameSet private_aliases;
NameSet column_aliases;
NameSet complex_aliases;
NameSet masked_columns;
NameSet public_columns;
NameSet array_join_columns;
std::vector<JoinedTable> tables; /// ordered list of visited tables in FROM section with joins
bool has_table_join = false;
bool has_array_join = false;
bool addTableAliasIfAny(const IAST & ast);
bool addColumnAliasIfAny(const IAST & ast, bool is_public = false);
void addColumnIdentifier(const ASTIdentifier & node, bool is_public = false);
bool addColumnAliasIfAny(const IAST & ast);
void addColumnIdentifier(const ASTIdentifier & node);
bool addArrayJoinAliasIfAny(const IAST & ast);
void addArrayJoinIdentifier(const ASTIdentifier & node);
NameSet requiredColumns() const;
/// returns set of columns and map of ther aliases (alias -> column)
NameSet requiredColumns(std::unordered_map<String, String> & alias_map) const;
};
std::ostream & operator << (std::ostream & os, const ColumnNamesContext & cols);

View File

@ -948,7 +948,8 @@ void ExpressionAnalyzer::collectUsedColumns()
RequiredSourceColumnsVisitor::Data columns_context;
RequiredSourceColumnsVisitor(columns_context).visit(query);
NameSet required = columns_context.requiredColumns();
std::unordered_map<String, String> required_alias_map;
NameSet required = columns_context.requiredColumns(required_alias_map);
#if 0
std::cerr << "Query: " << query << std::endl;
@ -957,8 +958,8 @@ void ExpressionAnalyzer::collectUsedColumns()
for (const auto & name : source_columns)
std::cerr << "'" << name.name << "' ";
std::cerr << "required: ";
for (const auto & name : required)
std::cerr << "'" << name << "' ";
for (const auto & pr : required)
std::cerr << "'" << pr.first << "' ";
std::cerr << std::endl;
#endif
@ -991,11 +992,19 @@ void ExpressionAnalyzer::collectUsedColumns()
for (const auto & joined_column : analyzed_join.available_joined_columns)
{
auto & name = joined_column.name_and_type.name;
if (required.count(name) && !avaliable_columns.count(name))
if (avaliable_columns.count(name))
continue;
if (required.count(name))
{
columns_added_by_join.push_back(joined_column);
required.erase(name);
}
else if (required_alias_map.count(name) && required.count(required_alias_map[name]))
{
columns_added_by_join.push_back(joined_column);
required.erase(required_alias_map[name]);
}
}
}
@ -1022,7 +1031,11 @@ void ExpressionAnalyzer::collectUsedColumns()
const String & column_name = it->name;
unknown_required_source_columns.erase(column_name);
if (!required.count(column_name))
bool is_required_alias = required_alias_map.count(column_name);
if (is_required_alias)
unknown_required_source_columns.erase(required_alias_map[column_name]);
if (!required.count(column_name) && !is_required_alias)
source_columns.erase(it++);
else
++it;

View File

@ -68,7 +68,6 @@ std::vector<ASTPtr *> RequiredSourceColumnsMatcher::visit(ASTPtr & ast, Data & d
if (auto * t = typeid_cast<ASTIdentifier *>(ast.get()))
{
data.addColumnAliasIfAny(*ast);
visit(*t, ast, data);
return {};
}
@ -121,9 +120,9 @@ std::vector<ASTPtr *> RequiredSourceColumnsMatcher::visit(ASTSelectQuery & selec
for (auto & node : select.select_expression_list->children)
{
if (auto * identifier = typeid_cast<ASTIdentifier *>(node.get()))
data.addColumnIdentifier(*identifier, true);
data.addColumnIdentifier(*identifier);
else
data.addColumnAliasIfAny(*node, true);
data.addColumnAliasIfAny(*node);
}
std::vector<ASTPtr *> out;

View File

@ -0,0 +1,29 @@
0 0
2 2
4 4
6 6
8 8
0 0
2 2
4 4
6 6
8 8
0 0
2 2
4 4
6 6
8 8
0 0
2 2
4 4
6 6
8 8
0 0
2 2
4 4
6 6
8 8
6 6 6 60
8 8 8 80
6 6 6 60
8 8 8 80

View File

@ -0,0 +1,43 @@
USE test;
DROP TABLE IF EXISTS table1;
DROP TABLE IF EXISTS table2;
CREATE TABLE table1 (a UInt32, b UInt32) ENGINE = Memory;
CREATE TABLE table2 (a UInt32, b UInt32) ENGINE = Memory;
INSERT INTO table1 SELECT number, number FROM numbers(10);
INSERT INTO table2 SELECT number * 2, number * 20 FROM numbers(6);
select t1.a t1_a, t2.a
from table1 as t1
join table2 as t2 on table1.a = table2.a and t1.a = table2.a and t1_a = table2.a;
select t1.a t1_a, t2.a
from table1 as t1
join table2 as t2 on table1.a = t2.a and t1.a = t2.a and t1_a = t2.a;
select t1.a as t1_a, t2.a t2_a
from table1 as t1
join table2 as t2 on table1.a = t2_a and t1.a = t2_a and t1_a = t2_a;
select t1.a t1_a, t2.a
from table1 as t1
join table2 as t2 on table1.a = table2.a and t1.a = t2.a and t1_a = t2.a;
select t1.a t1_a, t2.a as t2_a
from table1 as t1
join table2 as t2 on table1.a = table2.a and t1.a = t2.a and t1_a = t2_a;
select *
from table1 as t1
join table2 as t2 on t1_a = t2_a
where (table1.a as t1_a) > 4 and (table2.a as t2_a) > 2;
select t1.*, t2.*
from table1 as t1
join table2 as t2 on t1_a = t2_a
where (t1.a as t1_a) > 2 and (t2.a as t2_a) > 4;
DROP TABLE table1;
DROP TABLE table2;