mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
JOIN with USING: do not duplicate columns
This commit is contained in:
parent
f13d094a4e
commit
cbbf04204b
@ -1,13 +1,16 @@
|
||||
#include <Poco/String.h>
|
||||
#include <Core/Names.h>
|
||||
#include <Interpreters/QueryNormalizer.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/AnalyzedJoin.h>
|
||||
#include <Parsers/ASTAsterisk.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Poco/String.h>
|
||||
#include <Parsers/ASTQualifiedAsterisk.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
@ -16,10 +19,14 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int TOO_DEEP_AST;
|
||||
extern const int CYCLIC_ALIASES;
|
||||
}
|
||||
|
||||
NameSet removeDuplicateColumns(NamesAndTypesList & columns);
|
||||
|
||||
|
||||
class CheckASTDepth
|
||||
{
|
||||
public:
|
||||
@ -135,7 +142,8 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
|
||||
/// Replace *, alias.*, database.table.* with a list of columns.
|
||||
void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & data)
|
||||
{
|
||||
auto & tables_with_columns = data.tables_with_columns;
|
||||
const auto & tables_with_columns = data.tables_with_columns;
|
||||
const auto & source_columns_set = data.source_columns_set;
|
||||
|
||||
ASTs old_children;
|
||||
if (data.processAsterisks())
|
||||
@ -162,22 +170,43 @@ void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & dat
|
||||
{
|
||||
if (typeid_cast<const ASTAsterisk *>(child.get()))
|
||||
{
|
||||
for (const auto & pr : tables_with_columns)
|
||||
for (const auto & column_name : pr.second)
|
||||
node.children.emplace_back(std::make_shared<ASTIdentifier>(column_name));
|
||||
bool first_table = true;
|
||||
for (const auto & [table_name, table_columns] : tables_with_columns)
|
||||
{
|
||||
for (const auto & column_name : table_columns)
|
||||
if (first_table || !data.join_using_columns.count(column_name))
|
||||
{
|
||||
/// qualifed names for duplicates
|
||||
if (!first_table && source_columns_set && source_columns_set->count(column_name))
|
||||
node.children.emplace_back(std::make_shared<ASTIdentifier>(table_name.getQualifiedNamePrefix() + column_name));
|
||||
else
|
||||
node.children.emplace_back(std::make_shared<ASTIdentifier>(column_name));
|
||||
}
|
||||
|
||||
first_table = false;
|
||||
}
|
||||
}
|
||||
else if (const auto * qualified_asterisk = typeid_cast<const ASTQualifiedAsterisk *>(child.get()))
|
||||
{
|
||||
DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->children[0]);
|
||||
|
||||
bool first_table = true;
|
||||
for (const auto & [table_name, table_columns] : tables_with_columns)
|
||||
{
|
||||
if (ident_db_and_name.satisfies(table_name, true))
|
||||
{
|
||||
for (const auto & column_name : table_columns)
|
||||
node.children.emplace_back(std::make_shared<ASTIdentifier>(column_name));
|
||||
{
|
||||
/// qualifed names for duplicates
|
||||
if (!first_table && source_columns_set && source_columns_set->count(column_name))
|
||||
node.children.emplace_back(std::make_shared<ASTIdentifier>(table_name.getQualifiedNamePrefix() + column_name));
|
||||
else
|
||||
node.children.emplace_back(std::make_shared<ASTIdentifier>(column_name));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
first_table = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -198,6 +227,11 @@ void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &
|
||||
/// special visitChildren() for ASTSelectQuery
|
||||
void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data & data)
|
||||
{
|
||||
extractTablesWithColumns(select, data);
|
||||
|
||||
if (auto join = select.join())
|
||||
extractJoinUsingColumns(join->table_join, data);
|
||||
|
||||
for (auto & child : ast->children)
|
||||
{
|
||||
if (typeid_cast<const ASTSelectQuery *>(child.get()) ||
|
||||
@ -312,4 +346,46 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data)
|
||||
}
|
||||
}
|
||||
|
||||
void QueryNormalizer::extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data)
|
||||
{
|
||||
if (data.context && select_query.tables && !select_query.tables->children.empty())
|
||||
{
|
||||
data.tables_with_columns.clear();
|
||||
String current_database = data.context->getCurrentDatabase();
|
||||
|
||||
for (const ASTTableExpression * table_expression : getSelectTablesExpression(select_query))
|
||||
{
|
||||
DatabaseAndTableWithAlias table_name(*table_expression, current_database);
|
||||
|
||||
NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, *data.context);
|
||||
removeDuplicateColumns(names_and_types);
|
||||
|
||||
data.tables_with_columns.emplace_back(std::move(table_name), names_and_types.getNames());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// 'select * from a join b using id' should result one 'id' column
|
||||
void QueryNormalizer::extractJoinUsingColumns(const ASTPtr ast, Data & data)
|
||||
{
|
||||
const auto & table_join = typeid_cast<const ASTTableJoin &>(*ast);
|
||||
|
||||
if (table_join.using_expression_list)
|
||||
{
|
||||
auto & keys = typeid_cast<ASTExpressionList &>(*table_join.using_expression_list);
|
||||
for (const auto & key : keys.children)
|
||||
if (auto opt_column = getIdentifierName(key))
|
||||
data.join_using_columns.insert(*opt_column);
|
||||
else if (auto * literal = typeid_cast<const ASTLiteral *>(key.get()))
|
||||
data.join_using_columns.insert(key->getColumnName());
|
||||
else
|
||||
{
|
||||
String alias = key->tryGetAlias();
|
||||
if (alias.empty())
|
||||
throw Exception("Logical error: expected identifier or alias, got: " + key->getID(), ErrorCodes::LOGICAL_ERROR);
|
||||
data.join_using_columns.insert(alias);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
#include <Core/Names.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
||||
@ -17,11 +19,11 @@ inline bool functionIsInOrGlobalInOperator(const String & name)
|
||||
return functionIsInOperator(name) || name == "globalIn" || name == "globalNotIn";
|
||||
}
|
||||
|
||||
|
||||
class ASTFunction;
|
||||
class ASTIdentifier;
|
||||
class ASTExpressionList;
|
||||
struct ASTTablesInSelectQueryElement;
|
||||
class Context;
|
||||
|
||||
|
||||
class QueryNormalizer
|
||||
@ -52,7 +54,10 @@ public:
|
||||
|
||||
const Aliases & aliases;
|
||||
const ExtractedSettings settings;
|
||||
const std::vector<TableWithColumnNames> tables_with_columns;
|
||||
const Context * context;
|
||||
const NameSet * source_columns_set;
|
||||
std::vector<TableWithColumnNames> tables_with_columns;
|
||||
std::unordered_set<String> join_using_columns;
|
||||
|
||||
/// tmp data
|
||||
size_t level;
|
||||
@ -60,10 +65,22 @@ public:
|
||||
SetOfASTs current_asts; /// vertices in the current call stack of this method
|
||||
std::string current_alias; /// the alias referencing to the ancestor of ast (the deepest ancestor with aliases)
|
||||
|
||||
Data(const Aliases & aliases_, ExtractedSettings && settings_, std::vector<TableWithColumnNames> && tables_with_columns_ = {})
|
||||
Data(const Aliases & aliases_, ExtractedSettings && settings_, const Context & context_,
|
||||
const NameSet & source_columns_set, Names && all_columns)
|
||||
: aliases(aliases_)
|
||||
, settings(settings_)
|
||||
, tables_with_columns(tables_with_columns_)
|
||||
, context(&context_)
|
||||
, source_columns_set(&source_columns_set)
|
||||
, level(0)
|
||||
{
|
||||
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns));
|
||||
}
|
||||
|
||||
Data(const Aliases & aliases_, ExtractedSettings && settings_)
|
||||
: aliases(aliases_)
|
||||
, settings(settings_)
|
||||
, context(nullptr)
|
||||
, source_columns_set(nullptr)
|
||||
, level(0)
|
||||
{}
|
||||
|
||||
@ -91,6 +108,9 @@ private:
|
||||
static void visit(ASTSelectQuery &, const ASTPtr &, Data &);
|
||||
|
||||
static void visitChildren(const ASTPtr &, Data & data);
|
||||
|
||||
static void extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data);
|
||||
static void extractJoinUsingColumns(const ASTPtr ast, Data & data);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -100,12 +100,13 @@ void normalizeTree(
|
||||
const Names & source_columns,
|
||||
const NameSet & source_columns_set,
|
||||
const Context & context,
|
||||
const ASTSelectQuery * select_query,
|
||||
bool asterisk_left_columns_only)
|
||||
const ASTSelectQuery * select_query)
|
||||
{
|
||||
const auto & settings = context.getSettingsRef();
|
||||
|
||||
Names all_columns_name = source_columns;
|
||||
|
||||
if (!asterisk_left_columns_only)
|
||||
if (!settings.asterisk_left_columns_only)
|
||||
{
|
||||
auto columns_from_joined_table = result.analyzed_join.getColumnsFromJoinedTable(source_columns_set, context, select_query);
|
||||
for (auto & column : columns_from_joined_table)
|
||||
@ -115,37 +116,7 @@ void normalizeTree(
|
||||
if (all_columns_name.empty())
|
||||
throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
std::vector<QueryNormalizer::TableWithColumnNames> table_with_columns;
|
||||
if (select_query && select_query->tables && !select_query->tables->children.empty())
|
||||
{
|
||||
std::vector<const ASTTableExpression *> tables_expression = getSelectTablesExpression(*select_query);
|
||||
|
||||
bool first = true;
|
||||
String current_database = context.getCurrentDatabase();
|
||||
for (const auto * table_expression : tables_expression)
|
||||
{
|
||||
DatabaseAndTableWithAlias table_name(*table_expression, current_database);
|
||||
NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, context);
|
||||
|
||||
removeDuplicateColumns(names_and_types);
|
||||
|
||||
if (!first)
|
||||
{
|
||||
/// For joined tables qualify duplicating names.
|
||||
for (auto & name_and_type : names_and_types)
|
||||
if (source_columns_set.count(name_and_type.name))
|
||||
name_and_type.name = table_name.getQualifiedNamePrefix() + name_and_type.name;
|
||||
}
|
||||
|
||||
first = false;
|
||||
|
||||
table_with_columns.emplace_back(std::move(table_name), names_and_types.getNames());
|
||||
}
|
||||
}
|
||||
else
|
||||
table_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name));
|
||||
|
||||
QueryNormalizer::Data normalizer_data(result.aliases, context.getSettingsRef(), std::move(table_with_columns));
|
||||
QueryNormalizer::Data normalizer_data(result.aliases, settings, context, source_columns_set, std::move(all_columns_name));
|
||||
QueryNormalizer(normalizer_data).visit(query);
|
||||
}
|
||||
|
||||
@ -754,7 +725,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
|
||||
|
||||
/// Common subexpression elimination. Rewrite rules.
|
||||
normalizeTree(query, result, (storage ? storage->getColumns().ordinary.getNames() : source_columns_list), source_columns_set,
|
||||
context, select_query, settings.asterisk_left_columns_only != 0);
|
||||
context, select_query);
|
||||
|
||||
/// Remove unneeded columns according to 'required_result_columns'.
|
||||
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
|
||||
|
@ -1,5 +1,5 @@
|
||||
0 0 0
|
||||
2 2 1
|
||||
4 4 2
|
||||
6 6 3
|
||||
8 8 4
|
||||
0 0
|
||||
2 1
|
||||
4 2
|
||||
6 3
|
||||
8 4
|
||||
|
@ -1,15 +1,15 @@
|
||||
0 0 0
|
||||
0 0 1
|
||||
1 1 2
|
||||
1 1 3
|
||||
2 2 4
|
||||
2 2 5
|
||||
3 3 6
|
||||
3 3 7
|
||||
4 4 8
|
||||
4 4 9
|
||||
5 5 0
|
||||
6 6 0
|
||||
7 7 0
|
||||
8 8 0
|
||||
9 9 0
|
||||
0 0
|
||||
0 1
|
||||
1 2
|
||||
1 3
|
||||
2 4
|
||||
2 5
|
||||
3 6
|
||||
3 7
|
||||
4 8
|
||||
4 9
|
||||
5 0
|
||||
6 0
|
||||
7 0
|
||||
8 0
|
||||
9 0
|
||||
|
@ -1,10 +1,10 @@
|
||||
0 0 0
|
||||
0 0 1
|
||||
1 1 2
|
||||
1 1 3
|
||||
2 2 4
|
||||
2 2 5
|
||||
3 3 6
|
||||
3 3 7
|
||||
4 4 8
|
||||
4 4 9
|
||||
0 0
|
||||
0 1
|
||||
1 2
|
||||
1 3
|
||||
2 4
|
||||
2 5
|
||||
3 6
|
||||
3 7
|
||||
4 8
|
||||
4 9
|
||||
|
@ -1,15 +1,15 @@
|
||||
A A 0
|
||||
A A 1
|
||||
B B 2
|
||||
B B 3
|
||||
C C 4
|
||||
C C 5
|
||||
D D 6
|
||||
D D 7
|
||||
E E 8
|
||||
E E 9
|
||||
F F 0
|
||||
G G 0
|
||||
H H 0
|
||||
I I 0
|
||||
J J 0
|
||||
A 0
|
||||
A 1
|
||||
B 2
|
||||
B 3
|
||||
C 4
|
||||
C 5
|
||||
D 6
|
||||
D 7
|
||||
E 8
|
||||
E 9
|
||||
F 0
|
||||
G 0
|
||||
H 0
|
||||
I 0
|
||||
J 0
|
||||
|
@ -1,2 +1,2 @@
|
||||
SET max_block_size = 10;
|
||||
SELECT * FROM (select toUInt64(1) s limit 1) any right join (select number s from numbers(11)) using (s) ORDER BY s;
|
||||
SELECT * FROM (select toUInt64(1) s limit 1) any right join (select number s, s as x from numbers(11)) using (s) ORDER BY s;
|
||||
|
@ -14,10 +14,10 @@
|
||||
2 facebook.com
|
||||
1 google.com
|
||||
2 yandex.ru
|
||||
1 baidu.com 1 baidu.com
|
||||
1 google.com 1 google.com
|
||||
2 facebook.com 2 facebook.com
|
||||
2 yandex.ru 2 yandex.ru
|
||||
1 baidu.com
|
||||
1 google.com
|
||||
2 facebook.com
|
||||
2 yandex.ru
|
||||
1
|
||||
1
|
||||
2
|
||||
|
@ -3,7 +3,7 @@
|
||||
1
|
||||
1
|
||||
-------Need push down-------
|
||||
0 0
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
@ -23,13 +23,13 @@
|
||||
1 2000-01-01 1
|
||||
2000-01-01 1 test string 1 1
|
||||
2000-01-01 1 test string 1 1
|
||||
2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1
|
||||
2000-01-01 1 test string 1 1 2000-01-01 test string 1 1
|
||||
2000-01-01 1 test string 1 1
|
||||
1 2000-01-01 2000-01-01 1 test string 1 1
|
||||
1 2000-01-01 1 test string 1 1
|
||||
2000-01-01 1 test string 1 1
|
||||
2000-01-01 2 test string 2 2
|
||||
1
|
||||
1
|
||||
-------Push to having expression, need check.-------
|
||||
-------Compatibility test-------
|
||||
1 2000-01-01 2000-01-01 1 test string 1 1
|
||||
1 2000-01-01 test string 1 1
|
||||
|
@ -1,4 +1,4 @@
|
||||
1 2
|
||||
1 2 3 1 4 5
|
||||
1 2 1 3 1 3
|
||||
1 2 1 3 1 3 3
|
||||
1 2 3 4 5
|
||||
1 2 3 1 3
|
||||
1 2 3 1 3 3
|
||||
|
@ -1,3 +1,39 @@
|
||||
1 1 1 1
|
||||
2 2 2 2
|
||||
3 3 3 3
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
1 John Robert
|
||||
1 John Susan
|
||||
3 Daniel Sarah
|
||||
4 James David
|
||||
4 James Joseph
|
||||
5 Amanda Robert
|
||||
1 John Robert
|
||||
1 John Susan
|
||||
3 Daniel Sarah
|
||||
4 James David
|
||||
4 James Joseph
|
||||
5 Amanda Robert
|
||||
1 John Robert
|
||||
1 John Susan
|
||||
3 Daniel Sarah
|
||||
4 James David
|
||||
4 James Joseph
|
||||
5 Amanda Robert
|
||||
1 John Robert
|
||||
1 John Susan
|
||||
3 Daniel Sarah
|
||||
4 James David
|
||||
4 James Joseph
|
||||
5 Amanda Robert
|
||||
1 John Robert
|
||||
1 John Susan
|
||||
3 Daniel Sarah
|
||||
4 James David
|
||||
4 James Joseph
|
||||
5 Amanda Robert
|
||||
1 John Robert
|
||||
1 John Susan
|
||||
3 Daniel Sarah
|
||||
4 James David
|
||||
4 James Joseph
|
||||
5 Amanda Robert
|
||||
|
@ -11,3 +11,29 @@ SELECT * FROM test.using1 ALL LEFT JOIN (SELECT * FROM test.using2) USING (a, a,
|
||||
|
||||
DROP TABLE test.using1;
|
||||
DROP TABLE test.using2;
|
||||
|
||||
--
|
||||
|
||||
use test;
|
||||
drop table if exists persons;
|
||||
drop table if exists children;
|
||||
|
||||
create table persons (id String, name String) engine MergeTree order by id;
|
||||
create table children (id String, childName String) engine MergeTree order by id;
|
||||
|
||||
insert into persons (id, name)
|
||||
values ('1', 'John'), ('2', 'Jack'), ('3', 'Daniel'), ('4', 'James'), ('5', 'Amanda');
|
||||
|
||||
insert into children (id, childName)
|
||||
values ('1', 'Robert'), ('1', 'Susan'), ('3', 'Sarah'), ('4', 'David'), ('4', 'Joseph'), ('5', 'Robert');
|
||||
|
||||
select * from persons all inner join children using id;
|
||||
select * from persons all inner join (select * from children) as j using id;
|
||||
select * from (select * from persons) as s all inner join (select * from children ) as j using id;
|
||||
--
|
||||
select * from persons all inner join (select * from children) using id;
|
||||
select * from (select * from persons) all inner join (select * from children) using id;
|
||||
select * from (select * from persons) as s all inner join (select * from children) using id;
|
||||
|
||||
drop table persons;
|
||||
drop table children;
|
||||
|
@ -9,7 +9,7 @@ insert into test.s values(1,1);
|
||||
|
||||
select a, b, s_a, s_b from test.t all left join (select a,b,a s_a, b s_b from test.s) using (a,b);
|
||||
select '-';
|
||||
select * from test.t all left join test.s using (a,b);
|
||||
select t.*, s.* from test.t all left join test.s using (a,b);
|
||||
select '-';
|
||||
select a,b,s_a,s_b from test.t all left join (select a, b, a s_a, b s_b from test.s) s on (s.a = t.a and s.b = t.b);
|
||||
select '-';
|
||||
|
@ -7,7 +7,7 @@ USE test;
|
||||
CREATE VIEW test AS SELECT 1 AS N;
|
||||
CREATE VIEW test_view AS SELECT * FROM test;
|
||||
CREATE VIEW test_nested_view AS SELECT * FROM (SELECT * FROM test);
|
||||
CREATE VIEW test_joined_view AS SELECT * FROM test ANY LEFT JOIN test USING N;
|
||||
CREATE VIEW test_joined_view AS SELECT *, N AS x FROM test ANY LEFT JOIN test USING N;
|
||||
|
||||
SELECT * FROM test_view;
|
||||
SELECT * FROM test_nested_view;
|
||||
|
@ -1,12 +1,12 @@
|
||||
0 0
|
||||
0 0
|
||||
0 0
|
||||
0 0
|
||||
0 0
|
||||
0 0
|
||||
0 0
|
||||
0 0
|
||||
0 0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
-
|
||||
0 0
|
||||
0 0
|
||||
|
Loading…
Reference in New Issue
Block a user