JOIN with USING: do not duplicate columns

This commit is contained in:
chertus 2019-01-24 17:22:58 +03:00
parent f13d094a4e
commit cbbf04204b
16 changed files with 245 additions and 116 deletions

View File

@ -1,13 +1,16 @@
#include <Poco/String.h>
#include <Core/Names.h>
#include <Interpreters/QueryNormalizer.h>
#include <Interpreters/Context.h>
#include <Interpreters/AnalyzedJoin.h>
#include <Parsers/ASTAsterisk.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/typeid_cast.h>
#include <Poco/String.h>
#include <Parsers/ASTQualifiedAsterisk.h>
#include <IO/WriteHelpers.h>
@ -16,10 +19,14 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int TOO_DEEP_AST;
extern const int CYCLIC_ALIASES;
}
NameSet removeDuplicateColumns(NamesAndTypesList & columns);
class CheckASTDepth
{
public:
@ -135,7 +142,8 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
/// Replace *, alias.*, database.table.* with a list of columns.
void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & data)
{
auto & tables_with_columns = data.tables_with_columns;
const auto & tables_with_columns = data.tables_with_columns;
const auto & source_columns_set = data.source_columns_set;
ASTs old_children;
if (data.processAsterisks())
@ -162,22 +170,43 @@ void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & dat
{
if (typeid_cast<const ASTAsterisk *>(child.get()))
{
for (const auto & pr : tables_with_columns)
for (const auto & column_name : pr.second)
node.children.emplace_back(std::make_shared<ASTIdentifier>(column_name));
bool first_table = true;
for (const auto & [table_name, table_columns] : tables_with_columns)
{
for (const auto & column_name : table_columns)
if (first_table || !data.join_using_columns.count(column_name))
{
/// qualifed names for duplicates
if (!first_table && source_columns_set && source_columns_set->count(column_name))
node.children.emplace_back(std::make_shared<ASTIdentifier>(table_name.getQualifiedNamePrefix() + column_name));
else
node.children.emplace_back(std::make_shared<ASTIdentifier>(column_name));
}
first_table = false;
}
}
else if (const auto * qualified_asterisk = typeid_cast<const ASTQualifiedAsterisk *>(child.get()))
{
DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->children[0]);
bool first_table = true;
for (const auto & [table_name, table_columns] : tables_with_columns)
{
if (ident_db_and_name.satisfies(table_name, true))
{
for (const auto & column_name : table_columns)
node.children.emplace_back(std::make_shared<ASTIdentifier>(column_name));
{
/// qualifed names for duplicates
if (!first_table && source_columns_set && source_columns_set->count(column_name))
node.children.emplace_back(std::make_shared<ASTIdentifier>(table_name.getQualifiedNamePrefix() + column_name));
else
node.children.emplace_back(std::make_shared<ASTIdentifier>(column_name));
}
break;
}
first_table = false;
}
}
else
@ -198,6 +227,11 @@ void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &
/// special visitChildren() for ASTSelectQuery
void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data & data)
{
extractTablesWithColumns(select, data);
if (auto join = select.join())
extractJoinUsingColumns(join->table_join, data);
for (auto & child : ast->children)
{
if (typeid_cast<const ASTSelectQuery *>(child.get()) ||
@ -312,4 +346,46 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data)
}
}
void QueryNormalizer::extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data)
{
if (data.context && select_query.tables && !select_query.tables->children.empty())
{
data.tables_with_columns.clear();
String current_database = data.context->getCurrentDatabase();
for (const ASTTableExpression * table_expression : getSelectTablesExpression(select_query))
{
DatabaseAndTableWithAlias table_name(*table_expression, current_database);
NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, *data.context);
removeDuplicateColumns(names_and_types);
data.tables_with_columns.emplace_back(std::move(table_name), names_and_types.getNames());
}
}
}
/// 'select * from a join b using id' should result one 'id' column
void QueryNormalizer::extractJoinUsingColumns(const ASTPtr ast, Data & data)
{
const auto & table_join = typeid_cast<const ASTTableJoin &>(*ast);
if (table_join.using_expression_list)
{
auto & keys = typeid_cast<ASTExpressionList &>(*table_join.using_expression_list);
for (const auto & key : keys.children)
if (auto opt_column = getIdentifierName(key))
data.join_using_columns.insert(*opt_column);
else if (auto * literal = typeid_cast<const ASTLiteral *>(key.get()))
data.join_using_columns.insert(key->getColumnName());
else
{
String alias = key->tryGetAlias();
if (alias.empty())
throw Exception("Logical error: expected identifier or alias, got: " + key->getID(), ErrorCodes::LOGICAL_ERROR);
data.join_using_columns.insert(alias);
}
}
}
}

View File

@ -1,5 +1,7 @@
#pragma once
#include <unordered_set>
#include <Core/Names.h>
#include <Parsers/IAST.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
@ -17,11 +19,11 @@ inline bool functionIsInOrGlobalInOperator(const String & name)
return functionIsInOperator(name) || name == "globalIn" || name == "globalNotIn";
}
class ASTFunction;
class ASTIdentifier;
class ASTExpressionList;
struct ASTTablesInSelectQueryElement;
class Context;
class QueryNormalizer
@ -52,7 +54,10 @@ public:
const Aliases & aliases;
const ExtractedSettings settings;
const std::vector<TableWithColumnNames> tables_with_columns;
const Context * context;
const NameSet * source_columns_set;
std::vector<TableWithColumnNames> tables_with_columns;
std::unordered_set<String> join_using_columns;
/// tmp data
size_t level;
@ -60,10 +65,22 @@ public:
SetOfASTs current_asts; /// vertices in the current call stack of this method
std::string current_alias; /// the alias referencing to the ancestor of ast (the deepest ancestor with aliases)
Data(const Aliases & aliases_, ExtractedSettings && settings_, std::vector<TableWithColumnNames> && tables_with_columns_ = {})
Data(const Aliases & aliases_, ExtractedSettings && settings_, const Context & context_,
const NameSet & source_columns_set, Names && all_columns)
: aliases(aliases_)
, settings(settings_)
, tables_with_columns(tables_with_columns_)
, context(&context_)
, source_columns_set(&source_columns_set)
, level(0)
{
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns));
}
Data(const Aliases & aliases_, ExtractedSettings && settings_)
: aliases(aliases_)
, settings(settings_)
, context(nullptr)
, source_columns_set(nullptr)
, level(0)
{}
@ -91,6 +108,9 @@ private:
static void visit(ASTSelectQuery &, const ASTPtr &, Data &);
static void visitChildren(const ASTPtr &, Data & data);
static void extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data);
static void extractJoinUsingColumns(const ASTPtr ast, Data & data);
};
}

View File

@ -100,12 +100,13 @@ void normalizeTree(
const Names & source_columns,
const NameSet & source_columns_set,
const Context & context,
const ASTSelectQuery * select_query,
bool asterisk_left_columns_only)
const ASTSelectQuery * select_query)
{
const auto & settings = context.getSettingsRef();
Names all_columns_name = source_columns;
if (!asterisk_left_columns_only)
if (!settings.asterisk_left_columns_only)
{
auto columns_from_joined_table = result.analyzed_join.getColumnsFromJoinedTable(source_columns_set, context, select_query);
for (auto & column : columns_from_joined_table)
@ -115,37 +116,7 @@ void normalizeTree(
if (all_columns_name.empty())
throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR);
std::vector<QueryNormalizer::TableWithColumnNames> table_with_columns;
if (select_query && select_query->tables && !select_query->tables->children.empty())
{
std::vector<const ASTTableExpression *> tables_expression = getSelectTablesExpression(*select_query);
bool first = true;
String current_database = context.getCurrentDatabase();
for (const auto * table_expression : tables_expression)
{
DatabaseAndTableWithAlias table_name(*table_expression, current_database);
NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, context);
removeDuplicateColumns(names_and_types);
if (!first)
{
/// For joined tables qualify duplicating names.
for (auto & name_and_type : names_and_types)
if (source_columns_set.count(name_and_type.name))
name_and_type.name = table_name.getQualifiedNamePrefix() + name_and_type.name;
}
first = false;
table_with_columns.emplace_back(std::move(table_name), names_and_types.getNames());
}
}
else
table_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name));
QueryNormalizer::Data normalizer_data(result.aliases, context.getSettingsRef(), std::move(table_with_columns));
QueryNormalizer::Data normalizer_data(result.aliases, settings, context, source_columns_set, std::move(all_columns_name));
QueryNormalizer(normalizer_data).visit(query);
}
@ -754,7 +725,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
/// Common subexpression elimination. Rewrite rules.
normalizeTree(query, result, (storage ? storage->getColumns().ordinary.getNames() : source_columns_list), source_columns_set,
context, select_query, settings.asterisk_left_columns_only != 0);
context, select_query);
/// Remove unneeded columns according to 'required_result_columns'.
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.

View File

@ -1,5 +1,5 @@
0 0 0
2 2 1
4 4 2
6 6 3
8 8 4
0 0
2 1
4 2
6 3
8 4

View File

@ -1,15 +1,15 @@
0 0 0
0 0 1
1 1 2
1 1 3
2 2 4
2 2 5
3 3 6
3 3 7
4 4 8
4 4 9
5 5 0
6 6 0
7 7 0
8 8 0
9 9 0
0 0
0 1
1 2
1 3
2 4
2 5
3 6
3 7
4 8
4 9
5 0
6 0
7 0
8 0
9 0

View File

@ -1,10 +1,10 @@
0 0 0
0 0 1
1 1 2
1 1 3
2 2 4
2 2 5
3 3 6
3 3 7
4 4 8
4 4 9
0 0
0 1
1 2
1 3
2 4
2 5
3 6
3 7
4 8
4 9

View File

@ -1,15 +1,15 @@
A A 0
A A 1
B B 2
B B 3
C C 4
C C 5
D D 6
D D 7
E E 8
E E 9
F F 0
G G 0
H H 0
I I 0
J J 0
A 0
A 1
B 2
B 3
C 4
C 5
D 6
D 7
E 8
E 9
F 0
G 0
H 0
I 0
J 0

View File

@ -1,2 +1,2 @@
SET max_block_size = 10;
SELECT * FROM (select toUInt64(1) s limit 1) any right join (select number s from numbers(11)) using (s) ORDER BY s;
SELECT * FROM (select toUInt64(1) s limit 1) any right join (select number s, s as x from numbers(11)) using (s) ORDER BY s;

View File

@ -14,10 +14,10 @@
2 facebook.com
1 google.com
2 yandex.ru
1 baidu.com 1 baidu.com
1 google.com 1 google.com
2 facebook.com 2 facebook.com
2 yandex.ru 2 yandex.ru
1 baidu.com
1 google.com
2 facebook.com
2 yandex.ru
1
1
2

View File

@ -3,7 +3,7 @@
1
1
-------Need push down-------
0 0
0
1
1
1
@ -23,13 +23,13 @@
1 2000-01-01 1
2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1 2000-01-01 test string 1 1
2000-01-01 1 test string 1 1
1 2000-01-01 2000-01-01 1 test string 1 1
1 2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1
2000-01-01 2 test string 2 2
1
1
-------Push to having expression, need check.-------
-------Compatibility test-------
1 2000-01-01 2000-01-01 1 test string 1 1
1 2000-01-01 test string 1 1

View File

@ -1,4 +1,4 @@
1 2
1 2 3 1 4 5
1 2 1 3 1 3
1 2 1 3 1 3 3
1 2 3 4 5
1 2 3 1 3
1 2 3 1 3 3

View File

@ -1,3 +1,39 @@
1 1 1 1
2 2 2 2
3 3 3 3
1 1
2 2
3 3
1 John Robert
1 John Susan
3 Daniel Sarah
4 James David
4 James Joseph
5 Amanda Robert
1 John Robert
1 John Susan
3 Daniel Sarah
4 James David
4 James Joseph
5 Amanda Robert
1 John Robert
1 John Susan
3 Daniel Sarah
4 James David
4 James Joseph
5 Amanda Robert
1 John Robert
1 John Susan
3 Daniel Sarah
4 James David
4 James Joseph
5 Amanda Robert
1 John Robert
1 John Susan
3 Daniel Sarah
4 James David
4 James Joseph
5 Amanda Robert
1 John Robert
1 John Susan
3 Daniel Sarah
4 James David
4 James Joseph
5 Amanda Robert

View File

@ -11,3 +11,29 @@ SELECT * FROM test.using1 ALL LEFT JOIN (SELECT * FROM test.using2) USING (a, a,
DROP TABLE test.using1;
DROP TABLE test.using2;
--
use test;
drop table if exists persons;
drop table if exists children;
create table persons (id String, name String) engine MergeTree order by id;
create table children (id String, childName String) engine MergeTree order by id;
insert into persons (id, name)
values ('1', 'John'), ('2', 'Jack'), ('3', 'Daniel'), ('4', 'James'), ('5', 'Amanda');
insert into children (id, childName)
values ('1', 'Robert'), ('1', 'Susan'), ('3', 'Sarah'), ('4', 'David'), ('4', 'Joseph'), ('5', 'Robert');
select * from persons all inner join children using id;
select * from persons all inner join (select * from children) as j using id;
select * from (select * from persons) as s all inner join (select * from children ) as j using id;
--
select * from persons all inner join (select * from children) using id;
select * from (select * from persons) all inner join (select * from children) using id;
select * from (select * from persons) as s all inner join (select * from children) using id;
drop table persons;
drop table children;

View File

@ -9,7 +9,7 @@ insert into test.s values(1,1);
select a, b, s_a, s_b from test.t all left join (select a,b,a s_a, b s_b from test.s) using (a,b);
select '-';
select * from test.t all left join test.s using (a,b);
select t.*, s.* from test.t all left join test.s using (a,b);
select '-';
select a,b,s_a,s_b from test.t all left join (select a, b, a s_a, b s_b from test.s) s on (s.a = t.a and s.b = t.b);
select '-';

View File

@ -7,7 +7,7 @@ USE test;
CREATE VIEW test AS SELECT 1 AS N;
CREATE VIEW test_view AS SELECT * FROM test;
CREATE VIEW test_nested_view AS SELECT * FROM (SELECT * FROM test);
CREATE VIEW test_joined_view AS SELECT * FROM test ANY LEFT JOIN test USING N;
CREATE VIEW test_joined_view AS SELECT *, N AS x FROM test ANY LEFT JOIN test USING N;
SELECT * FROM test_view;
SELECT * FROM test_nested_view;

View File

@ -1,12 +1,12 @@
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0 0
0
0
0
0
0
0
0
0
0
-
0 0
0 0