Merge pull request #4307 from 4ertus2/joins

New portion of SyntaxAnalyzer refactoring
This commit is contained in:
alexey-milovidov 2019-02-09 01:04:46 +03:00 committed by GitHub
commit bb333643f0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 216 additions and 218 deletions

View File

@ -9,6 +9,7 @@
#include <Parsers/ASTSelectQuery.h>
#include <Storages/IStorage.h>
#include <DataTypes/DataTypeNullable.h>
namespace DB
{
@ -51,24 +52,13 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions(
return analyzer.getActions(false);
}
NameSet AnalyzedJoin::getRequiredColumnsFromJoinedTable(const JoinedColumnsList & columns_added_by_join,
const ExpressionActionsPtr & joined_block_actions) const
Names AnalyzedJoin::getOriginalColumnNames(const NameSet & required_columns_from_joined_table) const
{
NameSet required_columns_from_joined_table;
auto required_action_columns = joined_block_actions->getRequiredColumns();
required_columns_from_joined_table.insert(required_action_columns.begin(), required_action_columns.end());
auto sample = joined_block_actions->getSampleBlock();
for (auto & column : key_names_right)
if (!sample.has(column))
required_columns_from_joined_table.insert(column);
for (auto & column : columns_added_by_join)
if (!sample.has(column.name_and_type.name))
required_columns_from_joined_table.insert(column.name_and_type.name);
return required_columns_from_joined_table;
Names original_columns;
for (const auto & column : columns_from_joined_table)
if (required_columns_from_joined_table.count(column.name_and_type.name))
original_columns.emplace_back(column.original_name);
return original_columns;
}
const JoinedColumnsList & AnalyzedJoin::getColumnsFromJoinedTable(
@ -104,6 +94,30 @@ const JoinedColumnsList & AnalyzedJoin::getColumnsFromJoinedTable(
return columns_from_joined_table;
}
void AnalyzedJoin::calculateAvailableJoinedColumns(
const NameSet & source_columns, const Context & context, const ASTSelectQuery * select_query_with_join, bool make_nullable)
{
const auto & columns = getColumnsFromJoinedTable(source_columns, context, select_query_with_join);
NameSet joined_columns;
for (auto & column : columns)
{
auto & column_name = column.name_and_type.name;
auto & column_type = column.name_and_type.type;
auto & original_name = column.original_name;
{
if (joined_columns.count(column_name)) /// Duplicate columns in the subquery for JOIN do not make sense.
continue;
joined_columns.insert(column_name);
auto type = make_nullable ? makeNullable(column_type) : column_type;
available_joined_columns.emplace_back(NameAndTypePair(column_name, std::move(type)), original_name);
}
}
}
NamesAndTypesList getNamesAndTypeListFromTableExpression(const ASTTableExpression & table_expression, const Context & context)
{

View File

@ -61,18 +61,29 @@ struct AnalyzedJoin
/// It's columns_from_joined_table without duplicate columns and possibly modified types.
JoinedColumnsList available_joined_columns;
void addSimpleKey(const ASTPtr & ast)
{
key_names_left.push_back(ast->getColumnName());
key_names_right.push_back(ast->getAliasOrColumnName());
key_asts_left.push_back(ast);
key_asts_right.push_back(ast);
}
ExpressionActionsPtr createJoinedBlockActions(
const JoinedColumnsList & columns_added_by_join, /// Subset of available_joined_columns.
const ASTSelectQuery * select_query_with_join,
const Context & context) const;
/// Columns which will be used in query from joined table.
NameSet getRequiredColumnsFromJoinedTable(const JoinedColumnsList & columns_added_by_join,
const ExpressionActionsPtr & joined_block_actions) const;
Names getOriginalColumnNames(const NameSet & required_columns) const;
const JoinedColumnsList & getColumnsFromJoinedTable(const NameSet & source_columns,
const Context & context,
const ASTSelectQuery * select_query_with_join);
void calculateAvailableJoinedColumns(const NameSet & source_columns,
const Context & context,
const ASTSelectQuery * select_query_with_join,
bool make_nullable);
};
struct ASTTableExpression;

View File

@ -1,5 +1,6 @@
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/AnalyzedJoin.h> /// for getNamesAndTypeListFromTableExpression
#include <Interpreters/Context.h>
#include <Common/typeid_cast.h>
@ -12,6 +13,9 @@
namespace DB
{
NameSet removeDuplicateColumns(NamesAndTypesList & columns);
DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database)
{
alias = identifier.tryGetAlias();
@ -144,4 +148,26 @@ ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number
return nullptr;
}
std::vector<TableWithColumnNames> getDatabaseAndTablesWithColumnNames(const ASTSelectQuery & select_query, const Context & context)
{
std::vector<TableWithColumnNames> tables_with_columns;
if (select_query.tables && !select_query.tables->children.empty())
{
String current_database = context.getCurrentDatabase();
for (const ASTTableExpression * table_expression : getSelectTablesExpression(select_query))
{
DatabaseAndTableWithAlias table_name(*table_expression, current_database);
NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, context);
removeDuplicateColumns(names_and_types);
tables_with_columns.emplace_back(std::move(table_name), names_and_types.getNames());
}
}
return tables_with_columns;
}
}

View File

@ -4,6 +4,7 @@
#include <optional>
#include <Core/Types.h>
#include <Core/Names.h>
namespace DB
@ -15,6 +16,7 @@ using ASTPtr = std::shared_ptr<IAST>;
class ASTSelectQuery;
class ASTIdentifier;
struct ASTTableExpression;
class Context;
/// Extracts database name (and/or alias) from table expression or identifier
@ -36,9 +38,13 @@ struct DatabaseAndTableWithAlias
bool satisfies(const DatabaseAndTableWithAlias & table, bool table_may_be_an_alias);
};
using TableWithColumnNames = std::pair<DatabaseAndTableWithAlias, Names>;
std::vector<DatabaseAndTableWithAlias> getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database);
std::optional<DatabaseAndTableWithAlias> getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number);
std::vector<TableWithColumnNames> getDatabaseAndTablesWithColumnNames(const ASTSelectQuery & select_query, const Context & context);
std::vector<const ASTTableExpression *> getSelectTablesExpression(const ASTSelectQuery & select_query);
ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number);

View File

@ -510,6 +510,17 @@ void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, bool only
columns_added_by_join_list));
}
static void appendRequiredColumns(NameSet & required_columns, const Block & sample, const AnalyzedJoin & analyzed_join)
{
for (auto & column : analyzed_join.key_names_right)
if (!sample.has(column))
required_columns.insert(column);
for (auto & column : analyzed_join.columns_from_joined_table)
if (!sample.has(column.name_and_type.name))
required_columns.insert(column.name_and_type.name);
}
bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types)
{
assertSelect();
@ -566,6 +577,11 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
if (!subquery_for_set.join)
{
auto & analyzed_join = analyzedJoin();
/// Actions which need to be calculated on joined block.
ExpressionActionsPtr joined_block_actions =
analyzed_join.createJoinedBlockActions(columns_added_by_join, select_query, context);
/** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs
* - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1,
* in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`.
@ -582,15 +598,15 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
else if (table_to_join.database_and_table_name)
table = table_to_join.database_and_table_name;
const JoinedColumnsList & columns_from_joined_table = analyzedJoin().columns_from_joined_table;
Names action_columns = joined_block_actions->getRequiredColumns();
NameSet required_columns(action_columns.begin(), action_columns.end());
Names original_columns;
for (const auto & column : columns_from_joined_table)
if (required_columns_from_joined_table.count(column.name_and_type.name))
original_columns.emplace_back(column.original_name);
appendRequiredColumns(required_columns, joined_block_actions->getSampleBlock(), analyzed_join);
Names original_columns = analyzed_join.getOriginalColumnNames(required_columns);
auto interpreter = interpretSubquery(table, context, subquery_depth, original_columns);
subquery_for_set.makeSource(interpreter, columns_from_joined_table, required_columns_from_joined_table);
subquery_for_set.makeSource(interpreter, analyzed_join.columns_from_joined_table, required_columns);
}
Block sample_block = subquery_for_set.renamedSampleBlock();
@ -925,15 +941,6 @@ void ExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptio
aggregates = aggregate_descriptions;
}
/// db.table.column -> table.column / table.column -> column
static String cropDatabaseOrTableName(const String & name)
{
size_t pos = name.find('.', 0);
if (pos != std::string::npos)
return name.substr(pos + 1, name.size() - pos - 1);
return name;
}
void ExpressionAnalyzer::collectUsedColumns()
{
/** Calculate which columns are required to execute the expression.
@ -993,54 +1000,6 @@ void ExpressionAnalyzer::collectUsedColumns()
required.erase(name);
}
}
/// @fix filter required columns according to misqualified names in JOIN ON
if (columns_context.has_table_join &&
columns_context.tables.size() >= 2 &&
columns_context.tables[1].join &&
columns_context.tables[1].join->on_expression)
{
NameSet fixed_required;
for (const auto & req_name : required)
{
bool collated = false;
String cropped_name = req_name;
static const constexpr size_t max_column_prefix = 2;
for (size_t i = 0; i < max_column_prefix && !collated; ++i)
{
cropped_name = cropDatabaseOrTableName(cropped_name);
if (avaliable_columns.count(cropped_name))
{
fixed_required.insert(cropped_name);
collated = true;
break;
}
for (const auto & joined_column : analyzed_join.available_joined_columns)
{
auto & name = joined_column.name_and_type.name;
if (cropped_name == name)
{
columns_added_by_join.push_back(joined_column);
collated = true;
break;
}
}
}
if (!collated)
fixed_required.insert(req_name);
}
required.swap(fixed_required);
}
joined_block_actions = analyzed_join.createJoinedBlockActions(columns_added_by_join, select_query, context);
required_columns_from_joined_table = analyzed_join.getRequiredColumnsFromJoinedTable(columns_added_by_join, joined_block_actions);
}
if (columns_context.has_array_join)
@ -1090,6 +1049,7 @@ void ExpressionAnalyzer::collectUsedColumns()
if (!unknown_required_source_columns.empty())
{
std::stringstream ss;
ss << "query: '" << query << "' ";
ss << columns_context;
ss << "source_columns: ";
for (const auto & name : source_columns)

View File

@ -67,12 +67,6 @@ struct ExpressionAnalyzerData
/// Columns will be added to block by join.
JoinedColumnsList columns_added_by_join; /// Subset of analyzed_join.available_joined_columns
/// Actions which need to be calculated on joined block.
ExpressionActionsPtr joined_block_actions;
/// Columns which will be used in query from joined table. Duplicate names are qualified.
NameSet required_columns_from_joined_table;
protected:
ExpressionAnalyzerData(const NamesAndTypesList & source_columns_,
const NameSet & required_result_columns_,

View File

@ -10,8 +10,10 @@ namespace DB
struct FindIdentifierBestTableData
{
using TypeToVisit = ASTIdentifier;
using IdentifierWithTable = std::pair<ASTIdentifier *, const DatabaseAndTableWithAlias *>;
const std::vector<DatabaseAndTableWithAlias> & tables;
std::vector<std::pair<ASTIdentifier *, const DatabaseAndTableWithAlias *>> identifier_table;
std::vector<IdentifierWithTable> identifier_table;
FindIdentifierBestTableData(const std::vector<DatabaseAndTableWithAlias> & tables_);

View File

@ -37,6 +37,12 @@ std::optional<String> IdentifierSemantic::getTableName(const ASTPtr & ast)
return {};
}
void IdentifierSemantic::setNeedLongName(ASTIdentifier & identifier, bool value)
{
identifier.semantic->need_long_name = value;
}
std::pair<String, String> IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier)
{
if (identifier.name_parts.size() > 2)
@ -97,10 +103,30 @@ void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, size_t t
identifier.name.swap(new_name);
}
void IdentifierSemantic::setColumnQualifiedName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
void IdentifierSemantic::setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
{
size_t match = IdentifierSemantic::canReferColumnToTable(identifier, db_and_table);
setColumnShortName(identifier, match);
if (identifier.semantic->need_long_name)
{
String prefix = db_and_table.getQualifiedNamePrefix();
identifier.name.insert(identifier.name.begin(), prefix.begin(), prefix.end());
if (!prefix.empty())
{
String short_name = identifier.shortName();
identifier.name = prefix + short_name;
prefix.resize(prefix.size() - 1); /// crop dot
identifier.name_parts = {prefix, short_name};
}
}
}
String IdentifierSemantic::columnNormalName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
{
ASTPtr copy = identifier.clone();
setColumnNormalName(typeid_cast<ASTIdentifier &>(*copy), db_and_table);
return copy->getAliasOrColumnName();
}
}

View File

@ -9,6 +9,7 @@ namespace DB
struct IdentifierSemanticImpl
{
bool special = false;
bool need_long_name = false;
};
/// Static calss to manipulate IdentifierSemanticImpl via ASTIdentifier
@ -24,12 +25,14 @@ struct IdentifierSemantic
static std::pair<String, String> extractDatabaseAndTable(const ASTIdentifier & identifier);
static size_t canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static void setColumnShortName(ASTIdentifier & identifier, size_t match);
static void setColumnQualifiedName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static String columnNormalName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static void setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static void setNeedLongName(ASTIdentifier & identifier, bool); /// if set setColumnNormalName makes qualified name
private:
static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table);
static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table);
static void setColumnShortName(ASTIdentifier & identifier, size_t match);
};
}

View File

@ -236,10 +236,22 @@ void PredicateExpressionsOptimizer::setNewAliasesForInnerPredicate(
{
if (alias == qualified_name)
{
if (!isIdentifier(ast) && ast->tryGetAlias().empty())
String name;
if (auto * id = typeid_cast<const ASTIdentifier *>(ast.get()))
{
name = id->tryGetAlias();
if (name.empty())
name = id->shortName();
}
else
{
if (ast->tryGetAlias().empty())
ast->setAlias(ast->getColumnName());
name = ast->getAliasOrColumnName();
}
identifier->resetWithAlias(ast->getAliasOrColumnName());
IdentifierSemantic::setNeedLongName(*identifier, false);
identifier->setShortName(name);
}
}
}
@ -326,7 +338,9 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast
std::unordered_map<String, ASTPtr> aliases;
std::vector<DatabaseAndTableWithAlias> tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase());
TranslateQualifiedNamesVisitor::Data qn_visitor_data{{}, tables};
std::vector<TableWithColumnNames> tables_with_columns;
TranslateQualifiedNamesVisitor::Data::setTablesOnly(tables, tables_with_columns);
TranslateQualifiedNamesVisitor::Data qn_visitor_data{{}, tables_with_columns};
TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast);
QueryAliasesVisitor::Data query_aliases_data{aliases};

View File

@ -25,8 +25,6 @@ namespace ErrorCodes
extern const int CYCLIC_ALIASES;
}
NameSet removeDuplicateColumns(NamesAndTypesList & columns);
class CheckASTDepth
{
@ -143,7 +141,10 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
/// Replace *, alias.*, database.table.* with a list of columns.
void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & data)
{
const auto & tables_with_columns = data.tables_with_columns;
if (!data.tables_with_columns)
return;
const auto & tables_with_columns = *data.tables_with_columns;
const auto & source_columns_set = data.source_columns_set;
ASTs old_children;
@ -228,8 +229,6 @@ void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &
/// special visitChildren() for ASTSelectQuery
void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data & data)
{
extractTablesWithColumns(select, data);
if (auto join = select.join())
extractJoinUsingColumns(join->table_join, data);
@ -253,7 +252,6 @@ void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data &
}
/// Don't go into subqueries.
/// Don't go into components of compound identifiers.
/// Don't go into select query. It processes children itself.
/// Do not go to the left argument of lambda expressions, so as not to replace the formal parameters
/// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]).
@ -346,25 +344,6 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data)
}
}
void QueryNormalizer::extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data)
{
if (data.context && select_query.tables && !select_query.tables->children.empty())
{
data.tables_with_columns.clear();
String current_database = data.context->getCurrentDatabase();
for (const ASTTableExpression * table_expression : getSelectTablesExpression(select_query))
{
DatabaseAndTableWithAlias table_name(*table_expression, current_database);
NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, *data.context);
removeDuplicateColumns(names_and_types);
data.tables_with_columns.emplace_back(std::move(table_name), names_and_types.getNames());
}
}
}
/// 'select * from a join b using id' should result one 'id' column
void QueryNormalizer::extractJoinUsingColumns(const ASTPtr ast, Data & data)
{

View File

@ -46,8 +46,6 @@ class QueryNormalizer
};
public:
using TableWithColumnNames = std::pair<DatabaseAndTableWithAlias, Names>;
struct Data
{
using SetOfASTs = std::set<const IAST *>;
@ -57,7 +55,7 @@ public:
const ExtractedSettings settings;
const Context * context;
const NameSet * source_columns_set;
std::vector<TableWithColumnNames> tables_with_columns;
const std::vector<TableWithColumnNames> * tables_with_columns;
std::unordered_set<String> join_using_columns;
/// tmp data
@ -67,25 +65,25 @@ public:
std::string current_alias; /// the alias referencing to the ancestor of ast (the deepest ancestor with aliases)
Data(const Aliases & aliases_, ExtractedSettings && settings_, const Context & context_,
const NameSet & source_columns_set, Names && all_columns)
const NameSet & source_columns_set, const std::vector<TableWithColumnNames> & tables_with_columns_)
: aliases(aliases_)
, settings(settings_)
, context(&context_)
, source_columns_set(&source_columns_set)
, tables_with_columns(&tables_with_columns_)
, level(0)
{
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns));
}
{}
Data(const Aliases & aliases_, ExtractedSettings && settings_)
: aliases(aliases_)
, settings(settings_)
, context(nullptr)
, source_columns_set(nullptr)
, tables_with_columns(nullptr)
, level(0)
{}
bool processAsterisks() const { return !tables_with_columns.empty(); }
bool processAsterisks() const { return tables_with_columns && !tables_with_columns->empty(); }
};
QueryNormalizer(Data & data)
@ -110,7 +108,6 @@ private:
static void visitChildren(const ASTPtr &, Data & data);
static void extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data);
static void extractJoinUsingColumns(const ASTPtr ast, Data & data);
};

View File

@ -22,7 +22,6 @@
#include <Parsers/queryToString.h>
#include <DataTypes/NestedUtils.h>
#include <DataTypes/DataTypeNullable.h>
#include <Common/typeid_cast.h>
#include <Core/NamesAndTypes.h>
@ -80,16 +79,14 @@ void collectSourceColumns(ASTSelectQuery * select_query, StoragePtr storage, Nam
}
/// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names.
void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query,
const NameSet & source_columns, const Context & context)
void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, const NameSet & source_columns,
const std::vector<TableWithColumnNames> & tables_with_columns)
{
if (!select_query->tables || select_query->tables->children.empty())
return;
std::vector<DatabaseAndTableWithAlias> tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase());
LogAST log;
TranslateQualifiedNamesVisitor::Data visitor_data{source_columns, tables};
TranslateQualifiedNamesVisitor::Data visitor_data{source_columns, tables_with_columns};
TranslateQualifiedNamesVisitor visitor(visitor_data, log.stream());
visitor.visit(query);
}
@ -101,7 +98,8 @@ void normalizeTree(
const Names & source_columns,
const NameSet & source_columns_set,
const Context & context,
const ASTSelectQuery * select_query)
const ASTSelectQuery * select_query,
std::vector<TableWithColumnNames> & tables_with_columns)
{
const auto & settings = context.getSettingsRef();
@ -117,10 +115,12 @@ void normalizeTree(
if (all_columns_name.empty())
throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR);
QueryNormalizer::Data normalizer_data(result.aliases, settings, context, source_columns_set, std::move(all_columns_name));
if (tables_with_columns.empty())
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name));
QueryNormalizer::Data normalizer_data(result.aliases, settings, context, source_columns_set, tables_with_columns);
QueryNormalizer(normalizer_data).visit(query);
}
bool hasArrayJoin(const ASTPtr & ast)
{
if (const ASTFunction * function = typeid_cast<const ASTFunction *>(&*ast))
@ -447,7 +447,7 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const
/// Parse JOIN ON expression and collect ASTs for joined columns.
void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query,
const NameSet & source_columns, const Context & context)
const Context & context)
{
const auto & tables = static_cast<const ASTTablesInSelectQuery &>(*select_query->tables);
const auto * left_tables_element = static_cast<const ASTTablesInSelectQueryElement *>(tables.children.at(0).get());
@ -512,24 +512,6 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS
return table_belonging;
};
std::function<void(ASTPtr &, const DatabaseAndTableWithAlias &, bool)> translate_qualified_names;
translate_qualified_names = [&](ASTPtr & ast, const DatabaseAndTableWithAlias & source_names, bool right_table)
{
if (IdentifierSemantic::getColumnName(ast))
{
auto * identifier = typeid_cast<ASTIdentifier *>(ast.get());
size_t match = IdentifierSemantic::canReferColumnToTable(*identifier, source_names);
IdentifierSemantic::setColumnShortName(*identifier, match);
if (right_table && source_columns.count(ast->getColumnName()))
IdentifierSemantic::setColumnQualifiedName(*identifier, source_names);
}
for (auto & child : ast->children)
translate_qualified_names(child, source_names, right_table);
};
const auto supported_syntax = " Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) "
"[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]";
auto throwSyntaxException = [&](const String & msg)
@ -557,9 +539,6 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS
auto add_join_keys = [&](ASTPtr & ast_to_left_table, ASTPtr & ast_to_right_table)
{
translate_qualified_names(ast_to_left_table, left_source_names, false);
translate_qualified_names(ast_to_right_table, right_source_names, true);
analyzed_join.key_asts_left.push_back(ast_to_left_table);
analyzed_join.key_names_left.push_back(ast_to_left_table->getColumnName());
analyzed_join.key_asts_right.push_back(ast_to_right_table);
@ -613,57 +592,25 @@ void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * s
const auto & table_expression = static_cast<const ASTTableExpression &>(*node->table_expression);
DatabaseAndTableWithAlias joined_table_name(table_expression, context.getCurrentDatabase());
auto add_name_to_join_keys = [&](Names & join_keys, ASTs & join_asts, const ASTPtr & ast, bool right_table)
{
String name;
if (right_table)
{
name = ast->getAliasOrColumnName();
if (source_columns.count(name))
name = joined_table_name.getQualifiedNamePrefix() + name;
}
else
name = ast->getColumnName();
join_keys.push_back(name);
join_asts.push_back(ast);
};
if (table_join.using_expression_list)
{
auto & keys = typeid_cast<ASTExpressionList &>(*table_join.using_expression_list);
for (const auto & key : keys.children)
{
add_name_to_join_keys(analyzed_join.key_names_left, analyzed_join.key_asts_left, key, false);
add_name_to_join_keys(analyzed_join.key_names_right, analyzed_join.key_asts_right, key, true);
}
analyzed_join.addSimpleKey(key);
/// @warning wrong qualification if the right key is an alias
for (auto & name : analyzed_join.key_names_right)
if (source_columns.count(name))
name = joined_table_name.getQualifiedNamePrefix() + name;
}
else if (table_join.on_expression)
collectJoinedColumnsFromJoinOnExpr(analyzed_join, select_query, source_columns, context);
auto & columns_from_joined_table = analyzed_join.getColumnsFromJoinedTable(source_columns, context, select_query);
NameSet joined_columns;
collectJoinedColumnsFromJoinOnExpr(analyzed_join, select_query, context);
auto & settings = context.getSettingsRef();
for (auto & column : columns_from_joined_table)
{
auto & column_name = column.name_and_type.name;
auto & column_type = column.name_and_type.type;
auto & original_name = column.original_name;
{
if (joined_columns.count(column_name)) /// Duplicate columns in the subquery for JOIN do not make sense.
continue;
joined_columns.insert(column_name);
bool make_nullable = settings.join_use_nulls && (table_join.kind == ASTTableJoin::Kind::Left ||
table_join.kind == ASTTableJoin::Kind::Full);
auto type = make_nullable ? makeNullable(column_type) : column_type;
analyzed_join.available_joined_columns.emplace_back(NameAndTypePair(column_name, std::move(type)), original_name);
}
}
analyzed_join.calculateAvailableJoinedColumns(source_columns, context, select_query, make_nullable);
}
}
@ -699,9 +646,12 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
if (source_columns_set.size() != source_columns_list.size())
throw Exception("Unexpected duplicates in source columns list.", ErrorCodes::LOGICAL_ERROR);
std::vector<TableWithColumnNames> tables_with_columns;
if (select_query)
{
translateQualifiedNames(query, select_query, source_columns_set, context);
tables_with_columns = getDatabaseAndTablesWithColumnNames(*select_query, context);
translateQualifiedNames(query, select_query, source_columns_set, tables_with_columns);
/// Depending on the user's profile, check for the execution rights
/// distributed subqueries inside the IN or JOIN sections and process these subqueries.
@ -720,7 +670,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
/// Common subexpression elimination. Rewrite rules.
normalizeTree(query, result, (storage ? storage->getColumns().ordinary.getNames() : source_columns_list), source_columns_set,
context, select_query);
context, select_query, tables_with_columns);
/// Remove unneeded columns according to 'required_result_columns'.
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.

View File

@ -50,25 +50,24 @@ std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data &
return {};
}
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr & ast, Data & data)
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, Data & data)
{
if (IdentifierSemantic::getColumnName(identifier))
{
size_t best_table_pos = 0;
size_t best_match = 0;
for (size_t i = 0; i < data.tables.size(); ++i)
if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, data.tables[i]))
if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, data.tables[i].first))
if (match > best_match)
{
best_match = match;
best_table_pos = i;
}
IdentifierSemantic::setColumnShortName(identifier, best_match);
/// In case if column from the joined table are in source columns, change it's name to qualified.
if (best_table_pos && data.source_columns.count(ast->getColumnName()))
IdentifierSemantic::setColumnQualifiedName(identifier, data.tables[best_table_pos]);
if (best_table_pos && data.source_columns.count(identifier.shortName()))
IdentifierSemantic::setNeedLongName(identifier, true);
IdentifierSemantic::setColumnNormalName(identifier, data.tables[best_table_pos].first);
}
return {};
@ -85,7 +84,7 @@ std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAs
DatabaseAndTableWithAlias db_and_table(ident);
for (const auto & known_table : data.tables)
if (db_and_table.satisfies(known_table, true))
if (db_and_table.satisfies(known_table.first, true))
return {};
throw Exception("Unknown qualified identifier: " + ident->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER);
@ -93,10 +92,11 @@ std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAs
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTTableJoin & join, const ASTPtr & , Data &)
{
/// Don't translate on_expression here in order to resolve equation parts later.
std::vector<ASTPtr *> out;
if (join.using_expression_list)
out.push_back(&join.using_expression_list);
else if (join.on_expression)
out.push_back(&join.on_expression);
return out;
}

View File

@ -21,7 +21,16 @@ public:
struct Data
{
const NameSet & source_columns;
const std::vector<DatabaseAndTableWithAlias> & tables;
const std::vector<TableWithColumnNames> & tables;
static void setTablesOnly(const std::vector<DatabaseAndTableWithAlias> & tables,
std::vector<TableWithColumnNames> & tables_with_columns)
{
tables_with_columns.clear();
tables_with_columns.reserve(tables.size());
for (const auto & table : tables)
tables_with_columns.emplace_back(TableWithColumnNames{table, {}});
}
};
static constexpr const char * label = "TranslateQualifiedNames";

View File

@ -36,12 +36,19 @@ public:
bool compound() const { return !name_parts.empty(); }
bool isShort() const { return name_parts.empty() || name == name_parts.back(); }
void resetWithAlias(const String & new_name)
void setShortName(const String & new_name)
{
name = new_name;
name_parts.clear();
}
const String & shortName() const
{
if (!name_parts.empty())
return name_parts.back();
return name;
}
protected:
void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
void appendColumnNameImpl(WriteBuffer & ostr) const override;