mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge pull request #4307 from 4ertus2/joins
New portion of SyntaxAnalyzer refactoring
This commit is contained in:
commit
bb333643f0
@ -9,6 +9,7 @@
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
|
||||
#include <Storages/IStorage.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -51,24 +52,13 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions(
|
||||
return analyzer.getActions(false);
|
||||
}
|
||||
|
||||
NameSet AnalyzedJoin::getRequiredColumnsFromJoinedTable(const JoinedColumnsList & columns_added_by_join,
|
||||
const ExpressionActionsPtr & joined_block_actions) const
|
||||
Names AnalyzedJoin::getOriginalColumnNames(const NameSet & required_columns_from_joined_table) const
|
||||
{
|
||||
NameSet required_columns_from_joined_table;
|
||||
|
||||
auto required_action_columns = joined_block_actions->getRequiredColumns();
|
||||
required_columns_from_joined_table.insert(required_action_columns.begin(), required_action_columns.end());
|
||||
auto sample = joined_block_actions->getSampleBlock();
|
||||
|
||||
for (auto & column : key_names_right)
|
||||
if (!sample.has(column))
|
||||
required_columns_from_joined_table.insert(column);
|
||||
|
||||
for (auto & column : columns_added_by_join)
|
||||
if (!sample.has(column.name_and_type.name))
|
||||
required_columns_from_joined_table.insert(column.name_and_type.name);
|
||||
|
||||
return required_columns_from_joined_table;
|
||||
Names original_columns;
|
||||
for (const auto & column : columns_from_joined_table)
|
||||
if (required_columns_from_joined_table.count(column.name_and_type.name))
|
||||
original_columns.emplace_back(column.original_name);
|
||||
return original_columns;
|
||||
}
|
||||
|
||||
const JoinedColumnsList & AnalyzedJoin::getColumnsFromJoinedTable(
|
||||
@ -104,6 +94,30 @@ const JoinedColumnsList & AnalyzedJoin::getColumnsFromJoinedTable(
|
||||
return columns_from_joined_table;
|
||||
}
|
||||
|
||||
void AnalyzedJoin::calculateAvailableJoinedColumns(
|
||||
const NameSet & source_columns, const Context & context, const ASTSelectQuery * select_query_with_join, bool make_nullable)
|
||||
{
|
||||
const auto & columns = getColumnsFromJoinedTable(source_columns, context, select_query_with_join);
|
||||
|
||||
NameSet joined_columns;
|
||||
|
||||
for (auto & column : columns)
|
||||
{
|
||||
auto & column_name = column.name_and_type.name;
|
||||
auto & column_type = column.name_and_type.type;
|
||||
auto & original_name = column.original_name;
|
||||
{
|
||||
if (joined_columns.count(column_name)) /// Duplicate columns in the subquery for JOIN do not make sense.
|
||||
continue;
|
||||
|
||||
joined_columns.insert(column_name);
|
||||
|
||||
auto type = make_nullable ? makeNullable(column_type) : column_type;
|
||||
available_joined_columns.emplace_back(NameAndTypePair(column_name, std::move(type)), original_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
NamesAndTypesList getNamesAndTypeListFromTableExpression(const ASTTableExpression & table_expression, const Context & context)
|
||||
{
|
||||
|
@ -61,18 +61,29 @@ struct AnalyzedJoin
|
||||
/// It's columns_from_joined_table without duplicate columns and possibly modified types.
|
||||
JoinedColumnsList available_joined_columns;
|
||||
|
||||
void addSimpleKey(const ASTPtr & ast)
|
||||
{
|
||||
key_names_left.push_back(ast->getColumnName());
|
||||
key_names_right.push_back(ast->getAliasOrColumnName());
|
||||
|
||||
key_asts_left.push_back(ast);
|
||||
key_asts_right.push_back(ast);
|
||||
}
|
||||
|
||||
ExpressionActionsPtr createJoinedBlockActions(
|
||||
const JoinedColumnsList & columns_added_by_join, /// Subset of available_joined_columns.
|
||||
const ASTSelectQuery * select_query_with_join,
|
||||
const Context & context) const;
|
||||
|
||||
/// Columns which will be used in query from joined table.
|
||||
NameSet getRequiredColumnsFromJoinedTable(const JoinedColumnsList & columns_added_by_join,
|
||||
const ExpressionActionsPtr & joined_block_actions) const;
|
||||
Names getOriginalColumnNames(const NameSet & required_columns) const;
|
||||
|
||||
const JoinedColumnsList & getColumnsFromJoinedTable(const NameSet & source_columns,
|
||||
const Context & context,
|
||||
const ASTSelectQuery * select_query_with_join);
|
||||
void calculateAvailableJoinedColumns(const NameSet & source_columns,
|
||||
const Context & context,
|
||||
const ASTSelectQuery * select_query_with_join,
|
||||
bool make_nullable);
|
||||
};
|
||||
|
||||
struct ASTTableExpression;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
||||
#include <Interpreters/IdentifierSemantic.h>
|
||||
#include <Interpreters/AnalyzedJoin.h> /// for getNamesAndTypeListFromTableExpression
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
@ -12,6 +13,9 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
NameSet removeDuplicateColumns(NamesAndTypesList & columns);
|
||||
|
||||
|
||||
DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database)
|
||||
{
|
||||
alias = identifier.tryGetAlias();
|
||||
@ -144,4 +148,26 @@ ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::vector<TableWithColumnNames> getDatabaseAndTablesWithColumnNames(const ASTSelectQuery & select_query, const Context & context)
|
||||
{
|
||||
std::vector<TableWithColumnNames> tables_with_columns;
|
||||
|
||||
if (select_query.tables && !select_query.tables->children.empty())
|
||||
{
|
||||
String current_database = context.getCurrentDatabase();
|
||||
|
||||
for (const ASTTableExpression * table_expression : getSelectTablesExpression(select_query))
|
||||
{
|
||||
DatabaseAndTableWithAlias table_name(*table_expression, current_database);
|
||||
|
||||
NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, context);
|
||||
removeDuplicateColumns(names_and_types);
|
||||
|
||||
tables_with_columns.emplace_back(std::move(table_name), names_and_types.getNames());
|
||||
}
|
||||
}
|
||||
|
||||
return tables_with_columns;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <optional>
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <Core/Names.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -15,6 +16,7 @@ using ASTPtr = std::shared_ptr<IAST>;
|
||||
class ASTSelectQuery;
|
||||
class ASTIdentifier;
|
||||
struct ASTTableExpression;
|
||||
class Context;
|
||||
|
||||
|
||||
/// Extracts database name (and/or alias) from table expression or identifier
|
||||
@ -36,9 +38,13 @@ struct DatabaseAndTableWithAlias
|
||||
bool satisfies(const DatabaseAndTableWithAlias & table, bool table_may_be_an_alias);
|
||||
};
|
||||
|
||||
using TableWithColumnNames = std::pair<DatabaseAndTableWithAlias, Names>;
|
||||
|
||||
std::vector<DatabaseAndTableWithAlias> getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database);
|
||||
std::optional<DatabaseAndTableWithAlias> getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number);
|
||||
|
||||
std::vector<TableWithColumnNames> getDatabaseAndTablesWithColumnNames(const ASTSelectQuery & select_query, const Context & context);
|
||||
|
||||
std::vector<const ASTTableExpression *> getSelectTablesExpression(const ASTSelectQuery & select_query);
|
||||
ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number);
|
||||
|
||||
|
@ -510,6 +510,17 @@ void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, bool only
|
||||
columns_added_by_join_list));
|
||||
}
|
||||
|
||||
static void appendRequiredColumns(NameSet & required_columns, const Block & sample, const AnalyzedJoin & analyzed_join)
|
||||
{
|
||||
for (auto & column : analyzed_join.key_names_right)
|
||||
if (!sample.has(column))
|
||||
required_columns.insert(column);
|
||||
|
||||
for (auto & column : analyzed_join.columns_from_joined_table)
|
||||
if (!sample.has(column.name_and_type.name))
|
||||
required_columns.insert(column.name_and_type.name);
|
||||
}
|
||||
|
||||
bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types)
|
||||
{
|
||||
assertSelect();
|
||||
@ -566,6 +577,11 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
|
||||
|
||||
if (!subquery_for_set.join)
|
||||
{
|
||||
auto & analyzed_join = analyzedJoin();
|
||||
/// Actions which need to be calculated on joined block.
|
||||
ExpressionActionsPtr joined_block_actions =
|
||||
analyzed_join.createJoinedBlockActions(columns_added_by_join, select_query, context);
|
||||
|
||||
/** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs
|
||||
* - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1,
|
||||
* in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`.
|
||||
@ -582,15 +598,15 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
|
||||
else if (table_to_join.database_and_table_name)
|
||||
table = table_to_join.database_and_table_name;
|
||||
|
||||
const JoinedColumnsList & columns_from_joined_table = analyzedJoin().columns_from_joined_table;
|
||||
Names action_columns = joined_block_actions->getRequiredColumns();
|
||||
NameSet required_columns(action_columns.begin(), action_columns.end());
|
||||
|
||||
Names original_columns;
|
||||
for (const auto & column : columns_from_joined_table)
|
||||
if (required_columns_from_joined_table.count(column.name_and_type.name))
|
||||
original_columns.emplace_back(column.original_name);
|
||||
appendRequiredColumns(required_columns, joined_block_actions->getSampleBlock(), analyzed_join);
|
||||
|
||||
Names original_columns = analyzed_join.getOriginalColumnNames(required_columns);
|
||||
|
||||
auto interpreter = interpretSubquery(table, context, subquery_depth, original_columns);
|
||||
subquery_for_set.makeSource(interpreter, columns_from_joined_table, required_columns_from_joined_table);
|
||||
subquery_for_set.makeSource(interpreter, analyzed_join.columns_from_joined_table, required_columns);
|
||||
}
|
||||
|
||||
Block sample_block = subquery_for_set.renamedSampleBlock();
|
||||
@ -925,15 +941,6 @@ void ExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptio
|
||||
aggregates = aggregate_descriptions;
|
||||
}
|
||||
|
||||
/// db.table.column -> table.column / table.column -> column
|
||||
static String cropDatabaseOrTableName(const String & name)
|
||||
{
|
||||
size_t pos = name.find('.', 0);
|
||||
if (pos != std::string::npos)
|
||||
return name.substr(pos + 1, name.size() - pos - 1);
|
||||
return name;
|
||||
}
|
||||
|
||||
void ExpressionAnalyzer::collectUsedColumns()
|
||||
{
|
||||
/** Calculate which columns are required to execute the expression.
|
||||
@ -993,54 +1000,6 @@ void ExpressionAnalyzer::collectUsedColumns()
|
||||
required.erase(name);
|
||||
}
|
||||
}
|
||||
|
||||
/// @fix filter required columns according to misqualified names in JOIN ON
|
||||
if (columns_context.has_table_join &&
|
||||
columns_context.tables.size() >= 2 &&
|
||||
columns_context.tables[1].join &&
|
||||
columns_context.tables[1].join->on_expression)
|
||||
{
|
||||
NameSet fixed_required;
|
||||
|
||||
for (const auto & req_name : required)
|
||||
{
|
||||
bool collated = false;
|
||||
String cropped_name = req_name;
|
||||
static const constexpr size_t max_column_prefix = 2;
|
||||
|
||||
for (size_t i = 0; i < max_column_prefix && !collated; ++i)
|
||||
{
|
||||
cropped_name = cropDatabaseOrTableName(cropped_name);
|
||||
|
||||
if (avaliable_columns.count(cropped_name))
|
||||
{
|
||||
fixed_required.insert(cropped_name);
|
||||
collated = true;
|
||||
break;
|
||||
}
|
||||
|
||||
for (const auto & joined_column : analyzed_join.available_joined_columns)
|
||||
{
|
||||
auto & name = joined_column.name_and_type.name;
|
||||
|
||||
if (cropped_name == name)
|
||||
{
|
||||
columns_added_by_join.push_back(joined_column);
|
||||
collated = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!collated)
|
||||
fixed_required.insert(req_name);
|
||||
}
|
||||
|
||||
required.swap(fixed_required);
|
||||
}
|
||||
|
||||
joined_block_actions = analyzed_join.createJoinedBlockActions(columns_added_by_join, select_query, context);
|
||||
required_columns_from_joined_table = analyzed_join.getRequiredColumnsFromJoinedTable(columns_added_by_join, joined_block_actions);
|
||||
}
|
||||
|
||||
if (columns_context.has_array_join)
|
||||
@ -1090,6 +1049,7 @@ void ExpressionAnalyzer::collectUsedColumns()
|
||||
if (!unknown_required_source_columns.empty())
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "query: '" << query << "' ";
|
||||
ss << columns_context;
|
||||
ss << "source_columns: ";
|
||||
for (const auto & name : source_columns)
|
||||
|
@ -67,12 +67,6 @@ struct ExpressionAnalyzerData
|
||||
/// Columns will be added to block by join.
|
||||
JoinedColumnsList columns_added_by_join; /// Subset of analyzed_join.available_joined_columns
|
||||
|
||||
/// Actions which need to be calculated on joined block.
|
||||
ExpressionActionsPtr joined_block_actions;
|
||||
|
||||
/// Columns which will be used in query from joined table. Duplicate names are qualified.
|
||||
NameSet required_columns_from_joined_table;
|
||||
|
||||
protected:
|
||||
ExpressionAnalyzerData(const NamesAndTypesList & source_columns_,
|
||||
const NameSet & required_result_columns_,
|
||||
|
@ -10,8 +10,10 @@ namespace DB
|
||||
struct FindIdentifierBestTableData
|
||||
{
|
||||
using TypeToVisit = ASTIdentifier;
|
||||
using IdentifierWithTable = std::pair<ASTIdentifier *, const DatabaseAndTableWithAlias *>;
|
||||
|
||||
const std::vector<DatabaseAndTableWithAlias> & tables;
|
||||
std::vector<std::pair<ASTIdentifier *, const DatabaseAndTableWithAlias *>> identifier_table;
|
||||
std::vector<IdentifierWithTable> identifier_table;
|
||||
|
||||
FindIdentifierBestTableData(const std::vector<DatabaseAndTableWithAlias> & tables_);
|
||||
|
||||
|
@ -37,6 +37,12 @@ std::optional<String> IdentifierSemantic::getTableName(const ASTPtr & ast)
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
void IdentifierSemantic::setNeedLongName(ASTIdentifier & identifier, bool value)
|
||||
{
|
||||
identifier.semantic->need_long_name = value;
|
||||
}
|
||||
|
||||
std::pair<String, String> IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier)
|
||||
{
|
||||
if (identifier.name_parts.size() > 2)
|
||||
@ -97,10 +103,30 @@ void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, size_t t
|
||||
identifier.name.swap(new_name);
|
||||
}
|
||||
|
||||
void IdentifierSemantic::setColumnQualifiedName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
|
||||
void IdentifierSemantic::setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
|
||||
{
|
||||
String prefix = db_and_table.getQualifiedNamePrefix();
|
||||
identifier.name.insert(identifier.name.begin(), prefix.begin(), prefix.end());
|
||||
size_t match = IdentifierSemantic::canReferColumnToTable(identifier, db_and_table);
|
||||
|
||||
setColumnShortName(identifier, match);
|
||||
|
||||
if (identifier.semantic->need_long_name)
|
||||
{
|
||||
String prefix = db_and_table.getQualifiedNamePrefix();
|
||||
if (!prefix.empty())
|
||||
{
|
||||
String short_name = identifier.shortName();
|
||||
identifier.name = prefix + short_name;
|
||||
prefix.resize(prefix.size() - 1); /// crop dot
|
||||
identifier.name_parts = {prefix, short_name};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String IdentifierSemantic::columnNormalName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
|
||||
{
|
||||
ASTPtr copy = identifier.clone();
|
||||
setColumnNormalName(typeid_cast<ASTIdentifier &>(*copy), db_and_table);
|
||||
return copy->getAliasOrColumnName();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,6 +9,7 @@ namespace DB
|
||||
struct IdentifierSemanticImpl
|
||||
{
|
||||
bool special = false;
|
||||
bool need_long_name = false;
|
||||
};
|
||||
|
||||
/// Static calss to manipulate IdentifierSemanticImpl via ASTIdentifier
|
||||
@ -24,12 +25,14 @@ struct IdentifierSemantic
|
||||
static std::pair<String, String> extractDatabaseAndTable(const ASTIdentifier & identifier);
|
||||
|
||||
static size_t canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
|
||||
static void setColumnShortName(ASTIdentifier & identifier, size_t match);
|
||||
static void setColumnQualifiedName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
|
||||
static String columnNormalName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
|
||||
static void setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
|
||||
static void setNeedLongName(ASTIdentifier & identifier, bool); /// if set setColumnNormalName makes qualified name
|
||||
|
||||
private:
|
||||
static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table);
|
||||
static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table);
|
||||
static void setColumnShortName(ASTIdentifier & identifier, size_t match);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -236,10 +236,22 @@ void PredicateExpressionsOptimizer::setNewAliasesForInnerPredicate(
|
||||
{
|
||||
if (alias == qualified_name)
|
||||
{
|
||||
if (!isIdentifier(ast) && ast->tryGetAlias().empty())
|
||||
ast->setAlias(ast->getColumnName());
|
||||
String name;
|
||||
if (auto * id = typeid_cast<const ASTIdentifier *>(ast.get()))
|
||||
{
|
||||
name = id->tryGetAlias();
|
||||
if (name.empty())
|
||||
name = id->shortName();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ast->tryGetAlias().empty())
|
||||
ast->setAlias(ast->getColumnName());
|
||||
name = ast->getAliasOrColumnName();
|
||||
}
|
||||
|
||||
identifier->resetWithAlias(ast->getAliasOrColumnName());
|
||||
IdentifierSemantic::setNeedLongName(*identifier, false);
|
||||
identifier->setShortName(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -326,7 +338,9 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast
|
||||
std::unordered_map<String, ASTPtr> aliases;
|
||||
std::vector<DatabaseAndTableWithAlias> tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase());
|
||||
|
||||
TranslateQualifiedNamesVisitor::Data qn_visitor_data{{}, tables};
|
||||
std::vector<TableWithColumnNames> tables_with_columns;
|
||||
TranslateQualifiedNamesVisitor::Data::setTablesOnly(tables, tables_with_columns);
|
||||
TranslateQualifiedNamesVisitor::Data qn_visitor_data{{}, tables_with_columns};
|
||||
TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast);
|
||||
|
||||
QueryAliasesVisitor::Data query_aliases_data{aliases};
|
||||
|
@ -25,8 +25,6 @@ namespace ErrorCodes
|
||||
extern const int CYCLIC_ALIASES;
|
||||
}
|
||||
|
||||
NameSet removeDuplicateColumns(NamesAndTypesList & columns);
|
||||
|
||||
|
||||
class CheckASTDepth
|
||||
{
|
||||
@ -143,7 +141,10 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
|
||||
/// Replace *, alias.*, database.table.* with a list of columns.
|
||||
void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & data)
|
||||
{
|
||||
const auto & tables_with_columns = data.tables_with_columns;
|
||||
if (!data.tables_with_columns)
|
||||
return;
|
||||
|
||||
const auto & tables_with_columns = *data.tables_with_columns;
|
||||
const auto & source_columns_set = data.source_columns_set;
|
||||
|
||||
ASTs old_children;
|
||||
@ -228,8 +229,6 @@ void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &
|
||||
/// special visitChildren() for ASTSelectQuery
|
||||
void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data & data)
|
||||
{
|
||||
extractTablesWithColumns(select, data);
|
||||
|
||||
if (auto join = select.join())
|
||||
extractJoinUsingColumns(join->table_join, data);
|
||||
|
||||
@ -253,7 +252,6 @@ void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data &
|
||||
}
|
||||
|
||||
/// Don't go into subqueries.
|
||||
/// Don't go into components of compound identifiers.
|
||||
/// Don't go into select query. It processes children itself.
|
||||
/// Do not go to the left argument of lambda expressions, so as not to replace the formal parameters
|
||||
/// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]).
|
||||
@ -346,25 +344,6 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data)
|
||||
}
|
||||
}
|
||||
|
||||
void QueryNormalizer::extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data)
|
||||
{
|
||||
if (data.context && select_query.tables && !select_query.tables->children.empty())
|
||||
{
|
||||
data.tables_with_columns.clear();
|
||||
String current_database = data.context->getCurrentDatabase();
|
||||
|
||||
for (const ASTTableExpression * table_expression : getSelectTablesExpression(select_query))
|
||||
{
|
||||
DatabaseAndTableWithAlias table_name(*table_expression, current_database);
|
||||
|
||||
NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, *data.context);
|
||||
removeDuplicateColumns(names_and_types);
|
||||
|
||||
data.tables_with_columns.emplace_back(std::move(table_name), names_and_types.getNames());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// 'select * from a join b using id' should result one 'id' column
|
||||
void QueryNormalizer::extractJoinUsingColumns(const ASTPtr ast, Data & data)
|
||||
{
|
||||
|
@ -46,8 +46,6 @@ class QueryNormalizer
|
||||
};
|
||||
|
||||
public:
|
||||
using TableWithColumnNames = std::pair<DatabaseAndTableWithAlias, Names>;
|
||||
|
||||
struct Data
|
||||
{
|
||||
using SetOfASTs = std::set<const IAST *>;
|
||||
@ -57,7 +55,7 @@ public:
|
||||
const ExtractedSettings settings;
|
||||
const Context * context;
|
||||
const NameSet * source_columns_set;
|
||||
std::vector<TableWithColumnNames> tables_with_columns;
|
||||
const std::vector<TableWithColumnNames> * tables_with_columns;
|
||||
std::unordered_set<String> join_using_columns;
|
||||
|
||||
/// tmp data
|
||||
@ -67,25 +65,25 @@ public:
|
||||
std::string current_alias; /// the alias referencing to the ancestor of ast (the deepest ancestor with aliases)
|
||||
|
||||
Data(const Aliases & aliases_, ExtractedSettings && settings_, const Context & context_,
|
||||
const NameSet & source_columns_set, Names && all_columns)
|
||||
const NameSet & source_columns_set, const std::vector<TableWithColumnNames> & tables_with_columns_)
|
||||
: aliases(aliases_)
|
||||
, settings(settings_)
|
||||
, context(&context_)
|
||||
, source_columns_set(&source_columns_set)
|
||||
, tables_with_columns(&tables_with_columns_)
|
||||
, level(0)
|
||||
{
|
||||
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns));
|
||||
}
|
||||
{}
|
||||
|
||||
Data(const Aliases & aliases_, ExtractedSettings && settings_)
|
||||
: aliases(aliases_)
|
||||
, settings(settings_)
|
||||
, context(nullptr)
|
||||
, source_columns_set(nullptr)
|
||||
, tables_with_columns(nullptr)
|
||||
, level(0)
|
||||
{}
|
||||
|
||||
bool processAsterisks() const { return !tables_with_columns.empty(); }
|
||||
bool processAsterisks() const { return tables_with_columns && !tables_with_columns->empty(); }
|
||||
};
|
||||
|
||||
QueryNormalizer(Data & data)
|
||||
@ -110,7 +108,6 @@ private:
|
||||
|
||||
static void visitChildren(const ASTPtr &, Data & data);
|
||||
|
||||
static void extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data);
|
||||
static void extractJoinUsingColumns(const ASTPtr ast, Data & data);
|
||||
};
|
||||
|
||||
|
@ -22,7 +22,6 @@
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
@ -80,16 +79,14 @@ void collectSourceColumns(ASTSelectQuery * select_query, StoragePtr storage, Nam
|
||||
}
|
||||
|
||||
/// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names.
|
||||
void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query,
|
||||
const NameSet & source_columns, const Context & context)
|
||||
void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, const NameSet & source_columns,
|
||||
const std::vector<TableWithColumnNames> & tables_with_columns)
|
||||
{
|
||||
if (!select_query->tables || select_query->tables->children.empty())
|
||||
return;
|
||||
|
||||
std::vector<DatabaseAndTableWithAlias> tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase());
|
||||
|
||||
LogAST log;
|
||||
TranslateQualifiedNamesVisitor::Data visitor_data{source_columns, tables};
|
||||
TranslateQualifiedNamesVisitor::Data visitor_data{source_columns, tables_with_columns};
|
||||
TranslateQualifiedNamesVisitor visitor(visitor_data, log.stream());
|
||||
visitor.visit(query);
|
||||
}
|
||||
@ -101,7 +98,8 @@ void normalizeTree(
|
||||
const Names & source_columns,
|
||||
const NameSet & source_columns_set,
|
||||
const Context & context,
|
||||
const ASTSelectQuery * select_query)
|
||||
const ASTSelectQuery * select_query,
|
||||
std::vector<TableWithColumnNames> & tables_with_columns)
|
||||
{
|
||||
const auto & settings = context.getSettingsRef();
|
||||
|
||||
@ -117,10 +115,12 @@ void normalizeTree(
|
||||
if (all_columns_name.empty())
|
||||
throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
QueryNormalizer::Data normalizer_data(result.aliases, settings, context, source_columns_set, std::move(all_columns_name));
|
||||
if (tables_with_columns.empty())
|
||||
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name));
|
||||
|
||||
QueryNormalizer::Data normalizer_data(result.aliases, settings, context, source_columns_set, tables_with_columns);
|
||||
QueryNormalizer(normalizer_data).visit(query);
|
||||
}
|
||||
|
||||
bool hasArrayJoin(const ASTPtr & ast)
|
||||
{
|
||||
if (const ASTFunction * function = typeid_cast<const ASTFunction *>(&*ast))
|
||||
@ -447,7 +447,7 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const
|
||||
|
||||
/// Parse JOIN ON expression and collect ASTs for joined columns.
|
||||
void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query,
|
||||
const NameSet & source_columns, const Context & context)
|
||||
const Context & context)
|
||||
{
|
||||
const auto & tables = static_cast<const ASTTablesInSelectQuery &>(*select_query->tables);
|
||||
const auto * left_tables_element = static_cast<const ASTTablesInSelectQueryElement *>(tables.children.at(0).get());
|
||||
@ -512,24 +512,6 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS
|
||||
return table_belonging;
|
||||
};
|
||||
|
||||
std::function<void(ASTPtr &, const DatabaseAndTableWithAlias &, bool)> translate_qualified_names;
|
||||
translate_qualified_names = [&](ASTPtr & ast, const DatabaseAndTableWithAlias & source_names, bool right_table)
|
||||
{
|
||||
if (IdentifierSemantic::getColumnName(ast))
|
||||
{
|
||||
auto * identifier = typeid_cast<ASTIdentifier *>(ast.get());
|
||||
|
||||
size_t match = IdentifierSemantic::canReferColumnToTable(*identifier, source_names);
|
||||
IdentifierSemantic::setColumnShortName(*identifier, match);
|
||||
|
||||
if (right_table && source_columns.count(ast->getColumnName()))
|
||||
IdentifierSemantic::setColumnQualifiedName(*identifier, source_names);
|
||||
}
|
||||
|
||||
for (auto & child : ast->children)
|
||||
translate_qualified_names(child, source_names, right_table);
|
||||
};
|
||||
|
||||
const auto supported_syntax = " Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) "
|
||||
"[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]";
|
||||
auto throwSyntaxException = [&](const String & msg)
|
||||
@ -557,9 +539,6 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS
|
||||
|
||||
auto add_join_keys = [&](ASTPtr & ast_to_left_table, ASTPtr & ast_to_right_table)
|
||||
{
|
||||
translate_qualified_names(ast_to_left_table, left_source_names, false);
|
||||
translate_qualified_names(ast_to_right_table, right_source_names, true);
|
||||
|
||||
analyzed_join.key_asts_left.push_back(ast_to_left_table);
|
||||
analyzed_join.key_names_left.push_back(ast_to_left_table->getColumnName());
|
||||
analyzed_join.key_asts_right.push_back(ast_to_right_table);
|
||||
@ -613,57 +592,25 @@ void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * s
|
||||
const auto & table_expression = static_cast<const ASTTableExpression &>(*node->table_expression);
|
||||
DatabaseAndTableWithAlias joined_table_name(table_expression, context.getCurrentDatabase());
|
||||
|
||||
auto add_name_to_join_keys = [&](Names & join_keys, ASTs & join_asts, const ASTPtr & ast, bool right_table)
|
||||
{
|
||||
String name;
|
||||
if (right_table)
|
||||
{
|
||||
name = ast->getAliasOrColumnName();
|
||||
if (source_columns.count(name))
|
||||
name = joined_table_name.getQualifiedNamePrefix() + name;
|
||||
}
|
||||
else
|
||||
name = ast->getColumnName();
|
||||
|
||||
join_keys.push_back(name);
|
||||
join_asts.push_back(ast);
|
||||
};
|
||||
|
||||
if (table_join.using_expression_list)
|
||||
{
|
||||
auto & keys = typeid_cast<ASTExpressionList &>(*table_join.using_expression_list);
|
||||
for (const auto & key : keys.children)
|
||||
{
|
||||
add_name_to_join_keys(analyzed_join.key_names_left, analyzed_join.key_asts_left, key, false);
|
||||
add_name_to_join_keys(analyzed_join.key_names_right, analyzed_join.key_asts_right, key, true);
|
||||
}
|
||||
analyzed_join.addSimpleKey(key);
|
||||
|
||||
/// @warning wrong qualification if the right key is an alias
|
||||
for (auto & name : analyzed_join.key_names_right)
|
||||
if (source_columns.count(name))
|
||||
name = joined_table_name.getQualifiedNamePrefix() + name;
|
||||
}
|
||||
else if (table_join.on_expression)
|
||||
collectJoinedColumnsFromJoinOnExpr(analyzed_join, select_query, source_columns, context);
|
||||
|
||||
auto & columns_from_joined_table = analyzed_join.getColumnsFromJoinedTable(source_columns, context, select_query);
|
||||
|
||||
NameSet joined_columns;
|
||||
collectJoinedColumnsFromJoinOnExpr(analyzed_join, select_query, context);
|
||||
|
||||
auto & settings = context.getSettingsRef();
|
||||
bool make_nullable = settings.join_use_nulls && (table_join.kind == ASTTableJoin::Kind::Left ||
|
||||
table_join.kind == ASTTableJoin::Kind::Full);
|
||||
|
||||
for (auto & column : columns_from_joined_table)
|
||||
{
|
||||
auto & column_name = column.name_and_type.name;
|
||||
auto & column_type = column.name_and_type.type;
|
||||
auto & original_name = column.original_name;
|
||||
{
|
||||
if (joined_columns.count(column_name)) /// Duplicate columns in the subquery for JOIN do not make sense.
|
||||
continue;
|
||||
|
||||
joined_columns.insert(column_name);
|
||||
|
||||
bool make_nullable = settings.join_use_nulls && (table_join.kind == ASTTableJoin::Kind::Left ||
|
||||
table_join.kind == ASTTableJoin::Kind::Full);
|
||||
auto type = make_nullable ? makeNullable(column_type) : column_type;
|
||||
analyzed_join.available_joined_columns.emplace_back(NameAndTypePair(column_name, std::move(type)), original_name);
|
||||
}
|
||||
}
|
||||
analyzed_join.calculateAvailableJoinedColumns(source_columns, context, select_query, make_nullable);
|
||||
}
|
||||
|
||||
}
|
||||
@ -699,9 +646,12 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
|
||||
if (source_columns_set.size() != source_columns_list.size())
|
||||
throw Exception("Unexpected duplicates in source columns list.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
std::vector<TableWithColumnNames> tables_with_columns;
|
||||
|
||||
if (select_query)
|
||||
{
|
||||
translateQualifiedNames(query, select_query, source_columns_set, context);
|
||||
tables_with_columns = getDatabaseAndTablesWithColumnNames(*select_query, context);
|
||||
translateQualifiedNames(query, select_query, source_columns_set, tables_with_columns);
|
||||
|
||||
/// Depending on the user's profile, check for the execution rights
|
||||
/// distributed subqueries inside the IN or JOIN sections and process these subqueries.
|
||||
@ -720,7 +670,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
|
||||
|
||||
/// Common subexpression elimination. Rewrite rules.
|
||||
normalizeTree(query, result, (storage ? storage->getColumns().ordinary.getNames() : source_columns_list), source_columns_set,
|
||||
context, select_query);
|
||||
context, select_query, tables_with_columns);
|
||||
|
||||
/// Remove unneeded columns according to 'required_result_columns'.
|
||||
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
|
||||
|
@ -50,25 +50,24 @@ std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data &
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr & ast, Data & data)
|
||||
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, Data & data)
|
||||
{
|
||||
if (IdentifierSemantic::getColumnName(identifier))
|
||||
{
|
||||
size_t best_table_pos = 0;
|
||||
size_t best_match = 0;
|
||||
for (size_t i = 0; i < data.tables.size(); ++i)
|
||||
if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, data.tables[i]))
|
||||
if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, data.tables[i].first))
|
||||
if (match > best_match)
|
||||
{
|
||||
best_match = match;
|
||||
best_table_pos = i;
|
||||
}
|
||||
|
||||
IdentifierSemantic::setColumnShortName(identifier, best_match);
|
||||
|
||||
/// In case if column from the joined table are in source columns, change it's name to qualified.
|
||||
if (best_table_pos && data.source_columns.count(ast->getColumnName()))
|
||||
IdentifierSemantic::setColumnQualifiedName(identifier, data.tables[best_table_pos]);
|
||||
if (best_table_pos && data.source_columns.count(identifier.shortName()))
|
||||
IdentifierSemantic::setNeedLongName(identifier, true);
|
||||
IdentifierSemantic::setColumnNormalName(identifier, data.tables[best_table_pos].first);
|
||||
}
|
||||
|
||||
return {};
|
||||
@ -85,7 +84,7 @@ std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAs
|
||||
DatabaseAndTableWithAlias db_and_table(ident);
|
||||
|
||||
for (const auto & known_table : data.tables)
|
||||
if (db_and_table.satisfies(known_table, true))
|
||||
if (db_and_table.satisfies(known_table.first, true))
|
||||
return {};
|
||||
|
||||
throw Exception("Unknown qualified identifier: " + ident->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER);
|
||||
@ -93,10 +92,11 @@ std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAs
|
||||
|
||||
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTTableJoin & join, const ASTPtr & , Data &)
|
||||
{
|
||||
/// Don't translate on_expression here in order to resolve equation parts later.
|
||||
std::vector<ASTPtr *> out;
|
||||
if (join.using_expression_list)
|
||||
out.push_back(&join.using_expression_list);
|
||||
else if (join.on_expression)
|
||||
out.push_back(&join.on_expression);
|
||||
return out;
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,16 @@ public:
|
||||
struct Data
|
||||
{
|
||||
const NameSet & source_columns;
|
||||
const std::vector<DatabaseAndTableWithAlias> & tables;
|
||||
const std::vector<TableWithColumnNames> & tables;
|
||||
|
||||
static void setTablesOnly(const std::vector<DatabaseAndTableWithAlias> & tables,
|
||||
std::vector<TableWithColumnNames> & tables_with_columns)
|
||||
{
|
||||
tables_with_columns.clear();
|
||||
tables_with_columns.reserve(tables.size());
|
||||
for (const auto & table : tables)
|
||||
tables_with_columns.emplace_back(TableWithColumnNames{table, {}});
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr const char * label = "TranslateQualifiedNames";
|
||||
|
@ -36,12 +36,19 @@ public:
|
||||
bool compound() const { return !name_parts.empty(); }
|
||||
bool isShort() const { return name_parts.empty() || name == name_parts.back(); }
|
||||
|
||||
void resetWithAlias(const String & new_name)
|
||||
void setShortName(const String & new_name)
|
||||
{
|
||||
name = new_name;
|
||||
name_parts.clear();
|
||||
}
|
||||
|
||||
const String & shortName() const
|
||||
{
|
||||
if (!name_parts.empty())
|
||||
return name_parts.back();
|
||||
return name;
|
||||
}
|
||||
|
||||
protected:
|
||||
void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
|
||||
void appendColumnNameImpl(WriteBuffer & ostr) const override;
|
||||
|
Loading…
Reference in New Issue
Block a user