get rid of custom JOIN ON names qualification

This commit is contained in:
chertus 2019-02-07 22:18:40 +03:00
parent 3fd3884b32
commit db9a2060bf
11 changed files with 106 additions and 82 deletions

View File

@ -1,5 +1,6 @@
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/AnalyzedJoin.h> /// for getNamesAndTypeListFromTableExpression
#include <Interpreters/Context.h>
#include <Common/typeid_cast.h>
@ -12,6 +13,9 @@
namespace DB
{
NameSet removeDuplicateColumns(NamesAndTypesList & columns);
DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database)
{
alias = identifier.tryGetAlias();
@ -144,4 +148,26 @@ ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number
return nullptr;
}
std::vector<TableWithColumnNames> getDatabaseAndTablesWithColumnNames(const ASTSelectQuery & select_query, const Context & context)
{
std::vector<TableWithColumnNames> tables_with_columns;
if (select_query.tables && !select_query.tables->children.empty())
{
String current_database = context.getCurrentDatabase();
for (const ASTTableExpression * table_expression : getSelectTablesExpression(select_query))
{
DatabaseAndTableWithAlias table_name(*table_expression, current_database);
NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, context);
removeDuplicateColumns(names_and_types);
tables_with_columns.emplace_back(std::move(table_name), names_and_types.getNames());
}
}
return tables_with_columns;
}
}

View File

@ -4,6 +4,7 @@
#include <optional>
#include <Core/Types.h>
#include <Core/Names.h>
namespace DB
@ -15,6 +16,7 @@ using ASTPtr = std::shared_ptr<IAST>;
class ASTSelectQuery;
class ASTIdentifier;
struct ASTTableExpression;
class Context;
/// Extracts database name (and/or alias) from table expression or identifier
@ -36,9 +38,13 @@ struct DatabaseAndTableWithAlias
bool satisfies(const DatabaseAndTableWithAlias & table, bool table_may_be_an_alias);
};
using TableWithColumnNames = std::pair<DatabaseAndTableWithAlias, Names>;
std::vector<DatabaseAndTableWithAlias> getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database);
std::optional<DatabaseAndTableWithAlias> getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number);
std::vector<TableWithColumnNames> getDatabaseAndTablesWithColumnNames(const ASTSelectQuery & select_query, const Context & context);
std::vector<const ASTTableExpression *> getSelectTablesExpression(const ASTSelectQuery & select_query);
ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number);

View File

@ -37,6 +37,12 @@ std::optional<String> IdentifierSemantic::getTableName(const ASTPtr & ast)
return {};
}
void IdentifierSemantic::setNeedLongName(ASTIdentifier & identifier, bool value)
{
identifier.semantic->need_long_name = value;
}
std::pair<String, String> IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier)
{
if (identifier.name_parts.size() > 2)
@ -97,10 +103,17 @@ void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, size_t t
identifier.name.swap(new_name);
}
void IdentifierSemantic::setColumnQualifiedName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
void IdentifierSemantic::setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
{
String prefix = db_and_table.getQualifiedNamePrefix();
identifier.name.insert(identifier.name.begin(), prefix.begin(), prefix.end());
size_t match = IdentifierSemantic::canReferColumnToTable(identifier, db_and_table);
setColumnShortName(identifier, match);
if (identifier.semantic->need_long_name)
{
String prefix = db_and_table.getQualifiedNamePrefix();
identifier.name.insert(identifier.name.begin(), prefix.begin(), prefix.end());
}
}
}

View File

@ -9,6 +9,7 @@ namespace DB
struct IdentifierSemanticImpl
{
bool special = false;
bool need_long_name = false;
};
/// Static calss to manipulate IdentifierSemanticImpl via ASTIdentifier
@ -24,12 +25,13 @@ struct IdentifierSemantic
static std::pair<String, String> extractDatabaseAndTable(const ASTIdentifier & identifier);
static size_t canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static void setColumnShortName(ASTIdentifier & identifier, size_t match);
static void setColumnQualifiedName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static void setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static void setNeedLongName(ASTIdentifier & identifier, bool); /// if set setColumnNormalName makes qualified name
private:
static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table);
static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table);
static void setColumnShortName(ASTIdentifier & identifier, size_t match);
};
}

View File

@ -326,7 +326,9 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast
std::unordered_map<String, ASTPtr> aliases;
std::vector<DatabaseAndTableWithAlias> tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase());
TranslateQualifiedNamesVisitor::Data qn_visitor_data{{}, tables};
std::vector<TableWithColumnNames> tables_with_columns;
TranslateQualifiedNamesVisitor::Data::setTablesOnly(tables, tables_with_columns);
TranslateQualifiedNamesVisitor::Data qn_visitor_data{{}, tables_with_columns};
TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast);
QueryAliasesVisitor::Data query_aliases_data{aliases};

View File

@ -25,8 +25,6 @@ namespace ErrorCodes
extern const int CYCLIC_ALIASES;
}
NameSet removeDuplicateColumns(NamesAndTypesList & columns);
class CheckASTDepth
{
@ -143,7 +141,10 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
/// Replace *, alias.*, database.table.* with a list of columns.
void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & data)
{
const auto & tables_with_columns = data.tables_with_columns;
if (!data.tables_with_columns)
return;
const auto & tables_with_columns = *data.tables_with_columns;
const auto & source_columns_set = data.source_columns_set;
ASTs old_children;
@ -228,8 +229,6 @@ void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &
/// special visitChildren() for ASTSelectQuery
void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data & data)
{
extractTablesWithColumns(select, data);
if (auto join = select.join())
extractJoinUsingColumns(join->table_join, data);
@ -253,7 +252,6 @@ void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data &
}
/// Don't go into subqueries.
/// Don't go into components of compound identifiers.
/// Don't go into select query. It processes children itself.
/// Do not go to the left argument of lambda expressions, so as not to replace the formal parameters
/// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]).
@ -346,25 +344,6 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data)
}
}
void QueryNormalizer::extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data)
{
if (data.context && select_query.tables && !select_query.tables->children.empty())
{
data.tables_with_columns.clear();
String current_database = data.context->getCurrentDatabase();
for (const ASTTableExpression * table_expression : getSelectTablesExpression(select_query))
{
DatabaseAndTableWithAlias table_name(*table_expression, current_database);
NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, *data.context);
removeDuplicateColumns(names_and_types);
data.tables_with_columns.emplace_back(std::move(table_name), names_and_types.getNames());
}
}
}
/// 'select * from a join b using id' should result one 'id' column
void QueryNormalizer::extractJoinUsingColumns(const ASTPtr ast, Data & data)
{

View File

@ -46,8 +46,6 @@ class QueryNormalizer
};
public:
using TableWithColumnNames = std::pair<DatabaseAndTableWithAlias, Names>;
struct Data
{
using SetOfASTs = std::set<const IAST *>;
@ -57,7 +55,7 @@ public:
const ExtractedSettings settings;
const Context * context;
const NameSet * source_columns_set;
std::vector<TableWithColumnNames> tables_with_columns;
const std::vector<TableWithColumnNames> * tables_with_columns;
std::unordered_set<String> join_using_columns;
/// tmp data
@ -67,25 +65,25 @@ public:
std::string current_alias; /// the alias referencing to the ancestor of ast (the deepest ancestor with aliases)
Data(const Aliases & aliases_, ExtractedSettings && settings_, const Context & context_,
const NameSet & source_columns_set, Names && all_columns)
const NameSet & source_columns_set, const std::vector<TableWithColumnNames> & tables_with_columns_)
: aliases(aliases_)
, settings(settings_)
, context(&context_)
, source_columns_set(&source_columns_set)
, tables_with_columns(&tables_with_columns_)
, level(0)
{
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns));
}
{}
Data(const Aliases & aliases_, ExtractedSettings && settings_)
: aliases(aliases_)
, settings(settings_)
, context(nullptr)
, source_columns_set(nullptr)
, tables_with_columns(nullptr)
, level(0)
{}
bool processAsterisks() const { return !tables_with_columns.empty(); }
bool processAsterisks() const { return tables_with_columns && !tables_with_columns->empty(); }
};
QueryNormalizer(Data & data)
@ -110,7 +108,6 @@ private:
static void visitChildren(const ASTPtr &, Data & data);
static void extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data);
static void extractJoinUsingColumns(const ASTPtr ast, Data & data);
};

View File

@ -79,16 +79,14 @@ void collectSourceColumns(ASTSelectQuery * select_query, StoragePtr storage, Nam
}
/// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names.
void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query,
const NameSet & source_columns, const Context & context)
void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, const NameSet & source_columns,
const std::vector<TableWithColumnNames> & tables_with_columns)
{
if (!select_query->tables || select_query->tables->children.empty())
return;
std::vector<DatabaseAndTableWithAlias> tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase());
LogAST log;
TranslateQualifiedNamesVisitor::Data visitor_data{source_columns, tables};
TranslateQualifiedNamesVisitor::Data visitor_data{source_columns, tables_with_columns};
TranslateQualifiedNamesVisitor visitor(visitor_data, log.stream());
visitor.visit(query);
}
@ -100,7 +98,8 @@ void normalizeTree(
const Names & source_columns,
const NameSet & source_columns_set,
const Context & context,
const ASTSelectQuery * select_query)
const ASTSelectQuery * select_query,
std::vector<TableWithColumnNames> & tables_with_columns)
{
const auto & settings = context.getSettingsRef();
@ -116,10 +115,12 @@ void normalizeTree(
if (all_columns_name.empty())
throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR);
QueryNormalizer::Data normalizer_data(result.aliases, settings, context, source_columns_set, std::move(all_columns_name));
if (tables_with_columns.empty())
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name));
QueryNormalizer::Data normalizer_data(result.aliases, settings, context, source_columns_set, tables_with_columns);
QueryNormalizer(normalizer_data).visit(query);
}
bool hasArrayJoin(const ASTPtr & ast)
{
if (const ASTFunction * function = typeid_cast<const ASTFunction *>(&*ast))
@ -446,7 +447,7 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const
/// Parse JOIN ON expression and collect ASTs for joined columns.
void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query,
const NameSet & source_columns, const Context & context)
const Context & context)
{
const auto & tables = static_cast<const ASTTablesInSelectQuery &>(*select_query->tables);
const auto * left_tables_element = static_cast<const ASTTablesInSelectQueryElement *>(tables.children.at(0).get());
@ -511,24 +512,6 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS
return table_belonging;
};
std::function<void(ASTPtr &, const DatabaseAndTableWithAlias &, bool)> translate_qualified_names;
translate_qualified_names = [&](ASTPtr & ast, const DatabaseAndTableWithAlias & source_names, bool right_table)
{
if (IdentifierSemantic::getColumnName(ast))
{
auto * identifier = typeid_cast<ASTIdentifier *>(ast.get());
size_t match = IdentifierSemantic::canReferColumnToTable(*identifier, source_names);
IdentifierSemantic::setColumnShortName(*identifier, match);
if (right_table && source_columns.count(ast->getColumnName()))
IdentifierSemantic::setColumnQualifiedName(*identifier, source_names);
}
for (auto & child : ast->children)
translate_qualified_names(child, source_names, right_table);
};
const auto supported_syntax = " Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) "
"[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]";
auto throwSyntaxException = [&](const String & msg)
@ -556,9 +539,6 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS
auto add_join_keys = [&](ASTPtr & ast_to_left_table, ASTPtr & ast_to_right_table)
{
translate_qualified_names(ast_to_left_table, left_source_names, false);
translate_qualified_names(ast_to_right_table, right_source_names, true);
analyzed_join.key_asts_left.push_back(ast_to_left_table);
analyzed_join.key_names_left.push_back(ast_to_left_table->getColumnName());
analyzed_join.key_asts_right.push_back(ast_to_right_table);
@ -624,7 +604,7 @@ void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * s
name = joined_table_name.getQualifiedNamePrefix() + name;
}
else if (table_join.on_expression)
collectJoinedColumnsFromJoinOnExpr(analyzed_join, select_query, source_columns, context);
collectJoinedColumnsFromJoinOnExpr(analyzed_join, select_query, context);
auto & settings = context.getSettingsRef();
bool make_nullable = settings.join_use_nulls && (table_join.kind == ASTTableJoin::Kind::Left ||
@ -666,9 +646,12 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
if (source_columns_set.size() != source_columns_list.size())
throw Exception("Unexpected duplicates in source columns list.", ErrorCodes::LOGICAL_ERROR);
std::vector<TableWithColumnNames> tables_with_columns;
if (select_query)
{
translateQualifiedNames(query, select_query, source_columns_set, context);
tables_with_columns = getDatabaseAndTablesWithColumnNames(*select_query, context);
translateQualifiedNames(query, select_query, source_columns_set, tables_with_columns);
/// Depending on the user's profile, check for the execution rights
/// distributed subqueries inside the IN or JOIN sections and process these subqueries.
@ -687,7 +670,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
/// Common subexpression elimination. Rewrite rules.
normalizeTree(query, result, (storage ? storage->getColumns().ordinary.getNames() : source_columns_list), source_columns_set,
context, select_query);
context, select_query, tables_with_columns);
/// Remove unneeded columns according to 'required_result_columns'.
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.

View File

@ -50,25 +50,24 @@ std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data &
return {};
}
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr & ast, Data & data)
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, Data & data)
{
if (IdentifierSemantic::getColumnName(identifier))
{
size_t best_table_pos = 0;
size_t best_match = 0;
for (size_t i = 0; i < data.tables.size(); ++i)
if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, data.tables[i]))
if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, data.tables[i].first))
if (match > best_match)
{
best_match = match;
best_table_pos = i;
}
IdentifierSemantic::setColumnShortName(identifier, best_match);
/// In case if column from the joined table are in source columns, change it's name to qualified.
if (best_table_pos && data.source_columns.count(ast->getColumnName()))
IdentifierSemantic::setColumnQualifiedName(identifier, data.tables[best_table_pos]);
if (best_table_pos && data.source_columns.count(identifier.shortName()))
IdentifierSemantic::setNeedLongName(identifier, true);
IdentifierSemantic::setColumnNormalName(identifier, data.tables[best_table_pos].first);
}
return {};
@ -85,7 +84,7 @@ std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAs
DatabaseAndTableWithAlias db_and_table(ident);
for (const auto & known_table : data.tables)
if (db_and_table.satisfies(known_table, true))
if (db_and_table.satisfies(known_table.first, true))
return {};
throw Exception("Unknown qualified identifier: " + ident->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER);
@ -93,10 +92,11 @@ std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAs
std::vector<ASTPtr *> TranslateQualifiedNamesMatcher::visit(ASTTableJoin & join, const ASTPtr & , Data &)
{
/// Don't translate on_expression here in order to resolve equation parts later.
std::vector<ASTPtr *> out;
if (join.using_expression_list)
out.push_back(&join.using_expression_list);
else if (join.on_expression)
out.push_back(&join.on_expression);
return out;
}

View File

@ -21,7 +21,16 @@ public:
struct Data
{
const NameSet & source_columns;
const std::vector<DatabaseAndTableWithAlias> & tables;
const std::vector<TableWithColumnNames> & tables;
static void setTablesOnly(const std::vector<DatabaseAndTableWithAlias> & tables,
std::vector<TableWithColumnNames> & tables_with_columns)
{
tables_with_columns.clear();
tables_with_columns.reserve(tables.size());
for (const auto & table : tables)
tables_with_columns.emplace_back(TableWithColumnNames{table, {}});
}
};
static constexpr const char * label = "TranslateQualifiedNames";

View File

@ -42,6 +42,13 @@ public:
name_parts.clear();
}
const String & shortName() const
{
if (!name_parts.empty())
return name_parts.back();
return name;
}
protected:
void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
void appendColumnNameImpl(WriteBuffer & ostr) const override;