2021-07-08 10:49:13 +00:00
|
|
|
#include <Interpreters/IdentifierSemantic.h>
|
|
|
|
|
2019-01-25 15:42:24 +00:00
|
|
|
#include <Common/typeid_cast.h>
|
|
|
|
|
2021-07-08 10:49:13 +00:00
|
|
|
#include <Interpreters/Context.h>
|
2020-03-13 10:30:55 +00:00
|
|
|
#include <Interpreters/StorageID.h>
|
2019-01-25 15:42:24 +00:00
|
|
|
|
2021-04-06 09:29:29 +00:00
|
|
|
#include <Parsers/ASTFunction.h>
|
2021-11-26 15:49:40 +00:00
|
|
|
#include <Parsers/ASTIdentifier.h>
|
2021-11-26 17:35:24 +00:00
|
|
|
#include <Parsers/ASTSelectQuery.h>
|
2021-04-06 09:29:29 +00:00
|
|
|
|
2019-01-25 15:42:24 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2019-10-17 21:08:28 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int AMBIGUOUS_COLUMN_NAME;
|
|
|
|
}
|
|
|
|
|
2019-10-16 17:33:53 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
|
|
|
template <typename T>
|
2020-06-15 12:36:10 +00:00
|
|
|
std::optional<size_t> tryChooseTable(const ASTIdentifier & identifier, const std::vector<T> & tables,
|
|
|
|
bool allow_ambiguous, bool column_match [[maybe_unused]] = false)
|
2019-10-16 17:33:53 +00:00
|
|
|
{
|
2019-10-17 21:08:28 +00:00
|
|
|
using ColumnMatch = IdentifierSemantic::ColumnMatch;
|
|
|
|
|
2020-03-08 11:07:05 +00:00
|
|
|
size_t best_table_pos = 0;
|
2019-10-17 21:08:28 +00:00
|
|
|
auto best_match = ColumnMatch::NoMatch;
|
|
|
|
size_t same_match = 0;
|
|
|
|
|
2019-10-16 17:33:53 +00:00
|
|
|
for (size_t i = 0; i < tables.size(); ++i)
|
2019-10-17 21:08:28 +00:00
|
|
|
{
|
2020-03-08 11:07:05 +00:00
|
|
|
auto match = IdentifierSemantic::canReferColumnToTable(identifier, tables[i]);
|
2020-06-15 12:36:10 +00:00
|
|
|
|
|
|
|
if constexpr (std::is_same_v<T, TableWithColumnNamesAndTypes>)
|
|
|
|
{
|
|
|
|
if (column_match && match == ColumnMatch::NoMatch && identifier.isShort() && tables[i].hasColumn(identifier.shortName()))
|
|
|
|
match = ColumnMatch::ColumnName;
|
|
|
|
}
|
|
|
|
|
2019-10-24 13:04:50 +00:00
|
|
|
if (match != ColumnMatch::NoMatch)
|
2019-10-17 21:08:28 +00:00
|
|
|
{
|
2019-10-24 13:04:50 +00:00
|
|
|
if (match > best_match)
|
2019-10-16 17:33:53 +00:00
|
|
|
{
|
|
|
|
best_match = match;
|
|
|
|
best_table_pos = i;
|
2019-10-17 21:08:28 +00:00
|
|
|
same_match = 0;
|
2019-10-16 17:33:53 +00:00
|
|
|
}
|
2019-10-17 21:08:28 +00:00
|
|
|
else if (match == best_match)
|
|
|
|
++same_match;
|
|
|
|
}
|
|
|
|
}
|
2019-10-16 17:33:53 +00:00
|
|
|
|
2019-10-24 13:04:50 +00:00
|
|
|
if ((best_match != ColumnMatch::NoMatch) && same_match)
|
2019-10-17 21:08:28 +00:00
|
|
|
{
|
|
|
|
if (!allow_ambiguous)
|
2020-10-24 18:46:10 +00:00
|
|
|
throw Exception("Ambiguous column '" + identifier.name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
|
2020-03-08 11:07:05 +00:00
|
|
|
best_match = ColumnMatch::Ambiguous;
|
|
|
|
return {};
|
2019-10-17 21:08:28 +00:00
|
|
|
}
|
2020-03-08 11:07:05 +00:00
|
|
|
|
|
|
|
if (best_match != ColumnMatch::NoMatch)
|
|
|
|
return best_table_pos;
|
|
|
|
return {};
|
2019-10-16 17:33:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2019-01-25 15:42:24 +00:00
|
|
|
std::optional<String> IdentifierSemantic::getColumnName(const ASTIdentifier & node)
|
|
|
|
{
|
|
|
|
if (!node.semantic->special)
|
2020-10-24 18:46:10 +00:00
|
|
|
return node.name();
|
2019-01-25 15:42:24 +00:00
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
|
|
|
std::optional<String> IdentifierSemantic::getColumnName(const ASTPtr & ast)
|
|
|
|
{
|
|
|
|
if (ast)
|
2019-03-11 13:22:51 +00:00
|
|
|
if (const auto * id = ast->as<ASTIdentifier>())
|
2019-01-25 15:42:24 +00:00
|
|
|
if (!id->semantic->special)
|
2020-10-24 18:46:10 +00:00
|
|
|
return id->name();
|
2019-01-25 15:42:24 +00:00
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
2019-12-24 18:51:37 +00:00
|
|
|
std::optional<ASTIdentifier> IdentifierSemantic::uncover(const ASTIdentifier & identifier)
|
|
|
|
{
|
|
|
|
if (identifier.semantic->covered)
|
|
|
|
{
|
|
|
|
std::vector<String> name_parts = identifier.name_parts;
|
|
|
|
return ASTIdentifier(std::move(name_parts));
|
|
|
|
}
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
|
|
|
void IdentifierSemantic::coverName(ASTIdentifier & identifier, const String & alias)
|
|
|
|
{
|
|
|
|
identifier.setShortName(alias);
|
|
|
|
identifier.semantic->covered = true;
|
|
|
|
}
|
|
|
|
|
2019-02-11 19:14:57 +00:00
|
|
|
bool IdentifierSemantic::canBeAlias(const ASTIdentifier & identifier)
|
|
|
|
{
|
|
|
|
return identifier.semantic->can_be_alias;
|
|
|
|
}
|
|
|
|
|
2019-10-18 16:16:57 +00:00
|
|
|
void IdentifierSemantic::setMembership(ASTIdentifier & identifier, size_t table_pos)
|
2019-02-13 15:18:02 +00:00
|
|
|
{
|
2019-10-18 16:16:57 +00:00
|
|
|
identifier.semantic->membership = table_pos;
|
|
|
|
identifier.semantic->can_be_alias = false;
|
2019-02-13 15:18:02 +00:00
|
|
|
}
|
|
|
|
|
2019-10-18 16:16:57 +00:00
|
|
|
std::optional<size_t> IdentifierSemantic::getMembership(const ASTIdentifier & identifier)
|
2019-02-13 15:18:02 +00:00
|
|
|
{
|
|
|
|
return identifier.semantic->membership;
|
|
|
|
}
|
2019-02-11 19:14:57 +00:00
|
|
|
|
2020-03-08 11:07:05 +00:00
|
|
|
std::optional<size_t> IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<DatabaseAndTableWithAlias> & tables,
|
|
|
|
bool ambiguous)
|
2019-10-16 14:47:58 +00:00
|
|
|
{
|
2020-03-08 11:07:05 +00:00
|
|
|
return tryChooseTable<DatabaseAndTableWithAlias>(identifier, tables, ambiguous);
|
2019-10-16 17:33:53 +00:00
|
|
|
}
|
2019-10-16 14:47:58 +00:00
|
|
|
|
2020-06-15 12:36:10 +00:00
|
|
|
std::optional<size_t> IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const TablesWithColumns & tables, bool ambiguous)
|
2020-03-08 11:07:05 +00:00
|
|
|
{
|
|
|
|
return tryChooseTable<TableWithColumnNamesAndTypes>(identifier, tables, ambiguous);
|
2019-10-16 14:47:58 +00:00
|
|
|
}
|
|
|
|
|
2020-06-15 12:36:10 +00:00
|
|
|
std::optional<size_t> IdentifierSemantic::chooseTableColumnMatch(const ASTIdentifier & identifier, const TablesWithColumns & tables,
|
|
|
|
bool ambiguous)
|
|
|
|
{
|
|
|
|
return tryChooseTable<TableWithColumnNamesAndTypes>(identifier, tables, ambiguous, true);
|
|
|
|
}
|
|
|
|
|
2019-11-13 16:49:29 +00:00
|
|
|
std::optional<String> IdentifierSemantic::extractNestedName(const ASTIdentifier & identifier, const String & table_name)
|
|
|
|
{
|
|
|
|
if (identifier.name_parts.size() == 3 && table_name == identifier.name_parts[0])
|
|
|
|
return identifier.name_parts[1] + '.' + identifier.name_parts[2];
|
|
|
|
else if (identifier.name_parts.size() == 2)
|
|
|
|
return identifier.name_parts[0] + '.' + identifier.name_parts[1];
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
2022-06-17 07:13:31 +00:00
|
|
|
String IdentifierSemantic::extractNestedName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & table)
|
|
|
|
{
|
|
|
|
auto match = IdentifierSemantic::canReferColumnToTable(identifier, table);
|
|
|
|
size_t to_strip = 0;
|
|
|
|
switch (match)
|
|
|
|
{
|
|
|
|
case IdentifierSemantic::ColumnMatch::TableName:
|
|
|
|
case IdentifierSemantic::ColumnMatch::AliasedTableName:
|
|
|
|
case IdentifierSemantic::ColumnMatch::TableAlias:
|
|
|
|
to_strip = 1;
|
|
|
|
break;
|
|
|
|
case IdentifierSemantic::ColumnMatch::DBAndTable:
|
|
|
|
to_strip = 2;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
String res;
|
|
|
|
for (size_t i = to_strip, sz = identifier.name_parts.size(); i < sz; ++i)
|
|
|
|
{
|
|
|
|
if (!res.empty())
|
|
|
|
res += ".";
|
|
|
|
res += identifier.name_parts[i];
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2019-01-25 15:42:24 +00:00
|
|
|
bool IdentifierSemantic::doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table)
|
|
|
|
{
|
|
|
|
size_t num_components = identifier.name_parts.size();
|
|
|
|
if (num_components >= 3)
|
|
|
|
return identifier.name_parts[0] == database &&
|
|
|
|
identifier.name_parts[1] == table;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool IdentifierSemantic::doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table)
|
|
|
|
{
|
|
|
|
size_t num_components = identifier.name_parts.size();
|
|
|
|
if (num_components >= 2)
|
|
|
|
return identifier.name_parts[0] == table;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-10-17 21:08:28 +00:00
|
|
|
IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier,
|
|
|
|
const DatabaseAndTableWithAlias & db_and_table)
|
2019-01-25 15:42:24 +00:00
|
|
|
{
|
|
|
|
/// database.table.column
|
|
|
|
if (doesIdentifierBelongTo(identifier, db_and_table.database, db_and_table.table))
|
2021-09-25 02:48:24 +00:00
|
|
|
return ColumnMatch::DBAndTable;
|
2019-01-25 15:42:24 +00:00
|
|
|
|
2019-10-17 21:08:28 +00:00
|
|
|
/// alias.column
|
|
|
|
if (doesIdentifierBelongTo(identifier, db_and_table.alias))
|
|
|
|
return ColumnMatch::TableAlias;
|
2019-01-25 15:42:24 +00:00
|
|
|
|
2019-10-17 21:08:28 +00:00
|
|
|
/// table.column
|
|
|
|
if (doesIdentifierBelongTo(identifier, db_and_table.table))
|
2019-01-25 15:42:24 +00:00
|
|
|
{
|
2019-10-18 16:16:57 +00:00
|
|
|
if (!db_and_table.alias.empty())
|
|
|
|
return ColumnMatch::AliasedTableName;
|
|
|
|
else
|
|
|
|
return ColumnMatch::TableName;
|
2019-01-25 15:42:24 +00:00
|
|
|
}
|
2019-10-18 16:16:57 +00:00
|
|
|
|
|
|
|
return ColumnMatch::NoMatch;
|
2019-01-25 15:42:24 +00:00
|
|
|
}
|
|
|
|
|
2020-03-08 11:07:05 +00:00
|
|
|
IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier,
|
2020-06-15 12:36:10 +00:00
|
|
|
const TableWithColumnNamesAndTypes & table_with_columns)
|
|
|
|
{
|
|
|
|
return canReferColumnToTable(identifier, table_with_columns.table);
|
2020-03-08 11:07:05 +00:00
|
|
|
}
|
|
|
|
|
2021-03-24 12:48:29 +00:00
|
|
|
/// Strip qualifications from left side of column name.
|
2019-10-18 16:16:57 +00:00
|
|
|
/// Example: 'database.table.name' -> 'name'.
|
|
|
|
void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
|
2019-01-25 15:42:24 +00:00
|
|
|
{
|
2019-10-17 21:08:28 +00:00
|
|
|
auto match = IdentifierSemantic::canReferColumnToTable(identifier, db_and_table);
|
|
|
|
size_t to_strip = 0;
|
|
|
|
switch (match)
|
|
|
|
{
|
|
|
|
case ColumnMatch::TableName:
|
2019-10-18 16:16:57 +00:00
|
|
|
case ColumnMatch::AliasedTableName:
|
2019-10-17 21:08:28 +00:00
|
|
|
case ColumnMatch::TableAlias:
|
|
|
|
to_strip = 1;
|
|
|
|
break;
|
2021-09-25 02:48:24 +00:00
|
|
|
case ColumnMatch::DBAndTable:
|
2019-10-17 21:08:28 +00:00
|
|
|
to_strip = 2;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2019-02-07 19:18:40 +00:00
|
|
|
|
2019-10-18 16:16:57 +00:00
|
|
|
if (!to_strip)
|
|
|
|
return;
|
|
|
|
|
2020-10-24 18:46:10 +00:00
|
|
|
identifier.name_parts = std::vector<String>(identifier.name_parts.begin() + to_strip, identifier.name_parts.end());
|
|
|
|
identifier.resetFullName();
|
2019-02-20 12:12:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void IdentifierSemantic::setColumnLongName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)
|
|
|
|
{
|
|
|
|
String prefix = db_and_table.getQualifiedNamePrefix();
|
|
|
|
if (!prefix.empty())
|
2019-02-07 19:18:40 +00:00
|
|
|
{
|
2019-02-20 12:12:36 +00:00
|
|
|
prefix.resize(prefix.size() - 1); /// crop dot
|
2020-10-24 18:46:10 +00:00
|
|
|
identifier.name_parts = {prefix, identifier.shortName()};
|
|
|
|
identifier.resetFullName();
|
|
|
|
identifier.semantic->table = prefix;
|
|
|
|
identifier.semantic->legacy_compound = true;
|
2019-02-07 19:18:40 +00:00
|
|
|
}
|
2019-01-25 15:42:24 +00:00
|
|
|
}
|
|
|
|
|
2021-04-01 11:21:36 +00:00
|
|
|
std::optional<size_t> IdentifierSemantic::getIdentMembership(const ASTIdentifier & ident, const std::vector<TableWithColumnNamesAndTypes> & tables)
|
|
|
|
{
|
|
|
|
std::optional<size_t> table_pos = IdentifierSemantic::getMembership(ident);
|
|
|
|
if (table_pos)
|
|
|
|
return table_pos;
|
|
|
|
return IdentifierSemantic::chooseTableColumnMatch(ident, tables, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::optional<size_t>
|
|
|
|
IdentifierSemantic::getIdentsMembership(ASTPtr ast, const std::vector<TableWithColumnNamesAndTypes> & tables, const Aliases & aliases)
|
|
|
|
{
|
|
|
|
auto idents = IdentifiersCollector::collect(ast);
|
|
|
|
|
|
|
|
std::optional<size_t> result;
|
|
|
|
for (const auto * ident : idents)
|
|
|
|
{
|
|
|
|
/// short name clashes with alias, ambiguous
|
2022-04-18 10:18:43 +00:00
|
|
|
if (ident->isShort() && aliases.contains(ident->shortName()))
|
2021-04-01 11:21:36 +00:00
|
|
|
return {};
|
|
|
|
const auto pos = getIdentMembership(*ident, tables);
|
|
|
|
if (!pos)
|
|
|
|
return {};
|
|
|
|
/// identifiers from different tables
|
|
|
|
if (result && *pos != *result)
|
|
|
|
return {};
|
|
|
|
result = pos;
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
IdentifiersCollector::ASTIdentifiers IdentifiersCollector::collect(const ASTPtr & node)
|
|
|
|
{
|
|
|
|
IdentifiersCollector::Data ident_data;
|
|
|
|
ConstInDepthNodeVisitor<IdentifiersCollector, true> ident_visitor(ident_data);
|
|
|
|
ident_visitor.visit(node);
|
|
|
|
return ident_data.idents;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool IdentifiersCollector::needChildVisit(const ASTPtr &, const ASTPtr &)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void IdentifiersCollector::visit(const ASTPtr & node, IdentifiersCollector::Data & data)
|
|
|
|
{
|
|
|
|
if (const auto * ident = node->as<ASTIdentifier>())
|
|
|
|
data.idents.push_back(ident);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
IdentifierMembershipCollector::IdentifierMembershipCollector(const ASTSelectQuery & select, ContextPtr context)
|
2021-04-01 11:21:36 +00:00
|
|
|
{
|
|
|
|
if (ASTPtr with = select.with())
|
|
|
|
QueryAliasesNoSubqueriesVisitor(aliases).visit(with);
|
|
|
|
QueryAliasesNoSubqueriesVisitor(aliases).visit(select.select());
|
|
|
|
|
2021-07-08 10:49:13 +00:00
|
|
|
const auto & settings = context->getSettingsRef();
|
|
|
|
tables = getDatabaseAndTablesWithColumns(getTableExpressions(select), context,
|
|
|
|
settings.asterisk_include_alias_columns,
|
|
|
|
settings.asterisk_include_materialized_columns);
|
2021-04-01 11:21:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::optional<size_t> IdentifierMembershipCollector::getIdentsMembership(ASTPtr ast) const
|
|
|
|
{
|
|
|
|
return IdentifierSemantic::getIdentsMembership(ast, tables, aliases);
|
|
|
|
}
|
|
|
|
|
2022-09-12 15:05:34 +00:00
|
|
|
void splitConjunctionsAst(const ASTPtr & node, ASTs & result)
|
2021-04-06 09:29:29 +00:00
|
|
|
{
|
2022-09-12 15:05:34 +00:00
|
|
|
if (!node)
|
2021-04-06 09:29:29 +00:00
|
|
|
return;
|
2022-09-12 15:05:34 +00:00
|
|
|
|
|
|
|
result.emplace_back(node);
|
|
|
|
|
|
|
|
for (size_t idx = 0; idx < result.size();)
|
|
|
|
{
|
|
|
|
ASTPtr expression = result.at(idx);
|
|
|
|
|
|
|
|
if (const auto * function = expression->as<ASTFunction>(); function && function->name == "and")
|
|
|
|
{
|
|
|
|
result.erase(result.begin() + idx);
|
|
|
|
|
|
|
|
for (auto & child : function->arguments->children)
|
|
|
|
result.emplace_back(child);
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
++idx;
|
2021-04-06 09:29:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-12 15:05:34 +00:00
|
|
|
ASTs splitConjunctionsAst(const ASTPtr & node)
|
2021-04-06 09:29:29 +00:00
|
|
|
{
|
2022-09-12 15:05:34 +00:00
|
|
|
std::vector<ASTPtr> result;
|
|
|
|
splitConjunctionsAst(node, result);
|
|
|
|
return result;
|
2021-04-06 09:29:29 +00:00
|
|
|
}
|
|
|
|
|
2019-01-25 15:42:24 +00:00
|
|
|
}
|