mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
some QueryNormalizer refactoring
This commit is contained in:
parent
9b46678920
commit
275d6b2fcc
@ -23,6 +23,7 @@ struct DatabaseAndTableWithAlias
|
||||
String table;
|
||||
String alias;
|
||||
|
||||
DatabaseAndTableWithAlias() = default;
|
||||
DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = "");
|
||||
DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database);
|
||||
|
||||
|
@ -321,7 +321,7 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast
|
||||
TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast);
|
||||
QueryAliasesVisitor::Data query_aliases_data{aliases};
|
||||
QueryAliasesVisitor(query_aliases_data).visit(ast);
|
||||
QueryNormalizer(ast, aliases, settings, {}, {}).perform();
|
||||
QueryNormalizer(ast, aliases, settings).perform();
|
||||
|
||||
for (const auto & projection_column : select_query->select_expression_list->children)
|
||||
{
|
||||
|
@ -21,13 +21,10 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
QueryNormalizer::QueryNormalizer(ASTPtr & query, const QueryNormalizer::Aliases & aliases,
|
||||
ExtractedSettings && settings_, const Names & all_column_names,
|
||||
const TableNamesAndColumnNames & table_names_and_column_names)
|
||||
: query(query), aliases(aliases), settings(settings_), all_column_names(all_column_names),
|
||||
table_names_and_column_names(table_names_and_column_names)
|
||||
{
|
||||
}
|
||||
QueryNormalizer::QueryNormalizer(ASTPtr & query_, const QueryNormalizer::Aliases & aliases_, ExtractedSettings && settings_,
|
||||
std::vector<TableWithColumnNames> && tables_with_columns_)
|
||||
: query(query_), aliases(aliases_), settings(settings_), tables_with_columns(tables_with_columns_)
|
||||
{}
|
||||
|
||||
void QueryNormalizer::perform()
|
||||
{
|
||||
@ -138,23 +135,42 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf
|
||||
else if (ASTExpressionList * expr_list = typeid_cast<ASTExpressionList *>(ast.get()))
|
||||
{
|
||||
/// Replace *, alias.*, database.table.* with a list of columns.
|
||||
ASTs & asts = expr_list->children;
|
||||
for (ssize_t expr_idx = asts.size() - 1; expr_idx >= 0; --expr_idx)
|
||||
{
|
||||
if (typeid_cast<const ASTAsterisk *>(asts[expr_idx].get()) && !all_column_names.empty())
|
||||
{
|
||||
asts.erase(asts.begin() + expr_idx);
|
||||
|
||||
for (size_t column_idx = 0; column_idx < all_column_names.size(); ++column_idx)
|
||||
asts.insert(asts.begin() + column_idx + expr_idx, std::make_shared<ASTIdentifier>(all_column_names[column_idx]));
|
||||
}
|
||||
else if (typeid_cast<const ASTQualifiedAsterisk *>(asts[expr_idx].get()) && !table_names_and_column_names.empty())
|
||||
ASTs old_children;
|
||||
if (processAsterisks())
|
||||
{
|
||||
bool has_asterisk = false;
|
||||
for (const auto & child : expr_list->children)
|
||||
{
|
||||
if (typeid_cast<const ASTAsterisk *>(child.get()) ||
|
||||
typeid_cast<const ASTQualifiedAsterisk *>(child.get()))
|
||||
{
|
||||
has_asterisk = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (has_asterisk)
|
||||
{
|
||||
old_children.swap(expr_list->children);
|
||||
expr_list->children.reserve(old_children.size());
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto & child : old_children)
|
||||
{
|
||||
if (typeid_cast<const ASTAsterisk *>(child.get()))
|
||||
{
|
||||
for (const auto & [table_name, table_columns] : tables_with_columns)
|
||||
for (const auto & column_name : table_columns)
|
||||
expr_list->children.emplace_back(std::make_shared<ASTIdentifier>(column_name));
|
||||
}
|
||||
else if (const auto * qualified_asterisk = typeid_cast<const ASTQualifiedAsterisk *>(child.get()))
|
||||
{
|
||||
const ASTQualifiedAsterisk * qualified_asterisk = static_cast<const ASTQualifiedAsterisk *>(asts[expr_idx].get());
|
||||
const ASTIdentifier * identifier = typeid_cast<const ASTIdentifier *>(qualified_asterisk->children[0].get());
|
||||
size_t num_components = identifier->children.size();
|
||||
|
||||
for (const auto & [table_name, table_all_column_names] : table_names_and_column_names)
|
||||
for (const auto & [table_name, table_columns] : tables_with_columns)
|
||||
{
|
||||
if ((num_components == 2 /// database.table.*
|
||||
&& !table_name.database.empty() /// This is normal (not a temporary) table.
|
||||
@ -164,14 +180,14 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf
|
||||
&& ((!table_name.table.empty() && identifier->name == table_name.table) /// table.*
|
||||
|| (!table_name.alias.empty() && identifier->name == table_name.alias)))) /// alias.*
|
||||
{
|
||||
asts.erase(asts.begin() + expr_idx);
|
||||
for (size_t column_idx = 0; column_idx < table_all_column_names.size(); ++column_idx)
|
||||
asts.insert(asts.begin() + column_idx + expr_idx, std::make_shared<ASTIdentifier>(table_all_column_names[column_idx]));
|
||||
|
||||
for (const auto & column_name : table_columns)
|
||||
expr_list->children.emplace_back(std::make_shared<ASTIdentifier>(column_name));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
expr_list->children.emplace_back(child);
|
||||
}
|
||||
}
|
||||
else if (ASTTablesInSelectQueryElement * tables_elem = typeid_cast<ASTTablesInSelectQueryElement *>(ast.get()))
|
||||
|
@ -18,10 +18,6 @@ inline bool functionIsInOrGlobalInOperator(const String & name)
|
||||
}
|
||||
|
||||
|
||||
using TableNameAndColumnNames = std::pair<DatabaseAndTableWithAlias, Names>;
|
||||
using TableNamesAndColumnNames = std::vector<TableNameAndColumnNames>;
|
||||
|
||||
|
||||
class QueryNormalizer
|
||||
{
|
||||
/// Extracts settings, mostly to show which are used and which are not.
|
||||
@ -41,9 +37,10 @@ class QueryNormalizer
|
||||
|
||||
public:
|
||||
using Aliases = std::unordered_map<String, ASTPtr>;
|
||||
using TableWithColumnNames = std::pair<DatabaseAndTableWithAlias, Names>;
|
||||
|
||||
QueryNormalizer(ASTPtr & query, const Aliases & aliases, ExtractedSettings && settings, const Names & all_columns_name,
|
||||
const TableNamesAndColumnNames & table_names_and_column_names);
|
||||
QueryNormalizer(ASTPtr & query, const Aliases & aliases, ExtractedSettings && settings,
|
||||
std::vector<TableWithColumnNames> && tables_with_columns = {});
|
||||
|
||||
void perform();
|
||||
|
||||
@ -54,8 +51,9 @@ private:
|
||||
ASTPtr & query;
|
||||
const Aliases & aliases;
|
||||
const ExtractedSettings settings;
|
||||
const Names & all_column_names;
|
||||
const TableNamesAndColumnNames & table_names_and_column_names;
|
||||
const std::vector<TableWithColumnNames> tables_with_columns;
|
||||
|
||||
bool processAsterisks() const { return !tables_with_columns.empty(); }
|
||||
|
||||
void performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level);
|
||||
};
|
||||
|
@ -42,9 +42,9 @@ namespace ErrorCodes
|
||||
extern const int INVALID_JOIN_ON_EXPRESSION;
|
||||
}
|
||||
|
||||
void removeDuplicateColumns(NamesAndTypesList & columns)
|
||||
NameSet removeDuplicateColumns(NamesAndTypesList & columns)
|
||||
{
|
||||
std::set<String> names;
|
||||
NameSet names;
|
||||
for (auto it = columns.begin(); it != columns.end();)
|
||||
{
|
||||
if (names.emplace(it->name).second)
|
||||
@ -52,6 +52,7 @@ void removeDuplicateColumns(NamesAndTypesList & columns)
|
||||
else
|
||||
columns.erase(it++);
|
||||
}
|
||||
return names;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -77,8 +78,6 @@ void collectSourceColumns(ASTSelectQuery * select_query, StoragePtr storage, Nam
|
||||
source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end());
|
||||
}
|
||||
}
|
||||
|
||||
removeDuplicateColumns(source_columns);
|
||||
}
|
||||
|
||||
/// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names.
|
||||
@ -102,12 +101,11 @@ void normalizeTree(
|
||||
SyntaxAnalyzerResult & result,
|
||||
const Names & source_columns,
|
||||
const NameSet & source_columns_set,
|
||||
const StoragePtr & storage,
|
||||
const Context & context,
|
||||
const ASTSelectQuery * select_query,
|
||||
bool asterisk_left_columns_only)
|
||||
{
|
||||
Names all_columns_name = storage ? storage->getColumns().ordinary.getNames() : source_columns;
|
||||
Names all_columns_name = source_columns;
|
||||
|
||||
if (!asterisk_left_columns_only)
|
||||
{
|
||||
@ -119,17 +117,20 @@ void normalizeTree(
|
||||
if (all_columns_name.empty())
|
||||
throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
TableNamesAndColumnNames table_names_and_column_names;
|
||||
std::vector<QueryNormalizer::TableWithColumnNames> table_with_columns;
|
||||
if (select_query && select_query->tables && !select_query->tables->children.empty())
|
||||
{
|
||||
std::vector<const ASTTableExpression *> tables_expression = getSelectTablesExpression(*select_query);
|
||||
|
||||
bool first = true;
|
||||
String current_database = context.getCurrentDatabase();
|
||||
for (const auto * table_expression : tables_expression)
|
||||
{
|
||||
DatabaseAndTableWithAlias table_name(*table_expression, context.getCurrentDatabase());
|
||||
DatabaseAndTableWithAlias table_name(*table_expression, current_database);
|
||||
NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, context);
|
||||
|
||||
removeDuplicateColumns(names_and_types);
|
||||
|
||||
if (!first)
|
||||
{
|
||||
/// For joined tables qualify duplicating names.
|
||||
@ -140,12 +141,13 @@ void normalizeTree(
|
||||
|
||||
first = false;
|
||||
|
||||
table_names_and_column_names.emplace_back(std::pair(table_name, names_and_types.getNames()));
|
||||
table_with_columns.emplace_back(std::move(table_name), names_and_types.getNames());
|
||||
}
|
||||
}
|
||||
else
|
||||
table_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name));
|
||||
|
||||
auto & settings = context.getSettingsRef();
|
||||
QueryNormalizer(query, result.aliases, settings, all_columns_name, table_names_and_column_names).perform();
|
||||
QueryNormalizer(query, result.aliases, context.getSettingsRef(), std::move(table_with_columns)).perform();
|
||||
}
|
||||
|
||||
bool hasArrayJoin(const ASTPtr & ast)
|
||||
@ -739,6 +741,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
|
||||
result.source_columns = source_columns_;
|
||||
|
||||
collectSourceColumns(select_query, result.storage, result.source_columns);
|
||||
NameSet source_columns_set = removeDuplicateColumns(result.source_columns);
|
||||
|
||||
const auto & settings = context.getSettingsRef();
|
||||
|
||||
@ -746,7 +749,9 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
|
||||
source_columns_list.reserve(result.source_columns.size());
|
||||
for (const auto & type_name : result.source_columns)
|
||||
source_columns_list.emplace_back(type_name.name);
|
||||
NameSet source_columns_set(source_columns_list.begin(), source_columns_list.end());
|
||||
|
||||
if (source_columns_set.size() != source_columns_list.size())
|
||||
throw Exception("Unexpected duplicates in source columns list.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (select_query)
|
||||
{
|
||||
@ -768,7 +773,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
|
||||
}
|
||||
|
||||
/// Common subexpression elimination. Rewrite rules.
|
||||
normalizeTree(query, result, source_columns_list, source_columns_set, result.storage,
|
||||
normalizeTree(query, result, (storage ? storage->getColumns().ordinary.getNames() : source_columns_list), source_columns_set,
|
||||
context, select_query, settings.asterisk_left_columns_only != 0);
|
||||
|
||||
/// Remove unneeded columns according to 'required_result_columns'.
|
||||
|
Loading…
Reference in New Issue
Block a user