some QueryNormalizer refactoring

This commit is contained in:
chertus 2019-01-10 21:58:55 +03:00
parent 9b46678920
commit 275d6b2fcc
5 changed files with 65 additions and 45 deletions

View File

@ -23,6 +23,7 @@ struct DatabaseAndTableWithAlias
String table;
String alias;
DatabaseAndTableWithAlias() = default;
DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = "");
DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database);

View File

@ -321,7 +321,7 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast
TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast);
QueryAliasesVisitor::Data query_aliases_data{aliases};
QueryAliasesVisitor(query_aliases_data).visit(ast);
QueryNormalizer(ast, aliases, settings, {}, {}).perform();
QueryNormalizer(ast, aliases, settings).perform();
for (const auto & projection_column : select_query->select_expression_list->children)
{

View File

@ -21,13 +21,10 @@ namespace ErrorCodes
}
QueryNormalizer::QueryNormalizer(ASTPtr & query, const QueryNormalizer::Aliases & aliases,
ExtractedSettings && settings_, const Names & all_column_names,
const TableNamesAndColumnNames & table_names_and_column_names)
: query(query), aliases(aliases), settings(settings_), all_column_names(all_column_names),
table_names_and_column_names(table_names_and_column_names)
{
}
QueryNormalizer::QueryNormalizer(ASTPtr & query_, const QueryNormalizer::Aliases & aliases_, ExtractedSettings && settings_,
std::vector<TableWithColumnNames> && tables_with_columns_)
: query(query_), aliases(aliases_), settings(settings_), tables_with_columns(tables_with_columns_)
{}
void QueryNormalizer::perform()
{
@ -138,23 +135,42 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf
else if (ASTExpressionList * expr_list = typeid_cast<ASTExpressionList *>(ast.get()))
{
/// Replace *, alias.*, database.table.* with a list of columns.
ASTs & asts = expr_list->children;
for (ssize_t expr_idx = asts.size() - 1; expr_idx >= 0; --expr_idx)
{
if (typeid_cast<const ASTAsterisk *>(asts[expr_idx].get()) && !all_column_names.empty())
{
asts.erase(asts.begin() + expr_idx);
for (size_t column_idx = 0; column_idx < all_column_names.size(); ++column_idx)
asts.insert(asts.begin() + column_idx + expr_idx, std::make_shared<ASTIdentifier>(all_column_names[column_idx]));
}
else if (typeid_cast<const ASTQualifiedAsterisk *>(asts[expr_idx].get()) && !table_names_and_column_names.empty())
ASTs old_children;
if (processAsterisks())
{
bool has_asterisk = false;
for (const auto & child : expr_list->children)
{
if (typeid_cast<const ASTAsterisk *>(child.get()) ||
typeid_cast<const ASTQualifiedAsterisk *>(child.get()))
{
has_asterisk = true;
break;
}
}
if (has_asterisk)
{
old_children.swap(expr_list->children);
expr_list->children.reserve(old_children.size());
}
}
for (const auto & child : old_children)
{
if (typeid_cast<const ASTAsterisk *>(child.get()))
{
for (const auto & [table_name, table_columns] : tables_with_columns)
for (const auto & column_name : table_columns)
expr_list->children.emplace_back(std::make_shared<ASTIdentifier>(column_name));
}
else if (const auto * qualified_asterisk = typeid_cast<const ASTQualifiedAsterisk *>(child.get()))
{
const ASTQualifiedAsterisk * qualified_asterisk = static_cast<const ASTQualifiedAsterisk *>(asts[expr_idx].get());
const ASTIdentifier * identifier = typeid_cast<const ASTIdentifier *>(qualified_asterisk->children[0].get());
size_t num_components = identifier->children.size();
for (const auto & [table_name, table_all_column_names] : table_names_and_column_names)
for (const auto & [table_name, table_columns] : tables_with_columns)
{
if ((num_components == 2 /// database.table.*
&& !table_name.database.empty() /// This is normal (not a temporary) table.
@ -164,14 +180,14 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf
&& ((!table_name.table.empty() && identifier->name == table_name.table) /// table.*
|| (!table_name.alias.empty() && identifier->name == table_name.alias)))) /// alias.*
{
asts.erase(asts.begin() + expr_idx);
for (size_t column_idx = 0; column_idx < table_all_column_names.size(); ++column_idx)
asts.insert(asts.begin() + column_idx + expr_idx, std::make_shared<ASTIdentifier>(table_all_column_names[column_idx]));
for (const auto & column_name : table_columns)
expr_list->children.emplace_back(std::make_shared<ASTIdentifier>(column_name));
break;
}
}
}
else
expr_list->children.emplace_back(child);
}
}
else if (ASTTablesInSelectQueryElement * tables_elem = typeid_cast<ASTTablesInSelectQueryElement *>(ast.get()))

View File

@ -18,10 +18,6 @@ inline bool functionIsInOrGlobalInOperator(const String & name)
}
using TableNameAndColumnNames = std::pair<DatabaseAndTableWithAlias, Names>;
using TableNamesAndColumnNames = std::vector<TableNameAndColumnNames>;
class QueryNormalizer
{
/// Extracts settings, mostly to show which are used and which are not.
@ -41,9 +37,10 @@ class QueryNormalizer
public:
using Aliases = std::unordered_map<String, ASTPtr>;
using TableWithColumnNames = std::pair<DatabaseAndTableWithAlias, Names>;
QueryNormalizer(ASTPtr & query, const Aliases & aliases, ExtractedSettings && settings, const Names & all_columns_name,
const TableNamesAndColumnNames & table_names_and_column_names);
QueryNormalizer(ASTPtr & query, const Aliases & aliases, ExtractedSettings && settings,
std::vector<TableWithColumnNames> && tables_with_columns = {});
void perform();
@ -54,8 +51,9 @@ private:
ASTPtr & query;
const Aliases & aliases;
const ExtractedSettings settings;
const Names & all_column_names;
const TableNamesAndColumnNames & table_names_and_column_names;
const std::vector<TableWithColumnNames> tables_with_columns;
bool processAsterisks() const { return !tables_with_columns.empty(); }
void performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level);
};

View File

@ -42,9 +42,9 @@ namespace ErrorCodes
extern const int INVALID_JOIN_ON_EXPRESSION;
}
void removeDuplicateColumns(NamesAndTypesList & columns)
NameSet removeDuplicateColumns(NamesAndTypesList & columns)
{
std::set<String> names;
NameSet names;
for (auto it = columns.begin(); it != columns.end();)
{
if (names.emplace(it->name).second)
@ -52,6 +52,7 @@ void removeDuplicateColumns(NamesAndTypesList & columns)
else
columns.erase(it++);
}
return names;
}
namespace
@ -77,8 +78,6 @@ void collectSourceColumns(ASTSelectQuery * select_query, StoragePtr storage, Nam
source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end());
}
}
removeDuplicateColumns(source_columns);
}
/// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names.
@ -102,12 +101,11 @@ void normalizeTree(
SyntaxAnalyzerResult & result,
const Names & source_columns,
const NameSet & source_columns_set,
const StoragePtr & storage,
const Context & context,
const ASTSelectQuery * select_query,
bool asterisk_left_columns_only)
{
Names all_columns_name = storage ? storage->getColumns().ordinary.getNames() : source_columns;
Names all_columns_name = source_columns;
if (!asterisk_left_columns_only)
{
@ -119,17 +117,20 @@ void normalizeTree(
if (all_columns_name.empty())
throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR);
TableNamesAndColumnNames table_names_and_column_names;
std::vector<QueryNormalizer::TableWithColumnNames> table_with_columns;
if (select_query && select_query->tables && !select_query->tables->children.empty())
{
std::vector<const ASTTableExpression *> tables_expression = getSelectTablesExpression(*select_query);
bool first = true;
String current_database = context.getCurrentDatabase();
for (const auto * table_expression : tables_expression)
{
DatabaseAndTableWithAlias table_name(*table_expression, context.getCurrentDatabase());
DatabaseAndTableWithAlias table_name(*table_expression, current_database);
NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, context);
removeDuplicateColumns(names_and_types);
if (!first)
{
/// For joined tables qualify duplicating names.
@ -140,12 +141,13 @@ void normalizeTree(
first = false;
table_names_and_column_names.emplace_back(std::pair(table_name, names_and_types.getNames()));
table_with_columns.emplace_back(std::move(table_name), names_and_types.getNames());
}
}
else
table_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name));
auto & settings = context.getSettingsRef();
QueryNormalizer(query, result.aliases, settings, all_columns_name, table_names_and_column_names).perform();
QueryNormalizer(query, result.aliases, context.getSettingsRef(), std::move(table_with_columns)).perform();
}
bool hasArrayJoin(const ASTPtr & ast)
@ -739,6 +741,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
result.source_columns = source_columns_;
collectSourceColumns(select_query, result.storage, result.source_columns);
NameSet source_columns_set = removeDuplicateColumns(result.source_columns);
const auto & settings = context.getSettingsRef();
@ -746,7 +749,9 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
source_columns_list.reserve(result.source_columns.size());
for (const auto & type_name : result.source_columns)
source_columns_list.emplace_back(type_name.name);
NameSet source_columns_set(source_columns_list.begin(), source_columns_list.end());
if (source_columns_set.size() != source_columns_list.size())
throw Exception("Unexpected duplicates in source columns list.", ErrorCodes::LOGICAL_ERROR);
if (select_query)
{
@ -768,7 +773,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
}
/// Common subexpression elimination. Rewrite rules.
normalizeTree(query, result, source_columns_list, source_columns_set, result.storage,
normalizeTree(query, result, (storage ? storage->getColumns().ordinary.getNames() : source_columns_list), source_columns_set,
context, select_query, settings.asterisk_left_columns_only != 0);
/// Remove unneeded columns according to 'required_result_columns'.