ClickHouse/dbms/src/Interpreters/SyntaxAnalyzer.cpp

726 lines
27 KiB
C++
Raw Normal View History

2018-11-02 18:53:23 +00:00
#include <Interpreters/SyntaxAnalyzer.h>
#include <Interpreters/InJoinSubqueriesPreprocessor.h>
#include <Interpreters/LogicalExpressionsOptimizer.h>
#include <Interpreters/Settings.h>
#include <Interpreters/QueryAliasesVisitor.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
2018-11-02 18:53:23 +00:00
#include <Interpreters/ArrayJoinedColumnsVisitor.h>
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
2018-11-02 18:53:23 +00:00
#include <Interpreters/Context.h>
#include <Interpreters/QueryNormalizer.h>
2018-11-02 18:53:23 +00:00
#include <Interpreters/ExecuteScalarSubqueriesVisitor.h>
#include <Interpreters/PredicateExpressionsOptimizer.h>
2018-11-02 18:53:23 +00:00
#include <Interpreters/ExternalDictionaries.h>
#include <Interpreters/OptimizeIfWithConstantConditionVisitor.h>
2018-11-02 18:53:23 +00:00
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
2018-11-02 18:53:23 +00:00
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTSelectQuery.h>
2018-11-02 18:53:23 +00:00
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ParserTablesInSelectQuery.h>
#include <Parsers/parseQuery.h>
2018-11-02 18:53:23 +00:00
#include <Parsers/queryToString.h>
#include <DataTypes/NestedUtils.h>
#include <Core/NamesAndTypes.h>
#include <IO/WriteHelpers.h>
#include <Storages/IStorage.h>
#include <Common/typeid_cast.h>
2018-11-02 18:53:23 +00:00
#include <functional>
2018-11-02 18:53:23 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int EMPTY_NESTED_TABLE;
extern const int LOGICAL_ERROR;
extern const int INVALID_JOIN_ON_EXPRESSION;
}
2019-01-10 18:58:55 +00:00
NameSet removeDuplicateColumns(NamesAndTypesList & columns)
2018-11-08 09:00:25 +00:00
{
2019-01-10 18:58:55 +00:00
NameSet names;
2018-11-08 09:00:25 +00:00
for (auto it = columns.begin(); it != columns.end();)
{
if (names.emplace(it->name).second)
++it;
else
columns.erase(it++);
}
2019-01-10 18:58:55 +00:00
return names;
2018-11-08 09:00:25 +00:00
}
namespace
{
2019-01-09 16:16:59 +00:00
using LogAST = DebugASTLog<false>; /// set to true to enable logs
2018-11-08 09:00:25 +00:00
2019-01-09 16:16:59 +00:00
/// Add columns from storage to source_columns list.
void collectSourceColumns(const ASTSelectQuery * select_query, StoragePtr storage, NamesAndTypesList & source_columns)
2019-01-09 16:16:59 +00:00
{
2018-11-08 09:00:25 +00:00
if (storage)
{
auto physical_columns = storage->getColumns().getAllPhysical();
if (source_columns.empty())
source_columns.swap(physical_columns);
else
source_columns.insert(source_columns.end(), physical_columns.begin(), physical_columns.end());
if (select_query)
{
const auto & storage_aliases = storage->getColumns().aliases;
source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end());
}
}
}
/// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form.
/// Expand asterisks and qualified asterisks with column names.
/// There would be columns in normal form & column aliases after translation. Column & column alias would be normalized in QueryNormalizer.
2019-02-13 19:00:52 +00:00
void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query, const Context & context,
const Names & source_columns_list, const NameSet & source_columns_set,
const JoinedColumnsList & columns_from_joined_table)
2018-11-02 18:53:23 +00:00
{
2019-02-13 19:00:52 +00:00
std::vector<TableWithColumnNames> tables_with_columns = getDatabaseAndTablesWithColumnNames(select_query, context);
2018-11-02 18:53:23 +00:00
if (tables_with_columns.empty())
2018-11-02 18:53:23 +00:00
{
Names all_columns_name = source_columns_list;
2018-11-02 18:53:23 +00:00
/// TODO: asterisk_left_columns_only probably does not work in some cases
if (!context.getSettingsRef().asterisk_left_columns_only)
{
for (auto & column : columns_from_joined_table)
all_columns_name.emplace_back(column.name_and_type.name);
}
2018-11-02 18:53:23 +00:00
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name));
}
LogAST log;
TranslateQualifiedNamesVisitor::Data visitor_data(source_columns_set, tables_with_columns);
TranslateQualifiedNamesVisitor visitor(visitor_data, log.stream());
visitor.visit(query);
2018-11-02 18:53:23 +00:00
}
2018-11-02 18:53:23 +00:00
bool hasArrayJoin(const ASTPtr & ast)
{
2019-03-11 13:22:51 +00:00
if (const ASTFunction * function = ast->as<ASTFunction>())
2018-11-02 18:53:23 +00:00
if (function->name == "arrayJoin")
return true;
for (const auto & child : ast->children)
2019-03-11 13:22:51 +00:00
if (!child->as<ASTSelectQuery>() && hasArrayJoin(child))
2018-11-02 18:53:23 +00:00
return true;
return false;
}
2019-01-09 16:16:59 +00:00
/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query.
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
2018-11-02 18:53:23 +00:00
void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns)
{
if (required_result_columns.empty())
return;
ASTs & elements = select_query->select_expression_list->children;
ASTs new_elements;
new_elements.reserve(elements.size());
/// Some columns may be queried multiple times, like SELECT x, y, y FROM table.
/// In that case we keep them exactly same number of times.
std::map<String, size_t> required_columns_with_duplicate_count;
for (const auto & name : required_result_columns)
++required_columns_with_duplicate_count[name];
for (const auto & elem : elements)
{
String name = elem->getAliasOrColumnName();
auto it = required_columns_with_duplicate_count.find(name);
if (required_columns_with_duplicate_count.end() != it && it->second)
{
new_elements.push_back(elem);
--it->second;
}
else if (select_query->distinct || hasArrayJoin(elem))
{
new_elements.push_back(elem);
}
}
elements = std::move(new_elements);
}
2019-01-09 16:16:59 +00:00
/// Replacing scalar subqueries with constant values.
void executeScalarSubqueries(ASTPtr & query, const Context & context, size_t subquery_depth)
2018-11-02 18:53:23 +00:00
{
LogAST log;
2019-01-09 16:16:59 +00:00
ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth};
ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query);
2018-11-02 18:53:23 +00:00
}
/** Calls to these functions in the GROUP BY statement would be
* replaced by their immediate argument.
*/
const std::unordered_set<String> injective_function_names
{
"negate",
"bitNot",
"reverse",
"reverseUTF8",
"toString",
"toFixedString",
"IPv4NumToString",
"IPv4StringToNum",
"hex",
"unhex",
"bitmaskToList",
"bitmaskToArray",
"tuple",
"regionToName",
"concatAssumeInjective",
};
const std::unordered_set<String> possibly_injective_function_names
{
"dictGetString",
"dictGetUInt8",
"dictGetUInt16",
"dictGetUInt32",
"dictGetUInt64",
"dictGetInt8",
"dictGetInt16",
"dictGetInt32",
"dictGetInt64",
"dictGetFloat32",
"dictGetFloat64",
"dictGetDate",
"dictGetDateTime"
};
2019-01-09 16:16:59 +00:00
/// Eliminates injective function calls and constant expressions from group by statement.
2018-11-02 18:53:23 +00:00
void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context)
{
if (!select_query->group_expression_list)
2018-11-02 18:53:23 +00:00
return;
const auto is_literal = [] (const ASTPtr & ast) -> bool
2018-11-02 18:53:23 +00:00
{
2019-03-11 13:22:51 +00:00
return ast->as<ASTLiteral>();
2018-11-02 18:53:23 +00:00
};
auto & group_exprs = select_query->group_expression_list->children;
/// removes expression at index idx by making it last one and calling .pop_back()
const auto remove_expr_at_index = [&group_exprs] (const size_t idx)
{
if (idx < group_exprs.size() - 1)
std::swap(group_exprs[idx], group_exprs.back());
group_exprs.pop_back();
};
/// iterate over each GROUP BY expression, eliminate injective function calls and literals
for (size_t i = 0; i < group_exprs.size();)
{
2019-03-11 13:22:51 +00:00
if (const auto * function = group_exprs[i]->as<ASTFunction>())
2018-11-02 18:53:23 +00:00
{
/// assert function is injective
if (possibly_injective_function_names.count(function->name))
{
/// do not handle semantic errors here
if (function->arguments->children.size() < 2)
{
++i;
continue;
}
2019-03-11 13:22:51 +00:00
const auto & dict_name = function->arguments->children[0]->as<ASTLiteral>()->value.safeGet<String>();
2018-11-02 18:53:23 +00:00
const auto & dict_ptr = context.getExternalDictionaries().getDictionary(dict_name);
2019-03-11 13:22:51 +00:00
const auto & attr_name = function->arguments->children[1]->as<ASTLiteral>()->value.safeGet<String>();
2018-11-02 18:53:23 +00:00
if (!dict_ptr->isInjective(attr_name))
{
++i;
continue;
}
}
else if (!injective_function_names.count(function->name))
{
++i;
continue;
}
/// copy shared pointer to args in order to ensure lifetime
auto args_ast = function->arguments;
/** remove function call and take a step back to ensure
* next iteration does not skip not yet processed data
*/
remove_expr_at_index(i);
/// copy non-literal arguments
std::remove_copy_if(
std::begin(args_ast->children), std::end(args_ast->children),
std::back_inserter(group_exprs), is_literal
);
}
else if (is_literal(group_exprs[i]))
{
remove_expr_at_index(i);
}
else
{
/// if neither a function nor literal - advance to next expression
++i;
}
}
if (group_exprs.empty())
{
/** You can not completely remove GROUP BY. Because if there were no aggregate functions, then it turns out that there will be no aggregation.
* Instead, leave `GROUP BY const`.
* Next, see deleting the constants in the analyzeAggregation method.
*/
/// You must insert a constant that is not the name of the column in the table. Such a case is rare, but it happens.
UInt64 unused_column = 0;
String unused_column_name = toString(unused_column);
while (source_columns.count(unused_column_name))
{
++unused_column;
unused_column_name = toString(unused_column);
}
select_query->group_expression_list = std::make_shared<ASTExpressionList>();
select_query->group_expression_list->children.emplace_back(std::make_shared<ASTLiteral>(UInt64(unused_column)));
}
}
2019-01-09 16:16:59 +00:00
/// Remove duplicate items from ORDER BY.
2018-11-02 18:53:23 +00:00
void optimizeOrderBy(const ASTSelectQuery * select_query)
{
if (!select_query->order_expression_list)
2018-11-02 18:53:23 +00:00
return;
/// Make unique sorting conditions.
using NameAndLocale = std::pair<String, String>;
std::set<NameAndLocale> elems_set;
ASTs & elems = select_query->order_expression_list->children;
ASTs unique_elems;
unique_elems.reserve(elems.size());
for (const auto & elem : elems)
{
String name = elem->children.front()->getColumnName();
2019-03-11 13:22:51 +00:00
const auto * order_by_elem = elem->as<ASTOrderByElement>();
2018-11-02 18:53:23 +00:00
if (elems_set.emplace(name, order_by_elem->collation ? order_by_elem->collation->getColumnName() : "").second)
2018-11-02 18:53:23 +00:00
unique_elems.emplace_back(elem);
}
if (unique_elems.size() < elems.size())
elems = unique_elems;
}
2019-01-09 16:16:59 +00:00
/// Remove duplicate items from LIMIT BY.
2018-11-02 18:53:23 +00:00
void optimizeLimitBy(const ASTSelectQuery * select_query)
{
if (!select_query->limit_by_expression_list)
2018-11-02 18:53:23 +00:00
return;
std::set<String> elems_set;
ASTs & elems = select_query->limit_by_expression_list->children;
ASTs unique_elems;
unique_elems.reserve(elems.size());
for (const auto & elem : elems)
{
if (elems_set.emplace(elem->getColumnName()).second)
unique_elems.emplace_back(elem);
}
if (unique_elems.size() < elems.size())
elems = unique_elems;
}
2019-01-09 16:16:59 +00:00
/// Remove duplicated columns from USING(...).
2018-11-02 18:53:23 +00:00
void optimizeUsing(const ASTSelectQuery * select_query)
{
if (!select_query->join())
2018-11-02 18:53:23 +00:00
return;
const auto * table_join = select_query->join()->table_join->as<ASTTableJoin>();
2018-11-02 18:53:23 +00:00
if (!(table_join && table_join->using_expression_list))
return;
ASTs & expression_list = table_join->using_expression_list->children;
ASTs uniq_expressions_list;
std::set<String> expressions_names;
for (const auto & expression : expression_list)
{
auto expression_name = expression->getAliasOrColumnName();
if (expressions_names.find(expression_name) == expressions_names.end())
{
uniq_expressions_list.push_back(expression);
expressions_names.insert(expression_name);
}
}
if (uniq_expressions_list.size() < expression_list.size())
expression_list = uniq_expressions_list;
}
void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query,
2018-11-07 12:55:19 +00:00
const Names & source_columns, const NameSet & source_columns_set)
2018-11-02 18:53:23 +00:00
{
2019-01-22 17:36:08 +00:00
if (ASTPtr array_join_expression_list = select_query->array_join_expression_list())
2018-11-02 18:53:23 +00:00
{
2019-01-22 17:36:08 +00:00
ArrayJoinedColumnsVisitor::Data visitor_data{result.aliases,
result.array_join_name_to_alias,
result.array_join_alias_to_name,
result.array_join_result_to_source};
ArrayJoinedColumnsVisitor(visitor_data).visit(query);
2018-11-02 18:53:23 +00:00
/// If the result of ARRAY JOIN is not used, it is necessary to ARRAY-JOIN any column,
/// to get the correct number of rows.
if (result.array_join_result_to_source.empty())
{
ASTPtr expr = select_query->array_join_expression_list()->children.at(0);
String source_name = expr->getColumnName();
String result_name = expr->getAliasOrColumnName();
/// This is an array.
2019-03-11 13:22:51 +00:00
if (!expr->as<ASTIdentifier>() || source_columns_set.count(source_name))
2018-11-02 18:53:23 +00:00
{
result.array_join_result_to_source[result_name] = source_name;
}
else /// This is a nested table.
{
bool found = false;
for (const auto & column_name : source_columns)
{
auto splitted = Nested::splitName(column_name);
if (splitted.first == source_name && !splitted.second.empty())
{
result.array_join_result_to_source[Nested::concatenateName(result_name, splitted.second)] = column_name;
found = true;
break;
}
}
if (!found)
throw Exception("No columns in nested table " + source_name, ErrorCodes::EMPTY_NESTED_TABLE);
}
}
}
}
2019-01-09 16:16:59 +00:00
/// Parse JOIN ON expression and collect ASTs for joined columns.
void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTTableJoin & table_join)
2018-11-02 18:53:23 +00:00
{
if (!table_join.on_expression)
return;
/// Stores examples of columns which are only from one table.
struct TableBelonging
{
const ASTIdentifier * example_only_from_left = nullptr;
const ASTIdentifier * example_only_from_right = nullptr;
};
/// Check all identifiers in ast and decide their possible table belonging.
/// Throws if there are two identifiers definitely from different tables.
std::function<TableBelonging(const ASTPtr &)> get_table_belonging;
get_table_belonging = [&](const ASTPtr & ast) -> TableBelonging
{
if (IdentifierSemantic::getColumnName(ast))
2018-11-02 18:53:23 +00:00
{
2019-03-11 13:22:51 +00:00
const auto * identifier = ast->as<ASTIdentifier>();
/// It's set in TranslateQualifiedNamesVisitor
size_t membership = IdentifierSemantic::getMembership(*identifier);
switch (membership)
{
case 1: return {identifier, nullptr};
case 2: return {nullptr, identifier};
default:
break;
}
2018-11-02 18:53:23 +00:00
return {};
}
TableBelonging table_belonging;
for (const auto & child : ast->children)
{
auto children_belonging = get_table_belonging(child);
if (!table_belonging.example_only_from_left)
table_belonging.example_only_from_left = children_belonging.example_only_from_left;
if (!table_belonging.example_only_from_right)
table_belonging.example_only_from_right = children_belonging.example_only_from_right;
}
if (table_belonging.example_only_from_left && table_belonging.example_only_from_right)
throw Exception("Invalid columns in JOIN ON section. Columns "
+ table_belonging.example_only_from_left->getAliasOrColumnName() + " and "
+ table_belonging.example_only_from_right->getAliasOrColumnName()
+ " are from different tables.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
return table_belonging;
};
const auto supported_syntax = " Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) "
"[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]";
auto throwSyntaxException = [&](const String & msg)
{
throw Exception("Invalid expression for JOIN ON. " + msg + supported_syntax, ErrorCodes::INVALID_JOIN_ON_EXPRESSION);
};
/// For equal expression find out corresponding table for each part, translate qualified names and add asts to join keys.
auto add_columns_from_equals_expr = [&](const ASTPtr & expr)
{
2019-03-11 13:22:51 +00:00
const auto * func_equals = expr->as<ASTFunction>();
2018-11-02 18:53:23 +00:00
if (!func_equals || func_equals->name != "equals")
throwSyntaxException("Expected equals expression, got " + queryToString(expr) + ".");
ASTPtr left_ast = func_equals->arguments->children.at(0)->clone();
ASTPtr right_ast = func_equals->arguments->children.at(1)->clone();
auto left_table_belonging = get_table_belonging(left_ast);
auto right_table_belonging = get_table_belonging(right_ast);
bool can_be_left_part_from_left_table = left_table_belonging.example_only_from_right == nullptr;
bool can_be_left_part_from_right_table = left_table_belonging.example_only_from_left == nullptr;
bool can_be_right_part_from_left_table = right_table_belonging.example_only_from_right == nullptr;
bool can_be_right_part_from_right_table = right_table_belonging.example_only_from_left == nullptr;
/// Default variant when all identifiers may be from any table.
if (can_be_left_part_from_left_table && can_be_right_part_from_right_table)
analyzed_join.addOnKeys(left_ast, right_ast);
2018-11-02 18:53:23 +00:00
else if (can_be_left_part_from_right_table && can_be_right_part_from_left_table)
analyzed_join.addOnKeys(right_ast, left_ast);
2018-11-02 18:53:23 +00:00
else
{
auto * left_example = left_table_belonging.example_only_from_left ?
left_table_belonging.example_only_from_left :
left_table_belonging.example_only_from_right;
auto * right_example = right_table_belonging.example_only_from_left ?
right_table_belonging.example_only_from_left :
right_table_belonging.example_only_from_right;
auto left_name = queryToString(*left_example);
auto right_name = queryToString(*right_example);
auto expr_name = queryToString(expr);
throwSyntaxException("In expression " + expr_name + " columns " + left_name + " and " + right_name
+ " are from the same table but from different arguments of equal function.");
}
};
2019-03-11 13:22:51 +00:00
const auto * func = table_join.on_expression->as<ASTFunction>();
2018-11-02 18:53:23 +00:00
if (func && func->name == "and")
{
for (const auto & expr : func->arguments->children)
add_columns_from_equals_expr(expr);
}
else
add_columns_from_equals_expr(table_join.on_expression);
}
2019-01-09 16:16:59 +00:00
/// Find the columns that are obtained by JOIN.
2019-02-13 19:00:52 +00:00
void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery & select_query,
const NameSet & source_columns, const String & current_database, bool join_use_nulls)
2018-11-02 18:53:23 +00:00
{
2019-02-13 19:00:52 +00:00
const ASTTablesInSelectQueryElement * node = select_query.join();
2018-11-02 18:53:23 +00:00
if (!node)
return;
2019-03-11 13:22:51 +00:00
const auto * table_join = node->table_join->as<ASTTableJoin>();
const auto * table_expression = node->table_expression->as<ASTTableExpression>();
DatabaseAndTableWithAlias joined_table_name(*table_expression, current_database);
2018-11-02 18:53:23 +00:00
if (table_join->using_expression_list)
2018-11-02 18:53:23 +00:00
{
2019-03-11 13:22:51 +00:00
const auto * keys = table_join->using_expression_list->as<ASTExpressionList>();
for (const auto & key : keys->children)
analyzed_join.addUsingKey(key);
2019-02-06 16:44:47 +00:00
for (auto & name : analyzed_join.key_names_right)
if (source_columns.count(name))
name = joined_table_name.getQualifiedNamePrefix() + name;
2018-11-02 18:53:23 +00:00
}
else if (table_join->on_expression)
collectJoinedColumnsFromJoinOnExpr(analyzed_join, *table_join);
2018-11-02 18:53:23 +00:00
bool make_nullable = join_use_nulls && isLeftOrFull(table_join->kind);
2018-11-02 18:53:23 +00:00
2019-02-13 19:00:52 +00:00
analyzed_join.calculateAvailableJoinedColumns(make_nullable);
2018-11-02 18:53:23 +00:00
}
2019-02-21 19:38:12 +00:00
Names qualifyOccupiedNames(NamesAndTypesList & columns, const NameSet & source_columns, const DatabaseAndTableWithAlias& table)
{
Names originals;
originals.reserve(columns.size());
for (auto & column : columns)
{
originals.push_back(column.name);
if (source_columns.count(column.name))
column.name = table.getQualifiedNamePrefix() + column.name;
}
return originals;
}
void replaceJoinedTable(const ASTTablesInSelectQueryElement* join)
{
if (!join || !join->table_expression)
return;
2019-03-11 13:22:51 +00:00
const auto * table_expr = join->table_expression->as<ASTTableExpression>();
if (table_expr->database_and_table_name)
{
2019-03-11 13:22:51 +00:00
const auto * table_id = table_expr->database_and_table_name->as<ASTIdentifier>();
String expr = "(select * from " + table_id->name + ") as " + table_id->shortName();
2019-02-26 14:12:25 +00:00
// FIXME: since the expression "a as b" exposes both "a" and "b" names, which is not equivalent to "(select * from a) as b",
// we can't replace aliased tables.
// FIXME: long table names include database name, which we can't save within alias.
if (table_id->alias.empty() && table_id->isShort())
{
2019-02-26 14:12:25 +00:00
ParserTableExpression parser;
2019-03-11 13:22:51 +00:00
table_expr = parseQuery(parser, expr, 0)->as<ASTTableExpression>();
}
}
2018-11-02 18:53:23 +00:00
}
} // namespace
2019-01-09 16:16:59 +00:00
SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
ASTPtr & query,
const NamesAndTypesList & source_columns_,
const Names & required_result_columns,
StoragePtr storage) const
{
2019-03-11 13:22:51 +00:00
auto * select_query = query->as<ASTSelectQuery>();
2019-01-09 16:16:59 +00:00
if (!storage && select_query)
{
if (auto db_and_table = getDatabaseAndTable(*select_query, 0))
storage = context.tryGetTable(db_and_table->database, db_and_table->table);
}
SyntaxAnalyzerResult result;
result.storage = storage;
result.source_columns = source_columns_;
collectSourceColumns(select_query, result.storage, result.source_columns);
2019-01-10 18:58:55 +00:00
NameSet source_columns_set = removeDuplicateColumns(result.source_columns);
2019-01-09 16:16:59 +00:00
const auto & settings = context.getSettingsRef();
Names source_columns_list;
source_columns_list.reserve(result.source_columns.size());
for (const auto & type_name : result.source_columns)
source_columns_list.emplace_back(type_name.name);
2019-01-10 18:58:55 +00:00
if (source_columns_set.size() != source_columns_list.size())
throw Exception("Unexpected duplicates in source columns list.", ErrorCodes::LOGICAL_ERROR);
2019-01-09 16:16:59 +00:00
if (select_query)
{
2019-02-13 19:00:52 +00:00
if (const ASTTablesInSelectQueryElement * node = select_query->join())
{
if (settings.enable_optimize_predicate_expression)
replaceJoinedTable(node);
2019-03-11 13:22:51 +00:00
const auto * joined_expression = node->table_expression->as<ASTTableExpression>();
DatabaseAndTableWithAlias table(*joined_expression, context.getCurrentDatabase());
2019-02-13 19:00:52 +00:00
NamesAndTypesList joined_columns = getNamesAndTypeListFromTableExpression(*joined_expression, context);
2019-02-21 19:38:12 +00:00
Names original_names = qualifyOccupiedNames(joined_columns, source_columns_set, table);
result.analyzed_join.calculateColumnsFromJoinedTable(joined_columns, original_names);
2019-02-13 19:00:52 +00:00
}
translateQualifiedNames(query, *select_query, context,
(storage ? storage->getColumns().ordinary.getNames() : source_columns_list), source_columns_set,
result.analyzed_join.columns_from_joined_table);
2019-01-09 16:16:59 +00:00
/// Depending on the user's profile, check for the execution rights
/// distributed subqueries inside the IN or JOIN sections and process these subqueries.
InJoinSubqueriesPreprocessor(context).process(select_query);
2019-01-09 16:16:59 +00:00
/// Optimizes logical expressions.
LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform();
}
2019-01-09 16:16:59 +00:00
/// Creates a dictionary `aliases`: alias -> ASTPtr
{
LogAST log;
QueryAliasesVisitor::Data query_aliases_data{result.aliases};
QueryAliasesVisitor(query_aliases_data, log.stream()).visit(query);
}
/// Common subexpression elimination. Rewrite rules.
{
QueryNormalizer::Data normalizer_data(result.aliases, context.getSettingsRef());
QueryNormalizer(normalizer_data).visit(query);
}
2019-01-09 16:16:59 +00:00
/// Remove unneeded columns according to 'required_result_columns'.
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
/// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
if (select_query)
removeUnneededColumnsFromSelectClause(select_query, required_result_columns);
2019-01-09 16:16:59 +00:00
/// Executing scalar subqueries - replacing them with constant values.
executeScalarSubqueries(query, context, subquery_depth);
2019-01-22 19:56:53 +00:00
/// Optimize if with constant condition after constants was substituted instead of scalar subqueries.
OptimizeIfWithConstantConditionVisitor(result.aliases).visit(query);
2019-01-09 16:16:59 +00:00
if (select_query)
{
/// GROUP BY injective function elimination.
optimizeGroupBy(select_query, source_columns_set, context);
2019-01-09 16:16:59 +00:00
/// Remove duplicate items from ORDER BY.
optimizeOrderBy(select_query);
2019-01-09 16:16:59 +00:00
/// Remove duplicated elements from LIMIT BY clause.
optimizeLimitBy(select_query);
2019-01-09 16:16:59 +00:00
/// Remove duplicated columns from USING(...).
optimizeUsing(select_query);
2019-01-09 16:16:59 +00:00
/// array_join_alias_to_name, array_join_result_to_source.
getArrayJoinedColumns(query, result, select_query, source_columns_list, source_columns_set);
2019-01-09 16:16:59 +00:00
/// Push the predicate expression down to the subqueries.
result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize();
2019-01-09 16:16:59 +00:00
2019-02-13 19:00:52 +00:00
collectJoinedColumns(result.analyzed_join, *select_query, source_columns_set, context.getCurrentDatabase(), settings.join_use_nulls);
}
2019-01-09 16:16:59 +00:00
return std::make_shared<const SyntaxAnalyzerResult>(result);
}
2018-11-02 18:53:23 +00:00
}