2018-09-12 05:41:09 +00:00
# include <Core/Names.h>
# include <Interpreters/QueryNormalizer.h>
# include <Parsers/ASTAsterisk.h>
# include <Parsers/ASTFunction.h>
# include <Parsers/ASTIdentifier.h>
# include <Parsers/ASTSelectQuery.h>
# include <Parsers/ASTTablesInSelectQuery.h>
# include <Common/StringUtils/StringUtils.h>
# include <Common/typeid_cast.h>
# include <Poco/String.h>
2018-09-24 03:20:22 +00:00
# include <Parsers/ASTQualifiedAsterisk.h>
2018-10-18 15:03:14 +00:00
# include <IO/WriteHelpers.h>
2018-09-12 05:41:09 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int TOO_DEEP_AST ;
extern const int CYCLIC_ALIASES ;
}
2019-01-10 18:58:55 +00:00
QueryNormalizer : : QueryNormalizer ( ASTPtr & query_ , const QueryNormalizer : : Aliases & aliases_ , ExtractedSettings & & settings_ ,
std : : vector < TableWithColumnNames > & & tables_with_columns_ )
: query ( query_ ) , aliases ( aliases_ ) , settings ( settings_ ) , tables_with_columns ( tables_with_columns_ )
{ }
2018-09-12 05:41:09 +00:00
void QueryNormalizer : : perform ( )
{
SetOfASTs tmp_set ;
MapOfASTs tmp_map ;
performImpl ( query , tmp_map , tmp_set , " " , 0 ) ;
try
{
query - > checkSize ( settings . max_expanded_ast_elements ) ;
}
catch ( Exception & e )
{
e . addMessage ( " (after expansion of aliases) " ) ;
throw ;
}
}
/// finished_asts - already processed vertices (and by what they replaced)
/// current_asts - vertices in the current call stack of this method
/// current_alias - the alias referencing to the ancestor of ast (the deepest ancestor with aliases)
void QueryNormalizer : : performImpl ( ASTPtr & ast , MapOfASTs & finished_asts , SetOfASTs & current_asts , std : : string current_alias , size_t level )
{
if ( level > settings . max_ast_depth )
2018-10-18 15:03:14 +00:00
throw Exception ( " Normalized AST is too deep. Maximum: " + toString ( settings . max_ast_depth ) , ErrorCodes : : TOO_DEEP_AST ) ;
2018-09-12 05:41:09 +00:00
if ( finished_asts . count ( ast ) )
{
ast = finished_asts [ ast ] ;
return ;
}
ASTPtr initial_ast = ast ;
current_asts . insert ( initial_ast . get ( ) ) ;
String my_alias = ast - > tryGetAlias ( ) ;
if ( ! my_alias . empty ( ) )
current_alias = my_alias ;
/// rewrite rules that act when you go from top to bottom.
bool replaced = false ;
ASTIdentifier * identifier_node = nullptr ;
ASTFunction * func_node = nullptr ;
if ( ( func_node = typeid_cast < ASTFunction * > ( ast . get ( ) ) ) )
{
/// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`.
if ( functionIsInOrGlobalInOperator ( func_node - > name ) )
if ( ASTIdentifier * right = typeid_cast < ASTIdentifier * > ( func_node - > arguments - > children . at ( 1 ) . get ( ) ) )
if ( ! aliases . count ( right - > name ) )
2018-09-20 13:13:33 +00:00
right - > setSpecial ( ) ;
2018-09-12 05:41:09 +00:00
/// Special cases for count function.
String func_name_lowercase = Poco : : toLower ( func_node - > name ) ;
if ( startsWith ( func_name_lowercase , " count " ) )
{
/// Select implementation of countDistinct based on settings.
/// Important that it is done as query rewrite. It means rewritten query
/// will be sent to remote servers during distributed query execution,
/// and on all remote servers, function implementation will be same.
if ( endsWith ( func_node - > name , " Distinct " ) & & func_name_lowercase = = " countdistinct " )
func_node - > name = settings . count_distinct_implementation ;
/// As special case, treat count(*) as count(), not as count(list of all columns).
if ( func_name_lowercase = = " count " & & func_node - > arguments - > children . size ( ) = = 1
& & typeid_cast < const ASTAsterisk * > ( func_node - > arguments - > children [ 0 ] . get ( ) ) )
{
func_node - > arguments - > children . clear ( ) ;
}
}
}
else if ( ( identifier_node = typeid_cast < ASTIdentifier * > ( ast . get ( ) ) ) )
{
2018-09-20 13:13:33 +00:00
if ( identifier_node - > general ( ) )
2018-09-12 05:41:09 +00:00
{
/// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column").
auto it_alias = aliases . find ( identifier_node - > name ) ;
if ( it_alias ! = aliases . end ( ) & & current_alias ! = identifier_node - > name )
{
/// Let's replace it with the corresponding tree node.
if ( current_asts . count ( it_alias - > second . get ( ) ) )
throw Exception ( " Cyclic aliases " , ErrorCodes : : CYCLIC_ALIASES ) ;
if ( ! my_alias . empty ( ) & & my_alias ! = it_alias - > second - > getAliasOrColumnName ( ) )
{
/// Avoid infinite recursion here
auto replace_to_identifier = typeid_cast < ASTIdentifier * > ( it_alias - > second . get ( ) ) ;
2018-09-20 13:13:33 +00:00
bool is_cycle = replace_to_identifier & & replace_to_identifier - > general ( )
2018-09-12 05:41:09 +00:00
& & replace_to_identifier - > name = = identifier_node - > name ;
if ( ! is_cycle )
{
/// In a construct like "a AS b", where a is an alias, you must set alias b to the result of substituting alias a.
ast = it_alias - > second - > clone ( ) ;
ast - > setAlias ( my_alias ) ;
replaced = true ;
}
}
else
{
ast = it_alias - > second ;
replaced = true ;
}
}
}
}
else if ( ASTExpressionList * expr_list = typeid_cast < ASTExpressionList * > ( ast . get ( ) ) )
{
2018-09-25 02:42:51 +00:00
/// Replace *, alias.*, database.table.* with a list of columns.
2019-01-10 18:58:55 +00:00
ASTs old_children ;
if ( processAsterisks ( ) )
2018-09-12 05:41:09 +00:00
{
2019-01-10 18:58:55 +00:00
bool has_asterisk = false ;
for ( const auto & child : expr_list - > children )
2018-09-12 05:41:09 +00:00
{
2019-01-10 18:58:55 +00:00
if ( typeid_cast < const ASTAsterisk * > ( child . get ( ) ) | |
typeid_cast < const ASTQualifiedAsterisk * > ( child . get ( ) ) )
{
has_asterisk = true ;
break ;
}
}
2018-09-12 05:41:09 +00:00
2019-01-10 18:58:55 +00:00
if ( has_asterisk )
{
old_children . swap ( expr_list - > children ) ;
expr_list - > children . reserve ( old_children . size ( ) ) ;
2018-09-12 05:41:09 +00:00
}
2019-01-10 18:58:55 +00:00
}
for ( const auto & child : old_children )
{
if ( typeid_cast < const ASTAsterisk * > ( child . get ( ) ) )
{
2019-01-10 22:04:37 +00:00
for ( const auto & pr : tables_with_columns )
for ( const auto & column_name : pr . second )
2019-01-10 18:58:55 +00:00
expr_list - > children . emplace_back ( std : : make_shared < ASTIdentifier > ( column_name ) ) ;
}
else if ( const auto * qualified_asterisk = typeid_cast < const ASTQualifiedAsterisk * > ( child . get ( ) ) )
2018-09-24 03:20:22 +00:00
{
2018-10-10 20:37:01 +00:00
const ASTIdentifier * identifier = typeid_cast < const ASTIdentifier * > ( qualified_asterisk - > children [ 0 ] . get ( ) ) ;
2018-09-24 03:20:22 +00:00
size_t num_components = identifier - > children . size ( ) ;
2019-01-10 18:58:55 +00:00
for ( const auto & [ table_name , table_columns ] : tables_with_columns )
2018-09-24 03:20:22 +00:00
{
2018-10-10 20:44:19 +00:00
if ( ( num_components = = 2 /// database.table.*
& & ! table_name . database . empty ( ) /// This is normal (not a temporary) table.
& & static_cast < const ASTIdentifier & > ( * identifier - > children [ 0 ] ) . name = = table_name . database
& & static_cast < const ASTIdentifier & > ( * identifier - > children [ 1 ] ) . name = = table_name . table )
| | ( num_components = = 0 /// t.*
& & ( ( ! table_name . table . empty ( ) & & identifier - > name = = table_name . table ) /// table.*
| | ( ! table_name . alias . empty ( ) & & identifier - > name = = table_name . alias ) ) ) ) /// alias.*
2018-09-24 03:20:22 +00:00
{
2019-01-10 18:58:55 +00:00
for ( const auto & column_name : table_columns )
expr_list - > children . emplace_back ( std : : make_shared < ASTIdentifier > ( column_name ) ) ;
2018-10-10 20:44:50 +00:00
break ;
2018-09-24 03:20:22 +00:00
}
}
}
2019-01-10 18:58:55 +00:00
else
expr_list - > children . emplace_back ( child ) ;
2018-09-12 05:41:09 +00:00
}
}
else if ( ASTTablesInSelectQueryElement * tables_elem = typeid_cast < ASTTablesInSelectQueryElement * > ( ast . get ( ) ) )
{
if ( tables_elem - > table_expression )
{
auto & database_and_table_name = static_cast < ASTTableExpression & > ( * tables_elem - > table_expression ) . database_and_table_name ;
if ( database_and_table_name )
{
if ( ASTIdentifier * right = typeid_cast < ASTIdentifier * > ( database_and_table_name . get ( ) ) )
2018-09-20 13:13:33 +00:00
right - > setSpecial ( ) ;
2018-09-12 05:41:09 +00:00
}
}
}
/// If we replace the root of the subtree, we will be called again for the new root, in case the alias is replaced by an alias.
if ( replaced )
{
performImpl ( ast , finished_asts , current_asts , current_alias , level + 1 ) ;
current_asts . erase ( initial_ast . get ( ) ) ;
current_asts . erase ( ast . get ( ) ) ;
finished_asts [ initial_ast ] = ast ;
return ;
}
/// Recurring calls. Don't go into subqueries. Don't go into components of compound identifiers.
/// We also do not go to the left argument of lambda expressions, so as not to replace the formal parameters
/// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]).
if ( func_node & & func_node - > name = = " lambda " )
{
/// We skip the first argument. We also assume that the lambda function can not have parameters.
for ( size_t i = 1 , size = func_node - > arguments - > children . size ( ) ; i < size ; + + i )
{
auto & child = func_node - > arguments - > children [ i ] ;
if ( typeid_cast < const ASTSelectQuery * > ( child . get ( ) ) | | typeid_cast < const ASTTableExpression * > ( child . get ( ) ) )
continue ;
performImpl ( child , finished_asts , current_asts , current_alias , level + 1 ) ;
}
}
else if ( identifier_node )
{
}
else
{
for ( auto & child : ast - > children )
{
if ( typeid_cast < const ASTSelectQuery * > ( child . get ( ) ) | | typeid_cast < const ASTTableExpression * > ( child . get ( ) ) )
continue ;
performImpl ( child , finished_asts , current_asts , current_alias , level + 1 ) ;
}
}
/// If the WHERE clause or HAVING consists of a single alias, the reference must be replaced not only in children, but also in where_expression and having_expression.
if ( ASTSelectQuery * select = typeid_cast < ASTSelectQuery * > ( ast . get ( ) ) )
{
if ( select - > prewhere_expression )
performImpl ( select - > prewhere_expression , finished_asts , current_asts , current_alias , level + 1 ) ;
if ( select - > where_expression )
performImpl ( select - > where_expression , finished_asts , current_asts , current_alias , level + 1 ) ;
if ( select - > having_expression )
performImpl ( select - > having_expression , finished_asts , current_asts , current_alias , level + 1 ) ;
}
current_asts . erase ( initial_ast . get ( ) ) ;
current_asts . erase ( ast . get ( ) ) ;
finished_asts [ initial_ast ] = ast ;
}
}