2017-04-01 09:19:00 +00:00
# include <DataStreams/ExpressionBlockInputStream.h>
# include <DataStreams/FilterBlockInputStream.h>
2019-04-27 09:39:03 +00:00
# include <DataStreams/FinishSortingBlockInputStream.h>
2017-04-01 09:19:00 +00:00
# include <DataStreams/LimitBlockInputStream.h>
# include <DataStreams/LimitByBlockInputStream.h>
# include <DataStreams/PartialSortingBlockInputStream.h>
# include <DataStreams/MergeSortingBlockInputStream.h>
# include <DataStreams/MergingSortedBlockInputStream.h>
# include <DataStreams/AggregatingBlockInputStream.h>
# include <DataStreams/MergingAggregatedBlockInputStream.h>
# include <DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h>
# include <DataStreams/AsynchronousBlockInputStream.h>
# include <DataStreams/UnionBlockInputStream.h>
# include <DataStreams/ParallelAggregatingBlockInputStream.h>
# include <DataStreams/DistinctBlockInputStream.h>
# include <DataStreams/NullBlockInputStream.h>
# include <DataStreams/TotalsHavingBlockInputStream.h>
# include <DataStreams/copyData.h>
# include <DataStreams/CreatingSetsBlockInputStream.h>
# include <DataStreams/MaterializingBlockInputStream.h>
# include <DataStreams/ConcatBlockInputStream.h>
2018-08-21 16:08:45 +00:00
# include <DataStreams/RollupBlockInputStream.h>
2018-09-17 18:01:04 +00:00
# include <DataStreams/CubeBlockInputStream.h>
2018-09-27 15:55:22 +00:00
# include <DataStreams/ConvertColumnLowCardinalityToFullBlockInputStream.h>
2019-02-10 15:17:45 +00:00
# include <DataStreams/ConvertingBlockInputStream.h>
2019-04-17 21:20:51 +00:00
# include <DataStreams/ReverseBlockInputStream.h>
2019-04-21 16:16:25 +00:00
# include <DataStreams/FillingBlockInputStream.h>
2019-09-27 11:06:20 +00:00
# include <DataStreams/CheckNonEmptySetBlockInputStream.h>
2017-04-01 09:19:00 +00:00
# include <Parsers/ASTFunction.h>
2019-03-29 20:31:06 +00:00
# include <Parsers/ASTIdentifier.h>
2017-04-01 09:19:00 +00:00
# include <Parsers/ASTLiteral.h>
# include <Parsers/ASTOrderByElement.h>
2019-03-29 20:31:06 +00:00
# include <Parsers/ASTSelectWithUnionQuery.h>
2017-04-01 09:19:00 +00:00
# include <Parsers/ASTTablesInSelectQuery.h>
2019-03-29 20:31:06 +00:00
# include <Parsers/ParserSelectQuery.h>
2019-05-03 02:00:57 +00:00
# include <Parsers/ExpressionListParsers.h>
# include <Parsers/parseQuery.h>
2017-04-01 09:19:00 +00:00
# include <Interpreters/InterpreterSelectQuery.h>
2018-02-25 06:34:20 +00:00
# include <Interpreters/InterpreterSelectWithUnionQuery.h>
2017-04-01 09:19:00 +00:00
# include <Interpreters/InterpreterSetQuery.h>
2019-02-03 18:31:17 +00:00
# include <Interpreters/evaluateConstantExpression.h>
2019-02-10 15:17:45 +00:00
# include <Interpreters/convertFieldToType.h>
2017-04-01 09:19:00 +00:00
# include <Interpreters/ExpressionAnalyzer.h>
2018-10-30 16:31:21 +00:00
# include <Interpreters/DatabaseAndTableWithAlias.h>
2019-03-11 12:20:55 +00:00
# include <Interpreters/JoinToSubqueryTransformVisitor.h>
# include <Interpreters/CrossToInnerJoinVisitor.h>
2019-09-03 16:56:32 +00:00
# include <Interpreters/AnalyzedJoin.h>
2019-09-27 11:06:20 +00:00
# include <Interpreters/Join.h>
2017-04-01 09:19:00 +00:00
2018-12-09 17:50:35 +00:00
# include <Storages/MergeTree/MergeTreeData.h>
2019-02-10 15:17:45 +00:00
# include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
2017-04-01 09:19:00 +00:00
# include <Storages/IStorage.h>
2019-08-04 00:29:32 +00:00
# include <Storages/StorageValues.h>
2017-04-01 09:19:00 +00:00
# include <TableFunctions/ITableFunction.h>
# include <TableFunctions/TableFunctionFactory.h>
2019-07-24 14:23:57 +00:00
# include <Functions/IFunction.h>
2017-04-01 09:19:00 +00:00
# include <Core/Field.h>
2019-02-03 18:31:17 +00:00
# include <Core/Types.h>
2017-11-20 06:01:05 +00:00
# include <Columns/Collator.h>
2019-08-19 20:22:45 +00:00
# include <Common/FieldVisitors.h>
2017-07-13 20:58:19 +00:00
# include <Common/typeid_cast.h>
2019-08-10 17:51:47 +00:00
# include <Common/checkStackSize.h>
2018-03-04 16:15:31 +00:00
# include <Parsers/queryToString.h>
2018-09-06 07:49:42 +00:00
# include <ext/map.h>
2018-09-07 15:13:08 +00:00
# include <memory>
2014-01-28 19:26:39 +00:00
2019-03-26 18:28:37 +00:00
# include <Processors/Sources/NullSource.h>
# include <Processors/Sources/SourceFromInputStream.h>
# include <Processors/Transforms/FilterTransform.h>
# include <Processors/Transforms/ExpressionTransform.h>
# include <Processors/Transforms/AggregatingTransform.h>
# include <Processors/Transforms/MergingAggregatedTransform.h>
# include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
# include <Processors/Transforms/TotalsHavingTransform.h>
# include <Processors/Transforms/PartialSortingTransform.h>
# include <Processors/Transforms/LimitsCheckingTransform.h>
# include <Processors/Transforms/MergeSortingTransform.h>
# include <Processors/Transforms/MergingSortedTransform.h>
# include <Processors/Transforms/DistinctTransform.h>
# include <Processors/Transforms/LimitByTransform.h>
2019-04-05 11:34:11 +00:00
# include <Processors/Transforms/ExtremesTransform.h>
2019-03-26 18:28:37 +00:00
# include <Processors/Transforms/CreatingSetsTransform.h>
# include <Processors/Transforms/RollupTransform.h>
# include <Processors/Transforms/CubeTransform.h>
2019-09-02 14:29:51 +00:00
# include <Processors/Transforms/FillingTransform.h>
2019-03-26 18:28:37 +00:00
# include <Processors/LimitTransform.h>
2019-08-26 14:54:29 +00:00
# include <Processors/Transforms/FinishSortingTransform.h>
2019-08-07 13:41:36 +00:00
# include <DataTypes/DataTypeAggregateFunction.h>
# include <DataStreams/materializeBlock.h>
2019-03-26 18:28:37 +00:00
2015-10-29 15:14:19 +00:00
2011-08-28 05:13:24 +00:00
namespace DB
{
2016-01-11 21:46:36 +00:00
namespace ErrorCodes
{
2017-04-01 07:20:54 +00:00
extern const int TOO_DEEP_SUBQUERIES ;
extern const int THERE_IS_NO_COLUMN ;
extern const int SAMPLING_NOT_SUPPORTED ;
extern const int ILLEGAL_FINAL ;
extern const int ILLEGAL_PREWHERE ;
2018-03-09 23:23:15 +00:00
extern const int TOO_MANY_COLUMNS ;
2018-02-21 06:25:21 +00:00
extern const int LOGICAL_ERROR ;
2018-03-16 02:08:31 +00:00
extern const int NOT_IMPLEMENTED ;
2018-08-25 16:32:44 +00:00
extern const int PARAMETER_OUT_OF_BOUND ;
2018-09-07 15:13:08 +00:00
extern const int ARGUMENT_OUT_OF_BOUND ;
2019-02-03 18:31:17 +00:00
extern const int INVALID_LIMIT_EXPRESSION ;
2019-04-21 03:36:59 +00:00
extern const int INVALID_WITH_FILL_EXPRESSION ;
2016-01-11 21:46:36 +00:00
}
2019-03-29 20:31:06 +00:00
namespace
{
/// Assumes `storage` is set and the table filter is not empty.
String generateFilterActions ( ExpressionActionsPtr & actions , const StoragePtr & storage , const Context & context , const Names & prerequisite_columns = { } )
{
const auto & db_name = storage - > getDatabaseName ( ) ;
const auto & table_name = storage - > getTableName ( ) ;
const auto & filter_str = context . getUserProperty ( db_name , table_name , " filter " ) ;
/// TODO: implement some AST builders for this kind of stuff
ASTPtr query_ast = std : : make_shared < ASTSelectQuery > ( ) ;
auto * select_ast = query_ast - > as < ASTSelectQuery > ( ) ;
2019-04-09 14:22:35 +00:00
select_ast - > setExpression ( ASTSelectQuery : : Expression : : SELECT , std : : make_shared < ASTExpressionList > ( ) ) ;
auto expr_list = select_ast - > select ( ) ;
2019-03-29 20:31:06 +00:00
auto parseExpression = [ ] ( const String & expr )
{
ParserExpression expr_parser ;
return parseQuery ( expr_parser , expr , 0 ) ;
} ;
// The first column is our filter expression.
expr_list - > children . push_back ( parseExpression ( filter_str ) ) ;
/// Keep columns that are required after the filter actions.
for ( const auto & column_str : prerequisite_columns )
expr_list - > children . push_back ( parseExpression ( column_str ) ) ;
2019-04-09 14:22:35 +00:00
select_ast - > setExpression ( ASTSelectQuery : : Expression : : TABLES , std : : make_shared < ASTTablesInSelectQuery > ( ) ) ;
2019-04-09 14:59:06 +00:00
auto tables = select_ast - > tables ( ) ;
2019-03-29 20:31:06 +00:00
auto tables_elem = std : : make_shared < ASTTablesInSelectQueryElement > ( ) ;
auto table_expr = std : : make_shared < ASTTableExpression > ( ) ;
tables - > children . push_back ( tables_elem ) ;
tables_elem - > table_expression = table_expr ;
tables_elem - > children . push_back ( table_expr ) ;
table_expr - > database_and_table_name = createTableIdentifier ( db_name , table_name ) ;
table_expr - > children . push_back ( table_expr - > database_and_table_name ) ;
/// Using separate expression analyzer to prevent any possible alias injection
auto syntax_result = SyntaxAnalyzer ( context ) . analyze ( query_ast , storage - > getColumns ( ) . getAllPhysical ( ) ) ;
2019-08-14 19:30:30 +00:00
SelectQueryExpressionAnalyzer analyzer ( query_ast , syntax_result , context ) ;
2019-03-29 20:31:06 +00:00
ExpressionActionsChain new_chain ( context ) ;
analyzer . appendSelect ( new_chain , false ) ;
actions = new_chain . getLastActions ( ) ;
return expr_list - > children . at ( 0 ) - > getColumnName ( ) ;
}
2019-06-13 10:43:37 +00:00
}
2019-03-29 20:31:06 +00:00
2018-02-26 21:00:42 +00:00
InterpreterSelectQuery : : InterpreterSelectQuery (
const ASTPtr & query_ptr_ ,
const Context & context_ ,
2019-08-03 11:02:40 +00:00
const SelectQueryOptions & options_ ,
const Names & required_result_column_names_ )
: InterpreterSelectQuery ( query_ptr_ , context_ , nullptr , nullptr , options_ , required_result_column_names_ )
2018-02-26 21:00:42 +00:00
{
}
2018-07-17 13:09:33 +00:00
InterpreterSelectQuery : : InterpreterSelectQuery (
const ASTPtr & query_ptr_ ,
const Context & context_ ,
const BlockInputStreamPtr & input_ ,
2019-08-03 11:02:40 +00:00
const SelectQueryOptions & options_ )
: InterpreterSelectQuery ( query_ptr_ , context_ , input_ , nullptr , options_ . copy ( ) . noSubquery ( ) )
2019-03-15 15:57:18 +00:00
{ }
2018-02-26 21:00:42 +00:00
2018-07-18 12:17:48 +00:00
InterpreterSelectQuery : : InterpreterSelectQuery (
const ASTPtr & query_ptr_ ,
const Context & context_ ,
const StoragePtr & storage_ ,
2019-08-03 11:02:40 +00:00
const SelectQueryOptions & options_ )
: InterpreterSelectQuery ( query_ptr_ , context_ , nullptr , storage_ , options_ . copy ( ) . noSubquery ( ) )
2019-03-15 15:57:18 +00:00
{ }
2018-07-18 12:17:48 +00:00
2018-07-19 13:36:21 +00:00
InterpreterSelectQuery : : ~ InterpreterSelectQuery ( ) = default ;
/** There are no limits on the maximum size of the result for the subquery.
* Since the result of the query is not the result of the entire query .
*/
static Context getSubqueryContext ( const Context & context )
2018-02-26 21:00:42 +00:00
{
2018-07-19 13:36:21 +00:00
Context subquery_context = context ;
Settings subquery_settings = context . getSettings ( ) ;
subquery_settings . max_result_rows = 0 ;
subquery_settings . max_result_bytes = 0 ;
/// The calculation of extremes does not make sense and is not necessary (if you do it, then the extremes of the subquery can be taken for whole query).
subquery_settings . extremes = 0 ;
subquery_context . setSettings ( subquery_settings ) ;
return subquery_context ;
2018-02-26 21:00:42 +00:00
}
2016-01-11 21:46:36 +00:00
2018-07-17 13:09:33 +00:00
InterpreterSelectQuery : : InterpreterSelectQuery (
const ASTPtr & query_ptr_ ,
const Context & context_ ,
const BlockInputStreamPtr & input_ ,
const StoragePtr & storage_ ,
2019-03-18 12:05:51 +00:00
const SelectQueryOptions & options_ ,
2019-03-15 13:49:58 +00:00
const Names & required_result_column_names )
2019-03-18 12:05:51 +00:00
: options ( options_ )
2019-02-11 19:53:55 +00:00
/// NOTE: the query almost always should be cloned because it will be modified during analysis.
2019-03-18 12:05:51 +00:00
, query_ptr ( options . modify_inplace ? query_ptr_ : query_ptr_ - > clone ( ) )
2018-07-17 13:09:33 +00:00
, context ( context_ )
, storage ( storage_ )
, input ( input_ )
, log ( & Logger : : get ( " InterpreterSelectQuery " ) )
2014-12-26 10:35:03 +00:00
{
2019-08-10 17:51:47 +00:00
checkStackSize ( ) ;
2017-04-01 07:20:54 +00:00
initSettings ( ) ;
2017-05-24 20:13:04 +00:00
const Settings & settings = context . getSettingsRef ( ) ;
2017-04-01 07:20:54 +00:00
2019-03-18 12:05:51 +00:00
if ( settings . max_subquery_depth & & options . subquery_depth > settings . max_subquery_depth )
2018-03-11 00:15:26 +00:00
throw Exception ( " Too deep subqueries. Maximum: " + settings . max_subquery_depth . toString ( ) ,
2017-04-01 07:20:54 +00:00
ErrorCodes : : TOO_DEEP_SUBQUERIES ) ;
2019-03-11 12:20:55 +00:00
if ( settings . allow_experimental_cross_to_join_conversion )
{
CrossToInnerJoinVisitor : : Data cross_to_inner ;
CrossToInnerJoinVisitor ( cross_to_inner ) . visit ( query_ptr ) ;
}
2019-03-11 13:08:51 +00:00
if ( settings . allow_experimental_multiple_joins_emulation )
{
2019-04-03 16:06:05 +00:00
JoinToSubqueryTransformVisitor : : Data join_to_subs_data { context } ;
2019-03-11 13:08:51 +00:00
JoinToSubqueryTransformVisitor ( join_to_subs_data ) . visit ( query_ptr ) ;
}
2017-07-03 21:04:10 +00:00
max_streams = settings . max_threads ;
2019-03-12 14:07:02 +00:00
auto & query = getSelectQuery ( ) ;
2017-07-03 21:04:10 +00:00
2019-01-15 18:29:54 +00:00
ASTPtr table_expression = extractTableExpression ( query , 0 ) ;
bool is_table_func = false ;
bool is_subquery = false ;
if ( table_expression )
{
2019-03-11 13:22:51 +00:00
is_table_func = table_expression - > as < ASTFunction > ( ) ;
is_subquery = table_expression - > as < ASTSelectWithUnionQuery > ( ) ;
2019-01-15 18:29:54 +00:00
}
2018-02-28 01:29:55 +00:00
2018-02-15 18:54:12 +00:00
if ( input )
2017-04-01 07:20:54 +00:00
{
2018-02-28 01:29:55 +00:00
/// Read from prepared input.
2018-07-19 13:36:21 +00:00
source_header = input - > getHeader ( ) ;
2017-04-01 07:20:54 +00:00
}
2019-01-15 18:29:54 +00:00
else if ( is_subquery )
2017-04-01 07:20:54 +00:00
{
2018-02-15 18:54:12 +00:00
/// Read from subquery.
2018-07-19 13:36:21 +00:00
interpreter_subquery = std : : make_unique < InterpreterSelectWithUnionQuery > (
2019-03-18 12:05:51 +00:00
table_expression , getSubqueryContext ( context ) , options . subquery ( ) , required_columns ) ;
2018-07-19 13:36:21 +00:00
source_header = interpreter_subquery - > getSampleBlock ( ) ;
2018-02-28 01:29:55 +00:00
}
2018-07-18 12:17:48 +00:00
else if ( ! storage )
2018-02-28 01:29:55 +00:00
{
2019-01-15 18:29:54 +00:00
if ( is_table_func )
2018-07-18 12:17:48 +00:00
{
/// Read from table function.
storage = context . getQueryContext ( ) . executeTableFunction ( table_expression ) ;
}
else
{
String database_name ;
String table_name ;
2019-08-08 15:18:28 +00:00
getDatabaseAndTableNames ( query , database_name , table_name , context ) ;
2018-07-18 12:17:48 +00:00
2018-12-09 17:50:35 +00:00
if ( auto view_source = context . getViewSource ( ) )
{
2019-08-04 00:29:32 +00:00
auto & storage_values = static_cast < const StorageValues & > ( * view_source ) ;
if ( storage_values . getDatabaseName ( ) = = database_name & & storage_values . getTableName ( ) = = table_name )
2018-12-09 17:50:35 +00:00
{
/// Read from view source.
storage = context . getViewSource ( ) ;
}
}
if ( ! storage )
{
/// Read from table. Even without table expression (implicit SELECT ... FROM system.one).
storage = context . getTable ( database_name , table_name ) ;
}
2018-07-18 12:17:48 +00:00
}
2017-04-01 07:20:54 +00:00
}
2018-02-28 01:29:55 +00:00
if ( storage )
2019-08-31 12:18:14 +00:00
table_lock = storage - > lockStructureForShare ( false , context . getInitialQueryId ( ) ) ;
2017-04-01 07:20:54 +00:00
2019-03-18 14:56:33 +00:00
syntax_analyzer_result = SyntaxAnalyzer ( context , options ) . analyze (
2019-08-09 14:50:04 +00:00
query_ptr , source_header . getNamesAndTypesList ( ) , required_result_column_names , storage , NamesAndTypesList ( ) ) ;
2019-08-14 19:30:30 +00:00
query_analyzer = std : : make_unique < SelectQueryExpressionAnalyzer > (
2019-08-09 14:50:04 +00:00
query_ptr , syntax_analyzer_result , context ,
2019-03-18 12:05:51 +00:00
NameSet ( required_result_column_names . begin ( ) , required_result_column_names . end ( ) ) ,
options . subquery_depth , ! options . only_analyze ) ;
2017-04-01 07:20:54 +00:00
2019-03-18 12:05:51 +00:00
if ( ! options . only_analyze )
2018-03-12 15:14:26 +00:00
{
if ( query . sample_size ( ) & & ( input | | ! storage | | ! storage - > supportsSampling ( ) ) )
throw Exception ( " Illegal SAMPLE: table doesn't support sampling " , ErrorCodes : : SAMPLING_NOT_SUPPORTED ) ;
2017-11-05 17:48:50 +00:00
2018-03-12 15:14:26 +00:00
if ( query . final ( ) & & ( input | | ! storage | | ! storage - > supportsFinal ( ) ) )
throw Exception ( ( ! input & & storage ) ? " Storage " + storage - > getName ( ) + " doesn't support FINAL " : " Illegal FINAL " , ErrorCodes : : ILLEGAL_FINAL ) ;
2017-11-05 17:48:50 +00:00
2019-04-09 14:22:35 +00:00
if ( query . prewhere ( ) & & ( input | | ! storage | | ! storage - > supportsPrewhere ( ) ) )
2018-03-12 15:14:26 +00:00
throw Exception ( ( ! input & & storage ) ? " Storage " + storage - > getName ( ) + " doesn't support PREWHERE " : " Illegal PREWHERE " , ErrorCodes : : ILLEGAL_PREWHERE ) ;
2017-11-05 17:48:50 +00:00
2018-03-12 15:14:26 +00:00
/// Save the new temporary tables in the query context
for ( const auto & it : query_analyzer - > getExternalTables ( ) )
if ( ! context . tryGetExternalTable ( it . first ) )
context . addExternalTable ( it . first , it . second ) ;
2019-02-20 12:36:46 +00:00
}
2018-03-04 16:15:31 +00:00
2019-03-18 12:05:51 +00:00
if ( ! options . only_analyze | | options . modify_inplace )
2019-02-20 12:36:46 +00:00
{
2019-08-09 14:50:04 +00:00
if ( syntax_analyzer_result - > rewrite_subqueries )
2018-09-29 11:29:23 +00:00
{
2019-04-18 10:39:25 +00:00
/// remake interpreter_subquery when PredicateOptimizer rewrites subqueries and main table is subquery
2019-01-15 18:29:54 +00:00
if ( is_subquery )
2018-09-29 11:29:23 +00:00
interpreter_subquery = std : : make_unique < InterpreterSelectWithUnionQuery > (
2019-02-20 12:36:46 +00:00
table_expression ,
getSubqueryContext ( context ) ,
2019-03-18 12:05:51 +00:00
options . subquery ( ) ,
2019-03-15 13:49:58 +00:00
required_columns ) ;
2018-09-29 11:29:23 +00:00
}
2018-03-12 15:14:26 +00:00
}
2018-07-19 13:36:21 +00:00
if ( interpreter_subquery )
{
/// If there is an aggregation in the outer query, WITH TOTALS is ignored in the subquery.
if ( query_analyzer - > hasAggregation ( ) )
interpreter_subquery - > ignoreWithTotals ( ) ;
}
2019-08-09 14:50:04 +00:00
required_columns = syntax_analyzer_result - > requiredSourceColumns ( ) ;
2018-07-19 13:36:21 +00:00
if ( storage )
source_header = storage - > getSampleBlockForColumns ( required_columns ) ;
/// Calculate structure of the result.
2019-08-08 15:18:28 +00:00
result_header = getSampleBlockImpl ( ) ;
for ( auto & col : result_header )
2019-08-12 16:20:02 +00:00
{
2019-08-08 15:18:28 +00:00
if ( ! col . column )
col . column = col . type - > createColumn ( ) ;
2019-08-12 16:20:02 +00:00
else if ( isColumnConst ( * col . column ) & & ! col . column - > empty ( ) )
col . column = col . column - > cloneEmpty ( ) ;
}
2014-12-24 14:51:02 +00:00
}
2017-12-22 18:30:42 +00:00
2019-08-08 15:18:28 +00:00
void InterpreterSelectQuery : : getDatabaseAndTableNames ( const ASTSelectQuery & query , String & database_name , String & table_name , const Context & context )
2011-08-28 05:13:24 +00:00
{
2019-08-08 15:18:28 +00:00
if ( auto db_and_table = getDatabaseAndTable ( query , 0 ) )
2018-10-29 19:04:28 +00:00
{
2018-10-30 16:31:21 +00:00
table_name = db_and_table - > table ;
database_name = db_and_table - > database ;
2017-04-01 07:20:54 +00:00
2018-10-29 19:04:28 +00:00
/// If the database is not specified - use the current database.
if ( database_name . empty ( ) & & ! context . tryGetTable ( " " , table_name ) )
database_name = context . getCurrentDatabase ( ) ;
}
else /// If the table is not specified - use the table `system.one`.
2017-04-01 07:20:54 +00:00
{
database_name = " system " ;
table_name = " one " ;
}
2012-08-20 19:21:04 +00:00
}
2011-08-28 05:13:24 +00:00
2011-10-30 05:19:41 +00:00
Block InterpreterSelectQuery : : getSampleBlock ( )
{
2018-07-19 13:36:21 +00:00
return result_header ;
2015-07-13 15:02:29 +00:00
}
2015-06-18 02:11:05 +00:00
BlockIO InterpreterSelectQuery : : execute ( )
2014-12-19 15:56:12 +00:00
{
2018-02-21 03:26:06 +00:00
Pipeline pipeline ;
2019-08-09 13:42:58 +00:00
executeImpl ( pipeline , input ) ;
2019-08-09 14:57:15 +00:00
executeUnion ( pipeline , getSampleBlock ( ) ) ;
2017-04-01 07:20:54 +00:00
BlockIO res ;
2018-02-21 03:26:06 +00:00
res . in = pipeline . firstStream ( ) ;
2017-04-01 07:20:54 +00:00
return res ;
2014-12-16 10:39:02 +00:00
}
2018-02-25 00:50:53 +00:00
BlockInputStreams InterpreterSelectQuery : : executeWithMultipleStreams ( )
2018-02-21 03:26:06 +00:00
{
Pipeline pipeline ;
2019-08-09 13:42:58 +00:00
executeImpl ( pipeline , input ) ;
2019-08-09 14:57:15 +00:00
unifyStreams ( pipeline , getSampleBlock ( ) ) ;
2018-02-21 03:26:06 +00:00
return pipeline . streams ;
}
2019-03-26 18:28:37 +00:00
QueryPipeline InterpreterSelectQuery : : executeWithProcessors ( )
{
QueryPipeline query_pipeline ;
2019-08-27 18:37:28 +00:00
query_pipeline . setMaxThreads ( context . getSettingsRef ( ) . max_threads ) ;
2019-08-09 13:42:58 +00:00
executeImpl ( query_pipeline , input ) ;
2019-03-26 18:28:37 +00:00
return query_pipeline ;
}
2019-08-07 13:41:36 +00:00
2019-08-08 15:18:28 +00:00
Block InterpreterSelectQuery : : getSampleBlockImpl ( )
2019-08-07 13:41:36 +00:00
{
2019-08-09 13:37:42 +00:00
/// Need to create sets before analyzeExpressions(). Otherwise some sets for index won't be created.
2019-08-14 15:23:57 +00:00
query_analyzer - > makeSetsForIndex ( getSelectQuery ( ) . where ( ) ) ;
query_analyzer - > makeSetsForIndex ( getSelectQuery ( ) . prewhere ( ) ) ;
2019-08-09 13:37:42 +00:00
2019-08-08 15:18:28 +00:00
auto analysis_result = analyzeExpressions (
getSelectQuery ( ) ,
* query_analyzer ,
QueryProcessingStage : : Enum : : FetchColumns ,
options . to_stage ,
context ,
storage ,
2019-09-05 17:23:36 +00:00
true , // only_types
{ } // filter_info
) ;
2019-08-07 13:41:36 +00:00
2019-08-08 15:18:28 +00:00
if ( options . to_stage = = QueryProcessingStage : : Enum : : FetchColumns )
2019-08-07 13:41:36 +00:00
{
2019-08-08 15:18:28 +00:00
auto header = source_header ;
2019-08-07 13:41:36 +00:00
2019-08-08 15:18:28 +00:00
if ( analysis_result . prewhere_info )
2019-08-07 13:41:36 +00:00
{
2019-08-08 15:18:28 +00:00
analysis_result . prewhere_info - > prewhere_actions - > execute ( header ) ;
2019-08-07 13:41:36 +00:00
header = materializeBlock ( header ) ;
2019-08-08 15:18:28 +00:00
if ( analysis_result . prewhere_info - > remove_prewhere_column )
header . erase ( analysis_result . prewhere_info - > prewhere_column_name ) ;
2019-08-07 13:41:36 +00:00
}
return header ;
}
2019-08-08 15:18:28 +00:00
if ( options . to_stage = = QueryProcessingStage : : Enum : : WithMergeableState )
2019-08-07 13:41:36 +00:00
{
if ( ! analysis_result . need_aggregate )
return analysis_result . before_order_and_select - > getSampleBlock ( ) ;
auto header = analysis_result . before_aggregation - > getSampleBlock ( ) ;
Names key_names ;
AggregateDescriptions aggregates ;
2019-08-08 15:18:28 +00:00
query_analyzer - > getAggregateInfo ( key_names , aggregates ) ;
2019-08-07 13:41:36 +00:00
Block res ;
for ( auto & key : key_names )
2019-08-12 18:00:41 +00:00
res . insert ( { nullptr , header . getByName ( key ) . type , key } ) ;
2019-08-07 13:41:36 +00:00
for ( auto & aggregate : aggregates )
{
size_t arguments_size = aggregate . argument_names . size ( ) ;
DataTypes argument_types ( arguments_size ) ;
for ( size_t j = 0 ; j < arguments_size ; + + j )
argument_types [ j ] = header . getByName ( aggregate . argument_names [ j ] ) . type ;
DataTypePtr type = std : : make_shared < DataTypeAggregateFunction > ( aggregate . function , argument_types , aggregate . parameters ) ;
res . insert ( { nullptr , type , aggregate . column_name } ) ;
}
return res ;
}
return analysis_result . final_projection - > getSampleBlock ( ) ;
}
2019-03-29 20:31:06 +00:00
InterpreterSelectQuery : : AnalysisResult
2019-08-06 13:00:56 +00:00
InterpreterSelectQuery : : analyzeExpressions (
const ASTSelectQuery & query ,
2019-08-16 12:54:50 +00:00
SelectQueryExpressionAnalyzer & query_analyzer ,
2019-08-06 13:00:56 +00:00
QueryProcessingStage : : Enum from_stage ,
QueryProcessingStage : : Enum to_stage ,
const Context & context ,
const StoragePtr & storage ,
2019-08-15 09:51:53 +00:00
bool only_types ,
2019-08-06 13:00:56 +00:00
const FilterInfoPtr & filter_info )
2018-02-23 06:00:48 +00:00
{
AnalysisResult res ;
/// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing.
res . first_stage = from_stage < QueryProcessingStage : : WithMergeableState
2019-08-06 13:00:56 +00:00
& & to_stage > = QueryProcessingStage : : WithMergeableState ;
2018-02-23 06:00:48 +00:00
/// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing.
res . second_stage = from_stage < = QueryProcessingStage : : WithMergeableState
2019-08-06 13:00:56 +00:00
& & to_stage > QueryProcessingStage : : WithMergeableState ;
2018-02-23 06:00:48 +00:00
/** First we compose a chain of actions and remember the necessary steps from it.
* Regardless of from_stage and to_stage , we will compose a complete sequence of actions to perform optimization and
* throw out unnecessary columns based on the entire query . In unnecessary parts of the query , we will not execute subqueries .
*/
2019-03-29 20:31:06 +00:00
bool has_filter = false ;
2018-06-29 11:42:44 +00:00
bool has_prewhere = false ;
bool has_where = false ;
size_t where_step_num ;
auto finalizeChain = [ & ] ( ExpressionActionsChain & chain )
{
chain . finalize ( ) ;
if ( has_prewhere )
2018-09-07 15:13:08 +00:00
{
const ExpressionActionsChain : : Step & step = chain . steps . at ( 0 ) ;
res . prewhere_info - > remove_prewhere_column = step . can_remove_required_output . at ( 0 ) ;
2018-10-04 08:58:19 +00:00
Names columns_to_remove ;
2018-09-07 15:13:08 +00:00
for ( size_t i = 1 ; i < step . required_output . size ( ) ; + + i )
{
if ( step . can_remove_required_output [ i ] )
2018-10-04 08:58:19 +00:00
columns_to_remove . push_back ( step . required_output [ i ] ) ;
2018-09-07 15:13:08 +00:00
}
2018-10-04 08:58:19 +00:00
if ( ! columns_to_remove . empty ( ) )
2018-09-07 15:13:08 +00:00
{
auto columns = res . prewhere_info - > prewhere_actions - > getSampleBlock ( ) . getNamesAndTypesList ( ) ;
ExpressionActionsPtr actions = std : : make_shared < ExpressionActions > ( columns , context ) ;
2018-10-04 08:58:19 +00:00
for ( const auto & column : columns_to_remove )
2018-09-07 15:13:08 +00:00
actions - > add ( ExpressionAction : : removeColumn ( column ) ) ;
2018-10-04 08:58:19 +00:00
res . prewhere_info - > remove_columns_actions = std : : move ( actions ) ;
2018-09-07 15:13:08 +00:00
}
2018-11-06 13:55:41 +00:00
res . columns_to_remove_after_prewhere = std : : move ( columns_to_remove ) ;
2018-09-07 15:13:08 +00:00
}
2019-03-29 20:31:06 +00:00
else if ( has_filter )
{
/// Can't have prewhere and filter set simultaneously
res . filter_info - > do_remove_column = chain . steps . at ( 0 ) . can_remove_required_output . at ( 0 ) ;
}
2018-06-29 11:42:44 +00:00
if ( has_where )
res . remove_where_filter = chain . steps . at ( where_step_num ) . can_remove_required_output . at ( 0 ) ;
2019-03-29 20:31:06 +00:00
has_filter = has_prewhere = has_where = false ;
2018-06-29 11:42:44 +00:00
chain . clear ( ) ;
} ;
2018-04-12 09:45:24 +00:00
2018-02-23 06:00:48 +00:00
{
2018-08-30 16:31:20 +00:00
ExpressionActionsChain chain ( context ) ;
2018-11-08 16:39:43 +00:00
Names additional_required_columns_after_prewhere ;
if ( storage & & query . sample_size ( ) )
{
Names columns_for_sampling = storage - > getColumnsRequiredForSampling ( ) ;
additional_required_columns_after_prewhere . insert ( additional_required_columns_after_prewhere . end ( ) ,
columns_for_sampling . begin ( ) , columns_for_sampling . end ( ) ) ;
}
if ( storage & & query . final ( ) )
{
Names columns_for_final = storage - > getColumnsRequiredForFinal ( ) ;
additional_required_columns_after_prewhere . insert ( additional_required_columns_after_prewhere . end ( ) ,
columns_for_final . begin ( ) , columns_for_final . end ( ) ) ;
}
2019-08-07 13:41:36 +00:00
if ( storage & & filter_info )
2019-03-29 20:31:06 +00:00
{
has_filter = true ;
/// XXX: aggregated copy-paste from ExpressionAnalyzer::appendSmth()
if ( chain . steps . empty ( ) )
{
2019-08-07 13:41:36 +00:00
chain . steps . emplace_back ( std : : make_shared < ExpressionActions > ( NamesAndTypesList ( ) , context ) ) ;
2019-03-29 20:31:06 +00:00
}
ExpressionActionsChain : : Step & step = chain . steps . back ( ) ;
// FIXME: assert(filter_info);
res . filter_info = filter_info ;
step . actions = filter_info - > actions ;
step . required_output . push_back ( res . filter_info - > column_name ) ;
step . can_remove_required_output = { true } ;
chain . addStep ( ) ;
}
2019-08-06 13:00:56 +00:00
if ( query_analyzer . appendPrewhere ( chain , ! res . first_stage , additional_required_columns_after_prewhere ) )
2018-04-19 14:47:09 +00:00
{
2018-06-29 11:42:44 +00:00
has_prewhere = true ;
2018-04-19 14:47:09 +00:00
res . prewhere_info = std : : make_shared < PrewhereInfo > (
2019-04-09 14:22:35 +00:00
chain . steps . front ( ) . actions , query . prewhere ( ) - > getColumnName ( ) ) ;
2018-06-29 11:42:44 +00:00
2018-04-19 14:47:09 +00:00
chain . addStep ( ) ;
}
2019-08-06 13:00:56 +00:00
res . need_aggregate = query_analyzer . hasAggregation ( ) ;
2018-02-23 06:00:48 +00:00
2019-08-15 09:51:53 +00:00
query_analyzer . appendArrayJoin ( chain , only_types | | ! res . first_stage ) ;
2018-02-23 06:00:48 +00:00
2019-08-15 09:51:53 +00:00
if ( query_analyzer . appendJoin ( chain , only_types | | ! res . first_stage ) )
2018-02-23 06:00:48 +00:00
{
res . before_join = chain . getLastActions ( ) ;
2019-01-30 12:01:00 +00:00
if ( ! res . hasJoin ( ) )
throw Exception ( " No expected JOIN " , ErrorCodes : : LOGICAL_ERROR ) ;
2018-02-23 06:00:48 +00:00
chain . addStep ( ) ;
}
2019-08-15 09:51:53 +00:00
if ( query_analyzer . appendWhere ( chain , only_types | | ! res . first_stage ) )
2018-02-23 06:00:48 +00:00
{
2018-06-29 11:42:44 +00:00
where_step_num = chain . steps . size ( ) - 1 ;
has_where = res . has_where = true ;
2018-02-23 06:00:48 +00:00
res . before_where = chain . getLastActions ( ) ;
chain . addStep ( ) ;
}
if ( res . need_aggregate )
{
2019-08-15 09:51:53 +00:00
query_analyzer . appendGroupBy ( chain , only_types | | ! res . first_stage ) ;
query_analyzer . appendAggregateFunctionsArguments ( chain , only_types | | ! res . first_stage ) ;
2018-02-23 06:00:48 +00:00
res . before_aggregation = chain . getLastActions ( ) ;
2018-06-29 11:42:44 +00:00
finalizeChain ( chain ) ;
2018-02-23 06:00:48 +00:00
2019-08-15 09:51:53 +00:00
if ( query_analyzer . appendHaving ( chain , only_types | | ! res . second_stage ) )
2018-02-23 06:00:48 +00:00
{
res . has_having = true ;
res . before_having = chain . getLastActions ( ) ;
chain . addStep ( ) ;
}
}
2019-07-26 22:18:27 +00:00
/// If there is aggregation, we execute expressions in SELECT and ORDER BY on the initiating server, otherwise on the source servers.
2019-08-15 09:51:53 +00:00
query_analyzer . appendSelect ( chain , only_types | | ( res . need_aggregate ? ! res . second_stage : ! res . first_stage ) ) ;
2018-02-23 06:00:48 +00:00
res . selected_columns = chain . getLastStep ( ) . required_output ;
2019-08-15 09:51:53 +00:00
res . has_order_by = query_analyzer . appendOrderBy ( chain , only_types | | ( res . need_aggregate ? ! res . second_stage : ! res . first_stage ) ) ;
2019-07-26 22:18:27 +00:00
res . before_order_and_select = chain . getLastActions ( ) ;
2018-02-23 06:00:48 +00:00
chain . addStep ( ) ;
2019-08-15 09:51:53 +00:00
if ( query_analyzer . appendLimitBy ( chain , only_types | | ! res . second_stage ) )
2018-03-01 06:07:04 +00:00
{
res . has_limit_by = true ;
res . before_limit_by = chain . getLastActions ( ) ;
chain . addStep ( ) ;
}
2018-03-01 05:24:56 +00:00
2019-08-06 13:00:56 +00:00
query_analyzer . appendProjectResult ( chain ) ;
2018-02-23 06:00:48 +00:00
res . final_projection = chain . getLastActions ( ) ;
2018-06-29 11:42:44 +00:00
finalizeChain ( chain ) ;
2018-02-23 06:00:48 +00:00
}
/// Before executing WHERE and HAVING, remove the extra columns from the block (mostly the aggregation keys).
2019-03-29 20:31:06 +00:00
if ( res . filter_info )
res . filter_info - > actions - > prependProjectInput ( ) ;
2018-02-23 06:00:48 +00:00
if ( res . has_where )
res . before_where - > prependProjectInput ( ) ;
if ( res . has_having )
res . before_having - > prependProjectInput ( ) ;
2019-08-06 13:00:56 +00:00
res . subqueries_for_sets = query_analyzer . getSubqueriesForSets ( ) ;
2018-02-23 06:00:48 +00:00
2018-10-11 20:52:25 +00:00
/// Check that PREWHERE doesn't contain unusual actions. Unusual actions are that can change number of rows.
if ( res . prewhere_info )
{
auto check_actions = [ ] ( const ExpressionActionsPtr & actions )
{
if ( actions )
for ( const auto & action : actions - > getActions ( ) )
if ( action . type = = ExpressionAction : : Type : : JOIN | | action . type = = ExpressionAction : : Type : : ARRAY_JOIN )
throw Exception ( " PREWHERE cannot contain ARRAY JOIN or JOIN action " , ErrorCodes : : ILLEGAL_PREWHERE ) ;
} ;
check_actions ( res . prewhere_info - > prewhere_actions ) ;
check_actions ( res . prewhere_info - > alias_actions ) ;
check_actions ( res . prewhere_info - > remove_columns_actions ) ;
}
2018-02-23 06:00:48 +00:00
return res ;
}
2019-09-27 11:06:20 +00:00
2019-09-27 16:38:13 +00:00
BlockInputStreamPtr InterpreterSelectQuery : : createCheckNonEmptySetIfNeed ( BlockInputStreamPtr stream , const ExpressionActionsPtr & expression ) const
2019-09-27 11:06:20 +00:00
{
for ( const auto & action : expression - > getActions ( ) )
{
if ( action . type = = ExpressionAction : : JOIN )
{
const auto * join = dynamic_cast < Join * > ( action . join . get ( ) ) ;
if ( ! join )
continue ;
if ( isInnerOrRight ( join - > getKind ( ) ) )
{
stream = std : : make_shared < CheckNonEmptySetBlockInputStream > ( stream , expression , syntax_analyzer_result - > need_check_empty_sets ) ;
break ;
}
}
else if ( action . type = = ExpressionAction : : ADD_COLUMN )
{
if ( syntax_analyzer_result - > need_check_empty_sets . count ( action . result_name ) )
{
stream = std : : make_shared < CheckNonEmptySetBlockInputStream > ( stream , expression , syntax_analyzer_result - > need_check_empty_sets ) ;
break ;
}
}
}
return stream ;
}
2019-08-14 17:01:47 +00:00
static Field getWithFillFieldValue ( const ASTPtr & node , const Context & context )
{
const auto & [ field , type ] = evaluateConstantExpression ( node , context ) ;
2019-08-21 16:18:43 +00:00
if ( ! isColumnedAsNumber ( type ) )
2019-08-14 17:01:47 +00:00
throw Exception ( " Illegal type " + type - > getName ( ) + " of WITH FILL expression, must be numeric type " , ErrorCodes : : INVALID_WITH_FILL_EXPRESSION ) ;
2018-02-23 06:00:48 +00:00
2019-08-14 17:01:47 +00:00
return field ;
}
2018-02-23 06:00:48 +00:00
2019-08-29 22:24:33 +00:00
static FillColumnDescription getWithFillDescription ( const ASTOrderByElement & order_by_elem , const Context & context )
2019-08-14 17:01:47 +00:00
{
FillColumnDescription descr ;
2019-08-19 20:22:45 +00:00
if ( order_by_elem . fill_from )
descr . fill_from = getWithFillFieldValue ( order_by_elem . fill_from , context ) ;
if ( order_by_elem . fill_to )
descr . fill_to = getWithFillFieldValue ( order_by_elem . fill_to , context ) ;
if ( order_by_elem . fill_step )
descr . fill_step = getWithFillFieldValue ( order_by_elem . fill_step , context ) ;
2019-08-14 17:01:47 +00:00
else
2019-08-19 20:22:45 +00:00
descr . fill_step = order_by_elem . direction ;
2019-08-14 17:01:47 +00:00
2019-08-19 20:22:45 +00:00
if ( applyVisitor ( FieldVisitorAccurateEquals ( ) , descr . fill_step , Field { 0 } ) )
throw Exception ( " WITH FILL STEP value cannot be zero " , ErrorCodes : : INVALID_WITH_FILL_EXPRESSION ) ;
if ( order_by_elem . direction = = 1 )
{
if ( applyVisitor ( FieldVisitorAccurateLess ( ) , descr . fill_step , Field { 0 } ) )
throw Exception ( " WITH FILL STEP value cannot be negative for sorting in ascending direction " ,
ErrorCodes : : INVALID_WITH_FILL_EXPRESSION ) ;
if ( ! descr . fill_from . isNull ( ) & & ! descr . fill_to . isNull ( ) & &
applyVisitor ( FieldVisitorAccurateLess ( ) , descr . fill_to , descr . fill_from ) )
{
throw Exception ( " WITH FILL TO value cannot be less than FROM value for sorting in ascending direction " ,
ErrorCodes : : INVALID_WITH_FILL_EXPRESSION ) ;
}
}
else
{
if ( applyVisitor ( FieldVisitorAccurateLess ( ) , Field { 0 } , descr . fill_step ) )
throw Exception ( " WITH FILL STEP value cannot be positive for sorting in descending direction " ,
ErrorCodes : : INVALID_WITH_FILL_EXPRESSION ) ;
if ( ! descr . fill_from . isNull ( ) & & ! descr . fill_to . isNull ( ) & &
applyVisitor ( FieldVisitorAccurateLess ( ) , descr . fill_from , descr . fill_to ) )
{
throw Exception ( " WITH FILL FROM value cannot be less than TO value for sorting in descending direction " ,
ErrorCodes : : INVALID_WITH_FILL_EXPRESSION ) ;
}
}
2019-08-14 17:01:47 +00:00
return descr ;
}
2018-02-23 06:00:48 +00:00
2019-08-14 17:01:47 +00:00
static SortDescription getSortDescription ( const ASTSelectQuery & query , const Context & context )
2019-07-18 14:41:11 +00:00
{
SortDescription order_descr ;
order_descr . reserve ( query . orderBy ( ) - > children . size ( ) ) ;
for ( const auto & elem : query . orderBy ( ) - > children )
{
String name = elem - > children . front ( ) - > getColumnName ( ) ;
const auto & order_by_elem = elem - > as < ASTOrderByElement & > ( ) ;
std : : shared_ptr < Collator > collator ;
if ( order_by_elem . collation )
collator = std : : make_shared < Collator > ( order_by_elem . collation - > as < ASTLiteral & > ( ) . value . get < String > ( ) ) ;
2019-08-14 17:01:47 +00:00
if ( order_by_elem . with_fill )
{
FillColumnDescription fill_desc = getWithFillDescription ( order_by_elem , context ) ;
2019-08-19 20:22:45 +00:00
order_descr . emplace_back ( name , order_by_elem . direction ,
order_by_elem . nulls_direction , collator , true , fill_desc ) ;
2019-08-14 17:01:47 +00:00
}
else
order_descr . emplace_back ( name , order_by_elem . direction , order_by_elem . nulls_direction , collator ) ;
2019-07-18 14:41:11 +00:00
}
return order_descr ;
}
static UInt64 getLimitUIntValue ( const ASTPtr & node , const Context & context )
{
const auto & [ field , type ] = evaluateConstantExpression ( node , context ) ;
if ( ! isNativeNumber ( type ) )
throw Exception ( " Illegal type " + type - > getName ( ) + " of LIMIT expression, must be numeric type " , ErrorCodes : : INVALID_LIMIT_EXPRESSION ) ;
Field converted = convertFieldToType ( field , DataTypeUInt64 ( ) ) ;
if ( converted . isNull ( ) )
throw Exception ( " The value " + applyVisitor ( FieldVisitorToString ( ) , field ) + " of LIMIT expression is not representable as UInt64 " , ErrorCodes : : INVALID_LIMIT_EXPRESSION ) ;
return converted . safeGet < UInt64 > ( ) ;
}
static std : : pair < UInt64 , UInt64 > getLimitLengthAndOffset ( const ASTSelectQuery & query , const Context & context )
{
UInt64 length = 0 ;
UInt64 offset = 0 ;
if ( query . limitLength ( ) )
{
length = getLimitUIntValue ( query . limitLength ( ) , context ) ;
2019-09-17 18:55:59 +00:00
if ( query . limitOffset ( ) & & length )
2019-07-18 14:41:11 +00:00
offset = getLimitUIntValue ( query . limitOffset ( ) , context ) ;
}
return { length , offset } ;
}
static UInt64 getLimitForSorting ( const ASTSelectQuery & query , const Context & context )
{
/// Partial sort can be done if there is LIMIT but no DISTINCT or LIMIT BY.
2019-08-22 23:16:00 +00:00
if ( ! query . distinct & & ! query . limitBy ( ) & & ! query . limit_with_ties )
2019-07-18 14:41:11 +00:00
{
auto [ limit_length , limit_offset ] = getLimitLengthAndOffset ( query , context ) ;
return limit_length + limit_offset ;
}
return 0 ;
}
2019-07-24 14:23:57 +00:00
2019-08-14 12:29:21 +00:00
static SortingInfoPtr optimizeReadInOrder ( const MergeTreeData & merge_tree , const ASTSelectQuery & query ,
2019-08-15 10:15:04 +00:00
const Context & context , const SyntaxAnalyzerResultPtr & global_syntax_result )
2019-07-24 14:23:57 +00:00
{
if ( ! merge_tree . hasSortingKey ( ) )
return { } ;
2019-08-14 17:01:47 +00:00
auto order_descr = getSortDescription ( query , context ) ;
2019-07-24 14:23:57 +00:00
SortDescription prefix_order_descr ;
int read_direction = order_descr . at ( 0 ) . direction ;
const auto & sorting_key_columns = merge_tree . getSortingKeyColumns ( ) ;
size_t prefix_size = std : : min ( order_descr . size ( ) , sorting_key_columns . size ( ) ) ;
for ( size_t i = 0 ; i < prefix_size ; + + i )
{
2019-08-15 10:15:04 +00:00
if ( global_syntax_result - > array_join_result_to_source . count ( order_descr [ i ] . column_name ) )
2019-08-14 12:29:21 +00:00
break ;
/// Optimize in case of exact match with order key element
2019-07-24 14:23:57 +00:00
/// or in some simple cases when order key element is wrapped into monotonic function.
int current_direction = order_descr [ i ] . direction ;
if ( order_descr [ i ] . column_name = = sorting_key_columns [ i ] & & current_direction = = read_direction )
prefix_order_descr . push_back ( order_descr [ i ] ) ;
else
{
2019-08-15 10:15:04 +00:00
auto ast = query . orderBy ( ) - > children [ i ] - > children . at ( 0 ) ;
auto syntax_result = SyntaxAnalyzer ( context ) . analyze ( ast , global_syntax_result - > required_source_columns ) ;
auto actions = ExpressionAnalyzer ( ast , syntax_result , context ) . getActions ( true ) ;
2019-07-24 14:23:57 +00:00
const auto & input_columns = actions - > getRequiredColumnsWithTypes ( ) ;
if ( input_columns . size ( ) ! = 1 | | input_columns . front ( ) . name ! = sorting_key_columns [ i ] )
break ;
bool first = true ;
for ( const auto & action : actions - > getActions ( ) )
{
if ( action . type ! = ExpressionAction : : APPLY_FUNCTION )
continue ;
if ( ! first )
{
current_direction = 0 ;
break ;
}
else
first = false ;
const auto & func = * action . function_base ;
if ( ! func . hasInformationAboutMonotonicity ( ) )
{
current_direction = 0 ;
break ;
}
auto monotonicity = func . getMonotonicityForRange ( * input_columns . front ( ) . type , { } , { } ) ;
if ( ! monotonicity . is_monotonic )
{
current_direction = 0 ;
break ;
}
else if ( ! monotonicity . is_positive )
current_direction * = - 1 ;
}
if ( ! current_direction | | ( i > 0 & & current_direction ! = read_direction ) )
break ;
if ( i = = 0 )
read_direction = current_direction ;
prefix_order_descr . push_back ( order_descr [ i ] ) ;
}
}
if ( prefix_order_descr . empty ( ) )
return { } ;
2019-07-26 22:18:27 +00:00
return std : : make_shared < SortingInfo > ( std : : move ( prefix_order_descr ) , read_direction ) ;
2019-07-24 14:23:57 +00:00
}
2019-04-03 11:21:38 +00:00
template < typename TPipeline >
2019-08-09 13:42:58 +00:00
void InterpreterSelectQuery : : executeImpl ( TPipeline & pipeline , const BlockInputStreamPtr & prepared_input )
2011-08-28 05:13:24 +00:00
{
2017-04-02 17:37:49 +00:00
/** Streams of data. When the query is executed in parallel, we have several data streams.
* If there is no GROUP BY , then perform all operations before ORDER BY and LIMIT in parallel , then
* if there is an ORDER BY , then glue the streams using UnionBlockInputStream , and then MergeSortingBlockInputStream ,
* if not , then glue it using UnionBlockInputStream ,
* then apply LIMIT .
* If there is GROUP BY , then we will perform all operations up to GROUP BY , inclusive , in parallel ;
* a parallel GROUP BY will glue streams into one ,
* then perform the remaining operations with one resulting stream .
2017-04-01 07:20:54 +00:00
*/
2019-04-03 11:21:38 +00:00
constexpr bool pipeline_with_processors = std : : is_same < TPipeline , QueryPipeline > : : value ;
2019-03-12 14:07:02 +00:00
/// Now we will compose block streams that perform the necessary actions.
auto & query = getSelectQuery ( ) ;
2018-04-19 15:18:26 +00:00
const Settings & settings = context . getSettingsRef ( ) ;
2018-04-19 14:47:09 +00:00
2018-04-19 15:18:26 +00:00
QueryProcessingStage : : Enum from_stage = QueryProcessingStage : : FetchColumns ;
2018-03-16 02:08:31 +00:00
2018-04-19 15:18:26 +00:00
/// PREWHERE optimization
2019-03-29 20:31:06 +00:00
/// Turn off, if the table filter is applied.
if ( storage & & ! context . hasUserProperty ( storage - > getDatabaseName ( ) , storage - > getTableName ( ) , " filter " ) )
2018-04-19 15:18:26 +00:00
{
2019-08-09 13:42:58 +00:00
if ( ! options . only_analyze )
2018-04-19 15:18:26 +00:00
from_stage = storage - > getQueryProcessingStage ( context ) ;
2019-08-12 19:27:09 +00:00
query_analyzer - > makeSetsForIndex ( query . where ( ) ) ;
query_analyzer - > makeSetsForIndex ( query . prewhere ( ) ) ;
2018-04-20 19:51:21 +00:00
2018-04-19 15:18:26 +00:00
auto optimize_prewhere = [ & ] ( auto & merge_tree )
{
2019-07-19 10:14:27 +00:00
SelectQueryInfo current_info ;
current_info . query = query_ptr ;
current_info . syntax_analyzer_result = syntax_analyzer_result ;
current_info . sets = query_analyzer - > getPreparedSets ( ) ;
2018-04-19 15:18:26 +00:00
/// Try transferring some condition from WHERE to PREWHERE if enabled and viable
2019-04-09 14:22:35 +00:00
if ( settings . optimize_move_to_prewhere & & query . where ( ) & & ! query . prewhere ( ) & & ! query . final ( ) )
2019-08-09 14:50:04 +00:00
MergeTreeWhereOptimizer { current_info , context , merge_tree , syntax_analyzer_result - > requiredSourceColumns ( ) , log } ;
2018-04-19 15:18:26 +00:00
} ;
2019-05-03 02:00:57 +00:00
if ( const MergeTreeData * merge_tree_data = dynamic_cast < const MergeTreeData * > ( storage . get ( ) ) )
optimize_prewhere ( * merge_tree_data ) ;
2018-04-06 13:58:06 +00:00
}
2018-02-23 06:00:48 +00:00
2018-07-19 13:36:21 +00:00
AnalysisResult expressions ;
2019-03-29 20:31:06 +00:00
FilterInfoPtr filter_info ;
/// We need proper `source_header` for `NullBlockInputStream` in dry-run.
if ( storage & & context . hasUserProperty ( storage - > getDatabaseName ( ) , storage - > getTableName ( ) , " filter " ) )
{
filter_info = std : : make_shared < FilterInfo > ( ) ;
filter_info - > column_name = generateFilterActions ( filter_info - > actions , storage , context , required_columns ) ;
source_header = storage - > getSampleBlockForColumns ( filter_info - > actions - > getRequiredColumns ( ) ) ;
}
2018-07-19 13:36:21 +00:00
2019-07-19 10:14:27 +00:00
SortingInfoPtr sorting_info ;
2019-08-21 17:42:44 +00:00
if ( settings . optimize_read_in_order & & storage & & query . orderBy ( ) & & ! query_analyzer - > hasAggregation ( ) & & ! query . final ( ) & & ! query . join ( ) )
2019-07-26 22:18:27 +00:00
{
if ( const MergeTreeData * merge_tree_data = dynamic_cast < const MergeTreeData * > ( storage . get ( ) ) )
2019-08-14 12:29:21 +00:00
sorting_info = optimizeReadInOrder ( * merge_tree_data , query , context , syntax_analyzer_result ) ;
2019-07-26 22:18:27 +00:00
}
2019-07-25 10:45:01 +00:00
2019-08-09 13:42:58 +00:00
if ( options . only_analyze )
2018-07-19 13:36:21 +00:00
{
2019-04-03 11:21:38 +00:00
if constexpr ( pipeline_with_processors )
pipeline . init ( { std : : make_shared < NullSource > ( source_header ) } ) ;
else
pipeline . streams . emplace_back ( std : : make_shared < NullBlockInputStream > ( source_header ) ) ;
2019-08-06 13:00:56 +00:00
expressions = analyzeExpressions (
getSelectQuery ( ) ,
* query_analyzer ,
QueryProcessingStage : : FetchColumns ,
options . to_stage ,
context ,
storage ,
true ,
filter_info ) ;
2019-03-29 20:31:06 +00:00
if ( storage & & expressions . filter_info & & expressions . prewhere_info )
throw Exception ( " PREWHERE is not supported if the table is filtered by row-level security expression " , ErrorCodes : : ILLEGAL_PREWHERE ) ;
2018-08-20 12:57:31 +00:00
if ( expressions . prewhere_info )
2019-04-03 11:21:38 +00:00
{
if constexpr ( pipeline_with_processors )
2019-04-05 11:34:11 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header )
{
2019-04-03 11:21:38 +00:00
return std : : make_shared < FilterTransform > (
header ,
expressions . prewhere_info - > prewhere_actions ,
expressions . prewhere_info - > prewhere_column_name ,
expressions . prewhere_info - > remove_prewhere_column ) ;
} ) ;
else
pipeline . streams . back ( ) = std : : make_shared < FilterBlockInputStream > (
2019-09-27 11:06:20 +00:00
createCheckNonEmptySetIfNeed ( pipeline . streams . back ( ) , expressions . prewhere_info - > prewhere_actions ) , expressions . prewhere_info - > prewhere_actions ,
2018-09-10 03:59:48 +00:00
expressions . prewhere_info - > prewhere_column_name , expressions . prewhere_info - > remove_prewhere_column ) ;
2019-07-26 10:48:42 +00:00
// To remove additional columns in dry run
// For example, sample column which can be removed in this stage
if ( expressions . prewhere_info - > remove_columns_actions )
{
if constexpr ( pipeline_with_processors )
{
pipeline . addSimpleTransform ( [ & ] ( const Block & header )
{
return std : : make_shared < ExpressionTransform > ( header , expressions . prewhere_info - > remove_columns_actions ) ;
} ) ;
}
else
pipeline . streams . back ( ) = std : : make_shared < ExpressionBlockInputStream > ( pipeline . streams . back ( ) , expressions . prewhere_info - > remove_columns_actions ) ;
}
2019-04-03 11:21:38 +00:00
}
2018-07-19 13:36:21 +00:00
}
else
{
2018-08-27 17:42:13 +00:00
if ( prepared_input )
2019-04-03 11:21:38 +00:00
{
if constexpr ( pipeline_with_processors )
2019-04-17 14:56:46 +00:00
pipeline . init ( { std : : make_shared < SourceFromInputStream > ( prepared_input ) } ) ;
2019-04-03 11:21:38 +00:00
else
pipeline . streams . push_back ( prepared_input ) ;
}
2018-08-08 03:09:59 +00:00
2019-08-06 13:00:56 +00:00
expressions = analyzeExpressions (
getSelectQuery ( ) ,
* query_analyzer ,
from_stage ,
options . to_stage ,
context ,
storage ,
false ,
filter_info ) ;
2018-03-16 02:08:31 +00:00
2018-08-05 07:05:36 +00:00
if ( from_stage = = QueryProcessingStage : : WithMergeableState & &
2019-03-18 12:05:51 +00:00
options . to_stage = = QueryProcessingStage : : WithMergeableState )
2018-07-19 13:36:21 +00:00
throw Exception ( " Distributed on Distributed is not supported " , ErrorCodes : : NOT_IMPLEMENTED ) ;
2017-04-01 07:20:54 +00:00
2019-03-29 20:31:06 +00:00
if ( storage & & expressions . filter_info & & expressions . prewhere_info )
throw Exception ( " PREWHERE is not supported if the table is filtered by row-level security expression " , ErrorCodes : : ILLEGAL_PREWHERE ) ;
2018-08-05 07:05:36 +00:00
/** Read the data from Storage. from_stage - to what stage the request was completed in Storage. */
2019-07-19 10:14:27 +00:00
executeFetchColumns ( from_stage , pipeline , sorting_info , expressions . prewhere_info , expressions . columns_to_remove_after_prewhere ) ;
2017-04-01 07:20:54 +00:00
2019-03-18 12:05:51 +00:00
LOG_TRACE ( log , QueryProcessingStage : : toString ( from_stage ) < < " -> " < < QueryProcessingStage : : toString ( options . to_stage ) ) ;
2018-07-19 13:36:21 +00:00
}
2018-02-23 06:00:48 +00:00
2019-03-18 12:05:51 +00:00
if ( options . to_stage > QueryProcessingStage : : FetchColumns )
2017-04-01 07:20:54 +00:00
{
2017-04-02 17:37:49 +00:00
/// Do I need to aggregate in a separate row rows that have not passed max_rows_to_group_by.
2017-04-01 07:20:54 +00:00
bool aggregate_overflow_row =
2018-02-23 06:00:48 +00:00
expressions . need_aggregate & &
2017-04-01 07:20:54 +00:00
query . group_by_with_totals & &
2018-03-11 00:15:26 +00:00
settings . max_rows_to_group_by & &
settings . group_by_overflow_mode = = OverflowMode : : ANY & &
2017-04-01 07:20:54 +00:00
settings . totals_mode ! = TotalsMode : : AFTER_HAVING_EXCLUSIVE ;
2017-04-02 17:37:49 +00:00
/// Do I need to immediately finalize the aggregate functions after the aggregation?
2017-04-01 07:20:54 +00:00
bool aggregate_final =
2018-02-23 06:00:48 +00:00
expressions . need_aggregate & &
2019-03-18 12:05:51 +00:00
options . to_stage > QueryProcessingStage : : WithMergeableState & &
2018-09-17 18:01:04 +00:00
! query . group_by_with_totals & & ! query . group_by_with_rollup & & ! query . group_by_with_cube ;
2017-04-01 07:20:54 +00:00
2018-02-23 06:00:48 +00:00
if ( expressions . first_stage )
2017-04-01 07:20:54 +00:00
{
2019-03-29 20:31:06 +00:00
if ( expressions . filter_info )
{
2019-04-03 11:21:38 +00:00
if constexpr ( pipeline_with_processors )
2019-03-29 20:31:06 +00:00
{
2019-04-30 17:20:34 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & block , QueryPipeline : : StreamType stream_type ) - > ProcessorPtr
2018-09-17 19:16:51 +00:00
{
2019-04-30 17:20:34 +00:00
if ( stream_type = = QueryPipeline : : StreamType : : Totals )
return nullptr ;
2019-04-03 11:21:38 +00:00
return std : : make_shared < FilterTransform > (
block ,
expressions . filter_info - > actions ,
expressions . filter_info - > column_name ,
expressions . filter_info - > do_remove_column ) ;
} ) ;
}
else
{
pipeline . transform ( [ & ] ( auto & stream )
2019-01-16 00:26:15 +00:00
{
2019-04-03 11:21:38 +00:00
stream = std : : make_shared < FilterBlockInputStream > (
stream ,
expressions . filter_info - > actions ,
expressions . filter_info - > column_name ,
expressions . filter_info - > do_remove_column ) ;
} ) ;
2018-08-21 16:08:45 +00:00
}
2019-03-29 20:31:06 +00:00
}
2019-01-30 12:01:00 +00:00
if ( expressions . hasJoin ( ) )
2018-02-21 08:16:01 +00:00
{
2019-04-05 11:27:08 +00:00
Block header_before_join ;
2017-04-01 07:20:54 +00:00
2019-04-03 11:21:38 +00:00
if constexpr ( pipeline_with_processors )
2018-09-17 19:16:51 +00:00
{
2019-04-05 11:27:08 +00:00
header_before_join = pipeline . getHeader ( ) ;
2019-04-09 14:51:38 +00:00
2019-04-10 10:52:20 +00:00
/// In case joined subquery has totals, and we don't, add default chunk to totals.
2019-04-10 11:04:56 +00:00
bool default_totals = false ;
2019-04-10 10:52:20 +00:00
if ( ! pipeline . hasTotals ( ) )
2019-04-10 11:04:56 +00:00
{
2019-04-09 14:51:38 +00:00
pipeline . addDefaultTotals ( ) ;
2019-04-10 11:04:56 +00:00
default_totals = true ;
}
2019-04-09 14:51:38 +00:00
2019-04-09 14:08:52 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header , QueryPipeline : : StreamType type )
2019-04-05 11:34:11 +00:00
{
2019-04-09 14:08:52 +00:00
bool on_totals = type = = QueryPipeline : : StreamType : : Totals ;
2019-04-10 11:04:56 +00:00
return std : : make_shared < ExpressionTransform > ( header , expressions . before_join , on_totals , default_totals ) ;
2019-04-03 11:21:38 +00:00
} ) ;
2018-09-17 19:16:51 +00:00
}
2019-04-03 11:21:38 +00:00
else
2018-12-15 06:27:40 +00:00
{
2019-04-05 11:27:08 +00:00
header_before_join = pipeline . firstStream ( ) - > getHeader ( ) ;
2019-04-03 11:21:38 +00:00
/// Applies to all sources except stream_with_non_joined_data.
for ( auto & stream : pipeline . streams )
2019-09-27 11:06:20 +00:00
stream = std : : make_shared < ExpressionBlockInputStream > ( createCheckNonEmptySetIfNeed ( stream , expressions . before_join ) , expressions . before_join ) ;
2018-12-15 06:27:40 +00:00
}
2017-04-01 07:20:54 +00:00
2019-09-16 12:37:46 +00:00
if ( JoinPtr join = expressions . before_join - > getTableJoinAlgo ( ) )
2019-03-26 18:28:37 +00:00
{
2019-09-16 12:37:46 +00:00
if ( auto stream = join - > createStreamWithNonJoinedRows ( header_before_join , settings . max_block_size ) )
2019-04-03 11:21:38 +00:00
{
2019-09-03 14:36:02 +00:00
if constexpr ( pipeline_with_processors )
{
auto source = std : : make_shared < SourceFromInputStream > ( std : : move ( stream ) ) ;
pipeline . addDelayedStream ( source ) ;
}
else
pipeline . stream_with_non_joined_data = std : : move ( stream ) ;
2019-04-03 11:21:38 +00:00
}
2019-03-26 18:28:37 +00:00
}
2018-02-21 08:16:01 +00:00
}
2017-04-01 07:20:54 +00:00
2018-02-23 06:00:48 +00:00
if ( expressions . has_where )
2018-04-12 09:45:24 +00:00
executeWhere ( pipeline , expressions . before_where , expressions . remove_where_filter ) ;
2017-04-01 07:20:54 +00:00
2018-02-23 06:00:48 +00:00
if ( expressions . need_aggregate )
executeAggregation ( pipeline , expressions . before_aggregation , aggregate_overflow_row , aggregate_final ) ;
2017-04-01 07:20:54 +00:00
else
{
2019-07-26 22:18:27 +00:00
executeExpression ( pipeline , expressions . before_order_and_select ) ;
2018-02-23 06:00:48 +00:00
executeDistinct ( pipeline , true , expressions . selected_columns ) ;
2017-04-01 07:20:54 +00:00
}
2017-04-02 17:37:49 +00:00
/** For distributed query processing,
* if no GROUP , HAVING set ,
* but there is an ORDER or LIMIT ,
* then we will perform the preliminary sorting and LIMIT on the remote server .
2017-04-01 07:20:54 +00:00
*/
2018-02-23 06:00:48 +00:00
if ( ! expressions . second_stage & & ! expressions . need_aggregate & & ! expressions . has_having )
2017-04-01 07:20:54 +00:00
{
2018-02-23 06:00:48 +00:00
if ( expressions . has_order_by )
2019-07-18 14:41:11 +00:00
executeOrder ( pipeline , query_info . sorting_info ) ;
2017-04-01 07:20:54 +00:00
2019-04-09 14:22:35 +00:00
if ( expressions . has_order_by & & query . limitLength ( ) )
2018-02-23 06:00:48 +00:00
executeDistinct ( pipeline , false , expressions . selected_columns ) ;
2017-04-01 07:20:54 +00:00
2019-03-15 15:22:22 +00:00
if ( expressions . has_limit_by )
{
executeExpression ( pipeline , expressions . before_limit_by ) ;
executeLimitBy ( pipeline ) ;
}
2019-04-09 14:22:35 +00:00
if ( query . limitLength ( ) )
2018-02-21 03:26:06 +00:00
executePreLimit ( pipeline ) ;
2017-04-01 07:20:54 +00:00
}
2018-10-10 17:07:21 +00:00
2019-01-22 19:56:53 +00:00
// If there is no global subqueries, we can run subqueries only when receive them on server.
2018-10-10 17:07:21 +00:00
if ( ! query_analyzer - > hasGlobalSubqueries ( ) & & ! expressions . subqueries_for_sets . empty ( ) )
executeSubqueriesInSetsAndJoins ( pipeline , expressions . subqueries_for_sets ) ;
2017-04-01 07:20:54 +00:00
}
2018-02-23 06:00:48 +00:00
if ( expressions . second_stage )
2017-04-01 07:20:54 +00:00
{
2018-02-25 06:34:20 +00:00
bool need_second_distinct_pass = false ;
bool need_merge_streams = false ;
2017-04-01 07:20:54 +00:00
2018-02-23 06:00:48 +00:00
if ( expressions . need_aggregate )
2017-04-01 07:20:54 +00:00
{
2017-04-02 17:37:49 +00:00
/// If you need to combine aggregated results from multiple servers
2018-02-23 06:00:48 +00:00
if ( ! expressions . first_stage )
2018-02-21 03:26:06 +00:00
executeMergeAggregated ( pipeline , aggregate_overflow_row , aggregate_final ) ;
2017-04-01 07:20:54 +00:00
if ( ! aggregate_final )
2018-08-21 16:08:45 +00:00
{
if ( query . group_by_with_totals )
2018-09-17 19:16:51 +00:00
{
bool final = ! query . group_by_with_rollup & & ! query . group_by_with_cube ;
executeTotalsAndHaving ( pipeline , expressions . has_having , expressions . before_having , aggregate_overflow_row , final ) ;
}
2018-09-06 01:06:30 +00:00
2018-09-17 18:01:04 +00:00
if ( query . group_by_with_rollup )
2018-09-20 17:51:42 +00:00
executeRollupOrCube ( pipeline , Modificator : : ROLLUP ) ;
2018-11-23 18:52:00 +00:00
else if ( query . group_by_with_cube )
2018-09-20 17:51:42 +00:00
executeRollupOrCube ( pipeline , Modificator : : CUBE ) ;
2019-01-16 00:26:15 +00:00
2018-12-15 06:27:40 +00:00
if ( ( query . group_by_with_rollup | | query . group_by_with_cube ) & & expressions . has_having )
2019-01-16 00:26:15 +00:00
{
if ( query . group_by_with_totals )
throw Exception ( " WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING " , ErrorCodes : : NOT_IMPLEMENTED ) ;
2018-12-15 06:27:40 +00:00
executeHaving ( pipeline , expressions . before_having ) ;
2019-01-16 00:26:15 +00:00
}
2018-08-21 16:08:45 +00:00
}
2018-02-23 06:00:48 +00:00
else if ( expressions . has_having )
executeHaving ( pipeline , expressions . before_having ) ;
2017-04-01 07:20:54 +00:00
2019-07-26 22:18:27 +00:00
executeExpression ( pipeline , expressions . before_order_and_select ) ;
2018-02-23 06:00:48 +00:00
executeDistinct ( pipeline , true , expressions . selected_columns ) ;
2017-04-01 07:20:54 +00:00
}
2019-09-18 13:08:51 +00:00
else if ( query . group_by_with_totals | | query . group_by_with_rollup | | query . group_by_with_cube )
throw Exception ( " WITH TOTALS, ROLLUP or CUBE are not supported without aggregation " , ErrorCodes : : LOGICAL_ERROR ) ;
2019-01-16 00:26:15 +00:00
2019-09-18 13:08:51 +00:00
need_second_distinct_pass = query . distinct & & pipeline . hasMixedStreams ( ) ;
2017-04-01 07:20:54 +00:00
2018-02-23 06:00:48 +00:00
if ( expressions . has_order_by )
2017-04-01 07:20:54 +00:00
{
2017-04-02 17:37:49 +00:00
/** If there is an ORDER BY for distributed query processing,
* but there is no aggregation , then on the remote servers ORDER BY was made
* - therefore , we merge the sorted streams from remote servers .
2017-04-01 07:20:54 +00:00
*/
2019-07-24 14:23:57 +00:00
2018-02-23 06:00:48 +00:00
if ( ! expressions . first_stage & & ! expressions . need_aggregate & & ! ( query . group_by_with_totals & & ! aggregate_final ) )
2018-02-21 03:26:06 +00:00
executeMergeSorted ( pipeline ) ;
2017-04-02 17:37:49 +00:00
else /// Otherwise, just sort.
2019-07-18 14:41:11 +00:00
executeOrder ( pipeline , query_info . sorting_info ) ;
2017-04-01 07:20:54 +00:00
}
2017-04-02 17:37:49 +00:00
/** Optimization - if there are several sources and there is LIMIT, then first apply the preliminary LIMIT,
2018-02-23 06:00:48 +00:00
* limiting the number of rows in each up to ` offset + limit ` .
*/
2019-04-19 13:38:25 +00:00
if ( query . limitLength ( ) & & ! query . limit_with_ties & & pipeline . hasMoreThanOneStream ( ) & & ! query . distinct & & ! expressions . has_limit_by & & ! settings . extremes )
2018-02-25 06:34:20 +00:00
{
2018-02-21 03:26:06 +00:00
executePreLimit ( pipeline ) ;
2018-02-25 06:34:20 +00:00
}
2017-04-01 07:20:54 +00:00
2018-02-25 06:34:20 +00:00
if ( need_second_distinct_pass
2019-04-09 14:22:35 +00:00
| | query . limitLength ( )
| | query . limitBy ( )
2019-03-26 18:28:37 +00:00
| | pipeline . hasDelayedStream ( ) )
2018-02-25 06:34:20 +00:00
{
need_merge_streams = true ;
}
2017-04-01 07:20:54 +00:00
2018-02-25 06:34:20 +00:00
if ( need_merge_streams )
2019-04-03 11:21:38 +00:00
{
if constexpr ( pipeline_with_processors )
pipeline . resize ( 1 ) ;
else
2019-08-19 18:23:37 +00:00
executeUnion ( pipeline , { } ) ;
2019-04-03 11:21:38 +00:00
}
2017-04-01 07:20:54 +00:00
2018-03-01 01:25:06 +00:00
/** If there was more than one stream,
* then DISTINCT needs to be performed once again after merging all streams .
*/
if ( need_second_distinct_pass )
2018-03-01 05:42:44 +00:00
executeDistinct ( pipeline , false , expressions . selected_columns ) ;
2017-04-01 07:20:54 +00:00
2018-03-01 06:07:04 +00:00
if ( expressions . has_limit_by )
2018-03-01 05:24:56 +00:00
{
executeExpression ( pipeline , expressions . before_limit_by ) ;
executeLimitBy ( pipeline ) ;
}
2019-08-19 20:22:45 +00:00
executeWithFill ( pipeline ) ;
2018-03-01 01:25:06 +00:00
/** We must do projection after DISTINCT because projection may remove some columns.
*/
executeProjection ( pipeline , expressions . final_projection ) ;
/** Extremes are calculated before LIMIT, but after LIMIT BY. This is Ok.
*/
executeExtremes ( pipeline ) ;
executeLimit ( pipeline ) ;
2017-04-01 07:20:54 +00:00
}
}
2018-10-10 17:07:21 +00:00
if ( query_analyzer - > hasGlobalSubqueries ( ) & & ! expressions . subqueries_for_sets . empty ( ) )
2018-02-23 06:00:48 +00:00
executeSubqueriesInSetsAndJoins ( pipeline , expressions . subqueries_for_sets ) ;
2012-05-09 13:12:38 +00:00
}
2019-04-03 11:21:38 +00:00
template < typename TPipeline >
2018-08-05 07:05:36 +00:00
void InterpreterSelectQuery : : executeFetchColumns (
2019-04-03 11:21:38 +00:00
QueryProcessingStage : : Enum processing_stage , TPipeline & pipeline ,
2019-07-19 10:14:27 +00:00
const SortingInfoPtr & sorting_info , const PrewhereInfoPtr & prewhere_info , const Names & columns_to_remove_after_prewhere )
2012-05-09 13:12:38 +00:00
{
2019-04-03 11:21:38 +00:00
constexpr bool pipeline_with_processors = std : : is_same < TPipeline , QueryPipeline > : : value ;
2019-03-12 14:07:02 +00:00
auto & query = getSelectQuery ( ) ;
2018-09-03 17:24:46 +00:00
const Settings & settings = context . getSettingsRef ( ) ;
2017-04-02 17:37:49 +00:00
/// Actions to calculate ALIAS if required.
2017-04-01 07:20:54 +00:00
ExpressionActionsPtr alias_actions ;
2019-03-14 15:20:51 +00:00
if ( storage )
2017-04-01 07:20:54 +00:00
{
2019-03-29 20:31:06 +00:00
/// Append columns from the table filter to required
if ( context . hasUserProperty ( storage - > getDatabaseName ( ) , storage - > getTableName ( ) , " filter " ) )
{
auto initial_required_columns = required_columns ;
ExpressionActionsPtr actions ;
generateFilterActions ( actions , storage , context , initial_required_columns ) ;
auto required_columns_from_filter = actions - > getRequiredColumns ( ) ;
for ( const auto & column : required_columns_from_filter )
{
if ( required_columns . end ( ) = = std : : find ( required_columns . begin ( ) , required_columns . end ( ) , column ) )
required_columns . push_back ( column ) ;
}
}
/// Detect, if ALIAS columns are required for query execution
auto alias_columns_required = false ;
2019-03-14 15:20:51 +00:00
const ColumnsDescription & storage_columns = storage - > getColumns ( ) ;
for ( const auto & column_name : required_columns )
2017-04-01 07:20:54 +00:00
{
2019-03-14 15:20:51 +00:00
auto column_default = storage_columns . getDefault ( column_name ) ;
if ( column_default & & column_default - > kind = = ColumnDefaultKind : : Alias )
2017-04-01 07:20:54 +00:00
{
alias_columns_required = true ;
break ;
}
}
2019-03-29 20:31:06 +00:00
/// There are multiple sources of required columns:
/// - raw required columns,
/// - columns deduced from ALIAS columns,
/// - raw required columns from PREWHERE,
/// - columns deduced from ALIAS columns from PREWHERE.
/// PREWHERE is a special case, since we need to resolve it and pass directly to `IStorage::read()`
/// before any other executions.
2017-04-01 07:20:54 +00:00
if ( alias_columns_required )
{
2019-03-29 20:31:06 +00:00
NameSet required_columns_from_prewhere ; /// Set of all (including ALIAS) required columns for PREWHERE
NameSet required_aliases_from_prewhere ; /// Set of ALIAS required columns for PREWHERE
2018-09-03 17:24:46 +00:00
if ( prewhere_info )
{
2019-03-29 20:31:06 +00:00
/// Get some columns directly from PREWHERE expression actions
2019-01-04 12:10:00 +00:00
auto prewhere_required_columns = prewhere_info - > prewhere_actions - > getRequiredColumns ( ) ;
2019-03-29 20:31:06 +00:00
required_columns_from_prewhere . insert ( prewhere_required_columns . begin ( ) , prewhere_required_columns . end ( ) ) ;
2018-09-03 17:24:46 +00:00
}
2019-03-29 20:31:06 +00:00
/// Expression, that contains all raw required columns
ASTPtr required_columns_all_expr = std : : make_shared < ASTExpressionList > ( ) ;
2018-09-03 17:24:46 +00:00
2019-03-29 20:31:06 +00:00
/// Expression, that contains raw required columns for PREWHERE
ASTPtr required_columns_from_prewhere_expr = std : : make_shared < ASTExpressionList > ( ) ;
/// Sort out already known required columns between expressions,
/// also populate `required_aliases_from_prewhere`.
2017-04-01 07:20:54 +00:00
for ( const auto & column : required_columns )
{
2018-09-03 17:24:46 +00:00
ASTPtr column_expr ;
2019-03-14 15:20:51 +00:00
const auto column_default = storage_columns . getDefault ( column ) ;
bool is_alias = column_default & & column_default - > kind = = ColumnDefaultKind : : Alias ;
2018-09-03 17:24:46 +00:00
if ( is_alias )
2019-03-14 15:20:51 +00:00
column_expr = setAlias ( column_default - > expression - > clone ( ) , column ) ;
2018-09-03 17:24:46 +00:00
else
column_expr = std : : make_shared < ASTIdentifier > ( column ) ;
2019-03-29 20:31:06 +00:00
if ( required_columns_from_prewhere . count ( column ) )
2018-09-03 17:24:46 +00:00
{
2019-03-29 20:31:06 +00:00
required_columns_from_prewhere_expr - > children . emplace_back ( std : : move ( column_expr ) ) ;
2018-09-03 17:24:46 +00:00
if ( is_alias )
2019-03-29 20:31:06 +00:00
required_aliases_from_prewhere . insert ( column ) ;
2018-09-03 17:24:46 +00:00
}
2017-04-01 07:20:54 +00:00
else
2019-03-29 20:31:06 +00:00
required_columns_all_expr - > children . emplace_back ( std : : move ( column_expr ) ) ;
2017-04-01 07:20:54 +00:00
}
2019-03-29 20:31:06 +00:00
/// Columns, which we will get after prewhere and filter executions.
NamesAndTypesList required_columns_after_prewhere ;
NameSet required_columns_after_prewhere_set ;
/// Collect required columns from prewhere expression actions.
if ( prewhere_info )
2018-09-03 17:24:46 +00:00
{
2019-03-29 20:31:06 +00:00
NameSet columns_to_remove ( columns_to_remove_after_prewhere . begin ( ) , columns_to_remove_after_prewhere . end ( ) ) ;
Block prewhere_actions_result = prewhere_info - > prewhere_actions - > getSampleBlock ( ) ;
/// Populate required columns with the columns, added by PREWHERE actions and not removed afterwards.
/// XXX: looks hacky that we already know which columns after PREWHERE we won't need for sure.
for ( const auto & column : prewhere_actions_result )
{
if ( prewhere_info - > remove_prewhere_column & & column . name = = prewhere_info - > prewhere_column_name )
continue ;
2018-09-03 17:24:46 +00:00
2019-03-29 20:31:06 +00:00
if ( columns_to_remove . count ( column . name ) )
continue ;
2018-11-06 13:55:41 +00:00
2019-03-29 20:31:06 +00:00
required_columns_all_expr - > children . emplace_back ( std : : make_shared < ASTIdentifier > ( column . name ) ) ;
required_columns_after_prewhere . emplace_back ( column . name , column . type ) ;
}
required_columns_after_prewhere_set
= ext : : map < NameSet > ( required_columns_after_prewhere , [ ] ( const auto & it ) { return it . name ; } ) ;
2018-09-03 17:24:46 +00:00
}
2019-03-29 20:31:06 +00:00
auto syntax_result = SyntaxAnalyzer ( context ) . analyze ( required_columns_all_expr , required_columns_after_prewhere , { } , storage ) ;
alias_actions = ExpressionAnalyzer ( required_columns_all_expr , syntax_result , context ) . getActions ( true ) ;
2017-04-01 07:20:54 +00:00
2017-04-02 17:37:49 +00:00
/// The set of required columns could be added as a result of adding an action to calculate ALIAS.
2017-04-01 07:20:54 +00:00
required_columns = alias_actions - > getRequiredColumns ( ) ;
2018-09-03 17:24:46 +00:00
2018-09-04 18:50:19 +00:00
/// Do not remove prewhere filter if it is a column which is used as alias.
2018-09-03 17:24:46 +00:00
if ( prewhere_info & & prewhere_info - > remove_prewhere_column )
if ( required_columns . end ( )
! = std : : find ( required_columns . begin ( ) , required_columns . end ( ) , prewhere_info - > prewhere_column_name ) )
prewhere_info - > remove_prewhere_column = false ;
2018-09-04 18:50:19 +00:00
/// Remove columns which will be added by prewhere.
2019-03-29 20:31:06 +00:00
required_columns . erase ( std : : remove_if ( required_columns . begin ( ) , required_columns . end ( ) , [ & ] ( const String & name )
2018-09-03 17:24:46 +00:00
{
2019-03-29 20:31:06 +00:00
return ! ! required_columns_after_prewhere_set . count ( name ) ;
} ) , required_columns . end ( ) ) ;
2018-09-03 17:24:46 +00:00
if ( prewhere_info )
{
2018-09-04 18:50:19 +00:00
/// Don't remove columns which are needed to be aliased.
2018-09-06 09:52:22 +00:00
auto new_actions = std : : make_shared < ExpressionActions > ( prewhere_info - > prewhere_actions - > getRequiredColumnsWithTypes ( ) , context ) ;
2018-09-03 17:24:46 +00:00
for ( const auto & action : prewhere_info - > prewhere_actions - > getActions ( ) )
{
if ( action . type ! = ExpressionAction : : REMOVE_COLUMN
| | required_columns . end ( ) = = std : : find ( required_columns . begin ( ) , required_columns . end ( ) , action . source_name ) )
new_actions - > add ( action ) ;
}
prewhere_info - > prewhere_actions = std : : move ( new_actions ) ;
2019-03-29 20:31:06 +00:00
auto analyzed_result
= SyntaxAnalyzer ( context ) . analyze ( required_columns_from_prewhere_expr , storage - > getColumns ( ) . getAllPhysical ( ) ) ;
prewhere_info - > alias_actions
= ExpressionAnalyzer ( required_columns_from_prewhere_expr , analyzed_result , context ) . getActions ( true , false ) ;
2018-09-04 18:50:19 +00:00
2019-03-29 20:31:06 +00:00
/// Add (physical?) columns required by alias actions.
auto required_columns_from_alias = prewhere_info - > alias_actions - > getRequiredColumns ( ) ;
Block prewhere_actions_result = prewhere_info - > prewhere_actions - > getSampleBlock ( ) ;
for ( auto & column : required_columns_from_alias )
2018-09-03 17:24:46 +00:00
if ( ! prewhere_actions_result . has ( column ) )
if ( required_columns . end ( ) = = std : : find ( required_columns . begin ( ) , required_columns . end ( ) , column ) )
required_columns . push_back ( column ) ;
2019-03-29 20:31:06 +00:00
/// Add physical columns required by prewhere actions.
for ( const auto & column : required_columns_from_prewhere )
if ( required_aliases_from_prewhere . count ( column ) = = 0 )
2018-09-03 17:24:46 +00:00
if ( required_columns . end ( ) = = std : : find ( required_columns . begin ( ) , required_columns . end ( ) , column ) )
required_columns . push_back ( column ) ;
}
2017-04-01 07:20:54 +00:00
}
}
2017-05-24 20:25:01 +00:00
/// Limitation on the number of columns to read.
2018-07-19 13:36:21 +00:00
/// It's not applied in 'only_analyze' mode, because the query could be analyzed without removal of unnecessary columns.
2019-03-18 12:05:51 +00:00
if ( ! options . only_analyze & & settings . max_columns_to_read & & required_columns . size ( ) > settings . max_columns_to_read )
2017-05-24 20:25:01 +00:00
throw Exception ( " Limit for number of columns to read exceeded. "
" Requested: " + toString ( required_columns . size ( ) )
2018-03-11 00:15:26 +00:00
+ " , maximum: " + settings . max_columns_to_read . toString ( ) ,
2018-03-09 23:23:15 +00:00
ErrorCodes : : TOO_MANY_COLUMNS ) ;
2017-05-24 20:25:01 +00:00
2017-04-02 17:37:49 +00:00
/** With distributed query processing, almost no computations are done in the threads,
* but wait and receive data from remote servers .
* If we have 20 remote servers , and max_threads = 8 , then it would not be very good
* connect and ask only 8 servers at a time .
* To simultaneously query more remote servers ,
* instead of max_threads , max_distributed_connections is used .
2017-04-01 07:20:54 +00:00
*/
bool is_remote = false ;
if ( storage & & storage - > isRemote ( ) )
{
is_remote = true ;
2017-05-24 20:25:01 +00:00
max_streams = settings . max_distributed_connections ;
2017-04-01 07:20:54 +00:00
}
2019-02-10 16:55:12 +00:00
UInt64 max_block_size = settings . max_block_size ;
2018-08-25 16:32:44 +00:00
2019-02-10 15:17:45 +00:00
auto [ limit_length , limit_offset ] = getLimitLengthAndOffset ( query , context ) ;
2019-04-19 13:38:25 +00:00
/** Optimization - if not specified DISTINCT, WHERE, GROUP, HAVING, ORDER, LIMIT BY, WITH TIES but LIMIT is specified, and limit + offset < max_block_size,
2017-04-02 17:37:49 +00:00
* then as the block size we will use limit + offset ( not to read more from the table than requested ) ,
* and also set the number of threads to 1.
2017-04-01 07:20:54 +00:00
*/
if ( ! query . distinct
2019-04-19 13:38:25 +00:00
& & ! query . limit_with_ties
2019-04-09 14:22:35 +00:00
& & ! query . prewhere ( )
& & ! query . where ( )
& & ! query . groupBy ( )
& & ! query . having ( )
& & ! query . orderBy ( )
& & ! query . limitBy ( )
& & query . limitLength ( )
2017-04-01 07:20:54 +00:00
& & ! query_analyzer - > hasAggregation ( )
2018-02-25 06:34:20 +00:00
& & limit_length + limit_offset < max_block_size )
2017-04-01 07:20:54 +00:00
{
2019-02-10 16:55:12 +00:00
max_block_size = std : : max ( UInt64 ( 1 ) , limit_length + limit_offset ) ;
2017-05-24 20:25:01 +00:00
max_streams = 1 ;
2017-04-01 07:20:54 +00:00
}
2019-02-10 16:55:12 +00:00
if ( ! max_block_size )
throw Exception ( " Setting 'max_block_size' cannot be zero " , ErrorCodes : : PARAMETER_OUT_OF_BOUND ) ;
2018-02-21 06:25:21 +00:00
/// Initialize the initial data streams to which the query transforms are superimposed. Table or subquery or prepared input?
2019-03-26 18:28:37 +00:00
if ( pipeline . initialized ( ) )
2018-02-21 06:25:21 +00:00
{
/// Prepared input.
}
else if ( interpreter_subquery )
2017-04-01 07:20:54 +00:00
{
2018-02-21 06:25:21 +00:00
/// Subquery.
2018-07-19 13:36:21 +00:00
/// If we need less number of columns that subquery have - update the interpreter.
if ( required_columns . size ( ) < source_header . columns ( ) )
{
2019-01-15 18:29:54 +00:00
ASTPtr subquery = extractTableExpression ( query , 0 ) ;
2018-10-29 19:04:28 +00:00
if ( ! subquery )
throw Exception ( " Subquery expected " , ErrorCodes : : LOGICAL_ERROR ) ;
2018-07-19 13:36:21 +00:00
interpreter_subquery = std : : make_unique < InterpreterSelectWithUnionQuery > (
2019-03-18 12:05:51 +00:00
subquery , getSubqueryContext ( context ) ,
options . copy ( ) . subquery ( ) . noModify ( ) , required_columns ) ;
2018-07-19 13:36:21 +00:00
if ( query_analyzer - > hasAggregation ( ) )
interpreter_subquery - > ignoreWithTotals ( ) ;
}
2019-04-03 11:21:38 +00:00
if constexpr ( pipeline_with_processors )
/// Just use pipeline from subquery.
pipeline = interpreter_subquery - > executeWithProcessors ( ) ;
else
pipeline . streams = interpreter_subquery - > executeWithMultipleStreams ( ) ;
2018-02-21 06:25:21 +00:00
}
else if ( storage )
{
/// Table.
2017-04-01 07:20:54 +00:00
if ( max_streams = = 0 )
throw Exception ( " Logical error: zero number of streams requested " , ErrorCodes : : LOGICAL_ERROR ) ;
2017-04-02 17:37:49 +00:00
/// If necessary, we request more sources than the number of threads - to distribute the work evenly over the threads.
2017-04-01 07:20:54 +00:00
if ( max_streams > 1 & & ! is_remote )
max_streams * = settings . max_streams_to_max_threads_ratio ;
2017-07-15 03:48:36 +00:00
query_info . query = query_ptr ;
2018-11-08 15:43:14 +00:00
query_info . syntax_analyzer_result = syntax_analyzer_result ;
2017-07-15 03:48:36 +00:00
query_info . sets = query_analyzer - > getPreparedSets ( ) ;
2018-04-19 14:47:09 +00:00
query_info . prewhere_info = prewhere_info ;
2019-07-19 10:14:27 +00:00
query_info . sorting_info = sorting_info ;
2017-07-15 03:48:36 +00:00
2019-03-26 18:28:37 +00:00
auto streams = storage - > read ( required_columns , query_info , context , processing_stage , max_block_size , max_streams ) ;
2017-04-01 07:20:54 +00:00
2019-03-26 18:28:37 +00:00
if ( streams . empty ( ) )
2018-04-16 12:21:36 +00:00
{
2019-04-03 11:21:38 +00:00
streams = { std : : make_shared < NullBlockInputStream > ( storage - > getSampleBlockForColumns ( required_columns ) ) } ;
2018-01-07 00:35:44 +00:00
2018-04-16 12:21:36 +00:00
if ( query_info . prewhere_info )
2019-07-26 10:48:42 +00:00
{
2019-03-26 18:28:37 +00:00
streams . back ( ) = std : : make_shared < FilterBlockInputStream > (
2019-04-03 11:21:38 +00:00
streams . back ( ) ,
prewhere_info - > prewhere_actions ,
prewhere_info - > prewhere_column_name ,
prewhere_info - > remove_prewhere_column ) ;
2019-07-26 10:48:42 +00:00
// To remove additional columns
// In some cases, we did not read any marks so that the pipeline.streams is empty
// Thus, some columns in prewhere are not removed as expected
// This leads to mismatched header in distributed table
if ( query_info . prewhere_info - > remove_columns_actions )
{
streams . back ( ) = std : : make_shared < ExpressionBlockInputStream > ( streams . back ( ) , query_info . prewhere_info - > remove_columns_actions ) ;
}
}
2018-04-16 12:21:36 +00:00
}
2019-04-03 11:21:38 +00:00
for ( auto & stream : streams )
2017-04-01 07:20:54 +00:00
stream - > addTableLock ( table_lock ) ;
2018-04-02 18:01:25 +00:00
/// Set the limits and quota for reading data, the speed and time of the query.
2017-04-01 07:20:54 +00:00
{
2019-01-23 14:48:50 +00:00
IBlockInputStream : : LocalLimits limits ;
limits . mode = IBlockInputStream : : LIMITS_TOTAL ;
2018-03-11 00:15:26 +00:00
limits . size_limits = SizeLimits ( settings . max_rows_to_read , settings . max_bytes_to_read , settings . read_overflow_mode ) ;
limits . max_execution_time = settings . max_execution_time ;
limits . timeout_overflow_mode = settings . timeout_overflow_mode ;
2018-04-02 18:01:25 +00:00
/** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers,
* because the initiating server has a summary of the execution of the request on all servers .
*
* But limits on data size to read and maximum execution time are reasonable to check both on initiator and
* additionally on each remote server , because these limits are checked per block of data processed ,
* and remote servers may process way more blocks of data than are received by initiator .
*/
2019-03-18 12:05:51 +00:00
if ( options . to_stage = = QueryProcessingStage : : Complete )
2018-04-02 18:01:25 +00:00
{
limits . min_execution_speed = settings . min_execution_speed ;
2019-02-18 03:03:44 +00:00
limits . max_execution_speed = settings . max_execution_speed ;
2019-03-04 02:09:44 +00:00
limits . min_execution_speed_bytes = settings . min_execution_speed_bytes ;
limits . max_execution_speed_bytes = settings . max_execution_speed_bytes ;
2018-04-02 18:01:25 +00:00
limits . timeout_before_checking_execution_speed = settings . timeout_before_checking_execution_speed ;
}
2018-02-21 06:25:21 +00:00
QuotaForIntervals & quota = context . getQuota ( ) ;
2019-03-26 18:28:37 +00:00
for ( auto & stream : streams )
2017-04-01 07:20:54 +00:00
{
2019-06-14 19:27:53 +00:00
if ( ! options . ignore_limits )
stream - > setLimits ( limits ) ;
2018-04-02 18:01:25 +00:00
2019-03-18 12:05:51 +00:00
if ( options . to_stage = = QueryProcessingStage : : Complete )
2019-01-23 14:48:50 +00:00
stream - > setQuota ( quota ) ;
2019-03-26 18:28:37 +00:00
}
}
2019-04-03 11:21:38 +00:00
if constexpr ( pipeline_with_processors )
{
2019-08-30 14:21:22 +00:00
if ( streams . size ( ) = = 1 )
pipeline . setMaxThreads ( streams . size ( ) ) ;
2019-08-28 17:30:48 +00:00
2019-05-15 10:51:39 +00:00
/// Unify streams. They must have same headers.
if ( streams . size ( ) > 1 )
{
/// Unify streams in case they have different headers.
auto first_header = streams . at ( 0 ) - > getHeader ( ) ;
2019-08-21 16:53:40 +00:00
if ( first_header . columns ( ) > 1 & & first_header . has ( " _dummy " ) )
first_header . erase ( " _dummy " ) ;
for ( size_t i = 0 ; i < streams . size ( ) ; + + i )
2019-05-15 10:51:39 +00:00
{
auto & stream = streams [ i ] ;
auto header = stream - > getHeader ( ) ;
auto mode = ConvertingBlockInputStream : : MatchColumnsMode : : Name ;
if ( ! blocksHaveEqualStructure ( first_header , header ) )
stream = std : : make_shared < ConvertingBlockInputStream > ( context , stream , first_header , mode ) ;
}
}
2019-04-03 11:21:38 +00:00
Processors sources ;
sources . reserve ( streams . size ( ) ) ;
2019-03-26 18:28:37 +00:00
2019-09-12 13:23:24 +00:00
/// Pin sources for merge tree tables.
bool pin_sources = dynamic_cast < const MergeTreeData * > ( storage . get ( ) ) ! = nullptr ;
2019-04-03 11:21:38 +00:00
for ( auto & stream : streams )
2019-04-11 13:04:43 +00:00
{
2019-04-12 13:09:36 +00:00
bool force_add_agg_info = processing_stage = = QueryProcessingStage : : WithMergeableState ;
2019-04-17 14:38:16 +00:00
auto source = std : : make_shared < SourceFromInputStream > ( stream , force_add_agg_info ) ;
if ( processing_stage = = QueryProcessingStage : : Complete )
source - > addTotalsPort ( ) ;
2019-09-12 13:23:24 +00:00
if ( pin_sources )
source - > setStream ( sources . size ( ) ) ;
2019-04-17 14:38:16 +00:00
2019-09-12 13:23:24 +00:00
sources . emplace_back ( std : : move ( source ) ) ;
2019-04-11 13:04:43 +00:00
}
2019-03-26 18:28:37 +00:00
2019-04-03 11:21:38 +00:00
pipeline . init ( std : : move ( sources ) ) ;
2018-02-21 06:25:21 +00:00
}
2019-04-03 11:21:38 +00:00
else
pipeline . streams = std : : move ( streams ) ;
2017-04-01 07:20:54 +00:00
}
2018-02-21 06:25:21 +00:00
else
throw Exception ( " Logical error in InterpreterSelectQuery: nowhere to read " , ErrorCodes : : LOGICAL_ERROR ) ;
2017-04-01 07:20:54 +00:00
2018-02-26 21:00:42 +00:00
/// Aliases in table declaration.
2018-04-19 15:18:26 +00:00
if ( processing_stage = = QueryProcessingStage : : FetchColumns & & alias_actions )
2018-02-26 21:00:42 +00:00
{
2019-04-03 11:21:38 +00:00
if constexpr ( pipeline_with_processors )
2018-02-26 21:00:42 +00:00
{
2019-04-03 11:21:38 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header )
{
return std : : make_shared < ExpressionTransform > ( header , alias_actions ) ;
} ) ;
}
else
{
pipeline . transform ( [ & ] ( auto & stream )
{
stream = std : : make_shared < ExpressionBlockInputStream > ( stream , alias_actions ) ;
} ) ;
}
2018-02-26 21:00:42 +00:00
}
2012-05-09 13:12:38 +00:00
}
2011-08-28 05:13:24 +00:00
2012-05-09 13:12:38 +00:00
2018-04-12 09:45:24 +00:00
void InterpreterSelectQuery : : executeWhere ( Pipeline & pipeline , const ExpressionActionsPtr & expression , bool remove_fiter )
2012-08-27 05:13:14 +00:00
{
2018-02-21 03:26:06 +00:00
pipeline . transform ( [ & ] ( auto & stream )
2017-04-01 07:20:54 +00:00
{
2019-09-27 11:06:20 +00:00
stream = std : : make_shared < FilterBlockInputStream > ( createCheckNonEmptySetIfNeed ( stream , expression ) , expression , getSelectQuery ( ) . where ( ) - > getColumnName ( ) , remove_fiter ) ;
2017-04-01 07:20:54 +00:00
} ) ;
2012-08-27 05:13:14 +00:00
}
2019-03-26 18:28:37 +00:00
void InterpreterSelectQuery : : executeWhere ( QueryPipeline & pipeline , const ExpressionActionsPtr & expression , bool remove_fiter )
{
pipeline . addSimpleTransform ( [ & ] ( const Block & block )
{
2019-05-14 11:04:11 +00:00
return std : : make_shared < FilterTransform > ( block , expression , getSelectQuery ( ) . where ( ) - > getColumnName ( ) , remove_fiter ) ;
2019-03-26 18:28:37 +00:00
} ) ;
}
2012-08-27 05:13:14 +00:00
2018-02-21 03:26:06 +00:00
void InterpreterSelectQuery : : executeAggregation ( Pipeline & pipeline , const ExpressionActionsPtr & expression , bool overflow_row , bool final )
2012-05-09 13:12:38 +00:00
{
2018-02-21 03:26:06 +00:00
pipeline . transform ( [ & ] ( auto & stream )
2017-04-01 07:20:54 +00:00
{
2019-09-27 11:06:20 +00:00
stream = std : : make_shared < ExpressionBlockInputStream > ( createCheckNonEmptySetIfNeed ( stream , expression ) , expression ) ;
2017-04-01 07:20:54 +00:00
} ) ;
Names key_names ;
AggregateDescriptions aggregates ;
query_analyzer - > getAggregateInfo ( key_names , aggregates ) ;
2018-02-21 03:26:06 +00:00
Block header = pipeline . firstStream ( ) - > getHeader ( ) ;
2018-01-06 18:10:44 +00:00
ColumnNumbers keys ;
for ( const auto & name : key_names )
keys . push_back ( header . getPositionByName ( name ) ) ;
for ( auto & descr : aggregates )
if ( descr . arguments . empty ( ) )
for ( const auto & name : descr . argument_names )
descr . arguments . push_back ( header . getPositionByName ( name ) ) ;
2017-05-24 20:13:04 +00:00
const Settings & settings = context . getSettingsRef ( ) ;
2017-04-02 17:37:49 +00:00
/** Two-level aggregation is useful in two cases:
2017-07-04 12:46:31 +00:00
* 1. Parallel aggregation is done , and the results should be merged in parallel .
* 2. An aggregation is done with store of temporary data on the disk , and they need to be merged in a memory efficient way .
2017-04-01 07:20:54 +00:00
*/
2018-03-11 00:15:26 +00:00
bool allow_to_use_two_level_group_by = pipeline . streams . size ( ) > 1 | | settings . max_bytes_before_external_group_by ! = 0 ;
2017-04-01 07:20:54 +00:00
2018-01-06 18:10:44 +00:00
Aggregator : : Params params ( header , keys , aggregates ,
2018-03-11 00:15:26 +00:00
overflow_row , settings . max_rows_to_group_by , settings . group_by_overflow_mode ,
2017-04-01 07:20:54 +00:00
allow_to_use_two_level_group_by ? settings . group_by_two_level_threshold : SettingUInt64 ( 0 ) ,
allow_to_use_two_level_group_by ? settings . group_by_two_level_threshold_bytes : SettingUInt64 ( 0 ) ,
2018-03-11 00:15:26 +00:00
settings . max_bytes_before_external_group_by , settings . empty_result_for_aggregation_by_empty_set ,
2019-08-27 19:05:35 +00:00
context . getTemporaryPath ( ) , settings . max_threads , settings . min_free_disk_space_for_temporary_data ) ;
2017-04-01 07:20:54 +00:00
2017-04-02 17:37:49 +00:00
/// If there are several sources, then we perform parallel aggregation
2018-02-21 03:26:06 +00:00
if ( pipeline . streams . size ( ) > 1 )
2017-04-01 07:20:54 +00:00
{
2018-02-21 03:26:06 +00:00
pipeline . firstStream ( ) = std : : make_shared < ParallelAggregatingBlockInputStream > (
pipeline . streams , pipeline . stream_with_non_joined_data , params , final ,
2017-07-03 21:04:10 +00:00
max_streams ,
2017-04-01 07:20:54 +00:00
settings . aggregation_memory_efficient_merge_threads
2017-07-03 21:04:10 +00:00
? static_cast < size_t > ( settings . aggregation_memory_efficient_merge_threads )
2017-07-25 19:43:23 +00:00
: static_cast < size_t > ( settings . max_threads ) ) ;
2017-04-01 07:20:54 +00:00
2018-02-21 03:26:06 +00:00
pipeline . stream_with_non_joined_data = nullptr ;
pipeline . streams . resize ( 1 ) ;
2017-04-01 07:20:54 +00:00
}
else
{
BlockInputStreams inputs ;
2018-02-21 03:26:06 +00:00
if ( ! pipeline . streams . empty ( ) )
inputs . push_back ( pipeline . firstStream ( ) ) ;
2017-04-01 07:20:54 +00:00
else
2018-02-21 03:26:06 +00:00
pipeline . streams . resize ( 1 ) ;
2017-04-01 07:20:54 +00:00
2018-02-21 03:26:06 +00:00
if ( pipeline . stream_with_non_joined_data )
inputs . push_back ( pipeline . stream_with_non_joined_data ) ;
2017-04-01 07:20:54 +00:00
2018-02-21 03:26:06 +00:00
pipeline . firstStream ( ) = std : : make_shared < AggregatingBlockInputStream > ( std : : make_shared < ConcatBlockInputStream > ( inputs ) , params , final ) ;
2017-04-01 07:20:54 +00:00
2018-02-21 03:26:06 +00:00
pipeline . stream_with_non_joined_data = nullptr ;
2017-04-01 07:20:54 +00:00
}
2012-05-09 13:12:38 +00:00
}
2011-11-06 22:00:39 +00:00
2012-05-09 13:12:38 +00:00
2019-03-26 18:28:37 +00:00
void InterpreterSelectQuery : : executeAggregation ( QueryPipeline & pipeline , const ExpressionActionsPtr & expression , bool overflow_row , bool final )
{
pipeline . addSimpleTransform ( [ & ] ( const Block & header )
{
return std : : make_shared < ExpressionTransform > ( header , expression ) ;
} ) ;
Names key_names ;
AggregateDescriptions aggregates ;
query_analyzer - > getAggregateInfo ( key_names , aggregates ) ;
2019-04-05 11:27:08 +00:00
Block header_before_aggregation = pipeline . getHeader ( ) ;
2019-03-26 18:28:37 +00:00
ColumnNumbers keys ;
for ( const auto & name : key_names )
2019-04-05 11:27:08 +00:00
keys . push_back ( header_before_aggregation . getPositionByName ( name ) ) ;
2019-03-26 18:28:37 +00:00
for ( auto & descr : aggregates )
if ( descr . arguments . empty ( ) )
for ( const auto & name : descr . argument_names )
2019-04-05 11:27:08 +00:00
descr . arguments . push_back ( header_before_aggregation . getPositionByName ( name ) ) ;
2019-03-26 18:28:37 +00:00
const Settings & settings = context . getSettingsRef ( ) ;
/** Two-level aggregation is useful in two cases:
* 1. Parallel aggregation is done , and the results should be merged in parallel .
* 2. An aggregation is done with store of temporary data on the disk , and they need to be merged in a memory efficient way .
*/
bool allow_to_use_two_level_group_by = pipeline . getNumMainStreams ( ) > 1 | | settings . max_bytes_before_external_group_by ! = 0 ;
2019-04-05 11:27:08 +00:00
Aggregator : : Params params ( header_before_aggregation , keys , aggregates ,
2019-03-26 18:28:37 +00:00
overflow_row , settings . max_rows_to_group_by , settings . group_by_overflow_mode ,
allow_to_use_two_level_group_by ? settings . group_by_two_level_threshold : SettingUInt64 ( 0 ) ,
allow_to_use_two_level_group_by ? settings . group_by_two_level_threshold_bytes : SettingUInt64 ( 0 ) ,
settings . max_bytes_before_external_group_by , settings . empty_result_for_aggregation_by_empty_set ,
2019-08-27 19:05:35 +00:00
context . getTemporaryPath ( ) , settings . max_threads , settings . min_free_disk_space_for_temporary_data ) ;
2019-03-26 18:28:37 +00:00
auto transform_params = std : : make_shared < AggregatingTransformParams > ( params , final ) ;
2019-04-17 15:35:22 +00:00
pipeline . dropTotalsIfHas ( ) ;
2019-03-26 18:28:37 +00:00
/// If there are several sources, then we perform parallel aggregation
if ( pipeline . getNumMainStreams ( ) > 1 )
{
2019-09-10 17:08:06 +00:00
/// Add resize transform to uniformly distribute data between aggregating streams.
pipeline . resize ( pipeline . getNumMainStreams ( ) , true ) ;
2019-03-26 18:28:37 +00:00
2019-09-11 10:07:04 +00:00
auto many_data = std : : make_shared < ManyAggregatedData > ( pipeline . getNumMainStreams ( ) ) ;
2019-03-26 18:28:37 +00:00
auto merge_threads = settings . aggregation_memory_efficient_merge_threads
? static_cast < size_t > ( settings . aggregation_memory_efficient_merge_threads )
: static_cast < size_t > ( settings . max_threads ) ;
size_t counter = 0 ;
2019-04-05 11:34:11 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header )
{
2019-03-26 18:28:37 +00:00
return std : : make_shared < AggregatingTransform > ( header , transform_params , many_data , counter + + , max_streams , merge_threads ) ;
} ) ;
pipeline . resize ( 1 ) ;
}
else
{
pipeline . resize ( 1 ) ;
2019-04-05 11:34:11 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header )
{
2019-03-26 18:28:37 +00:00
return std : : make_shared < AggregatingTransform > ( header , transform_params ) ;
} ) ;
}
}
2018-02-21 03:26:06 +00:00
void InterpreterSelectQuery : : executeMergeAggregated ( Pipeline & pipeline , bool overflow_row , bool final )
2012-05-30 01:38:02 +00:00
{
2017-04-01 07:20:54 +00:00
Names key_names ;
AggregateDescriptions aggregates ;
query_analyzer - > getAggregateInfo ( key_names , aggregates ) ;
2018-02-21 03:26:06 +00:00
Block header = pipeline . firstStream ( ) - > getHeader ( ) ;
2018-01-07 00:35:44 +00:00
2018-01-06 18:10:44 +00:00
ColumnNumbers keys ;
for ( const auto & name : key_names )
keys . push_back ( header . getPositionByName ( name ) ) ;
2017-04-02 17:37:49 +00:00
/** There are two modes of distributed aggregation.
2017-04-01 07:20:54 +00:00
*
2017-04-02 17:37:49 +00:00
* 1. In different threads read from the remote servers blocks .
* Save all the blocks in the RAM . Merge blocks .
* If the aggregation is two - level - parallelize to the number of buckets .
2017-04-01 07:20:54 +00:00
*
2017-04-02 17:37:49 +00:00
* 2. In one thread , read blocks from different servers in order .
* RAM stores only one block from each server .
* If the aggregation is a two - level aggregation , we consistently merge the blocks of each next level .
2017-04-01 07:20:54 +00:00
*
2017-04-02 17:37:49 +00:00
* The second option consumes less memory ( up to 256 times less )
* in the case of two - level aggregation , which is used for large results after GROUP BY ,
* but it can work more slowly .
2017-04-01 07:20:54 +00:00
*/
2017-05-24 20:13:04 +00:00
const Settings & settings = context . getSettingsRef ( ) ;
2018-09-12 13:27:00 +00:00
Aggregator : : Params params ( header , keys , aggregates , overflow_row , settings . max_threads ) ;
2017-04-01 07:20:54 +00:00
if ( ! settings . distributed_aggregation_memory_efficient )
{
2017-04-02 17:37:49 +00:00
/// We union several sources into one, parallelizing the work.
2019-08-19 18:23:37 +00:00
executeUnion ( pipeline , { } ) ;
2017-04-01 07:20:54 +00:00
2017-04-02 17:37:49 +00:00
/// Now merge the aggregated blocks
2018-02-21 03:26:06 +00:00
pipeline . firstStream ( ) = std : : make_shared < MergingAggregatedBlockInputStream > ( pipeline . firstStream ( ) , params , final , settings . max_threads ) ;
2017-04-01 07:20:54 +00:00
}
else
{
2018-02-21 03:26:06 +00:00
pipeline . firstStream ( ) = std : : make_shared < MergingAggregatedMemoryEfficientBlockInputStream > ( pipeline . streams , params , final ,
2017-07-03 21:04:10 +00:00
max_streams ,
2017-04-01 07:20:54 +00:00
settings . aggregation_memory_efficient_merge_threads
2017-07-25 19:43:23 +00:00
? static_cast < size_t > ( settings . aggregation_memory_efficient_merge_threads )
: static_cast < size_t > ( settings . max_threads ) ) ;
2017-04-01 07:20:54 +00:00
2018-02-21 03:26:06 +00:00
pipeline . streams . resize ( 1 ) ;
2017-04-01 07:20:54 +00:00
}
2012-05-09 13:12:38 +00:00
}
2019-03-26 18:28:37 +00:00
void InterpreterSelectQuery : : executeMergeAggregated ( QueryPipeline & pipeline , bool overflow_row , bool final )
{
Names key_names ;
AggregateDescriptions aggregates ;
query_analyzer - > getAggregateInfo ( key_names , aggregates ) ;
2019-04-05 11:27:08 +00:00
Block header_before_merge = pipeline . getHeader ( ) ;
2019-03-26 18:28:37 +00:00
ColumnNumbers keys ;
for ( const auto & name : key_names )
2019-04-05 11:27:08 +00:00
keys . push_back ( header_before_merge . getPositionByName ( name ) ) ;
2019-03-26 18:28:37 +00:00
/** There are two modes of distributed aggregation.
*
* 1. In different threads read from the remote servers blocks .
* Save all the blocks in the RAM . Merge blocks .
* If the aggregation is two - level - parallelize to the number of buckets .
*
* 2. In one thread , read blocks from different servers in order .
* RAM stores only one block from each server .
* If the aggregation is a two - level aggregation , we consistently merge the blocks of each next level .
*
* The second option consumes less memory ( up to 256 times less )
* in the case of two - level aggregation , which is used for large results after GROUP BY ,
* but it can work more slowly .
*/
const Settings & settings = context . getSettingsRef ( ) ;
2019-04-05 11:27:08 +00:00
Aggregator : : Params params ( header_before_merge , keys , aggregates , overflow_row , settings . max_threads ) ;
2019-03-26 18:28:37 +00:00
auto transform_params = std : : make_shared < AggregatingTransformParams > ( params , final ) ;
if ( ! settings . distributed_aggregation_memory_efficient )
{
/// We union several sources into one, parallelizing the work.
pipeline . resize ( 1 ) ;
/// Now merge the aggregated blocks
pipeline . addSimpleTransform ( [ & ] ( const Block & header )
{
return std : : make_shared < MergingAggregatedTransform > ( header , transform_params , settings . max_threads ) ;
} ) ;
}
else
{
2019-04-11 14:28:42 +00:00
/// pipeline.resize(max_streams); - Seem we don't need it.
2019-03-26 18:28:37 +00:00
auto num_merge_threads = settings . aggregation_memory_efficient_merge_threads
? static_cast < size_t > ( settings . aggregation_memory_efficient_merge_threads )
: static_cast < size_t > ( settings . max_threads ) ;
auto pipe = createMergingAggregatedMemoryEfficientPipe (
pipeline . getHeader ( ) ,
transform_params ,
pipeline . getNumStreams ( ) ,
num_merge_threads ) ;
pipeline . addPipe ( std : : move ( pipe ) ) ;
}
}
2012-05-09 13:12:38 +00:00
2018-02-21 03:26:06 +00:00
void InterpreterSelectQuery : : executeHaving ( Pipeline & pipeline , const ExpressionActionsPtr & expression )
2012-05-09 13:12:38 +00:00
{
2018-02-21 03:26:06 +00:00
pipeline . transform ( [ & ] ( auto & stream )
2017-04-01 07:20:54 +00:00
{
2019-04-09 14:22:35 +00:00
stream = std : : make_shared < FilterBlockInputStream > ( stream , expression , getSelectQuery ( ) . having ( ) - > getColumnName ( ) ) ;
2017-04-01 07:20:54 +00:00
} ) ;
2012-05-09 13:12:38 +00:00
}
2019-03-26 18:28:37 +00:00
void InterpreterSelectQuery : : executeHaving ( QueryPipeline & pipeline , const ExpressionActionsPtr & expression )
{
2019-04-29 10:00:17 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header , QueryPipeline : : StreamType stream_type ) - > ProcessorPtr
2019-03-26 18:28:37 +00:00
{
2019-04-29 10:00:17 +00:00
if ( stream_type = = QueryPipeline : : StreamType : : Totals )
return nullptr ;
/// TODO: do we need to save filter there?
2019-05-14 11:04:11 +00:00
return std : : make_shared < FilterTransform > ( header , expression , getSelectQuery ( ) . having ( ) - > getColumnName ( ) , false ) ;
2019-03-26 18:28:37 +00:00
} ) ;
}
2011-09-25 03:37:09 +00:00
2018-08-24 15:00:00 +00:00
void InterpreterSelectQuery : : executeTotalsAndHaving ( Pipeline & pipeline , bool has_having , const ExpressionActionsPtr & expression , bool overflow_row , bool final )
2014-02-27 12:49:21 +00:00
{
2019-08-19 18:23:37 +00:00
executeUnion ( pipeline , { } ) ;
2014-12-19 12:48:09 +00:00
2017-05-24 20:13:04 +00:00
const Settings & settings = context . getSettingsRef ( ) ;
2018-02-21 03:26:06 +00:00
pipeline . firstStream ( ) = std : : make_shared < TotalsHavingBlockInputStream > (
2019-03-11 12:49:39 +00:00
pipeline . firstStream ( ) ,
overflow_row ,
expression ,
2019-04-09 14:22:35 +00:00
has_having ? getSelectQuery ( ) . having ( ) - > getColumnName ( ) : " " ,
2019-03-11 12:49:39 +00:00
settings . totals_mode ,
settings . totals_auto_threshold ,
final ) ;
2014-02-27 12:49:21 +00:00
}
2019-03-26 18:28:37 +00:00
void InterpreterSelectQuery : : executeTotalsAndHaving ( QueryPipeline & pipeline , bool has_having , const ExpressionActionsPtr & expression , bool overflow_row , bool final )
{
const Settings & settings = context . getSettingsRef ( ) ;
auto totals_having = std : : make_shared < TotalsHavingTransform > (
pipeline . getHeader ( ) , overflow_row , expression ,
2019-05-14 11:04:11 +00:00
has_having ? getSelectQuery ( ) . having ( ) - > getColumnName ( ) : " " ,
2019-03-26 18:28:37 +00:00
settings . totals_mode , settings . totals_auto_threshold , final ) ;
pipeline . addTotalsHavingTransform ( std : : move ( totals_having ) ) ;
}
2018-09-20 17:51:42 +00:00
void InterpreterSelectQuery : : executeRollupOrCube ( Pipeline & pipeline , Modificator modificator )
2018-08-21 16:08:45 +00:00
{
2019-08-19 18:23:37 +00:00
executeUnion ( pipeline , { } ) ;
2018-08-21 16:08:45 +00:00
Names key_names ;
AggregateDescriptions aggregates ;
query_analyzer - > getAggregateInfo ( key_names , aggregates ) ;
Block header = pipeline . firstStream ( ) - > getHeader ( ) ;
2018-08-24 15:00:00 +00:00
2018-08-21 16:08:45 +00:00
ColumnNumbers keys ;
for ( const auto & name : key_names )
keys . push_back ( header . getPositionByName ( name ) ) ;
2018-09-06 01:06:30 +00:00
2018-08-21 16:08:45 +00:00
const Settings & settings = context . getSettingsRef ( ) ;
2018-09-06 01:06:30 +00:00
2018-08-21 16:08:45 +00:00
Aggregator : : Params params ( header , keys , aggregates ,
false , settings . max_rows_to_group_by , settings . group_by_overflow_mode ,
2018-08-24 17:37:08 +00:00
SettingUInt64 ( 0 ) , SettingUInt64 ( 0 ) ,
2018-08-21 16:08:45 +00:00
settings . max_bytes_before_external_group_by , settings . empty_result_for_aggregation_by_empty_set ,
2019-08-27 19:05:35 +00:00
context . getTemporaryPath ( ) , settings . max_threads , settings . min_free_disk_space_for_temporary_data ) ;
2018-08-21 16:08:45 +00:00
2018-09-20 17:51:42 +00:00
if ( modificator = = Modificator : : ROLLUP )
2018-09-17 18:01:04 +00:00
pipeline . firstStream ( ) = std : : make_shared < RollupBlockInputStream > ( pipeline . firstStream ( ) , params ) ;
2018-09-20 15:46:37 +00:00
else
2018-09-17 18:01:04 +00:00
pipeline . firstStream ( ) = std : : make_shared < CubeBlockInputStream > ( pipeline . firstStream ( ) , params ) ;
2014-02-27 12:49:21 +00:00
}
2019-03-26 18:28:37 +00:00
void InterpreterSelectQuery : : executeRollupOrCube ( QueryPipeline & pipeline , Modificator modificator )
{
pipeline . resize ( 1 ) ;
Names key_names ;
AggregateDescriptions aggregates ;
query_analyzer - > getAggregateInfo ( key_names , aggregates ) ;
2019-04-05 11:27:08 +00:00
Block header_before_transform = pipeline . getHeader ( ) ;
2019-03-26 18:28:37 +00:00
ColumnNumbers keys ;
for ( const auto & name : key_names )
2019-04-05 11:27:08 +00:00
keys . push_back ( header_before_transform . getPositionByName ( name ) ) ;
2019-03-26 18:28:37 +00:00
const Settings & settings = context . getSettingsRef ( ) ;
2019-04-05 11:27:08 +00:00
Aggregator : : Params params ( header_before_transform , keys , aggregates ,
2019-03-26 18:28:37 +00:00
false , settings . max_rows_to_group_by , settings . group_by_overflow_mode ,
SettingUInt64 ( 0 ) , SettingUInt64 ( 0 ) ,
settings . max_bytes_before_external_group_by , settings . empty_result_for_aggregation_by_empty_set ,
2019-08-27 19:05:35 +00:00
context . getTemporaryPath ( ) , settings . max_threads , settings . min_free_disk_space_for_temporary_data ) ;
2019-03-26 18:28:37 +00:00
2019-04-05 10:52:07 +00:00
auto transform_params = std : : make_shared < AggregatingTransformParams > ( params , true ) ;
2019-04-18 12:43:13 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header , QueryPipeline : : StreamType stream_type ) - > ProcessorPtr
2019-03-26 18:28:37 +00:00
{
2019-04-29 10:00:17 +00:00
if ( stream_type = = QueryPipeline : : StreamType : : Totals )
2019-04-18 12:43:13 +00:00
return nullptr ;
2019-03-26 18:28:37 +00:00
if ( modificator = = Modificator : : ROLLUP )
2019-04-05 10:52:07 +00:00
return std : : make_shared < RollupTransform > ( header , std : : move ( transform_params ) ) ;
2019-03-26 18:28:37 +00:00
else
2019-04-05 10:52:07 +00:00
return std : : make_shared < CubeTransform > ( header , std : : move ( transform_params ) ) ;
2019-03-26 18:28:37 +00:00
} ) ;
}
2014-02-27 12:49:21 +00:00
2018-02-21 03:26:06 +00:00
void InterpreterSelectQuery : : executeExpression ( Pipeline & pipeline , const ExpressionActionsPtr & expression )
2012-05-09 13:12:38 +00:00
{
2018-02-21 03:26:06 +00:00
pipeline . transform ( [ & ] ( auto & stream )
2017-04-01 07:20:54 +00:00
{
2019-09-27 11:06:20 +00:00
stream = std : : make_shared < ExpressionBlockInputStream > ( createCheckNonEmptySetIfNeed ( stream , expression ) , expression ) ;
2017-04-01 07:20:54 +00:00
} ) ;
2012-05-09 13:12:38 +00:00
}
2019-03-26 18:28:37 +00:00
void InterpreterSelectQuery : : executeExpression ( QueryPipeline & pipeline , const ExpressionActionsPtr & expression )
{
2019-04-30 17:37:30 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header ) - > ProcessorPtr
2019-03-26 18:28:37 +00:00
{
return std : : make_shared < ExpressionTransform > ( header , expression ) ;
} ) ;
}
2012-05-09 13:12:38 +00:00
2019-07-18 14:41:11 +00:00
void InterpreterSelectQuery : : executeOrder ( Pipeline & pipeline , SortingInfoPtr sorting_info )
2015-01-18 08:27:28 +00:00
{
2019-03-12 14:07:02 +00:00
auto & query = getSelectQuery ( ) ;
2019-08-14 17:01:47 +00:00
SortDescription order_descr = getSortDescription ( query , context ) ;
2017-05-24 20:13:04 +00:00
const Settings & settings = context . getSettingsRef ( ) ;
2019-07-18 14:41:11 +00:00
UInt64 limit = getLimitForSorting ( query , context ) ;
2017-05-24 20:13:04 +00:00
2019-07-18 14:41:11 +00:00
if ( sorting_info )
2019-04-17 21:20:51 +00:00
{
2019-07-18 14:41:11 +00:00
/* Case of sorting with optimization using sorting key.
* We have several threads , each of them reads batch of parts in direct
* or reverse order of sorting key using one input stream per part
* and then merge them into one sorted stream .
* At this stage we merge per - thread streams into one .
*/
2019-08-02 16:16:18 +00:00
bool need_finish_sorting = ( sorting_info - > prefix_order_descr . size ( ) < order_descr . size ( ) ) ;
if ( need_finish_sorting )
2019-04-17 21:20:51 +00:00
{
2019-07-18 14:41:11 +00:00
pipeline . transform ( [ & ] ( auto & stream )
2019-04-27 11:59:08 +00:00
{
2019-08-02 16:16:18 +00:00
stream = std : : make_shared < PartialSortingBlockInputStream > ( stream , order_descr , limit ) ;
2019-07-18 14:41:11 +00:00
} ) ;
2019-04-27 09:39:03 +00:00
}
2019-07-18 14:41:11 +00:00
if ( pipeline . hasMoreThanOneStream ( ) )
2019-05-25 11:09:23 +00:00
{
pipeline . transform ( [ & ] ( auto & stream )
{
2019-07-18 14:41:11 +00:00
stream = std : : make_shared < AsynchronousBlockInputStream > ( stream ) ;
2019-05-25 11:09:23 +00:00
} ) ;
2019-07-18 14:41:11 +00:00
2019-08-02 17:34:29 +00:00
UInt64 limit_for_merging = ( need_finish_sorting ? 0 : limit ) ;
2019-07-18 14:41:11 +00:00
pipeline . firstStream ( ) = std : : make_shared < MergingSortedBlockInputStream > (
2019-08-02 16:16:18 +00:00
pipeline . streams , sorting_info - > prefix_order_descr ,
2019-08-02 17:34:29 +00:00
settings . max_block_size , limit_for_merging ) ;
2019-07-18 14:41:11 +00:00
pipeline . streams . resize ( 1 ) ;
2019-05-25 11:09:23 +00:00
}
2019-08-02 16:16:18 +00:00
if ( need_finish_sorting )
{
pipeline . firstStream ( ) = std : : make_shared < FinishSortingBlockInputStream > (
pipeline . firstStream ( ) , sorting_info - > prefix_order_descr ,
order_descr , settings . max_block_size , limit ) ;
}
2019-07-18 14:41:11 +00:00
}
else
{
pipeline . transform ( [ & ] ( auto & stream )
2019-04-27 09:39:03 +00:00
{
2019-07-18 14:41:11 +00:00
auto sorting_stream = std : : make_shared < PartialSortingBlockInputStream > ( stream , order_descr , limit ) ;
2019-05-09 15:44:51 +00:00
2019-07-18 14:41:11 +00:00
/// Limits on sorting
IBlockInputStream : : LocalLimits limits ;
limits . mode = IBlockInputStream : : LIMITS_TOTAL ;
limits . size_limits = SizeLimits ( settings . max_rows_to_sort , settings . max_bytes_to_sort , settings . sort_overflow_mode ) ;
sorting_stream - > setLimits ( limits ) ;
2019-05-09 15:44:51 +00:00
2019-07-18 14:41:11 +00:00
stream = sorting_stream ;
} ) ;
2019-04-27 09:39:03 +00:00
2019-07-18 14:41:11 +00:00
/// If there are several streams, we merge them into one
2019-08-19 18:23:37 +00:00
executeUnion ( pipeline , { } ) ;
2019-07-18 14:41:11 +00:00
/// Merge the sorted blocks.
2019-05-10 20:22:45 +00:00
pipeline . firstStream ( ) = std : : make_shared < MergeSortingBlockInputStream > (
pipeline . firstStream ( ) , order_descr , settings . max_block_size , limit ,
settings . max_bytes_before_remerge_sort ,
2019-08-27 19:05:35 +00:00
settings . max_bytes_before_external_sort , context . getTemporaryPath ( ) , settings . min_free_disk_space_for_temporary_data ) ;
2019-04-17 21:20:51 +00:00
}
2013-06-03 10:18:41 +00:00
}
2012-02-27 06:28:20 +00:00
2019-08-23 11:39:22 +00:00
void InterpreterSelectQuery : : executeOrder ( QueryPipeline & pipeline , SortingInfoPtr sorting_info )
2019-03-26 18:28:37 +00:00
{
2019-04-03 11:21:38 +00:00
auto & query = getSelectQuery ( ) ;
2019-08-14 17:01:47 +00:00
SortDescription order_descr = getSortDescription ( query , context ) ;
2019-03-26 18:28:37 +00:00
UInt64 limit = getLimitForSorting ( query , context ) ;
const Settings & settings = context . getSettingsRef ( ) ;
/// TODO: Limits on sorting
// IBlockInputStream::LocalLimits limits;
// limits.mode = IBlockInputStream::LIMITS_TOTAL;
// limits.size_limits = SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode);
2019-08-23 11:39:22 +00:00
if ( sorting_info )
{
/* Case of sorting with optimization using sorting key.
* We have several threads , each of them reads batch of parts in direct
* or reverse order of sorting key using one input stream per part
* and then merge them into one sorted stream .
* At this stage we merge per - thread streams into one .
*/
bool need_finish_sorting = ( sorting_info - > prefix_order_descr . size ( ) < order_descr . size ( ) ) ;
2019-08-26 14:54:29 +00:00
if ( need_finish_sorting )
2019-08-23 11:39:22 +00:00
{
2019-08-26 14:54:29 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header , QueryPipeline : : StreamType stream_type )
2019-08-23 11:39:22 +00:00
{
2019-08-26 14:54:29 +00:00
bool do_count_rows = stream_type = = QueryPipeline : : StreamType : : Main ;
return std : : make_shared < PartialSortingTransform > ( header , order_descr , limit , do_count_rows ) ;
} ) ;
}
if ( pipeline . getNumStreams ( ) > 1 )
{
UInt64 limit_for_merging = ( need_finish_sorting ? 0 : limit ) ;
auto transform = std : : make_shared < MergingSortedTransform > (
2019-08-23 11:39:22 +00:00
pipeline . getHeader ( ) ,
pipeline . getNumStreams ( ) ,
sorting_info - > prefix_order_descr ,
2019-08-26 14:54:29 +00:00
settings . max_block_size , limit_for_merging ) ;
2019-08-23 11:39:22 +00:00
2019-08-26 14:54:29 +00:00
pipeline . addPipe ( { std : : move ( transform ) } ) ;
2019-08-23 11:39:22 +00:00
}
2019-08-26 14:54:29 +00:00
if ( need_finish_sorting )
{
pipeline . addSimpleTransform ( [ & ] ( const Block & header ) - > ProcessorPtr
{
return std : : make_shared < FinishSortingTransform > (
2019-08-26 15:25:18 +00:00
header , sorting_info - > prefix_order_descr ,
2019-08-26 14:54:29 +00:00
order_descr , settings . max_block_size , limit ) ;
} ) ;
}
return ;
2019-08-23 11:39:22 +00:00
}
2019-03-26 18:28:37 +00:00
2019-04-09 10:17:25 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header , QueryPipeline : : StreamType stream_type )
2019-03-26 18:28:37 +00:00
{
2019-04-09 10:17:25 +00:00
bool do_count_rows = stream_type = = QueryPipeline : : StreamType : : Main ;
return std : : make_shared < PartialSortingTransform > ( header , order_descr , limit , do_count_rows ) ;
2019-03-26 18:28:37 +00:00
} ) ;
/// If there are several streams, we merge them into one
pipeline . resize ( 1 ) ;
/// Merge the sorted blocks.
2019-04-30 17:01:12 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header , QueryPipeline : : StreamType stream_type ) - > ProcessorPtr
2019-03-26 18:28:37 +00:00
{
2019-04-30 17:01:12 +00:00
if ( stream_type = = QueryPipeline : : StreamType : : Totals )
return nullptr ;
2019-03-26 18:28:37 +00:00
return std : : make_shared < MergeSortingTransform > (
header , order_descr , settings . max_block_size , limit ,
settings . max_bytes_before_remerge_sort ,
2019-08-27 19:05:35 +00:00
settings . max_bytes_before_external_sort , context . getTemporaryPath ( ) , settings . min_free_disk_space_for_temporary_data ) ;
2019-03-26 18:28:37 +00:00
} ) ;
}
2012-07-25 20:29:22 +00:00
2018-02-21 03:26:06 +00:00
void InterpreterSelectQuery : : executeMergeSorted ( Pipeline & pipeline )
2015-01-18 08:27:28 +00:00
{
2019-03-12 14:07:02 +00:00
auto & query = getSelectQuery ( ) ;
2019-08-14 17:01:47 +00:00
SortDescription order_descr = getSortDescription ( query , context ) ;
2019-02-10 15:17:45 +00:00
UInt64 limit = getLimitForSorting ( query , context ) ;
2017-04-01 07:20:54 +00:00
2017-05-24 20:13:04 +00:00
const Settings & settings = context . getSettingsRef ( ) ;
2017-04-02 17:37:49 +00:00
/// If there are several streams, then we merge them into one
2018-02-21 03:26:06 +00:00
if ( pipeline . hasMoreThanOneStream ( ) )
2017-04-01 07:20:54 +00:00
{
2019-08-19 18:23:37 +00:00
unifyStreams ( pipeline , pipeline . firstStream ( ) - > getHeader ( ) ) ;
2018-11-06 10:11:37 +00:00
2017-04-02 17:37:49 +00:00
/** MergingSortedBlockInputStream reads the sources sequentially.
* To make the data on the remote servers prepared in parallel , we wrap it in AsynchronousBlockInputStream .
2017-04-01 07:20:54 +00:00
*/
2018-02-21 03:26:06 +00:00
pipeline . transform ( [ & ] ( auto & stream )
2017-04-01 07:20:54 +00:00
{
stream = std : : make_shared < AsynchronousBlockInputStream > ( stream ) ;
} ) ;
2017-04-02 17:37:49 +00:00
/// Merge the sorted sources into one sorted source.
2018-02-21 03:26:06 +00:00
pipeline . firstStream ( ) = std : : make_shared < MergingSortedBlockInputStream > ( pipeline . streams , order_descr , settings . max_block_size , limit ) ;
pipeline . streams . resize ( 1 ) ;
2017-04-01 07:20:54 +00:00
}
2015-01-18 08:27:28 +00:00
}
2019-03-26 18:28:37 +00:00
void InterpreterSelectQuery : : executeMergeSorted ( QueryPipeline & pipeline )
{
2019-04-03 11:21:38 +00:00
auto & query = getSelectQuery ( ) ;
2019-08-14 17:01:47 +00:00
SortDescription order_descr = getSortDescription ( query , context ) ;
2019-03-26 18:28:37 +00:00
UInt64 limit = getLimitForSorting ( query , context ) ;
const Settings & settings = context . getSettingsRef ( ) ;
/// If there are several streams, then we merge them into one
if ( pipeline . getNumStreams ( ) > 1 )
{
auto transform = std : : make_shared < MergingSortedTransform > (
pipeline . getHeader ( ) ,
pipeline . getNumStreams ( ) ,
order_descr ,
settings . max_block_size , limit ) ;
pipeline . addPipe ( { std : : move ( transform ) } ) ;
}
}
2015-01-18 08:27:28 +00:00
2018-02-21 03:26:06 +00:00
void InterpreterSelectQuery : : executeProjection ( Pipeline & pipeline , const ExpressionActionsPtr & expression )
2013-06-03 10:18:41 +00:00
{
2018-02-21 03:26:06 +00:00
pipeline . transform ( [ & ] ( auto & stream )
2017-04-01 07:20:54 +00:00
{
stream = std : : make_shared < ExpressionBlockInputStream > ( stream , expression ) ;
} ) ;
2012-05-09 13:12:38 +00:00
}
2019-03-26 18:28:37 +00:00
void InterpreterSelectQuery : : executeProjection ( QueryPipeline & pipeline , const ExpressionActionsPtr & expression )
{
2019-04-30 17:37:30 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header ) - > ProcessorPtr
2019-03-26 18:28:37 +00:00
{
return std : : make_shared < ExpressionTransform > ( header , expression ) ;
} ) ;
}
2012-02-27 06:28:20 +00:00
2018-02-21 03:26:06 +00:00
void InterpreterSelectQuery : : executeDistinct ( Pipeline & pipeline , bool before_order , Names columns )
2013-06-01 07:43:57 +00:00
{
2019-03-12 14:07:02 +00:00
auto & query = getSelectQuery ( ) ;
2017-04-01 07:20:54 +00:00
if ( query . distinct )
{
2017-05-24 20:13:04 +00:00
const Settings & settings = context . getSettingsRef ( ) ;
2019-02-10 15:17:45 +00:00
auto [ limit_length , limit_offset ] = getLimitLengthAndOffset ( query , context ) ;
UInt64 limit_for_distinct = 0 ;
2017-04-01 07:20:54 +00:00
2017-04-02 17:37:49 +00:00
/// If after this stage of DISTINCT ORDER BY is not executed, then you can get no more than limit_length + limit_offset of different rows.
2019-04-19 13:38:25 +00:00
if ( ( ! query . orderBy ( ) | | ! before_order ) & & ! query . limit_with_ties )
2017-04-01 07:20:54 +00:00
limit_for_distinct = limit_length + limit_offset ;
2018-02-21 03:26:06 +00:00
pipeline . transform ( [ & ] ( auto & stream )
2017-04-01 07:20:54 +00:00
{
2018-03-11 00:15:26 +00:00
SizeLimits limits ( settings . max_rows_in_distinct , settings . max_bytes_in_distinct , settings . distinct_overflow_mode ) ;
2018-05-31 18:28:04 +00:00
stream = std : : make_shared < DistinctBlockInputStream > ( stream , limits , limit_for_distinct , columns ) ;
2017-04-01 07:20:54 +00:00
} ) ;
}
2013-06-01 07:43:57 +00:00
}
2019-03-26 18:28:37 +00:00
void InterpreterSelectQuery : : executeDistinct ( QueryPipeline & pipeline , bool before_order , Names columns )
{
2019-04-03 11:21:38 +00:00
auto & query = getSelectQuery ( ) ;
2019-03-26 18:28:37 +00:00
if ( query . distinct )
{
const Settings & settings = context . getSettingsRef ( ) ;
auto [ limit_length , limit_offset ] = getLimitLengthAndOffset ( query , context ) ;
UInt64 limit_for_distinct = 0 ;
/// If after this stage of DISTINCT ORDER BY is not executed, then you can get no more than limit_length + limit_offset of different rows.
2019-05-14 11:04:11 +00:00
if ( ! query . orderBy ( ) | | ! before_order )
2019-03-26 18:28:37 +00:00
limit_for_distinct = limit_length + limit_offset ;
SizeLimits limits ( settings . max_rows_in_distinct , settings . max_bytes_in_distinct , settings . distinct_overflow_mode ) ;
2019-04-30 17:01:12 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header , QueryPipeline : : StreamType stream_type ) - > ProcessorPtr
2019-03-26 18:28:37 +00:00
{
2019-04-30 17:01:12 +00:00
if ( stream_type = = QueryPipeline : : StreamType : : Totals )
return nullptr ;
2019-03-26 18:28:37 +00:00
return std : : make_shared < DistinctTransform > ( header , limits , limit_for_distinct , columns ) ;
} ) ;
}
}
2013-06-01 07:43:57 +00:00
2019-08-09 14:57:15 +00:00
void InterpreterSelectQuery : : executeUnion ( Pipeline & pipeline , Block header )
2012-05-09 13:12:38 +00:00
{
2017-04-02 17:37:49 +00:00
/// If there are still several streams, then we combine them into one
2018-02-21 03:26:06 +00:00
if ( pipeline . hasMoreThanOneStream ( ) )
2017-04-01 07:20:54 +00:00
{
2019-08-19 18:23:37 +00:00
if ( ! header )
header = pipeline . firstStream ( ) - > getHeader ( ) ;
unifyStreams ( pipeline , std : : move ( header ) ) ;
2018-10-23 17:57:27 +00:00
2018-11-28 14:33:40 +00:00
pipeline . firstStream ( ) = std : : make_shared < UnionBlockInputStream > ( pipeline . streams , pipeline . stream_with_non_joined_data , max_streams ) ;
2018-02-21 03:26:06 +00:00
pipeline . stream_with_non_joined_data = nullptr ;
pipeline . streams . resize ( 1 ) ;
2019-04-12 17:04:38 +00:00
pipeline . union_stream = true ;
2017-04-01 07:20:54 +00:00
}
2018-02-21 03:26:06 +00:00
else if ( pipeline . stream_with_non_joined_data )
2017-04-01 07:20:54 +00:00
{
2018-02-21 03:26:06 +00:00
pipeline . streams . push_back ( pipeline . stream_with_non_joined_data ) ;
pipeline . stream_with_non_joined_data = nullptr ;
2017-04-01 07:20:54 +00:00
}
2012-05-09 13:12:38 +00:00
}
2017-04-02 17:37:49 +00:00
/// Preliminary LIMIT - is used in every source, if there are several sources, before they are combined.
2018-02-21 03:26:06 +00:00
void InterpreterSelectQuery : : executePreLimit ( Pipeline & pipeline )
2012-05-09 13:12:38 +00:00
{
2019-03-12 14:07:02 +00:00
auto & query = getSelectQuery ( ) ;
2017-04-02 17:37:49 +00:00
/// If there is LIMIT
2019-04-09 14:22:35 +00:00
if ( query . limitLength ( ) )
2017-04-01 07:20:54 +00:00
{
2019-02-10 15:17:45 +00:00
auto [ limit_length , limit_offset ] = getLimitLengthAndOffset ( query , context ) ;
2019-08-22 23:16:00 +00:00
SortDescription sort_descr ;
if ( query . limit_with_ties )
{
if ( ! query . orderBy ( ) )
throw Exception ( " LIMIT WITH TIES without ORDER BY " , ErrorCodes : : LOGICAL_ERROR ) ;
sort_descr = getSortDescription ( query , context ) ;
}
2019-02-10 15:17:45 +00:00
pipeline . transform ( [ & , limit = limit_length + limit_offset ] ( auto & stream )
2017-04-01 07:20:54 +00:00
{
2019-04-23 01:48:51 +00:00
stream = std : : make_shared < LimitBlockInputStream > ( stream , limit , 0 , false , false , query . limit_with_ties , sort_descr ) ;
2017-04-01 07:20:54 +00:00
} ) ;
}
2012-06-24 23:17:06 +00:00
}
2019-03-26 18:28:37 +00:00
/// Preliminary LIMIT - is used in every source, if there are several sources, before they are combined.
void InterpreterSelectQuery : : executePreLimit ( QueryPipeline & pipeline )
{
2019-04-03 11:21:38 +00:00
auto & query = getSelectQuery ( ) ;
2019-03-26 18:28:37 +00:00
/// If there is LIMIT
2019-05-14 11:04:11 +00:00
if ( query . limitLength ( ) )
2019-03-26 18:28:37 +00:00
{
auto [ limit_length , limit_offset ] = getLimitLengthAndOffset ( query , context ) ;
2019-04-30 17:06:41 +00:00
pipeline . addSimpleTransform ( [ & , limit = limit_length + limit_offset ] ( const Block & header , QueryPipeline : : StreamType stream_type ) - > ProcessorPtr
2019-03-26 18:28:37 +00:00
{
2019-04-30 17:06:41 +00:00
if ( stream_type = = QueryPipeline : : StreamType : : Totals )
return nullptr ;
2019-04-30 17:04:31 +00:00
return std : : make_shared < LimitTransform > ( header , limit , 0 ) ;
2019-03-26 18:28:37 +00:00
} ) ;
}
}
2012-06-24 23:17:06 +00:00
2018-02-21 03:26:06 +00:00
void InterpreterSelectQuery : : executeLimitBy ( Pipeline & pipeline )
2016-12-28 15:12:54 +00:00
{
2019-03-12 14:07:02 +00:00
auto & query = getSelectQuery ( ) ;
2019-04-29 13:12:39 +00:00
if ( ! query . limitByLength ( ) | | ! query . limitBy ( ) )
2017-04-01 07:20:54 +00:00
return ;
Names columns ;
2019-04-09 14:22:35 +00:00
for ( const auto & elem : query . limitBy ( ) - > children )
2018-03-01 05:24:56 +00:00
columns . emplace_back ( elem - > getColumnName ( ) ) ;
2019-04-29 13:12:39 +00:00
UInt64 length = getLimitUIntValue ( query . limitByLength ( ) , context ) ;
UInt64 offset = ( query . limitByOffset ( ) ? getLimitUIntValue ( query . limitByOffset ( ) , context ) : 0 ) ;
2017-04-01 07:20:54 +00:00
2018-02-21 03:26:06 +00:00
pipeline . transform ( [ & ] ( auto & stream )
2017-04-01 07:20:54 +00:00
{
2019-04-29 13:12:39 +00:00
stream = std : : make_shared < LimitByBlockInputStream > ( stream , length , offset , columns ) ;
2017-04-01 07:20:54 +00:00
} ) ;
2016-12-28 15:12:54 +00:00
}
2019-03-26 18:28:37 +00:00
void InterpreterSelectQuery : : executeLimitBy ( QueryPipeline & pipeline )
{
2019-04-03 11:21:38 +00:00
auto & query = getSelectQuery ( ) ;
2019-05-14 11:04:11 +00:00
if ( ! query . limitByLength ( ) | | ! query . limitBy ( ) )
2019-03-26 18:28:37 +00:00
return ;
Names columns ;
2019-05-14 11:04:11 +00:00
for ( const auto & elem : query . limitBy ( ) - > children )
2019-03-26 18:28:37 +00:00
columns . emplace_back ( elem - > getColumnName ( ) ) ;
2019-05-14 13:21:42 +00:00
UInt64 length = getLimitUIntValue ( query . limitByLength ( ) , context ) ;
UInt64 offset = ( query . limitByOffset ( ) ? getLimitUIntValue ( query . limitByOffset ( ) , context ) : 0 ) ;
2019-03-26 18:28:37 +00:00
2019-04-30 17:01:12 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header , QueryPipeline : : StreamType stream_type ) - > ProcessorPtr
2019-03-26 18:28:37 +00:00
{
2019-04-30 17:01:12 +00:00
if ( stream_type = = QueryPipeline : : StreamType : : Totals )
return nullptr ;
2019-05-14 13:21:42 +00:00
return std : : make_shared < LimitByTransform > ( header , length , offset , columns ) ;
2019-03-26 18:28:37 +00:00
} ) ;
}
2016-12-28 15:12:54 +00:00
2019-03-29 20:31:06 +00:00
// TODO: move to anonymous namespace
2018-02-26 06:12:59 +00:00
bool hasWithTotalsInAnySubqueryInFromClause ( const ASTSelectQuery & query )
{
if ( query . group_by_with_totals )
return true ;
/** NOTE You can also check that the table in the subquery is distributed, and that it only looks at one shard.
* In other cases , totals will be computed on the initiating server of the query , and it is not necessary to read the data to the end .
*/
2019-01-15 18:29:54 +00:00
if ( auto query_table = extractTableExpression ( query , 0 ) )
2018-02-26 06:12:59 +00:00
{
2019-03-11 13:22:51 +00:00
if ( const auto * ast_union = query_table - > as < ASTSelectWithUnionQuery > ( ) )
2018-02-26 06:12:59 +00:00
{
for ( const auto & elem : ast_union - > list_of_selects - > children )
2019-03-15 16:14:13 +00:00
if ( hasWithTotalsInAnySubqueryInFromClause ( elem - > as < ASTSelectQuery & > ( ) ) )
2018-02-26 06:12:59 +00:00
return true ;
}
}
return false ;
}
2018-02-21 03:26:06 +00:00
void InterpreterSelectQuery : : executeLimit ( Pipeline & pipeline )
2012-06-24 23:17:06 +00:00
{
2019-03-12 14:07:02 +00:00
auto & query = getSelectQuery ( ) ;
2017-04-02 17:37:49 +00:00
/// If there is LIMIT
2019-04-09 14:22:35 +00:00
if ( query . limitLength ( ) )
2017-04-01 07:20:54 +00:00
{
2017-04-02 17:37:49 +00:00
/** Rare case:
* if there is no WITH TOTALS and there is a subquery in FROM , and there is WITH TOTALS on one of the levels ,
* then when using LIMIT , you should read the data to the end , rather than cancel the query earlier ,
* because if you cancel the query , we will not get ` totals ` data from the remote server .
2017-04-01 07:20:54 +00:00
*
2017-04-02 17:37:49 +00:00
* Another case :
* if there is WITH TOTALS and there is no ORDER BY , then read the data to the end ,
* otherwise TOTALS is counted according to incomplete data .
2017-04-01 07:20:54 +00:00
*/
bool always_read_till_end = false ;
2019-04-09 14:22:35 +00:00
if ( query . group_by_with_totals & & ! query . orderBy ( ) )
2017-04-01 07:20:54 +00:00
always_read_till_end = true ;
2018-02-26 06:12:59 +00:00
if ( ! query . group_by_with_totals & & hasWithTotalsInAnySubqueryInFromClause ( query ) )
always_read_till_end = true ;
2017-04-01 07:20:54 +00:00
2019-08-15 11:09:41 +00:00
SortDescription order_descr ;
if ( query . limit_with_ties )
{
if ( ! query . orderBy ( ) )
throw Exception ( " LIMIT WITH TIES without ORDER BY " , ErrorCodes : : LOGICAL_ERROR ) ;
order_descr = getSortDescription ( query , context ) ;
}
2019-04-19 13:38:25 +00:00
2019-02-10 15:17:45 +00:00
UInt64 limit_length ;
UInt64 limit_offset ;
std : : tie ( limit_length , limit_offset ) = getLimitLengthAndOffset ( query , context ) ;
2018-02-21 03:26:06 +00:00
pipeline . transform ( [ & ] ( auto & stream )
2017-04-01 07:20:54 +00:00
{
2019-04-19 13:38:25 +00:00
stream = std : : make_shared < LimitBlockInputStream > ( stream , limit_length , limit_offset , always_read_till_end , false , query . limit_with_ties , order_descr ) ;
2017-04-01 07:20:54 +00:00
} ) ;
}
2011-08-28 05:13:24 +00:00
}
2019-04-21 16:16:25 +00:00
void InterpreterSelectQuery : : executeWithFill ( Pipeline & pipeline )
{
auto & query = getSelectQuery ( ) ;
if ( query . orderBy ( ) )
{
2019-08-14 17:01:47 +00:00
SortDescription order_descr = getSortDescription ( query , context ) ;
2019-04-21 16:16:25 +00:00
SortDescription fill_descr ;
for ( auto & desc : order_descr )
{
if ( desc . with_fill )
fill_descr . push_back ( desc ) ;
}
2019-04-21 23:04:23 +00:00
if ( fill_descr . empty ( ) )
2019-04-21 16:16:25 +00:00
return ;
pipeline . transform ( [ & ] ( auto & stream )
{
stream = std : : make_shared < FillingBlockInputStream > ( stream , fill_descr ) ;
2017-04-01 07:20:54 +00:00
} ) ;
}
2011-08-28 05:13:24 +00:00
}
2019-09-02 14:29:51 +00:00
void InterpreterSelectQuery : : executeWithFill ( QueryPipeline & pipeline )
2019-08-14 17:01:47 +00:00
{
2019-08-23 11:20:53 +00:00
auto & query = getSelectQuery ( ) ;
if ( query . orderBy ( ) )
{
SortDescription order_descr = getSortDescription ( query , context ) ;
SortDescription fill_descr ;
for ( auto & desc : order_descr )
{
if ( desc . with_fill )
fill_descr . push_back ( desc ) ;
}
if ( fill_descr . empty ( ) )
return ;
2019-09-02 14:29:51 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header )
{
return std : : make_shared < FillingTransform > ( header , fill_descr ) ;
2017-04-01 07:20:54 +00:00
} ) ;
}
2011-08-28 05:13:24 +00:00
}
2019-08-14 17:01:47 +00:00
2019-03-26 18:28:37 +00:00
void InterpreterSelectQuery : : executeLimit ( QueryPipeline & pipeline )
{
2019-04-03 11:21:38 +00:00
auto & query = getSelectQuery ( ) ;
2019-03-26 18:28:37 +00:00
/// If there is LIMIT
2019-05-14 11:04:11 +00:00
if ( query . limitLength ( ) )
2019-03-26 18:28:37 +00:00
{
/** Rare case:
* if there is no WITH TOTALS and there is a subquery in FROM , and there is WITH TOTALS on one of the levels ,
* then when using LIMIT , you should read the data to the end , rather than cancel the query earlier ,
* because if you cancel the query , we will not get ` totals ` data from the remote server .
*
* Another case :
* if there is WITH TOTALS and there is no ORDER BY , then read the data to the end ,
* otherwise TOTALS is counted according to incomplete data .
*/
bool always_read_till_end = false ;
2019-05-14 11:04:11 +00:00
if ( query . group_by_with_totals & & ! query . orderBy ( ) )
2019-03-26 18:28:37 +00:00
always_read_till_end = true ;
if ( ! query . group_by_with_totals & & hasWithTotalsInAnySubqueryInFromClause ( query ) )
always_read_till_end = true ;
UInt64 limit_length ;
UInt64 limit_offset ;
std : : tie ( limit_length , limit_offset ) = getLimitLengthAndOffset ( query , context ) ;
2019-08-27 17:48:42 +00:00
SortDescription order_descr ;
if ( query . limit_with_ties )
{
if ( ! query . orderBy ( ) )
throw Exception ( " LIMIT WITH TIES without ORDER BY " , ErrorCodes : : LOGICAL_ERROR ) ;
order_descr = getSortDescription ( query , context ) ;
}
2019-04-30 17:01:12 +00:00
pipeline . addSimpleTransform ( [ & ] ( const Block & header , QueryPipeline : : StreamType stream_type ) - > ProcessorPtr
2019-03-26 18:28:37 +00:00
{
2019-04-30 17:01:12 +00:00
if ( stream_type ! = QueryPipeline : : StreamType : : Main )
return nullptr ;
2019-04-09 10:17:25 +00:00
return std : : make_shared < LimitTransform > (
2019-08-27 17:48:42 +00:00
header , limit_length , limit_offset , always_read_till_end , query . limit_with_ties , order_descr ) ;
2019-03-26 18:28:37 +00:00
} ) ;
}
}
2011-08-28 05:13:24 +00:00
2018-02-28 02:32:34 +00:00
void InterpreterSelectQuery : : executeExtremes ( Pipeline & pipeline )
{
if ( ! context . getSettingsRef ( ) . extremes )
return ;
pipeline . transform ( [ & ] ( auto & stream )
{
2019-01-23 14:48:50 +00:00
stream - > enableExtremes ( ) ;
2018-02-28 02:32:34 +00:00
} ) ;
}
2019-03-26 18:28:37 +00:00
void InterpreterSelectQuery : : executeExtremes ( QueryPipeline & pipeline )
{
if ( ! context . getSettingsRef ( ) . extremes )
return ;
auto transform = std : : make_shared < ExtremesTransform > ( pipeline . getHeader ( ) ) ;
pipeline . addExtremesTransform ( std : : move ( transform ) ) ;
}
2018-02-28 02:32:34 +00:00
2018-02-21 03:26:06 +00:00
void InterpreterSelectQuery : : executeSubqueriesInSetsAndJoins ( Pipeline & pipeline , SubqueriesForSets & subqueries_for_sets )
2014-03-04 11:26:55 +00:00
{
2019-08-19 18:23:37 +00:00
executeUnion ( pipeline , { } ) ;
2018-03-11 00:15:26 +00:00
pipeline . firstStream ( ) = std : : make_shared < CreatingSetsBlockInputStream > (
2019-02-27 18:26:24 +00:00
pipeline . firstStream ( ) , subqueries_for_sets , context ) ;
2016-11-12 17:55:40 +00:00
}
2019-03-26 18:28:37 +00:00
void InterpreterSelectQuery : : executeSubqueriesInSetsAndJoins ( QueryPipeline & pipeline , SubqueriesForSets & subqueries_for_sets )
{
const Settings & settings = context . getSettingsRef ( ) ;
auto creating_sets = std : : make_shared < CreatingSetsTransform > (
pipeline . getHeader ( ) , subqueries_for_sets ,
2019-04-05 10:52:07 +00:00
SizeLimits ( settings . max_rows_to_transfer , settings . max_bytes_to_transfer , settings . transfer_overflow_mode ) ,
context ) ;
2019-03-26 18:28:37 +00:00
pipeline . addCreatingSetsTransform ( std : : move ( creating_sets ) ) ;
}
2019-08-09 14:57:15 +00:00
void InterpreterSelectQuery : : unifyStreams ( Pipeline & pipeline , Block header )
2018-11-06 11:44:45 +00:00
{
2019-08-19 18:10:40 +00:00
/// Unify streams in case they have different headers.
2019-08-09 14:57:15 +00:00
2019-08-19 18:10:40 +00:00
/// TODO: remove previos addition of _dummy column.
if ( header . columns ( ) > 1 & & header . has ( " _dummy " ) )
header . erase ( " _dummy " ) ;
2019-08-15 08:50:17 +00:00
2019-08-19 18:10:40 +00:00
for ( size_t i = 0 ; i < pipeline . streams . size ( ) ; + + i )
{
auto & stream = pipeline . streams [ i ] ;
auto stream_header = stream - > getHeader ( ) ;
auto mode = ConvertingBlockInputStream : : MatchColumnsMode : : Name ;
2019-08-09 14:57:15 +00:00
2019-08-19 18:10:40 +00:00
if ( ! blocksHaveEqualStructure ( header , stream_header ) )
stream = std : : make_shared < ConvertingBlockInputStream > ( context , stream , header , mode ) ;
2018-11-06 11:44:45 +00:00
}
}
2014-03-04 11:26:55 +00:00
2015-05-06 23:35:37 +00:00
void InterpreterSelectQuery : : ignoreWithTotals ( )
{
2019-03-12 14:07:02 +00:00
getSelectQuery ( ) . group_by_with_totals = false ;
2015-05-06 23:35:37 +00:00
}
2015-06-05 21:28:04 +00:00
void InterpreterSelectQuery : : initSettings ( )
{
2019-03-12 14:07:02 +00:00
auto & query = getSelectQuery ( ) ;
2019-04-09 14:22:35 +00:00
if ( query . settings ( ) )
InterpreterSetQuery ( query . settings ( ) , context ) . executeForCurrentContext ( ) ;
2015-06-05 21:28:04 +00:00
}
2011-08-28 05:13:24 +00:00
}