2014-03-13 12:48:07 +00:00
# include <DB/Storages/MergeTree/MergeTreeDataSelectExecutor.h>
2015-04-12 04:39:20 +00:00
# include <DB/Storages/MergeTree/MergeTreeBlockInputStream.h>
2015-06-24 11:03:53 +00:00
# include <DB/Storages/MergeTree/MergeTreeReadPool.h>
# include <DB/Storages/MergeTree/MergeTreeThreadBlockInputStream.h>
2014-03-13 12:48:07 +00:00
# include <DB/Parsers/ASTIdentifier.h>
# include <DB/DataStreams/ExpressionBlockInputStream.h>
# include <DB/DataStreams/FilterBlockInputStream.h>
# include <DB/DataStreams/CollapsingFinalBlockInputStream.h>
2014-12-30 18:04:53 +00:00
# include <DB/DataStreams/AddingConstColumnBlockInputStream.h>
2015-02-15 04:16:11 +00:00
# include <DB/DataStreams/CreatingSetsBlockInputStream.h>
# include <DB/DataStreams/NullBlockInputStream.h>
2015-07-08 04:38:46 +00:00
# include <DB/DataStreams/SummingSortedBlockInputStream.h>
# include <DB/DataStreams/AggregatingSortedBlockInputStream.h>
2014-03-13 12:48:07 +00:00
# include <DB/DataTypes/DataTypesNumberFixed.h>
2014-07-28 10:36:11 +00:00
# include <DB/Common/VirtualColumnUtils.h>
2014-09-17 09:59:21 +00:00
2015-02-03 14:37:35 +00:00
2014-03-13 12:48:07 +00:00
namespace DB
{
2015-02-15 02:31:48 +00:00
MergeTreeDataSelectExecutor : : MergeTreeDataSelectExecutor ( MergeTreeData & data_ )
: data ( data_ ) , log ( & Logger : : get ( data . getLogName ( ) + " (SelectExecutor) " ) )
2014-03-13 12:48:07 +00:00
{
}
2014-07-28 10:36:11 +00:00
/// Построить блок состоящий только из возможных значений виртуальных столбцов
static Block getBlockWithVirtualColumns ( const MergeTreeData : : DataPartsVector & parts )
{
Block res ;
2015-07-17 01:27:35 +00:00
ColumnWithTypeAndName _part ( new ColumnString , new DataTypeString , " _part " ) ;
2014-07-28 10:36:11 +00:00
for ( const auto & part : parts )
_part . column - > insert ( part - > name ) ;
res . insert ( _part ) ;
return res ;
}
2014-03-13 12:48:07 +00:00
BlockInputStreams MergeTreeDataSelectExecutor : : read (
const Names & column_names_to_return ,
ASTPtr query ,
2014-12-17 11:53:17 +00:00
const Context & context ,
2014-03-13 12:48:07 +00:00
const Settings & settings ,
QueryProcessingStage : : Enum & processed_stage ,
2014-12-17 11:53:17 +00:00
const size_t max_block_size ,
const unsigned threads ,
2014-07-30 12:10:34 +00:00
size_t * part_index )
2014-03-13 12:48:07 +00:00
{
2014-07-30 12:10:34 +00:00
size_t part_index_var = 0 ;
if ( ! part_index )
part_index = & part_index_var ;
2014-09-19 11:44:29 +00:00
MergeTreeData : : DataPartsVector parts = data . getDataPartsVector ( ) ;
2014-07-28 10:36:11 +00:00
/// Если в запросе есть ограничения на виртуальный столбец _part, выберем только подходящие под него куски.
Names virt_column_names , real_column_names ;
2014-07-30 12:10:34 +00:00
for ( const String & name : column_names_to_return )
if ( name ! = " _part " & &
name ! = " _part_index " )
real_column_names . push_back ( name ) ;
2014-07-28 10:36:11 +00:00
else
2014-07-30 12:10:34 +00:00
virt_column_names . push_back ( name ) ;
2014-07-28 10:36:11 +00:00
2014-07-30 12:22:12 +00:00
/// Если в запросе только виртуальные столбцы, надо запросить хотя бы один любой другой.
2014-10-03 09:17:06 +00:00
if ( real_column_names . empty ( ) )
2014-07-30 12:22:12 +00:00
real_column_names . push_back ( ExpressionActions : : getSmallestColumn ( data . getColumnsList ( ) ) ) ;
2014-07-28 10:36:11 +00:00
Block virtual_columns_block = getBlockWithVirtualColumns ( parts ) ;
/// Если запрошен хотя бы один виртуальный столбец, пробуем индексировать
if ( ! virt_column_names . empty ( ) )
2014-12-17 11:53:17 +00:00
VirtualColumnUtils : : filterBlockWithQuery ( query , virtual_columns_block , context ) ;
2014-07-28 10:36:11 +00:00
2014-07-29 14:05:15 +00:00
std : : multiset < String > values = VirtualColumnUtils : : extractSingleValueFromBlock < String > ( virtual_columns_block , " _part " ) ;
2014-07-28 10:36:11 +00:00
data . check ( real_column_names ) ;
2014-03-13 12:48:07 +00:00
processed_stage = QueryProcessingStage : : FetchColumns ;
2014-12-17 11:53:17 +00:00
PKCondition key_condition ( query , context , data . getColumnsList ( ) , data . getSortDescription ( ) ) ;
PKCondition date_condition ( query , context , data . getColumnsList ( ) , SortDescription ( 1 , SortColumnDescription ( data . date_column_name , 1 ) ) ) ;
2014-03-13 12:48:07 +00:00
2015-03-27 03:06:06 +00:00
if ( settings . force_index_by_date & & date_condition . alwaysUnknown ( ) )
2015-03-14 01:20:10 +00:00
throw Exception ( " Index by date is not used and setting 'force_index_by_date' is set. " , ErrorCodes : : INDEX_NOT_USED ) ;
2014-07-28 10:36:11 +00:00
/// Выберем куски, в которых могут быть данные, удовлетворяющие date_condition, и которые подходят под условие на _part.
2014-03-13 12:48:07 +00:00
{
2014-07-28 10:36:11 +00:00
auto prev_parts = parts ;
parts . clear ( ) ;
2014-03-13 12:48:07 +00:00
2014-07-28 10:36:11 +00:00
for ( const auto & part : prev_parts )
2014-03-13 12:48:07 +00:00
{
2014-07-28 10:36:11 +00:00
if ( values . find ( part - > name ) = = values . end ( ) )
continue ;
Field left = static_cast < UInt64 > ( part - > left_date ) ;
Field right = static_cast < UInt64 > ( part - > right_date ) ;
if ( ! date_condition . mayBeTrueInRange ( & left , & right ) )
continue ;
2014-03-13 12:48:07 +00:00
2014-07-28 10:36:11 +00:00
parts . push_back ( part ) ;
2014-03-13 12:48:07 +00:00
}
}
/// Семплирование.
2014-07-28 10:36:11 +00:00
Names column_names_to_read = real_column_names ;
2014-03-13 12:48:07 +00:00
typedef Poco : : SharedPtr < ASTFunction > ASTFunctionPtr ;
ASTFunctionPtr filter_function ;
ExpressionActionsPtr filter_expression ;
2014-10-23 19:16:43 +00:00
double relative_sample_size = 0 ;
2014-03-13 12:48:07 +00:00
2015-05-20 11:58:21 +00:00
ASTSelectQuery & select = * typeid_cast < ASTSelectQuery * > ( & * query ) ;
2014-03-13 12:48:07 +00:00
if ( select . sample_size )
{
2014-10-23 19:16:43 +00:00
relative_sample_size = apply_visitor ( FieldVisitorConvertToNumber < double > ( ) ,
2014-06-26 00:58:14 +00:00
typeid_cast < ASTLiteral & > ( * select . sample_size ) . value ) ;
2014-03-13 12:48:07 +00:00
2014-10-23 19:16:43 +00:00
if ( relative_sample_size < 0 )
2014-03-13 12:48:07 +00:00
throw Exception ( " Negative sample size " , ErrorCodes : : ARGUMENT_OUT_OF_BOUND ) ;
2014-10-23 19:16:43 +00:00
/// Переводим абсолютную величину сэмплирования (вида SAMPLE 1000000 - сколько строк прочитать) в относительную (какую долю данных читать).
if ( relative_sample_size > 1 )
2014-03-13 12:48:07 +00:00
{
2014-06-26 00:58:14 +00:00
size_t requested_count = apply_visitor ( FieldVisitorConvertToNumber < UInt64 > ( ) , typeid_cast < ASTLiteral & > ( * select . sample_size ) . value ) ;
2014-03-13 12:48:07 +00:00
/// Узнаем, сколько строк мы бы прочли без семплирования.
LOG_DEBUG ( log , " Preliminary index scan with condition: " < < key_condition . toString ( ) ) ;
size_t total_count = 0 ;
for ( size_t i = 0 ; i < parts . size ( ) ; + + i )
{
MergeTreeData : : DataPartPtr & part = parts [ i ] ;
2015-02-15 02:31:48 +00:00
MarkRanges ranges = markRangesFromPkRange ( part - > index , key_condition , settings ) ;
2014-03-13 12:48:07 +00:00
for ( size_t j = 0 ; j < ranges . size ( ) ; + + j )
total_count + = ranges [ j ] . end - ranges [ j ] . begin ;
}
total_count * = data . index_granularity ;
2014-10-23 19:16:43 +00:00
relative_sample_size = std : : min ( 1. , static_cast < double > ( requested_count ) / total_count ) ;
2014-03-13 12:48:07 +00:00
2014-10-23 19:16:43 +00:00
LOG_DEBUG ( log , " Selected relative sample size: " < < relative_sample_size ) ;
2014-03-13 12:48:07 +00:00
}
2014-10-23 19:16:43 +00:00
/// SAMPLE 1 - то же, что и отсутствие SAMPLE.
if ( relative_sample_size = = 1 )
relative_sample_size = 0 ;
}
2015-02-04 10:40:00 +00:00
if ( ( settings . parallel_replicas_count > 1 ) & & ! data . sampling_expression . isNull ( ) & & ( relative_sample_size = = 0 ) )
2015-02-03 12:33:51 +00:00
relative_sample_size = 1 ;
2014-10-23 19:16:43 +00:00
if ( relative_sample_size ! = 0 )
{
2014-03-13 12:48:07 +00:00
UInt64 sampling_column_max = 0 ;
2014-03-14 17:03:52 +00:00
DataTypePtr type = data . getPrimaryExpression ( ) - > getSampleBlock ( ) . getByName ( data . sampling_expression - > getColumnName ( ) ) . type ;
2014-03-13 12:48:07 +00:00
if ( type - > getName ( ) = = " UInt64 " )
sampling_column_max = std : : numeric_limits < UInt64 > : : max ( ) ;
else if ( type - > getName ( ) = = " UInt32 " )
sampling_column_max = std : : numeric_limits < UInt32 > : : max ( ) ;
else if ( type - > getName ( ) = = " UInt16 " )
sampling_column_max = std : : numeric_limits < UInt16 > : : max ( ) ;
else if ( type - > getName ( ) = = " UInt8 " )
sampling_column_max = std : : numeric_limits < UInt8 > : : max ( ) ;
else
throw Exception ( " Invalid sampling column type in storage parameters: " + type - > getName ( ) + " . Must be unsigned integer type. " , ErrorCodes : : ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER ) ;
2015-02-03 12:33:51 +00:00
UInt64 sampling_column_value_lower_limit ;
UInt64 sampling_column_value_upper_limit ;
2015-07-16 13:01:49 +00:00
UInt64 upper_limit = static_cast < long double > ( relative_sample_size ) * sampling_column_max ;
2015-02-03 12:33:51 +00:00
2015-02-04 10:40:00 +00:00
if ( settings . parallel_replicas_count > 1 )
2015-02-03 12:33:51 +00:00
{
2015-07-16 13:01:49 +00:00
sampling_column_value_lower_limit = ( static_cast < long double > ( settings . parallel_replica_offset ) / settings . parallel_replicas_count ) * upper_limit ;
2015-02-04 10:40:00 +00:00
if ( ( settings . parallel_replica_offset + 1 ) < settings . parallel_replicas_count )
2015-07-16 13:01:49 +00:00
sampling_column_value_upper_limit = ( static_cast < long double > ( settings . parallel_replica_offset + 1 ) / settings . parallel_replicas_count ) * upper_limit ;
2015-02-03 12:33:51 +00:00
else
2015-07-16 13:01:49 +00:00
sampling_column_value_upper_limit = ( upper_limit < sampling_column_max ) ? ( upper_limit + 1 ) : upper_limit ;
2015-02-03 12:33:51 +00:00
}
else
{
sampling_column_value_lower_limit = 0 ;
2015-07-16 13:01:49 +00:00
sampling_column_value_upper_limit = ( upper_limit < sampling_column_max ) ? ( upper_limit + 1 ) : upper_limit ;
2015-02-03 12:33:51 +00:00
}
2014-03-13 12:48:07 +00:00
/// Добавим условие, чтобы отсечь еще что-нибудь при повторном просмотре индекса.
2015-02-04 10:27:06 +00:00
if ( sampling_column_value_lower_limit > 0 )
if ( ! key_condition . addCondition ( data . sampling_expression - > getColumnName ( ) ,
Range : : createLeftBounded ( sampling_column_value_lower_limit , true ) ) )
throw Exception ( " Sampling column not in primary key " , ErrorCodes : : ILLEGAL_COLUMN ) ;
2014-03-13 12:48:07 +00:00
2015-02-03 12:33:51 +00:00
if ( ! key_condition . addCondition ( data . sampling_expression - > getColumnName ( ) ,
Range : : createRightBounded ( sampling_column_value_upper_limit , false ) ) )
throw Exception ( " Sampling column not in primary key " , ErrorCodes : : ILLEGAL_COLUMN ) ;
ASTPtr upper_filter_args = new ASTExpressionList ;
upper_filter_args - > children . push_back ( data . sampling_expression ) ;
upper_filter_args - > children . push_back ( new ASTLiteral ( StringRange ( ) , sampling_column_value_upper_limit ) ) ;
ASTFunctionPtr upper_filter_function = new ASTFunction ;
upper_filter_function - > name = " less " ;
upper_filter_function - > arguments = upper_filter_args ;
upper_filter_function - > children . push_back ( upper_filter_function - > arguments ) ;
2014-03-13 12:48:07 +00:00
2015-02-04 10:27:06 +00:00
if ( sampling_column_value_lower_limit > 0 )
{
/// Выражение для фильтрации: sampling_expression in [sampling_column_value_lower_limit, sampling_column_value_upper_limit)
ASTPtr lower_filter_args = new ASTExpressionList ;
lower_filter_args - > children . push_back ( data . sampling_expression ) ;
lower_filter_args - > children . push_back ( new ASTLiteral ( StringRange ( ) , sampling_column_value_lower_limit ) ) ;
2014-03-13 12:48:07 +00:00
2015-02-04 10:27:06 +00:00
ASTFunctionPtr lower_filter_function = new ASTFunction ;
lower_filter_function - > name = " greaterOrEquals " ;
lower_filter_function - > arguments = lower_filter_args ;
lower_filter_function - > children . push_back ( lower_filter_function - > arguments ) ;
ASTPtr filter_function_args = new ASTExpressionList ;
filter_function_args - > children . push_back ( lower_filter_function ) ;
filter_function_args - > children . push_back ( upper_filter_function ) ;
filter_function = new ASTFunction ;
filter_function - > name = " and " ;
filter_function - > arguments = filter_function_args ;
filter_function - > children . push_back ( filter_function - > arguments ) ;
}
else
{
/// Выражение для фильтрации: sampling_expression < sampling_column_value_upper_limit
filter_function = upper_filter_function ;
}
2014-03-13 12:48:07 +00:00
2015-07-15 04:50:48 +00:00
filter_expression = ExpressionAnalyzer ( filter_function , context , nullptr , data . getColumnsList ( ) ) . getActions ( false ) ;
2014-03-13 12:48:07 +00:00
/// Добавим столбцы, нужные для sampling_expression.
std : : vector < String > add_columns = filter_expression - > getRequiredColumns ( ) ;
column_names_to_read . insert ( column_names_to_read . end ( ) , add_columns . begin ( ) , add_columns . end ( ) ) ;
std : : sort ( column_names_to_read . begin ( ) , column_names_to_read . end ( ) ) ;
column_names_to_read . erase ( std : : unique ( column_names_to_read . begin ( ) , column_names_to_read . end ( ) ) , column_names_to_read . end ( ) ) ;
}
LOG_DEBUG ( log , " Key condition: " < < key_condition . toString ( ) ) ;
LOG_DEBUG ( log , " Date condition: " < < date_condition . toString ( ) ) ;
/// PREWHERE
ExpressionActionsPtr prewhere_actions ;
String prewhere_column ;
if ( select . prewhere_expression )
{
2015-07-15 04:50:48 +00:00
ExpressionAnalyzer analyzer ( select . prewhere_expression , context , nullptr , data . getColumnsList ( ) ) ;
2014-03-13 12:48:07 +00:00
prewhere_actions = analyzer . getActions ( false ) ;
prewhere_column = select . prewhere_expression - > getColumnName ( ) ;
2015-02-15 04:16:11 +00:00
SubqueriesForSets prewhere_subqueries = analyzer . getSubqueriesForSets ( ) ;
/** Вычислим подзапросы прямо сейчас.
* NOTE Н е д о с т а т о к - э т и в ы ч и с л е н и я н е в п и с ы в а ю т с я в к о н в е й е р в ы п о л н е н и я з а п р о с а .
* О н и д е л а ю т с я д о н а ч а л а в ы п о л н е н и я к о н в е й е р а ; и х н е л ь з я п р е р в а т ь ; в о в р е м я в ы ч и с л е н и й н е о т п р а в л я ю т с я п а к е т ы п р о г р е с с а .
*/
if ( ! prewhere_subqueries . empty ( ) )
CreatingSetsBlockInputStream ( new NullBlockInputStream , prewhere_subqueries , settings . limits ) . read ( ) ;
2014-03-13 12:48:07 +00:00
}
RangesInDataParts parts_with_ranges ;
/// Найдем, какой диапазон читать из каждого куска.
2015-02-03 12:33:51 +00:00
size_t sum_marks = 0 ;
size_t sum_ranges = 0 ;
2014-10-18 21:46:05 +00:00
for ( auto & part : parts )
2014-03-13 12:48:07 +00:00
{
2014-07-30 12:10:34 +00:00
RangesInDataPart ranges ( part , ( * part_index ) + + ) ;
2015-03-14 02:37:53 +00:00
if ( data . mode ! = MergeTreeData : : Unsorted )
ranges . ranges = markRangesFromPkRange ( part - > index , key_condition , settings ) ;
else
ranges . ranges = MarkRanges { MarkRange { 0 , part - > size } } ;
2014-03-13 12:48:07 +00:00
if ( ! ranges . ranges . empty ( ) )
2015-01-21 11:17:18 +00:00
{
2015-02-03 12:33:51 +00:00
parts_with_ranges . push_back ( ranges ) ;
2015-01-22 14:22:59 +00:00
2015-02-03 12:33:51 +00:00
sum_ranges + = ranges . ranges . size ( ) ;
for ( const auto & range : ranges . ranges )
sum_marks + = range . end - range . begin ;
2015-01-21 12:24:29 +00:00
}
2015-01-22 14:22:59 +00:00
}
2015-01-21 14:35:49 +00:00
LOG_DEBUG ( log , " Selected " < < parts . size ( ) < < " parts by date, " < < parts_with_ranges . size ( ) < < " parts by key, "
2015-03-14 02:36:39 +00:00
< < sum_marks < < " marks to read from " < < sum_ranges < < " ranges " ) ;
2014-03-13 12:48:07 +00:00
BlockInputStreams res ;
if ( select . final )
{
/// Добавим столбцы, нужные для вычисления первичного ключа и знака.
2014-03-14 17:03:52 +00:00
std : : vector < String > add_columns = data . getPrimaryExpression ( ) - > getRequiredColumns ( ) ;
2014-03-13 12:48:07 +00:00
column_names_to_read . insert ( column_names_to_read . end ( ) , add_columns . begin ( ) , add_columns . end ( ) ) ;
2015-07-08 04:38:46 +00:00
if ( ! data . sign_column . empty ( ) )
column_names_to_read . push_back ( data . sign_column ) ;
2014-03-13 12:48:07 +00:00
std : : sort ( column_names_to_read . begin ( ) , column_names_to_read . end ( ) ) ;
column_names_to_read . erase ( std : : unique ( column_names_to_read . begin ( ) , column_names_to_read . end ( ) ) , column_names_to_read . end ( ) ) ;
res = spreadMarkRangesAmongThreadsFinal (
2015-01-21 14:35:49 +00:00
parts_with_ranges ,
2014-03-13 12:48:07 +00:00
threads ,
column_names_to_read ,
max_block_size ,
settings . use_uncompressed_cache ,
prewhere_actions ,
2014-07-28 10:36:11 +00:00
prewhere_column ,
2015-02-15 02:31:48 +00:00
virt_column_names ,
2015-07-15 04:50:48 +00:00
settings ,
context ) ;
2014-03-13 12:48:07 +00:00
}
else
{
res = spreadMarkRangesAmongThreads (
2015-01-21 14:35:49 +00:00
parts_with_ranges ,
2014-03-13 12:48:07 +00:00
threads ,
column_names_to_read ,
max_block_size ,
settings . use_uncompressed_cache ,
prewhere_actions ,
2014-07-28 10:36:11 +00:00
prewhere_column ,
2015-02-15 02:31:48 +00:00
virt_column_names ,
settings ) ;
2014-03-13 12:48:07 +00:00
}
2014-10-23 19:16:43 +00:00
if ( relative_sample_size ! = 0 )
2014-10-18 21:46:05 +00:00
for ( auto & stream : res )
stream = new FilterBlockInputStream ( new ExpressionBlockInputStream ( stream , filter_expression ) , filter_function - > getColumnName ( ) ) ;
2014-03-13 12:48:07 +00:00
return res ;
}
2015-02-15 04:16:11 +00:00
2014-03-13 12:48:07 +00:00
BlockInputStreams MergeTreeDataSelectExecutor : : spreadMarkRangesAmongThreads (
RangesInDataParts parts ,
size_t threads ,
const Names & column_names ,
size_t max_block_size ,
bool use_uncompressed_cache ,
ExpressionActionsPtr prewhere_actions ,
2014-07-28 10:36:11 +00:00
const String & prewhere_column ,
2015-02-15 02:31:48 +00:00
const Names & virt_columns ,
const Settings & settings )
2014-03-13 12:48:07 +00:00
{
2015-06-24 11:03:53 +00:00
const std : : size_t min_marks_for_concurrent_read =
2015-06-11 13:04:45 +00:00
( settings . merge_tree_min_rows_for_concurrent_read + data . index_granularity - 1 ) / data . index_granularity ;
2015-06-24 11:03:53 +00:00
const std : : size_t max_marks_to_use_cache =
2015-06-11 13:04:45 +00:00
( settings . merge_tree_max_rows_to_use_cache + data . index_granularity - 1 ) / data . index_granularity ;
2015-02-15 02:31:48 +00:00
2014-03-13 12:48:07 +00:00
/// Посчитаем засечки для каждого куска.
std : : vector < size_t > sum_marks_in_parts ( parts . size ( ) ) ;
size_t sum_marks = 0 ;
for ( size_t i = 0 ; i < parts . size ( ) ; + + i )
{
/// Пусть отрезки будут перечислены справа налево, чтобы можно было выбрасывать самый левый отрезок с помощью pop_back().
std : : reverse ( parts [ i ] . ranges . begin ( ) , parts [ i ] . ranges . end ( ) ) ;
2015-06-11 13:04:45 +00:00
for ( const auto & range : parts [ i ] . ranges )
2014-03-13 12:48:07 +00:00
sum_marks_in_parts [ i ] + = range . end - range . begin ;
2015-06-11 13:04:45 +00:00
2014-03-13 12:48:07 +00:00
sum_marks + = sum_marks_in_parts [ i ] ;
}
if ( sum_marks > max_marks_to_use_cache )
use_uncompressed_cache = false ;
BlockInputStreams res ;
2015-06-24 12:21:43 +00:00
/// @todo remove old code
2015-09-01 16:09:12 +00:00
if ( sum_marks > 0 & & settings . merge_tree_uniform_read_distribution = = 1 )
{
MergeTreeReadPoolPtr pool = std : : make_shared < MergeTreeReadPool > (
threads , sum_marks , min_marks_for_concurrent_read , parts , data , prewhere_actions , prewhere_column , true ,
column_names ) ;
for ( std : : size_t i = 0 ; i < threads ; + + i )
res . emplace_back ( new MergeTreeThreadBlockInputStream {
i , pool , min_marks_for_concurrent_read , max_block_size , data , use_uncompressed_cache , prewhere_actions ,
prewhere_column , settings . min_bytes_to_use_direct_io , settings . max_read_buffer_size , virt_columns
} ) ;
/// Оценим общее количество строк - для прогресс-б а р а .
const std : : size_t total_rows = data . index_granularity * sum_marks ;
/// Выставим приблизительное количество строк только для первого источника
static_cast < IProfilingBlockInputStream & > ( * res . front ( ) ) . setTotalRowsApprox ( total_rows ) ;
LOG_TRACE ( log , " Reading approx. " < < total_rows ) ;
}
else if ( sum_marks > 0 )
2015-06-24 12:21:43 +00:00
{
const size_t min_marks_per_thread = ( sum_marks - 1 ) / threads + 1 ;
for ( size_t i = 0 ; i < threads & & ! parts . empty ( ) ; + + i )
{
size_t need_marks = min_marks_per_thread ;
/// Цикл по кускам.
while ( need_marks > 0 & & ! parts . empty ( ) )
{
RangesInDataPart & part = parts . back ( ) ;
size_t & marks_in_part = sum_marks_in_parts . back ( ) ;
/// Н е будем брать из куска слишком мало строк.
if ( marks_in_part > = min_marks_for_concurrent_read & &
need_marks < min_marks_for_concurrent_read )
need_marks = min_marks_for_concurrent_read ;
/// Н е будем оставлять в куске слишком мало строк.
if ( marks_in_part > need_marks & &
marks_in_part - need_marks < min_marks_for_concurrent_read )
need_marks = marks_in_part ;
MarkRanges ranges_to_get_from_part ;
/// Возьмем весь кусок, если он достаточно мал.
if ( marks_in_part < = need_marks )
{
/// Восстановим порядок отрезков.
std : : reverse ( part . ranges . begin ( ) , part . ranges . end ( ) ) ;
ranges_to_get_from_part = part . ranges ;
need_marks - = marks_in_part ;
parts . pop_back ( ) ;
sum_marks_in_parts . pop_back ( ) ;
}
else
{
/// Цикл по отрезкам куска.
while ( need_marks > 0 )
{
if ( part . ranges . empty ( ) )
throw Exception ( " Unexpected end of ranges while spreading marks among threads " , ErrorCodes : : LOGICAL_ERROR ) ;
MarkRange & range = part . ranges . back ( ) ;
const size_t marks_in_range = range . end - range . begin ;
const size_t marks_to_get_from_range = std : : min ( marks_in_range , need_marks ) ;
ranges_to_get_from_part . emplace_back ( range . begin , range . begin + marks_to_get_from_range ) ;
range . begin + = marks_to_get_from_range ;
marks_in_part - = marks_to_get_from_range ;
need_marks - = marks_to_get_from_range ;
if ( range . begin = = range . end )
part . ranges . pop_back ( ) ;
}
}
BlockInputStreamPtr source_stream = new MergeTreeBlockInputStream (
data . getFullPath ( ) + part . data_part - > name + ' / ' , max_block_size , column_names , data ,
part . data_part , ranges_to_get_from_part , use_uncompressed_cache ,
prewhere_actions , prewhere_column , true , settings . min_bytes_to_use_direct_io , settings . max_read_buffer_size ) ;
res . push_back ( source_stream ) ;
for ( const String & virt_column : virt_columns )
{
if ( virt_column = = " _part " )
res . back ( ) = new AddingConstColumnBlockInputStream < String > (
res . back ( ) , new DataTypeString , part . data_part - > name , " _part " ) ;
else if ( virt_column = = " _part_index " )
res . back ( ) = new AddingConstColumnBlockInputStream < UInt64 > (
res . back ( ) , new DataTypeUInt64 , part . part_index_in_query , " _part_index " ) ;
}
}
}
if ( ! parts . empty ( ) )
throw Exception ( " Couldn't spread marks among threads " , ErrorCodes : : LOGICAL_ERROR ) ;
}
2014-03-13 12:48:07 +00:00
return res ;
}
BlockInputStreams MergeTreeDataSelectExecutor : : spreadMarkRangesAmongThreadsFinal (
RangesInDataParts parts ,
size_t threads ,
const Names & column_names ,
size_t max_block_size ,
bool use_uncompressed_cache ,
ExpressionActionsPtr prewhere_actions ,
2014-07-28 10:36:11 +00:00
const String & prewhere_column ,
2015-02-15 02:31:48 +00:00
const Names & virt_columns ,
2015-07-15 04:50:48 +00:00
const Settings & settings ,
const Context & context )
2014-03-13 12:48:07 +00:00
{
2015-09-01 12:24:38 +00:00
const size_t max_marks_to_use_cache =
( settings . merge_tree_max_rows_to_use_cache + data . index_granularity - 1 ) / data . index_granularity ;
const size_t min_marks_for_read_task =
( settings . merge_tree_min_rows_for_concurrent_read + data . index_granularity - 1 ) / data . index_granularity ;
2015-02-15 02:31:48 +00:00
2014-03-13 12:48:07 +00:00
size_t sum_marks = 0 ;
for ( size_t i = 0 ; i < parts . size ( ) ; + + i )
for ( size_t j = 0 ; j < parts [ i ] . ranges . size ( ) ; + + j )
sum_marks + = parts [ i ] . ranges [ j ] . end - parts [ i ] . ranges [ j ] . begin ;
if ( sum_marks > max_marks_to_use_cache )
use_uncompressed_cache = false ;
2015-07-08 04:38:46 +00:00
BlockInputStreams to_merge ;
2014-03-13 12:48:07 +00:00
2015-09-01 16:09:12 +00:00
if ( settings . merge_tree_uniform_read_distribution = = 1 )
2014-03-13 12:48:07 +00:00
{
2015-09-01 16:09:12 +00:00
MergeTreeReadPoolPtr pool = std : : make_shared < MergeTreeReadPool > (
parts . size ( ) , sum_marks , min_marks_for_read_task , parts , data , prewhere_actions , prewhere_column , true ,
column_names , true ) ;
for ( const auto i : ext : : range ( 0 , parts . size ( ) ) )
{
BlockInputStreamPtr source_stream {
new MergeTreeThreadBlockInputStream {
i , pool , min_marks_for_read_task , max_block_size , data , use_uncompressed_cache , prewhere_actions ,
prewhere_column , settings . min_bytes_to_use_direct_io , settings . max_read_buffer_size , virt_columns
}
} ;
to_merge . push_back ( new ExpressionBlockInputStream ( source_stream , data . getPrimaryExpression ( ) ) ) ;
}
2014-03-13 12:48:07 +00:00
2015-09-01 16:09:12 +00:00
/// Оценим общее количество строк - для прогресс-б а р а .
const std : : size_t total_rows = data . index_granularity * sum_marks ;
/// Выставим приблизительное количество строк только для первого источника
static_cast < IProfilingBlockInputStream & > ( * to_merge . front ( ) ) . setTotalRowsApprox ( total_rows ) ;
LOG_TRACE ( log , " Reading approx. " < < total_rows ) ;
2014-03-13 12:48:07 +00:00
}
2015-09-01 16:09:12 +00:00
else
{
for ( size_t part_index = 0 ; part_index < parts . size ( ) ; + + part_index )
{
RangesInDataPart & part = parts [ part_index ] ;
2014-03-13 12:48:07 +00:00
2015-09-01 16:09:12 +00:00
BlockInputStreamPtr source_stream = new MergeTreeBlockInputStream (
data . getFullPath ( ) + part . data_part - > name + ' / ' , max_block_size , column_names , data ,
part . data_part , part . ranges , use_uncompressed_cache ,
prewhere_actions , prewhere_column , true , settings . min_bytes_to_use_direct_io , settings . max_read_buffer_size ) ;
2015-09-01 12:24:38 +00:00
2015-09-01 16:09:12 +00:00
for ( const String & virt_column : virt_columns )
{
if ( virt_column = = " _part " )
source_stream = new AddingConstColumnBlockInputStream < String > (
source_stream , new DataTypeString , part . data_part - > name , " _part " ) ;
else if ( virt_column = = " _part_index " )
source_stream = new AddingConstColumnBlockInputStream < UInt64 > (
source_stream , new DataTypeUInt64 , part . part_index_in_query , " _part_index " ) ;
}
2015-09-01 12:24:38 +00:00
2015-09-01 16:09:12 +00:00
to_merge . push_back ( new ExpressionBlockInputStream ( source_stream , data . getPrimaryExpression ( ) ) ) ;
}
}
2015-09-01 12:24:38 +00:00
2014-03-13 12:48:07 +00:00
BlockInputStreams res ;
2015-07-08 04:38:46 +00:00
if ( to_merge . size ( ) = = 1 )
{
if ( ! data . sign_column . empty ( ) )
{
ExpressionActionsPtr sign_filter_expression ;
String sign_filter_column ;
2015-07-15 04:50:48 +00:00
createPositiveSignCondition ( sign_filter_expression , sign_filter_column , context ) ;
2015-07-08 04:38:46 +00:00
res . push_back ( new FilterBlockInputStream ( new ExpressionBlockInputStream ( to_merge [ 0 ] , sign_filter_expression ) , sign_filter_column ) ) ;
}
else
res = to_merge ;
}
else if ( to_merge . size ( ) > 1 )
{
BlockInputStreamPtr merged ;
switch ( data . mode )
{
case MergeTreeData : : Ordinary :
throw Exception ( " Ordinary MergeTree doesn't support FINAL " , ErrorCodes : : LOGICAL_ERROR ) ;
case MergeTreeData : : Collapsing :
merged = new CollapsingFinalBlockInputStream ( to_merge , data . getSortDescription ( ) , data . sign_column ) ;
break ;
case MergeTreeData : : Summing :
merged = new SummingSortedBlockInputStream ( to_merge , data . getSortDescription ( ) , data . columns_to_sum , max_block_size ) ;
break ;
case MergeTreeData : : Aggregating :
merged = new AggregatingSortedBlockInputStream ( to_merge , data . getSortDescription ( ) , max_block_size ) ;
break ;
case MergeTreeData : : Unsorted :
throw Exception ( " UnsortedMergeTree doesn't support FINAL " , ErrorCodes : : LOGICAL_ERROR ) ;
}
res . push_back ( merged ) ;
}
2014-03-13 12:48:07 +00:00
return res ;
}
2015-07-15 04:50:48 +00:00
void MergeTreeDataSelectExecutor : : createPositiveSignCondition ( ExpressionActionsPtr & out_expression , String & out_column , const Context & context )
2014-03-13 12:48:07 +00:00
{
ASTFunction * function = new ASTFunction ;
ASTPtr function_ptr = function ;
ASTExpressionList * arguments = new ASTExpressionList ;
ASTPtr arguments_ptr = arguments ;
ASTIdentifier * sign = new ASTIdentifier ;
ASTPtr sign_ptr = sign ;
ASTLiteral * one = new ASTLiteral ;
ASTPtr one_ptr = one ;
function - > name = " equals " ;
function - > arguments = arguments_ptr ;
function - > children . push_back ( arguments_ptr ) ;
arguments - > children . push_back ( sign_ptr ) ;
arguments - > children . push_back ( one_ptr ) ;
sign - > name = data . sign_column ;
sign - > kind = ASTIdentifier : : Column ;
one - > type = new DataTypeInt8 ;
one - > value = Field ( static_cast < Int64 > ( 1 ) ) ;
2015-07-15 04:50:48 +00:00
out_expression = ExpressionAnalyzer ( function_ptr , context , { } , data . getColumnsList ( ) ) . getActions ( false ) ;
2014-03-13 12:48:07 +00:00
out_column = function - > getColumnName ( ) ;
}
/// Получает набор диапазонов засечек, вне которых не могут находиться ключи из заданного диапазона.
2015-02-15 02:31:48 +00:00
MarkRanges MergeTreeDataSelectExecutor : : markRangesFromPkRange (
const MergeTreeData : : DataPart : : Index & index , PKCondition & key_condition , const Settings & settings )
2014-03-13 12:48:07 +00:00
{
2015-02-15 02:31:48 +00:00
size_t min_marks_for_seek = ( settings . merge_tree_min_rows_for_seek + data . index_granularity - 1 ) / data . index_granularity ;
2014-03-13 12:48:07 +00:00
MarkRanges res ;
2014-03-14 17:03:52 +00:00
size_t key_size = data . getSortDescription ( ) . size ( ) ;
2014-03-13 12:48:07 +00:00
size_t marks_count = index . size ( ) / key_size ;
/// Если индекс не используется.
2015-03-27 03:06:06 +00:00
if ( key_condition . alwaysUnknown ( ) )
2014-03-13 12:48:07 +00:00
{
res . push_back ( MarkRange ( 0 , marks_count ) ) ;
}
else
{
/** В стеке всегда будут находиться непересекающиеся подозрительные отрезки, самый левый наверху (back).
* Н а к а ж д о м ш а г е б е р е м л е в ы й о т р е з о к и п р о в е р я е м , п о д х о д и т л и о н .
* Е с л и п о д х о д и т , р а з б и в а е м е г о н а б о л е е м е л к и е и к л а д е м и х в с т е к . Е с л и н е т - в ы б р а с ы в а е м е г о .
* Е с л и о т р е з о к у ж е д л и н о й в о д н у з а с е ч к у , д о б а в л я е м е г о в о т в е т и в ы б р а с ы в а е м .
*/
std : : vector < MarkRange > ranges_stack ;
ranges_stack . push_back ( MarkRange ( 0 , marks_count ) ) ;
while ( ! ranges_stack . empty ( ) )
{
MarkRange range = ranges_stack . back ( ) ;
ranges_stack . pop_back ( ) ;
bool may_be_true ;
if ( range . end = = marks_count )
may_be_true = key_condition . mayBeTrueAfter ( & index [ range . begin * key_size ] ) ;
else
may_be_true = key_condition . mayBeTrueInRange ( & index [ range . begin * key_size ] , & index [ range . end * key_size ] ) ;
if ( ! may_be_true )
continue ;
if ( range . end = = range . begin + 1 )
{
/// Увидели полезный промежуток между соседними засечками. Либо добавим е г о к последнему диапазону, либо начнем новый диапазон.
if ( res . empty ( ) | | range . begin - res . back ( ) . end > min_marks_for_seek )
res . push_back ( range ) ;
else
res . back ( ) . end = range . end ;
}
else
{
/// Разбиваем отрезок и кладем результат в стек справа налево.
2015-02-15 02:31:48 +00:00
size_t step = ( range . end - range . begin - 1 ) / settings . merge_tree_coarse_index_granularity + 1 ;
2014-03-13 12:48:07 +00:00
size_t end ;
for ( end = range . end ; end > range . begin + step ; end - = step )
ranges_stack . push_back ( MarkRange ( end - step , end ) ) ;
ranges_stack . push_back ( MarkRange ( range . begin , end ) ) ;
}
}
}
return res ;
}
}