2017-06-14 10:50:22 +00:00
# include <Storages/MergeTree/MergeTreeReader.h>
2018-02-13 19:34:15 +00:00
# include <Columns/FilterDescription.h>
# include <Columns/ColumnsCommon.h>
2018-04-06 13:58:06 +00:00
# include <Columns/ColumnNothing.h>
# include <ext/range.h>
2018-11-16 12:22:51 +00:00
# include <DataTypes/DataTypeNothing.h>
2018-02-13 19:34:15 +00:00
2019-01-04 12:10:00 +00:00
# ifdef __SSE2__
2018-02-13 19:34:15 +00:00
# include <emmintrin.h>
# endif
2017-06-14 10:50:22 +00:00
namespace DB
{
2018-02-20 11:45:58 +00:00
MergeTreeRangeReader : : DelayedStream : : DelayedStream (
2018-11-14 11:26:44 +00:00
size_t from_mark , MergeTreeReader * merge_tree_reader )
2018-02-13 19:34:15 +00:00
: current_mark ( from_mark ) , current_offset ( 0 ) , num_delayed_rows ( 0 )
2018-11-14 11:26:44 +00:00
, merge_tree_reader ( merge_tree_reader )
2018-02-13 19:34:15 +00:00
, continue_reading ( false ) , is_finished ( false )
{
}
2018-02-20 11:45:58 +00:00
size_t MergeTreeRangeReader : : DelayedStream : : position ( ) const
2018-02-13 19:34:15 +00:00
{
2018-11-15 14:06:54 +00:00
size_t num_rows_before_current_mark = 0 ;
for ( size_t i = 0 ; i < current_mark ; + + i )
2019-03-25 13:55:24 +00:00
num_rows_before_current_mark + = merge_tree_reader - > data_part - > index_granularity . getMarkRows ( i ) ;
2018-11-15 14:06:54 +00:00
return num_rows_before_current_mark + current_offset + num_delayed_rows ;
2018-02-13 19:34:15 +00:00
}
2018-02-20 11:45:58 +00:00
size_t MergeTreeRangeReader : : DelayedStream : : readRows ( Block & block , size_t num_rows )
2018-02-13 19:34:15 +00:00
{
if ( num_rows )
{
size_t rows_read = merge_tree_reader - > readRows ( current_mark , continue_reading , num_rows , block ) ;
2019-03-19 09:57:29 +00:00
//std::cerr << "Rows read:" << rows_read << std::endl;
//std::cerr << "Num rows:" << num_rows << std::endl;
2018-02-13 19:34:15 +00:00
continue_reading = true ;
2018-11-14 11:26:44 +00:00
/// Zero rows_read maybe either because reading has finished
2018-02-13 19:34:15 +00:00
/// or because there is no columns we can read in current part (for example, all columns are default).
/// In the last case we can't finish reading, but it's also ok for the first case
/// because we can finish reading by calculation the number of pending rows.
if ( 0 < rows_read & & rows_read < num_rows )
is_finished = true ;
return rows_read ;
}
return 0 ;
}
2018-02-20 11:45:58 +00:00
size_t MergeTreeRangeReader : : DelayedStream : : read ( Block & block , size_t from_mark , size_t offset , size_t num_rows )
2018-02-13 19:34:15 +00:00
{
2018-11-15 14:06:54 +00:00
size_t num_rows_before_from_mark = 0 ;
for ( size_t i = 0 ; i < from_mark ; + + i )
2019-03-25 13:55:24 +00:00
num_rows_before_from_mark + = merge_tree_reader - > data_part - > index_granularity . getMarkRows ( i ) ;
2018-11-14 11:26:44 +00:00
/// We already stand accurately in required position,
/// so because stream is lazy, we don't read anything
/// and only increment amount delayed_rows
if ( position ( ) = = num_rows_before_from_mark + offset )
2018-02-13 19:34:15 +00:00
{
num_delayed_rows + = num_rows ;
return 0 ;
}
else
{
size_t read_rows = finalize ( block ) ;
continue_reading = false ;
current_mark = from_mark ;
current_offset = offset ;
num_delayed_rows = num_rows ;
return read_rows ;
}
}
2018-02-20 11:45:58 +00:00
size_t MergeTreeRangeReader : : DelayedStream : : finalize ( Block & block )
2018-02-13 19:34:15 +00:00
{
2018-11-14 11:26:44 +00:00
/// We need to skip some rows before reading
2018-02-13 19:34:15 +00:00
if ( current_offset & & ! continue_reading )
{
2019-03-25 13:55:24 +00:00
for ( size_t mark_num : ext : : range ( current_mark , merge_tree_reader - > data_part - > getMarksCount ( ) ) )
2018-11-14 11:26:44 +00:00
{
2019-03-25 13:55:24 +00:00
size_t mark_index_granularity = merge_tree_reader - > data_part - > index_granularity . getMarkRows ( mark_num ) ;
2018-11-14 11:26:44 +00:00
if ( current_offset > = mark_index_granularity )
{
current_offset - = mark_index_granularity ;
current_mark + + ;
}
2018-11-15 14:06:54 +00:00
else
break ;
2018-02-13 19:34:15 +00:00
2018-11-14 11:26:44 +00:00
}
/// Skip some rows from begging of granule
/// We don't know size of rows in compressed granule,
/// so have to read them and throw out
2018-02-13 19:34:15 +00:00
if ( current_offset )
{
Block temp_block ;
readRows ( temp_block , current_offset ) ;
}
}
size_t rows_to_read = num_delayed_rows ;
current_offset + = num_delayed_rows ;
num_delayed_rows = 0 ;
return readRows ( block , rows_to_read ) ;
}
2018-02-20 11:45:58 +00:00
MergeTreeRangeReader : : Stream : : Stream (
2018-11-14 11:26:44 +00:00
size_t from_mark , size_t to_mark , MergeTreeReader * merge_tree_reader )
2018-02-13 19:34:15 +00:00
: current_mark ( from_mark ) , offset_after_current_mark ( 0 )
2018-11-14 11:26:44 +00:00
, last_mark ( to_mark )
, merge_tree_reader ( merge_tree_reader )
2019-03-25 13:55:24 +00:00
, current_mark_index_granularity ( merge_tree_reader - > data_part - > index_granularity . getMarkRows ( from_mark ) )
2018-11-14 11:26:44 +00:00
, stream ( from_mark , merge_tree_reader )
2017-06-14 10:50:22 +00:00
{
2019-03-25 13:55:24 +00:00
size_t marks_count = merge_tree_reader - > data_part - > index_granularity . getMarksCount ( ) ;
if ( from_mark > = marks_count )
2018-12-04 14:44:42 +00:00
throw Exception ( " Trying create stream to read from mark № " + toString ( current_mark ) + " but total marks count is "
2019-03-25 13:55:24 +00:00
+ toString ( marks_count ) , ErrorCodes : : LOGICAL_ERROR ) ;
2018-12-04 14:44:42 +00:00
2019-03-25 13:55:24 +00:00
if ( last_mark > marks_count )
2018-12-04 14:44:42 +00:00
throw Exception ( " Trying create stream to read to mark № " + toString ( current_mark ) + " but total marks count is "
2019-03-25 13:55:24 +00:00
+ toString ( marks_count ) , ErrorCodes : : LOGICAL_ERROR ) ;
2017-06-14 10:50:22 +00:00
}
2018-02-20 11:45:58 +00:00
void MergeTreeRangeReader : : Stream : : checkNotFinished ( ) const
2017-06-15 17:01:13 +00:00
{
2018-02-13 19:34:15 +00:00
if ( isFinished ( ) )
throw Exception ( " Cannot read out of marks range. " , ErrorCodes : : LOGICAL_ERROR ) ;
2017-06-15 17:01:13 +00:00
}
2018-02-20 11:45:58 +00:00
void MergeTreeRangeReader : : Stream : : checkEnoughSpaceInCurrentGranule ( size_t num_rows ) const
2017-06-15 17:01:13 +00:00
{
2018-11-14 11:26:44 +00:00
if ( num_rows + offset_after_current_mark > current_mark_index_granularity )
2018-02-13 19:34:15 +00:00
throw Exception ( " Cannot read from granule more than index_granularity. " , ErrorCodes : : LOGICAL_ERROR ) ;
2017-06-15 17:01:13 +00:00
}
2017-06-14 10:50:22 +00:00
2018-02-20 11:45:58 +00:00
size_t MergeTreeRangeReader : : Stream : : readRows ( Block & block , size_t num_rows )
2017-06-14 10:50:22 +00:00
{
2018-02-13 19:34:15 +00:00
size_t rows_read = stream . read ( block , current_mark , offset_after_current_mark , num_rows ) ;
if ( stream . isFinished ( ) )
finish ( ) ;
return rows_read ;
}
2018-11-14 11:26:44 +00:00
void MergeTreeRangeReader : : Stream : : toNextMark ( )
{
+ + current_mark ;
2018-12-04 08:05:58 +00:00
2019-03-25 13:55:24 +00:00
size_t total_marks_count = merge_tree_reader - > data_part - > index_granularity . getMarksCount ( ) ;
2018-12-04 14:44:42 +00:00
/// TODO(alesap) clumsy logic, fixme
2019-03-19 13:10:24 +00:00
if ( current_mark < total_marks_count )
2019-03-25 13:55:24 +00:00
current_mark_index_granularity = merge_tree_reader - > data_part - > index_granularity . getMarkRows ( current_mark ) ;
2018-12-04 14:44:42 +00:00
else
2019-03-19 13:10:24 +00:00
current_mark_index_granularity = 0 ; /// HACK?
///throw Exception("Trying to read from mark " + toString(current_mark) + ", but total marks count " + toString(total_marks_count), ErrorCodes::LOGICAL_ERROR);
2018-12-04 14:44:42 +00:00
2018-11-14 11:26:44 +00:00
offset_after_current_mark = 0 ;
}
2018-02-20 11:45:58 +00:00
size_t MergeTreeRangeReader : : Stream : : read ( Block & block , size_t num_rows , bool skip_remaining_rows_in_current_granule )
2018-02-13 19:34:15 +00:00
{
2018-02-20 11:45:58 +00:00
checkEnoughSpaceInCurrentGranule ( num_rows ) ;
2018-02-13 19:34:15 +00:00
if ( num_rows )
{
checkNotFinished ( ) ;
size_t read_rows = readRows ( block , num_rows ) ;
offset_after_current_mark + = num_rows ;
2018-11-14 11:26:44 +00:00
/// Start new granule; skipped_rows_after_offset is already zero.
2019-03-19 09:57:29 +00:00
//std::cerr << "Offset after current mark:" << offset_after_current_mark << std::endl;
//std::cerr << "Current Index granularity:" << current_mark_index_granularity << std::endl;
2018-11-14 11:26:44 +00:00
if ( offset_after_current_mark = = current_mark_index_granularity | | skip_remaining_rows_in_current_granule )
toNextMark ( ) ;
2018-02-13 19:34:15 +00:00
return read_rows ;
}
else
{
/// Nothing to read.
if ( skip_remaining_rows_in_current_granule )
{
/// Skip the rest of the rows in granule and start new one.
checkNotFinished ( ) ;
2018-11-14 11:26:44 +00:00
toNextMark ( ) ;
2018-02-13 19:34:15 +00:00
}
2017-07-19 13:42:21 +00:00
2018-02-13 19:34:15 +00:00
return 0 ;
}
}
2017-07-19 16:39:18 +00:00
2018-02-20 11:45:58 +00:00
void MergeTreeRangeReader : : Stream : : skip ( size_t num_rows )
2018-02-13 19:34:15 +00:00
{
if ( num_rows )
{
checkNotFinished ( ) ;
2018-02-20 11:45:58 +00:00
checkEnoughSpaceInCurrentGranule ( num_rows ) ;
2017-07-19 16:39:18 +00:00
2018-02-13 19:34:15 +00:00
offset_after_current_mark + = num_rows ;
2017-06-14 10:50:22 +00:00
2018-11-14 11:26:44 +00:00
if ( offset_after_current_mark = = current_mark_index_granularity )
2018-02-13 19:34:15 +00:00
{
/// Start new granule; skipped_rows_after_offset is already zero.
2018-11-14 11:26:44 +00:00
toNextMark ( ) ;
2018-02-13 19:34:15 +00:00
}
}
}
2018-02-20 11:45:58 +00:00
size_t MergeTreeRangeReader : : Stream : : finalize ( Block & block )
2018-02-13 19:34:15 +00:00
{
size_t read_rows = stream . finalize ( block ) ;
2017-06-14 10:50:22 +00:00
2018-02-13 19:34:15 +00:00
if ( stream . isFinished ( ) )
finish ( ) ;
2017-06-16 20:11:02 +00:00
return read_rows ;
2017-06-14 10:50:22 +00:00
}
2018-02-13 19:34:15 +00:00
2018-02-20 11:45:58 +00:00
void MergeTreeRangeReader : : ReadResult : : addGranule ( size_t num_rows )
2018-02-13 19:34:15 +00:00
{
rows_per_granule . push_back ( num_rows ) ;
2018-03-05 14:41:43 +00:00
total_rows_per_granule + = num_rows ;
2018-02-13 19:34:15 +00:00
}
2018-02-20 13:37:04 +00:00
void MergeTreeRangeReader : : ReadResult : : adjustLastGranule ( )
2018-02-13 19:34:15 +00:00
{
2018-03-05 14:41:43 +00:00
size_t num_rows_to_subtract = total_rows_per_granule - num_read_rows ;
2018-02-20 13:37:04 +00:00
2018-02-13 19:34:15 +00:00
if ( rows_per_granule . empty ( ) )
throw Exception ( " Can't adjust last granule because no granules were added. " , ErrorCodes : : LOGICAL_ERROR ) ;
if ( num_rows_to_subtract > rows_per_granule . back ( ) )
throw Exception ( " Can't adjust last granule because it has " + toString ( rows_per_granule . back ( ) )
2018-11-14 11:26:44 +00:00
+ " rows, but try to subtract " + toString ( num_rows_to_subtract ) + " rows. " ,
2018-02-13 19:34:15 +00:00
ErrorCodes : : LOGICAL_ERROR ) ;
rows_per_granule . back ( ) - = num_rows_to_subtract ;
2018-03-05 14:41:43 +00:00
total_rows_per_granule - = num_rows_to_subtract ;
2018-02-13 19:34:15 +00:00
}
2018-02-20 11:45:58 +00:00
void MergeTreeRangeReader : : ReadResult : : clear ( )
2018-02-13 19:34:15 +00:00
{
/// Need to save information about the number of granules.
2018-02-22 12:43:57 +00:00
num_rows_to_skip_in_last_granule + = rows_per_granule . back ( ) ;
2018-02-13 19:34:15 +00:00
rows_per_granule . assign ( rows_per_granule . size ( ) , 0 ) ;
2018-03-05 14:41:43 +00:00
total_rows_per_granule = 0 ;
filter_holder = nullptr ;
filter = nullptr ;
2018-02-13 19:34:15 +00:00
}
2018-02-20 11:45:58 +00:00
void MergeTreeRangeReader : : ReadResult : : optimize ( )
2018-02-13 19:34:15 +00:00
{
2018-03-05 14:41:43 +00:00
if ( total_rows_per_granule = = 0 | | filter = = nullptr )
2018-02-13 19:34:15 +00:00
return ;
2018-03-05 14:25:20 +00:00
NumRows zero_tails ;
2018-03-05 14:41:43 +00:00
auto total_zero_rows_in_tails = countZeroTails ( filter - > getData ( ) , zero_tails ) ;
if ( total_zero_rows_in_tails = = filter - > size ( ) )
{
clear ( ) ;
return ;
}
else if ( total_zero_rows_in_tails = = 0 & & countBytesInFilter ( filter - > getData ( ) ) = = filter - > size ( ) )
{
filter_holder = nullptr ;
filter = nullptr ;
return ;
}
2018-02-13 19:34:15 +00:00
2018-03-05 14:25:20 +00:00
/// Just a guess. If only a few rows may be skipped, it's better not to skip at all.
2018-03-05 14:41:43 +00:00
if ( 2 * total_zero_rows_in_tails > filter - > size ( ) )
2018-02-13 19:34:15 +00:00
{
2018-03-05 14:41:43 +00:00
auto new_filter = ColumnUInt8 : : create ( filter - > size ( ) - total_zero_rows_in_tails ) ;
2018-03-05 14:25:20 +00:00
IColumn : : Filter & new_data = new_filter - > getData ( ) ;
2018-02-13 19:34:15 +00:00
2018-03-05 14:25:20 +00:00
size_t rows_in_last_granule = rows_per_granule . back ( ) ;
2018-02-13 19:34:15 +00:00
2018-03-05 14:41:43 +00:00
collapseZeroTails ( filter - > getData ( ) , new_data , zero_tails ) ;
2018-02-13 19:34:15 +00:00
2018-03-05 14:41:43 +00:00
total_rows_per_granule = new_filter - > size ( ) ;
2018-03-05 14:25:20 +00:00
num_rows_to_skip_in_last_granule + = rows_in_last_granule - rows_per_granule . back ( ) ;
2018-02-22 12:43:57 +00:00
2018-03-05 14:41:43 +00:00
filter = new_filter . get ( ) ;
filter_holder = std : : move ( new_filter ) ;
2018-02-13 19:34:15 +00:00
}
}
2019-01-04 12:10:00 +00:00
size_t MergeTreeRangeReader : : ReadResult : : countZeroTails ( const IColumn : : Filter & filter_vec , NumRows & zero_tails ) const
2018-02-13 19:34:15 +00:00
{
2018-02-22 12:43:57 +00:00
zero_tails . resize ( 0 ) ;
zero_tails . reserve ( rows_per_granule . size ( ) ) ;
2019-01-04 12:10:00 +00:00
auto filter_data = filter_vec . data ( ) ;
2018-02-13 19:34:15 +00:00
2018-02-22 12:43:57 +00:00
size_t total_zero_rows_in_tails = 0 ;
for ( auto rows_to_read : rows_per_granule )
2018-02-13 19:34:15 +00:00
{
2018-02-22 12:43:57 +00:00
/// Count the number of zeros at the end of filter for rows were read from current granule.
zero_tails . push_back ( numZerosInTail ( filter_data , filter_data + rows_to_read ) ) ;
total_zero_rows_in_tails + = zero_tails . back ( ) ;
filter_data + = rows_to_read ;
2018-02-13 19:34:15 +00:00
}
2018-02-22 12:43:57 +00:00
return total_zero_rows_in_tails ;
}
2019-01-04 12:10:00 +00:00
void MergeTreeRangeReader : : ReadResult : : collapseZeroTails ( const IColumn : : Filter & filter_vec , IColumn : : Filter & new_filter_vec ,
2018-02-22 12:43:57 +00:00
const NumRows & zero_tails )
{
2019-01-04 12:10:00 +00:00
auto filter_data = filter_vec . data ( ) ;
auto new_filter_data = new_filter_vec . data ( ) ;
2018-02-22 12:43:57 +00:00
for ( auto i : ext : : range ( 0 , rows_per_granule . size ( ) ) )
2018-02-13 19:34:15 +00:00
{
2018-02-22 12:43:57 +00:00
auto & rows_to_read = rows_per_granule [ i ] ;
auto filtered_rows_num_at_granule_end = zero_tails [ i ] ;
2018-02-13 19:34:15 +00:00
rows_to_read - = filtered_rows_num_at_granule_end ;
memcpySmallAllowReadWriteOverflow15 ( new_filter_data , filter_data , rows_to_read ) ;
filter_data + = rows_to_read ;
new_filter_data + = rows_to_read ;
filter_data + = filtered_rows_num_at_granule_end ;
}
2019-01-04 12:10:00 +00:00
new_filter_vec . resize ( new_filter_data - new_filter_vec . data ( ) ) ;
2018-02-13 19:34:15 +00:00
}
2018-02-20 11:45:58 +00:00
size_t MergeTreeRangeReader : : ReadResult : : numZerosInTail ( const UInt8 * begin , const UInt8 * end )
2018-02-13 19:34:15 +00:00
{
size_t count = 0 ;
2019-01-04 12:10:00 +00:00
# if defined(__SSE2__) && defined(__POPCNT__)
2018-02-13 19:34:15 +00:00
const __m128i zero16 = _mm_setzero_si128 ( ) ;
while ( end - begin > = 64 )
{
end - = 64 ;
auto pos = end ;
UInt64 val =
static_cast < UInt64 > ( _mm_movemask_epi8 ( _mm_cmpgt_epi8 (
_mm_loadu_si128 ( reinterpret_cast < const __m128i * > ( pos ) ) ,
zero16 ) ) )
| ( static_cast < UInt64 > ( _mm_movemask_epi8 ( _mm_cmpgt_epi8 (
_mm_loadu_si128 ( reinterpret_cast < const __m128i * > ( pos + 16 ) ) ,
zero16 ) ) ) < < 16 )
| ( static_cast < UInt64 > ( _mm_movemask_epi8 ( _mm_cmpgt_epi8 (
_mm_loadu_si128 ( reinterpret_cast < const __m128i * > ( pos + 32 ) ) ,
zero16 ) ) ) < < 32 )
| ( static_cast < UInt64 > ( _mm_movemask_epi8 ( _mm_cmpgt_epi8 (
_mm_loadu_si128 ( reinterpret_cast < const __m128i * > ( pos + 48 ) ) ,
zero16 ) ) ) < < 48 ) ;
if ( val = = 0 )
count + = 64 ;
else
{
count + = __builtin_clzll ( val ) ;
return count ;
}
}
# endif
while ( end > begin & & * ( - - end ) = = 0 )
{
+ + count ;
}
return count ;
}
2018-03-05 14:41:43 +00:00
void MergeTreeRangeReader : : ReadResult : : setFilter ( const ColumnPtr & new_filter )
2018-02-13 19:34:15 +00:00
{
2018-03-05 14:41:43 +00:00
if ( ! new_filter & & filter )
2018-03-05 14:25:20 +00:00
throw Exception ( " Can't replace existing filter with empty. " , ErrorCodes : : LOGICAL_ERROR ) ;
2018-02-13 19:34:15 +00:00
2018-03-05 14:41:43 +00:00
if ( filter )
2018-02-13 19:34:15 +00:00
{
2018-03-05 14:41:43 +00:00
size_t new_size = new_filter - > size ( ) ;
2018-02-13 19:34:15 +00:00
2018-03-05 14:41:43 +00:00
if ( new_size ! = total_rows_per_granule )
2018-03-05 14:25:20 +00:00
throw Exception ( " Can't set filter because it's size is " + toString ( new_size ) + " but "
2018-03-05 14:41:43 +00:00
+ toString ( total_rows_per_granule ) + " rows was read. " , ErrorCodes : : LOGICAL_ERROR ) ;
2018-03-05 14:25:20 +00:00
}
2018-02-13 19:34:15 +00:00
2018-03-05 14:41:43 +00:00
ConstantFilterDescription const_description ( * new_filter ) ;
if ( const_description . always_false )
2018-02-22 11:31:15 +00:00
clear ( ) ;
2018-03-05 14:41:43 +00:00
else if ( ! const_description . always_true )
2018-02-22 11:31:15 +00:00
{
2018-03-05 14:41:43 +00:00
FilterDescription filter_description ( * new_filter ) ;
filter_holder = filter_description . data_holder ? filter_description . data_holder : new_filter ;
filter = typeid_cast < const ColumnUInt8 * > ( filter_holder . get ( ) ) ;
if ( ! filter )
throw Exception ( " setFilter function expected ColumnUInt8. " , ErrorCodes : : LOGICAL_ERROR ) ;
2018-02-22 11:31:15 +00:00
}
2018-02-13 19:34:15 +00:00
}
2018-02-20 11:45:58 +00:00
MergeTreeRangeReader : : MergeTreeRangeReader (
2018-11-15 14:06:54 +00:00
MergeTreeReader * merge_tree_reader , MergeTreeRangeReader * prev_reader ,
2018-09-03 17:24:46 +00:00
ExpressionActionsPtr alias_actions , ExpressionActionsPtr prewhere_actions ,
2018-03-05 14:41:43 +00:00
const String * prewhere_column_name , const Names * ordered_names ,
bool always_reorder , bool remove_prewhere_column , bool last_reader_in_chain )
2018-11-15 14:06:54 +00:00
: merge_tree_reader ( merge_tree_reader )
2018-03-05 14:41:43 +00:00
, prev_reader ( prev_reader ) , prewhere_column_name ( prewhere_column_name )
2018-09-03 17:24:46 +00:00
, ordered_names ( ordered_names ) , alias_actions ( alias_actions ) , prewhere_actions ( std : : move ( prewhere_actions ) )
2018-03-05 14:41:43 +00:00
, always_reorder ( always_reorder ) , remove_prewhere_column ( remove_prewhere_column )
, last_reader_in_chain ( last_reader_in_chain ) , is_initialized ( true )
2018-02-20 11:45:58 +00:00
{
}
bool MergeTreeRangeReader : : isReadingFinished ( ) const
{
return prev_reader ? prev_reader - > isReadingFinished ( ) : stream . isFinished ( ) ;
}
size_t MergeTreeRangeReader : : numReadRowsInCurrentGranule ( ) const
2018-02-13 19:34:15 +00:00
{
2018-02-20 11:45:58 +00:00
return prev_reader ? prev_reader - > numReadRowsInCurrentGranule ( ) : stream . numReadRowsInCurrentGranule ( ) ;
}
size_t MergeTreeRangeReader : : numPendingRowsInCurrentGranule ( ) const
{
2018-02-22 12:43:57 +00:00
if ( prev_reader )
return prev_reader - > numPendingRowsInCurrentGranule ( ) ;
2018-02-13 19:34:15 +00:00
2018-03-05 14:41:43 +00:00
auto pending_rows = stream . numPendingRowsInCurrentGranule ( ) ;
2018-02-22 12:43:57 +00:00
/// If pending_rows is zero, than stream is not initialized.
2018-11-15 14:06:54 +00:00
return pending_rows ? pending_rows : stream . current_mark_index_granularity ;
2018-11-14 11:26:44 +00:00
}
size_t MergeTreeRangeReader : : Stream : : numPendingRows ( ) const {
2018-11-15 14:06:54 +00:00
size_t rows_between_marks = 0 ;
for ( size_t mark = current_mark ; mark < last_mark ; + + mark )
2019-03-25 13:55:24 +00:00
rows_between_marks + = merge_tree_reader - > data_part - > index_granularity . getMarkRows ( mark ) ;
2018-11-15 14:06:54 +00:00
return rows_between_marks - offset_after_current_mark ;
2018-02-20 11:45:58 +00:00
}
2018-02-20 11:50:33 +00:00
bool MergeTreeRangeReader : : isCurrentRangeFinished ( ) const
2018-02-20 11:45:58 +00:00
{
return prev_reader ? prev_reader - > isCurrentRangeFinished ( ) : stream . isFinished ( ) ;
}
2018-02-13 19:34:15 +00:00
2018-02-20 11:45:58 +00:00
MergeTreeRangeReader : : ReadResult MergeTreeRangeReader : : read ( size_t max_rows , MarkRanges & ranges )
2018-02-13 19:34:15 +00:00
{
if ( max_rows = = 0 )
throw Exception ( " Expected at least 1 row to read, got 0. " , ErrorCodes : : LOGICAL_ERROR ) ;
ReadResult read_result ;
2018-02-20 14:26:22 +00:00
size_t prev_bytes = 0 ;
2018-09-10 15:28:03 +00:00
bool should_reorder = false ;
2018-02-13 19:34:15 +00:00
if ( prev_reader )
2018-02-20 11:45:58 +00:00
{
read_result = prev_reader - > read ( max_rows , ranges ) ;
2018-02-20 14:26:22 +00:00
prev_bytes = read_result . block . bytes ( ) ;
2018-02-22 12:43:57 +00:00
Block block = continueReadingChain ( read_result ) ;
bool should_evaluate_missing_defaults = false ;
if ( block )
{
/// block.rows() <= read_result.block. We must filter block before adding columns to read_result.block
/// Fill missing columns before filtering because some arrays from Nested may have empty data.
2018-10-03 13:55:21 +00:00
merge_tree_reader - > fillMissingColumns ( block , should_reorder , should_evaluate_missing_defaults , block . rows ( ) ) ;
2018-02-22 12:43:57 +00:00
2018-03-05 14:41:43 +00:00
if ( read_result . getFilter ( ) )
filterBlock ( block , read_result . getFilter ( ) - > getData ( ) ) ;
2018-02-22 12:43:57 +00:00
}
2018-10-03 13:55:21 +00:00
else
{
2018-10-04 08:58:19 +00:00
size_t num_rows = read_result . block . rows ( ) ;
if ( ! read_result . block )
{
if ( auto * filter = read_result . getFilter ( ) )
num_rows = countBytesInFilter ( filter - > getData ( ) ) ; /// All columns were removed and filter is not always true.
else if ( read_result . totalRowsPerGranule ( ) )
num_rows = read_result . numReadRows ( ) ; /// All columns were removed and filter is always true.
/// else filter is always false.
}
2018-10-03 13:55:21 +00:00
/// If block is empty, we still may need to add missing columns.
/// In that case use number of rows in result block and don't filter block.
2018-10-04 08:58:19 +00:00
if ( num_rows )
merge_tree_reader - > fillMissingColumns ( block , should_reorder , should_evaluate_missing_defaults , num_rows ) ;
2018-10-03 13:55:21 +00:00
}
for ( auto i : ext : : range ( 0 , block . columns ( ) ) )
read_result . block . insert ( std : : move ( block . getByPosition ( i ) ) ) ;
2018-02-22 12:43:57 +00:00
if ( read_result . block )
{
if ( should_evaluate_missing_defaults )
merge_tree_reader - > evaluateMissingDefaults ( read_result . block ) ;
}
2018-02-20 11:45:58 +00:00
}
else
2018-02-22 12:43:57 +00:00
{
2018-02-20 11:45:58 +00:00
read_result = startReadingChain ( max_rows , ranges ) ;
2018-02-22 12:43:57 +00:00
if ( read_result . block )
{
bool should_evaluate_missing_defaults ;
2018-10-03 13:55:21 +00:00
merge_tree_reader - > fillMissingColumns ( read_result . block , should_reorder , should_evaluate_missing_defaults ,
read_result . block . rows ( ) ) ;
2018-02-22 12:43:57 +00:00
if ( should_evaluate_missing_defaults )
merge_tree_reader - > evaluateMissingDefaults ( read_result . block ) ;
}
}
2018-02-13 19:34:15 +00:00
2018-02-20 11:45:58 +00:00
if ( ! read_result . block )
2018-02-13 19:34:15 +00:00
return read_result ;
2018-02-20 14:26:22 +00:00
read_result . addNumBytesRead ( read_result . block . bytes ( ) - prev_bytes ) ;
2018-02-20 11:45:58 +00:00
executePrewhereActionsAndFilterColumns ( read_result ) ;
2018-09-10 15:28:03 +00:00
if ( last_reader_in_chain & & ( should_reorder | | always_reorder ) )
merge_tree_reader - > reorderColumns ( read_result . block , * ordered_names , prewhere_column_name ) ;
2018-02-13 19:34:15 +00:00
return read_result ;
}
2018-03-05 14:41:43 +00:00
void MergeTreeRangeReader : : filterBlock ( Block & block , const IColumn : : Filter & filter ) const
2018-02-22 12:43:57 +00:00
{
for ( const auto i : ext : : range ( 0 , block . columns ( ) ) )
{
auto & col = block . getByPosition ( i ) ;
if ( col . column )
2018-03-05 14:41:43 +00:00
{
col . column = col . column - > filter ( filter , - 1 ) ;
if ( col . column - > empty ( ) )
{
block . clear ( ) ;
return ;
}
}
2018-02-22 12:43:57 +00:00
}
}
2018-02-20 11:45:58 +00:00
MergeTreeRangeReader : : ReadResult MergeTreeRangeReader : : startReadingChain ( size_t max_rows , MarkRanges & ranges )
{
ReadResult result ;
/// Stream is lazy. result.num_added_rows is the number of rows added to block which is not equal to
/// result.num_rows_read until call to stream.finalize(). Also result.num_added_rows may be less than
/// result.num_rows_read if the last granule in range also the last in part (so we have to adjust last granule).
{
size_t space_left = max_rows ;
2018-02-20 12:02:25 +00:00
while ( space_left & & ( ! stream . isFinished ( ) | | ! ranges . empty ( ) ) )
2018-02-20 11:45:58 +00:00
{
if ( stream . isFinished ( ) )
{
result . addRows ( stream . finalize ( result . block ) ) ;
2018-11-14 11:26:44 +00:00
stream = Stream ( ranges . back ( ) . begin , ranges . back ( ) . end , merge_tree_reader ) ;
2018-02-20 11:45:58 +00:00
result . addRange ( ranges . back ( ) ) ;
ranges . pop_back ( ) ;
}
auto rows_to_read = std : : min ( space_left , stream . numPendingRowsInCurrentGranule ( ) ) ;
2019-03-19 09:57:29 +00:00
//std::cerr << "Rows To Read:" << rows_to_read << std::endl;
2018-02-20 11:45:58 +00:00
bool last = rows_to_read = = space_left ;
result . addRows ( stream . read ( result . block , rows_to_read , ! last ) ) ;
result . addGranule ( rows_to_read ) ;
space_left - = rows_to_read ;
}
}
result . addRows ( stream . finalize ( result . block ) ) ;
/// Last granule may be incomplete.
2018-02-20 13:37:04 +00:00
result . adjustLastGranule ( ) ;
2018-02-20 11:45:58 +00:00
return result ;
}
2018-02-22 12:43:57 +00:00
Block MergeTreeRangeReader : : continueReadingChain ( ReadResult & result )
2018-02-13 19:34:15 +00:00
{
2018-02-22 12:43:57 +00:00
Block block ;
if ( result . rowsPerGranule ( ) . empty ( ) )
2018-02-13 19:34:15 +00:00
{
/// If zero rows were read on prev step, than there is no more rows to read.
/// Last granule may have less rows than index_granularity, so finish reading manually.
stream . finish ( ) ;
2018-02-22 12:43:57 +00:00
return block ;
2018-02-13 19:34:15 +00:00
}
2018-02-20 11:45:58 +00:00
auto & rows_per_granule = result . rowsPerGranule ( ) ;
auto & started_ranges = result . startedRanges ( ) ;
2018-02-13 19:34:15 +00:00
2018-02-20 11:45:58 +00:00
size_t added_rows = 0 ;
size_t next_range_to_start = 0 ;
2018-02-13 19:34:15 +00:00
2018-02-20 11:45:58 +00:00
auto size = rows_per_granule . size ( ) ;
for ( auto i : ext : : range ( 0 , size ) )
2018-02-13 19:34:15 +00:00
{
2018-02-20 11:45:58 +00:00
if ( next_range_to_start < started_ranges . size ( )
& & i = = started_ranges [ next_range_to_start ] . num_granules_read_before_start )
2018-02-13 19:34:15 +00:00
{
2018-02-22 12:43:57 +00:00
added_rows + = stream . finalize ( block ) ;
2018-02-20 11:45:58 +00:00
auto & range = started_ranges [ next_range_to_start ] . range ;
2018-02-22 11:31:15 +00:00
+ + next_range_to_start ;
2018-11-14 11:26:44 +00:00
stream = Stream ( range . begin , range . end , merge_tree_reader ) ;
2018-02-13 19:34:15 +00:00
}
2018-02-20 11:45:58 +00:00
bool last = i + 1 = = size ;
2018-02-22 12:43:57 +00:00
added_rows + = stream . read ( block , rows_per_granule [ i ] , ! last ) ;
2018-02-13 19:34:15 +00:00
}
2018-02-20 11:45:58 +00:00
2018-02-22 12:43:57 +00:00
stream . skip ( result . numRowsToSkipInLastGranule ( ) ) ;
added_rows + = stream . finalize ( block ) ;
2018-02-20 11:45:58 +00:00
/// added_rows may be zero if all columns were read in prewhere and it's ok.
2018-03-05 14:41:43 +00:00
if ( added_rows & & added_rows ! = result . totalRowsPerGranule ( ) )
2018-02-20 11:45:58 +00:00
throw Exception ( " RangeReader read " + toString ( added_rows ) + " rows, but "
2018-03-05 14:41:43 +00:00
+ toString ( result . totalRowsPerGranule ( ) ) + " expected. " , ErrorCodes : : LOGICAL_ERROR ) ;
2018-02-22 12:43:57 +00:00
return block ;
2018-02-13 19:34:15 +00:00
}
2018-02-20 11:45:58 +00:00
void MergeTreeRangeReader : : executePrewhereActionsAndFilterColumns ( ReadResult & result )
2017-06-15 17:01:13 +00:00
{
2018-03-05 14:41:43 +00:00
if ( ! prewhere_actions )
return ;
2018-02-22 12:43:57 +00:00
2018-09-03 17:24:46 +00:00
if ( alias_actions )
alias_actions - > execute ( result . block ) ;
2018-03-05 14:41:43 +00:00
prewhere_actions - > execute ( result . block ) ;
auto & prewhere_column = result . block . getByName ( * prewhere_column_name ) ;
size_t prev_rows = result . block . rows ( ) ;
ColumnPtr filter = prewhere_column . column ;
prewhere_column . column = nullptr ;
2018-02-13 19:34:15 +00:00
2018-03-05 14:41:43 +00:00
if ( result . getFilter ( ) )
2018-02-13 19:34:15 +00:00
{
/// TODO: implement for prewhere chain.
/// In order to do it we need combine filter and result.filter, where filter filters only '1' in result.filter.
2018-02-20 11:45:58 +00:00
throw Exception ( " MergeTreeRangeReader chain with several prewhere actions in not implemented. " ,
2018-02-13 19:34:15 +00:00
ErrorCodes : : LOGICAL_ERROR ) ;
}
2018-03-05 14:41:43 +00:00
result . setFilter ( filter ) ;
if ( ! last_reader_in_chain )
result . optimize ( ) ;
bool filter_always_true = ! result . getFilter ( ) & & result . totalRowsPerGranule ( ) = = filter - > size ( ) ;
if ( result . totalRowsPerGranule ( ) = = 0 )
result . block . clear ( ) ;
else if ( ! filter_always_true )
{
FilterDescription filter_description ( * filter ) ;
if ( last_reader_in_chain )
{
size_t num_bytes_in_filter = countBytesInFilter ( * filter_description . data ) ;
if ( num_bytes_in_filter = = 0 )
result . block . clear ( ) ;
else if ( num_bytes_in_filter = = filter - > size ( ) )
filter_always_true = true ;
}
if ( ! filter_always_true )
filterBlock ( result . block , * filter_description . data ) ;
}
if ( ! result . block )
return ;
2018-04-12 09:45:24 +00:00
auto getNumRows = [ & ] ( )
2018-04-06 13:58:06 +00:00
{
2018-04-12 09:45:24 +00:00
/// If block has single column, it's filter. We need to count bytes in it in order to get the number of rows.
if ( result . block . columns ( ) > 1 )
return result . block . rows ( ) ;
else if ( result . getFilter ( ) )
return countBytesInFilter ( result . getFilter ( ) - > getData ( ) ) ;
2018-04-06 13:58:06 +00:00
else
2018-04-12 09:45:24 +00:00
return prev_rows ;
} ;
2018-04-06 13:58:06 +00:00
2018-03-05 14:41:43 +00:00
if ( remove_prewhere_column )
result . block . erase ( * prewhere_column_name ) ;
else
2018-10-22 08:54:54 +00:00
prewhere_column . column = prewhere_column . type - > createColumnConst ( getNumRows ( ) , 1u ) ;
2018-04-06 13:58:06 +00:00
/// If block is empty, create column in order to store rows number.
if ( last_reader_in_chain & & result . block . columns ( ) = = 0 )
2018-04-12 09:45:24 +00:00
result . block . insert ( { ColumnNothing : : create ( getNumRows ( ) ) , std : : make_shared < DataTypeNothing > ( ) , " _nothing " } ) ;
2017-06-15 17:01:13 +00:00
}
2017-06-14 10:50:22 +00:00
}