Merge pull request #1015 from yandex/fix-performance-after-changing-reading

Fix performance after changing reading
This commit is contained in:
alexey-milovidov 2017-07-24 18:07:57 +03:00 committed by GitHub
commit d3159ed2c5

View File

@ -87,15 +87,23 @@ Block MergeTreeBaseBlockInputStream::readFromPart()
if (!task.size_predictor)
return max_block_size_rows;
size_t rows_to_read = std::max(index_granularity, task.size_predictor->estimateNumRows(preferred_block_size_bytes));
/// Calculates number of rows will be read using preferred_block_size_bytes.
/// Can't be less than index_granularity.
size_t rows_to_read = task.size_predictor->estimateNumRows(preferred_block_size_bytes);
if (!rows_to_read)
return rows_to_read;
rows_to_read = std::max(index_granularity, rows_to_read);
if (preferred_max_column_in_block_size_bytes)
{
/// Calculates number of rows will be read using preferred_max_column_in_block_size_bytes.
size_t rows_to_read_for_max_size_column
= task.size_predictor->estimateNumRowsForMaxSizeColumn(preferred_max_column_in_block_size_bytes);
double filtration_ratio = std::max(min_filtration_ratio, 1.0 - task.size_predictor->filtered_rows_ratio);
size_t rows_to_read_for_max_size_column_with_filtration
= static_cast<size_t>(rows_to_read_for_max_size_column / filtration_ratio);
/// If preferred_max_column_in_block_size_bytes is used, number of rows to read can be less than index_granularity.
rows_to_read = std::min(rows_to_read, rows_to_read_for_max_size_column_with_filtration);
}