Merge pull request #1015 from yandex/fix-performance-after-changing-reading

Fix performance after changing reading
2024-11-27 01:51:59 +00:00 · 2017-07-24 18:07:57 +03:00 · 2017-07-24 18:07:57 +03:00 · d3159ed2c5
commit d3159ed2c5
parent eba5ad73ec fcd50e3629
1 changed files with 9 additions and 1 deletions
--- a/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp
+++ b/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp
@ -87,15 +87,23 @@ Block MergeTreeBaseBlockInputStream::readFromPart()
        if (!task.size_predictor)
            return max_block_size_rows;

-        size_t rows_to_read = std::max(index_granularity, task.size_predictor->estimateNumRows(preferred_block_size_bytes));
+        /// Calculates number of rows will be read using preferred_block_size_bytes.
+        /// Can't be less than index_granularity.
+        size_t rows_to_read = task.size_predictor->estimateNumRows(preferred_block_size_bytes);
+        if (!rows_to_read)
+            return rows_to_read;
+        rows_to_read = std::max(index_granularity, rows_to_read);

        if (preferred_max_column_in_block_size_bytes)
        {
+            /// Calculates number of rows will be read using preferred_max_column_in_block_size_bytes.
            size_t rows_to_read_for_max_size_column
                = task.size_predictor->estimateNumRowsForMaxSizeColumn(preferred_max_column_in_block_size_bytes);
            double filtration_ratio = std::max(min_filtration_ratio, 1.0 - task.size_predictor->filtered_rows_ratio);
            size_t rows_to_read_for_max_size_column_with_filtration
                = static_cast<size_t>(rows_to_read_for_max_size_column / filtration_ratio);
+
+            /// If preferred_max_column_in_block_size_bytes is used, number of rows to read can be less than index_granularity.
            rows_to_read = std::min(rows_to_read, rows_to_read_for_max_size_column_with_filtration);
        }