only read one chunk in mergetramsform when meet virtual row

This commit is contained in:
jsc0218 2024-05-04 02:09:17 +00:00
parent ba049d85b3
commit 86c7488647
4 changed files with 27 additions and 4 deletions

View File

@ -1,3 +1,4 @@
#include <Processors/Merges/Algorithms/MergeTreeReadInfo.h>
#include <Processors/Merges/IMergingTransform.h>
namespace DB
@ -101,8 +102,10 @@ IProcessor::Status IMergingTransformBase::prepareInitializeInputs()
/// setNotNeeded after reading first chunk, because in optimismtic case
/// (e.g. with optimized 'ORDER BY primary_key LIMIT n' and small 'n')
/// we won't have to read any chunks anymore;
auto chunk = input.pull(limit_hint != 0);
if ((limit_hint && chunk.getNumRows() < limit_hint) || always_read_till_end)
/// If virtual row exists, test it first, so don't read more chunks.
auto chunk = input.pull(true);
if ((limit_hint == 0 && !getVirtualRowFromChunk(chunk))
|| (limit_hint && chunk.getNumRows() < limit_hint) || always_read_till_end)
input.setNeeded();
if (!chunk.hasRows())

View File

@ -65,7 +65,7 @@ public:
void addPartLevelToChunk(bool add_part_level_) { add_part_level = add_part_level_; }
void addVirtualRowToChunk(bool add_virtual_row_, const Columns& index_, size_t mark_range_begin_)
void addVirtualRowToChunk(bool add_virtual_row_, const Columns & index_, size_t mark_range_begin_)
{
add_virtual_row = add_virtual_row_;
index = index_;

View File

@ -2,4 +2,4 @@
1
2
3
24578
16386

View File

@ -46,4 +46,24 @@ AND query not like '%system.query_log%'
ORDER BY query_start_time DESC, read_rows DESC
LIMIT 1;
-- SELECT x
-- FROM t
-- ORDER BY x ASC
-- LIMIT 4
-- SETTINGS max_block_size = 8192,
-- read_in_order_two_level_merge_threshold = 5, --avoid preliminary merge
-- max_threads = 1,
-- optimize_read_in_order = 1;
-- SYSTEM FLUSH LOGS;
-- -- without virtual row 16.38k, but with virtual row 24.58k, becasue read again (why?) in the non-target part after reading its virtual row and before sending the virtual row to the priority queue
-- SELECT read_rows
-- FROM system.query_log
-- WHERE current_database = currentDatabase()
-- AND query like '%SELECT x%'
-- AND query not like '%system.query_log%'
-- ORDER BY query_start_time DESC, read_rows DESC
-- LIMIT 1;
DROP TABLE t;