From 8872417d00dcde1fb5ab6dbb5ef734397acaf15f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 25 Jun 2020 22:31:54 +0300 Subject: [PATCH 1/3] Respect direct_io/mmap settings while reading secondary indices --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 28 ++++++++++--------- .../MergeTree/MergeTreeDataSelectExecutor.h | 3 +- .../MergeTree/MergeTreeIndexReader.cpp | 5 ++-- src/Storages/MergeTree/MergeTreeIndexReader.h | 3 +- 4 files changed, 22 insertions(+), 17 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 901f947dd6e..f1356663a5a 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -544,6 +544,14 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( if (minmax_idx_condition) LOG_DEBUG(log, "MinMax index condition: {}", minmax_idx_condition->toString()); + MergeTreeReaderSettings reader_settings = + { + .min_bytes_to_use_direct_io = settings.min_bytes_to_use_direct_io, + .min_bytes_to_use_mmap_io = settings.min_bytes_to_use_mmap_io, + .max_read_buffer_size = settings.max_read_buffer_size, + .save_marks_in_cache = true + }; + /// PREWHERE String prewhere_column; if (select.prewhere()) @@ -583,7 +591,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( for (const auto & index_and_condition : useful_indices) ranges.ranges = filterMarksUsingIndex( - index_and_condition.first, index_and_condition.second, part, ranges.ranges, settings); + index_and_condition.first, index_and_condition.second, part, ranges.ranges, settings, reader_settings); if (!ranges.ranges.empty()) { @@ -605,14 +613,6 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( Pipes res; - MergeTreeReaderSettings reader_settings = - { - .min_bytes_to_use_direct_io = settings.min_bytes_to_use_direct_io, - .min_bytes_to_use_mmap_io = settings.min_bytes_to_use_mmap_io, - .max_read_buffer_size = settings.max_read_buffer_size, - .save_marks_in_cache = true - }; - /// Projection, that needed to drop columns, which have appeared by execution /// of some extra expressions, and to allow execute the same expressions later. /// NOTE: It may lead to double computation of expressions. @@ -1405,7 +1405,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( MergeTreeIndexConditionPtr condition, MergeTreeData::DataPartPtr part, const MarkRanges & ranges, - const Settings & settings) const + const Settings & settings, + const MergeTreeReaderSettings & reader_settings) const { if (!part->volume->getDisk()->exists(part->getFullRelativePath() + index_helper->getFileName() + ".idx")) { @@ -1428,9 +1429,10 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( size_t index_marks_count = (marks_count - final_mark + index_granularity - 1) / index_granularity; MergeTreeIndexReader reader( - index_helper, part, - index_marks_count, - ranges); + index_helper, part, + index_marks_count, + ranges, + reader_settings); MarkRanges res; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 7c3b9584e98..831b690ec62 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -106,7 +106,8 @@ private: MergeTreeIndexConditionPtr condition, MergeTreeData::DataPartPtr part, const MarkRanges & ranges, - const Settings & settings) const; + const Settings & settings, + const MergeTreeReaderSettings & reader_settings) const; }; } diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/src/Storages/MergeTree/MergeTreeIndexReader.cpp index d8f13e49b31..eaba247009b 100644 --- a/src/Storages/MergeTree/MergeTreeIndexReader.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexReader.cpp @@ -5,12 +5,13 @@ namespace DB { MergeTreeIndexReader::MergeTreeIndexReader( - MergeTreeIndexPtr index_, MergeTreeData::DataPartPtr part_, size_t marks_count_, const MarkRanges & all_mark_ranges_) + MergeTreeIndexPtr index_, MergeTreeData::DataPartPtr part_, size_t marks_count_, const MarkRanges & all_mark_ranges_, + MergeTreeReaderSettings settings) : index(index_), stream( part_->volume->getDisk(), part_->getFullRelativePath() + index->getFileName(), ".idx", marks_count_, all_mark_ranges_, - MergeTreeReaderSettings{}, nullptr, nullptr, + std::move(settings), nullptr, nullptr, part_->getFileSizeOrZero(index->getFileName() + ".idx"), &part_->index_granularity_info, ReadBufferFromFileBase::ProfileCallback{}, CLOCK_MONOTONIC_COARSE) diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.h b/src/Storages/MergeTree/MergeTreeIndexReader.h index 9b5b1c7fcb2..68d681458be 100644 --- a/src/Storages/MergeTree/MergeTreeIndexReader.h +++ b/src/Storages/MergeTree/MergeTreeIndexReader.h @@ -14,7 +14,8 @@ public: MergeTreeIndexPtr index_, MergeTreeData::DataPartPtr part_, size_t marks_count_, - const MarkRanges & all_mark_ranges_); + const MarkRanges & all_mark_ranges_, + MergeTreeReaderSettings settings); void seek(size_t mark); From 734be4f376a0f3adcdeff200cf8079712a725322 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 25 Jun 2020 22:46:02 +0300 Subject: [PATCH 2/3] Minor modification --- .../0_stateless/01343_min_bytes_to_use_mmap_io.sql | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql b/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql index 9e6e3708c9a..9ff16ca60a7 100644 --- a/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql +++ b/tests/queries/0_stateless/01343_min_bytes_to_use_mmap_io.sql @@ -1,11 +1,11 @@ -DROP TABLE IF EXISTS test; -CREATE TABLE test (x String) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO test VALUES ('Hello, world'); +DROP TABLE IF EXISTS test_01343; +CREATE TABLE test_01343 (x String) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO test_01343 VALUES ('Hello, world'); SET min_bytes_to_use_mmap_io = 1; -SELECT * FROM test; +SELECT * FROM test_01343; SYSTEM FLUSH LOGS; -SELECT PE.Values FROM system.query_log ARRAY JOIN ProfileEvents AS PE WHERE event_date >= yesterday() AND event_time >= now() - 300 AND query LIKE 'SELECT * FROM test%' AND PE.Names = 'CreatedReadBufferMMap' AND type = 2 ORDER BY event_time DESC LIMIT 1; +SELECT PE.Values FROM system.query_log ARRAY JOIN ProfileEvents AS PE WHERE event_date >= yesterday() AND event_time >= now() - 300 AND query LIKE 'SELECT * FROM test_01343%' AND PE.Names = 'CreatedReadBufferMMap' AND type = 2 ORDER BY event_time DESC LIMIT 1; -DROP TABLE test; +DROP TABLE test_01343; From 5d08789602e2147ccb5636ed995d4a675d398943 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 25 Jun 2020 22:48:12 +0300 Subject: [PATCH 3/3] Added a test --- .../01344_min_bytes_to_use_mmap_io_index.reference | 2 ++ .../01344_min_bytes_to_use_mmap_io_index.sql | 11 +++++++++++ 2 files changed, 13 insertions(+) create mode 100644 tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.reference create mode 100644 tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql diff --git a/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.reference b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.reference new file mode 100644 index 00000000000..ac3ce287de5 --- /dev/null +++ b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.reference @@ -0,0 +1,2 @@ +Hello, world +2 diff --git a/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql new file mode 100644 index 00000000000..67baef7136d --- /dev/null +++ b/tests/queries/0_stateless/01344_min_bytes_to_use_mmap_io_index.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS test_01344; +CREATE TABLE test_01344 (x String, INDEX idx (x) TYPE set(10) GRANULARITY 1) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO test_01344 VALUES ('Hello, world'); + +SET min_bytes_to_use_mmap_io = 1; +SELECT * FROM test_01344 WHERE x = 'Hello, world'; + +SYSTEM FLUSH LOGS; +SELECT PE.Values FROM system.query_log ARRAY JOIN ProfileEvents AS PE WHERE event_date >= yesterday() AND event_time >= now() - 300 AND query LIKE 'SELECT * FROM test_01344 WHERE x = ''Hello, world''%' AND PE.Names = 'CreatedReadBufferMMap' AND type = 2 ORDER BY event_time DESC LIMIT 1; + +DROP TABLE test_01344;