Merge pull request #11955 from ClickHouse/revive-mmap-2

Allow to use direct_io and mmap_io for secondary indices.
This commit is contained in:
alexey-milovidov 2020-06-26 03:16:56 +03:00 committed by GitHub
commit 2163716e45
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 41 additions and 23 deletions

View File

@ -544,6 +544,14 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
if (minmax_idx_condition)
LOG_DEBUG(log, "MinMax index condition: {}", minmax_idx_condition->toString());
MergeTreeReaderSettings reader_settings =
{
.min_bytes_to_use_direct_io = settings.min_bytes_to_use_direct_io,
.min_bytes_to_use_mmap_io = settings.min_bytes_to_use_mmap_io,
.max_read_buffer_size = settings.max_read_buffer_size,
.save_marks_in_cache = true
};
/// PREWHERE
String prewhere_column;
if (select.prewhere())
@ -583,7 +591,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
for (const auto & index_and_condition : useful_indices)
ranges.ranges = filterMarksUsingIndex(
index_and_condition.first, index_and_condition.second, part, ranges.ranges, settings);
index_and_condition.first, index_and_condition.second, part, ranges.ranges, settings, reader_settings);
if (!ranges.ranges.empty())
{
@ -605,14 +613,6 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
Pipes res;
MergeTreeReaderSettings reader_settings =
{
.min_bytes_to_use_direct_io = settings.min_bytes_to_use_direct_io,
.min_bytes_to_use_mmap_io = settings.min_bytes_to_use_mmap_io,
.max_read_buffer_size = settings.max_read_buffer_size,
.save_marks_in_cache = true
};
/// Projection, that needed to drop columns, which have appeared by execution
/// of some extra expressions, and to allow execute the same expressions later.
/// NOTE: It may lead to double computation of expressions.
@ -1405,7 +1405,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
MergeTreeIndexConditionPtr condition,
MergeTreeData::DataPartPtr part,
const MarkRanges & ranges,
const Settings & settings) const
const Settings & settings,
const MergeTreeReaderSettings & reader_settings) const
{
if (!part->volume->getDisk()->exists(part->getFullRelativePath() + index_helper->getFileName() + ".idx"))
{
@ -1428,9 +1429,10 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
size_t index_marks_count = (marks_count - final_mark + index_granularity - 1) / index_granularity;
MergeTreeIndexReader reader(
index_helper, part,
index_marks_count,
ranges);
index_helper, part,
index_marks_count,
ranges,
reader_settings);
MarkRanges res;

View File

@ -106,7 +106,8 @@ private:
MergeTreeIndexConditionPtr condition,
MergeTreeData::DataPartPtr part,
const MarkRanges & ranges,
const Settings & settings) const;
const Settings & settings,
const MergeTreeReaderSettings & reader_settings) const;
};
}

View File

@ -5,12 +5,13 @@ namespace DB
{
MergeTreeIndexReader::MergeTreeIndexReader(
MergeTreeIndexPtr index_, MergeTreeData::DataPartPtr part_, size_t marks_count_, const MarkRanges & all_mark_ranges_)
MergeTreeIndexPtr index_, MergeTreeData::DataPartPtr part_, size_t marks_count_, const MarkRanges & all_mark_ranges_,
MergeTreeReaderSettings settings)
: index(index_), stream(
part_->volume->getDisk(),
part_->getFullRelativePath() + index->getFileName(), ".idx", marks_count_,
all_mark_ranges_,
MergeTreeReaderSettings{}, nullptr, nullptr,
std::move(settings), nullptr, nullptr,
part_->getFileSizeOrZero(index->getFileName() + ".idx"),
&part_->index_granularity_info,
ReadBufferFromFileBase::ProfileCallback{}, CLOCK_MONOTONIC_COARSE)

View File

@ -14,7 +14,8 @@ public:
MergeTreeIndexPtr index_,
MergeTreeData::DataPartPtr part_,
size_t marks_count_,
const MarkRanges & all_mark_ranges_);
const MarkRanges & all_mark_ranges_,
MergeTreeReaderSettings settings);
void seek(size_t mark);

View File

@ -1,11 +1,11 @@
DROP TABLE IF EXISTS test;
CREATE TABLE test (x String) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO test VALUES ('Hello, world');
DROP TABLE IF EXISTS test_01343;
CREATE TABLE test_01343 (x String) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO test_01343 VALUES ('Hello, world');
SET min_bytes_to_use_mmap_io = 1;
SELECT * FROM test;
SELECT * FROM test_01343;
SYSTEM FLUSH LOGS;
SELECT PE.Values FROM system.query_log ARRAY JOIN ProfileEvents AS PE WHERE event_date >= yesterday() AND event_time >= now() - 300 AND query LIKE 'SELECT * FROM test%' AND PE.Names = 'CreatedReadBufferMMap' AND type = 2 ORDER BY event_time DESC LIMIT 1;
SELECT PE.Values FROM system.query_log ARRAY JOIN ProfileEvents AS PE WHERE event_date >= yesterday() AND event_time >= now() - 300 AND query LIKE 'SELECT * FROM test_01343%' AND PE.Names = 'CreatedReadBufferMMap' AND type = 2 ORDER BY event_time DESC LIMIT 1;
DROP TABLE test;
DROP TABLE test_01343;

View File

@ -0,0 +1,2 @@
Hello, world
2

View File

@ -0,0 +1,11 @@
DROP TABLE IF EXISTS test_01344;
CREATE TABLE test_01344 (x String, INDEX idx (x) TYPE set(10) GRANULARITY 1) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO test_01344 VALUES ('Hello, world');
SET min_bytes_to_use_mmap_io = 1;
SELECT * FROM test_01344 WHERE x = 'Hello, world';
SYSTEM FLUSH LOGS;
SELECT PE.Values FROM system.query_log ARRAY JOIN ProfileEvents AS PE WHERE event_date >= yesterday() AND event_time >= now() - 300 AND query LIKE 'SELECT * FROM test_01344 WHERE x = ''Hello, world''%' AND PE.Names = 'CreatedReadBufferMMap' AND type = 2 ORDER BY event_time DESC LIMIT 1;
DROP TABLE test_01344;