Less getMark calls

This commit is contained in:
Amos Bird 2022-02-16 21:35:23 +08:00
parent d3bd8b5f93
commit f459e8fc95
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
4 changed files with 11 additions and 19 deletions

View File

@ -54,7 +54,7 @@ MergeTreeIndexReader::MergeTreeIndexReader(
std::move(settings));
version = index_format.version;
stream->adjustForRange(MarkRange(0, getLastMark(all_mark_ranges_)));
stream->adjustRightMark(getLastMark(all_mark_ranges_));
stream->seekToStart();
}

View File

@ -42,7 +42,8 @@ MergeTreeReaderStream::MergeTreeReaderStream(
{
size_t left_mark = mark_range.begin;
size_t right_mark = mark_range.end;
auto [_, mark_range_bytes] = getRightOffsetAndBytesRange(left_mark, right_mark);
size_t left_offset = left_mark < marks_count ? marks_loader.getMark(left_mark).offset_in_compressed_file : 0;
auto mark_range_bytes = getRightOffset(right_mark) - left_offset;
max_mark_range_bytes = std::max(max_mark_range_bytes, mark_range_bytes);
sum_mark_range_bytes += mark_range_bytes;
@ -108,18 +109,16 @@ MergeTreeReaderStream::MergeTreeReaderStream(
}
std::pair<size_t, size_t> MergeTreeReaderStream::getRightOffsetAndBytesRange(size_t left_mark, size_t right_mark_non_included)
size_t MergeTreeReaderStream::getRightOffset(size_t right_mark_non_included)
{
/// NOTE: if we are reading the whole file, then right_mark == marks_count
/// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks.
/// Special case, can happen in Collapsing/Replacing engines
if (marks_count == 0)
return std::make_pair(0, 0);
return 0;
assert(left_mark < marks_count);
assert(right_mark_non_included <= marks_count);
assert(left_mark <= right_mark_non_included);
size_t result_right_offset;
if (0 < right_mark_non_included && right_mark_non_included < marks_count)
@ -179,20 +178,13 @@ std::pair<size_t, size_t> MergeTreeReaderStream::getRightOffsetAndBytesRange(siz
}
}
else if (right_mark_non_included == 0)
{
result_right_offset = marks_loader.getMark(right_mark_non_included).offset_in_compressed_file;
}
else
{
result_right_offset = file_size;
}
size_t mark_range_bytes = result_right_offset - (left_mark < marks_count ? marks_loader.getMark(left_mark).offset_in_compressed_file : 0);
return std::make_pair(result_right_offset, mark_range_bytes);
return result_right_offset;
}
void MergeTreeReaderStream::seekToMark(size_t index)
{
MarkInCompressedFile mark = marks_loader.getMark(index);
@ -232,14 +224,14 @@ void MergeTreeReaderStream::seekToStart()
}
void MergeTreeReaderStream::adjustForRange(MarkRange range)
void MergeTreeReaderStream::adjustRightMark(size_t right_mark)
{
/**
* Note: this method is called multiple times for the same range of marks -- each time we
* read from stream, but we must update last_right_offset only if it is bigger than
* the last one to avoid redundantly cancelling prefetches.
*/
auto [right_offset, _] = getRightOffsetAndBytesRange(range.begin, range.end);
auto right_offset = getRightOffset(right_mark);
if (!right_offset)
{
if (last_right_offset && *last_right_offset == 0)

View File

@ -34,13 +34,13 @@ public:
* Does buffer need to know something about mark ranges bounds it is going to read?
* (In case of MergeTree* tables). Mostly needed for reading from remote fs.
*/
void adjustForRange(MarkRange range);
void adjustRightMark(size_t right_mark);
ReadBuffer * data_buffer;
CompressedReadBufferBase * compressed_data_buffer;
private:
std::pair<size_t, size_t> getRightOffsetAndBytesRange(size_t left_mark, size_t right_mark_non_included);
size_t getRightOffset(size_t right_mark_non_included);
DiskPtr disk;
std::string path_prefix;

View File

@ -212,7 +212,7 @@ static ReadBuffer * getStream(
return nullptr;
MergeTreeReaderStream & stream = *it->second;
stream.adjustForRange(MarkRange(seek_to_start ? 0 : from_mark, current_task_last_mark));
stream.adjustRightMark(current_task_last_mark);
if (seek_to_start)
stream.seekToStart();