#include "CachedInMemoryReadBufferFromFile.h" #include #include #include namespace DB { namespace ErrorCodes { extern const int UNEXPECTED_END_OF_FILE; extern const int CANNOT_SEEK_THROUGH_FILE; extern const int SEEK_POSITION_OUT_OF_BOUND; } CachedInMemoryReadBufferFromFile::CachedInMemoryReadBufferFromFile( FileChunkAddress cache_key_, PageCachePtr cache_, std::unique_ptr in_, const ReadSettings & settings_) : ReadBufferFromFileBase(0, nullptr, 0, in_->getFileSize()), cache_key(cache_key_), cache(cache_), settings(settings_), in(std::move(in_)) , read_until_position(file_size.value()) { cache_key.offset = 0; } String CachedInMemoryReadBufferFromFile::getFileName() const { return in->getFileName(); } off_t CachedInMemoryReadBufferFromFile::seek(off_t off, int whence) { if (whence != SEEK_SET) throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed."); size_t offset = static_cast(off); if (offset > file_size.value()) throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}", off); if (offset >= file_offset_of_buffer_end - working_buffer.size() && offset <= file_offset_of_buffer_end) { pos = working_buffer.end() - (file_offset_of_buffer_end - offset); chassert(getPosition() == off); return off; } resetWorkingBuffer(); file_offset_of_buffer_end = offset; chunk.reset(); chassert(getPosition() == off); return off; } off_t CachedInMemoryReadBufferFromFile::getPosition() { return file_offset_of_buffer_end - available(); } size_t CachedInMemoryReadBufferFromFile::getFileOffsetOfBufferEnd() const { return file_offset_of_buffer_end; } void CachedInMemoryReadBufferFromFile::setReadUntilPosition(size_t position) { read_until_position = position; if (position < static_cast(getPosition())) { resetWorkingBuffer(); chunk.reset(); } else if (position < file_offset_of_buffer_end) { size_t diff = file_offset_of_buffer_end - position; working_buffer.resize(working_buffer.size() - diff); file_offset_of_buffer_end -= diff; } } void CachedInMemoryReadBufferFromFile::setReadUntilEnd() { setReadUntilPosition(file_size.value()); } bool CachedInMemoryReadBufferFromFile::nextImpl() { chassert(read_until_position <= file_size.value()); if (file_offset_of_buffer_end >= read_until_position) return false; if (chunk.has_value() && file_offset_of_buffer_end >= cache_key.offset + cache->chunkSize()) { chassert(file_offset_of_buffer_end == cache_key.offset + cache->chunkSize()); chunk.reset(); } if (!chunk.has_value()) { cache_key.offset = file_offset_of_buffer_end / cache->chunkSize() * cache->chunkSize(); chunk = cache->getOrSet(cache_key.hash(), settings.read_from_page_cache_if_exists_otherwise_bypass_cache, settings.page_cache_inject_eviction); size_t chunk_size = std::min(cache->chunkSize(), file_size.value() - cache_key.offset); std::unique_lock download_lock(chunk->getChunk()->state.download_mutex); if (!chunk->isPrefixPopulated(chunk_size)) { /// A few things could be improved here, which may or may not be worth the added complexity: /// * If the next file chunk is in cache, use in->setReadUntilPosition() to limit the read to /// just one chunk. More generally, look ahead in the cache to count how many next chunks /// need to be downloaded. (Up to some limit? And avoid changing `in`'s until-position if /// it's already reasonable; otherwise we'd increase it by one chunk every chunk, discarding /// a half-completed HTTP request every time.) /// * If only a subset of pages are missing from this chunk, download only them, /// with some threshold for avoiding short seeks. /// In particular, if a previous download failed in the middle of the chunk, we could /// resume from that position instead of from the beginning of the chunk. /// (It's also possible in principle that a proper subset of chunk's pages was reclaimed /// by the OS. But, for performance purposes, we should completely ignore that, because /// (a) PageCache normally uses 2 MiB transparent huge pages and has just one such page /// per chunk, and (b) even with 4 KiB pages partial chunk eviction is extremely rare.) /// * If our [position, read_until_position) covers only part of the chunk, we could download /// just that part. (Which would be bad if someone else needs the rest of the chunk and has /// to do a whole new HTTP request to get it. Unclear what the policy should be.) /// * Instead of doing in->next() in a loop until we get the whole chunk, we could return the /// results as soon as in->next() produces them. /// (But this would make the download_mutex situation much more complex, similar to the /// FileSegment::State::PARTIALLY_DOWNLOADED and FileSegment::setRemoteFileReader() stuff.) Buffer prev_in_buffer = in->internalBuffer(); SCOPE_EXIT({ in->set(prev_in_buffer.begin(), prev_in_buffer.size()); }); size_t pos = 0; while (pos < chunk_size) { char * piece_start = chunk->getChunk()->data + pos; size_t piece_size = chunk_size - pos; in->set(piece_start, piece_size); if (pos == 0) in->seek(cache_key.offset, SEEK_SET); else chassert(!in->available()); if (in->eof()) throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "File {} ended after {} bytes, but we expected {}", getFileName(), cache_key.offset + pos, file_size.value()); chassert(in->position() >= piece_start && in->buffer().end() <= piece_start + piece_size); chassert(in->getPosition() == static_cast(cache_key.offset + pos)); size_t n = in->available(); chassert(n); if (in->position() != piece_start) memmove(piece_start, in->position(), n); in->position() += n; pos += n; } chunk->markPrefixPopulated(chunk_size); } } nextimpl_working_buffer_offset = file_offset_of_buffer_end - cache_key.offset; working_buffer = Buffer( chunk->getChunk()->data, chunk->getChunk()->data + std::min(chunk->getChunk()->size, read_until_position - cache_key.offset)); pos = working_buffer.begin() + nextimpl_working_buffer_offset; if (!internal_buffer.empty()) { /// We were given an external buffer to read into. Copy the data into it. /// Would be nice to avoid this copy, somehow, maybe by making ReadBufferFromRemoteFSGather /// and AsynchronousBoundedReadBuffer explicitly aware of the page cache. size_t n = std::min(available(), internal_buffer.size()); memcpy(internal_buffer.begin(), pos, n); working_buffer = Buffer(internal_buffer.begin(), internal_buffer.begin() + n); pos = working_buffer.begin(); nextimpl_working_buffer_offset = 0; } file_offset_of_buffer_end += available(); return true; } }