2020-01-22 16:17:25 +00:00
# pragma once
# include <IO/ReadBuffer.h>
2022-04-26 12:57:02 +00:00
# include <IO/WithFileSize.h>
2021-10-31 19:53:24 +00:00
# include <optional>
2020-01-22 16:17:25 +00:00
2020-01-27 19:17:22 +00:00
namespace DB
{
2020-01-22 16:17:25 +00:00
2022-02-12 22:20:05 +00:00
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED ;
}
2020-01-27 19:17:22 +00:00
class SeekableReadBuffer : public ReadBuffer
{
2020-01-22 16:17:25 +00:00
public :
SeekableReadBuffer ( Position ptr , size_t size )
: ReadBuffer ( ptr , size ) { }
SeekableReadBuffer ( Position ptr , size_t size , size_t offset )
: ReadBuffer ( ptr , size , offset ) { }
2020-01-27 19:51:48 +00:00
/**
* Shifts buffer current position to given offset .
* @ param off Offset .
* @ param whence Seek mode ( @ see SEEK_SET , @ see SEEK_CUR ) .
2021-07-12 21:07:33 +00:00
* @ return New position from the beginning of underlying buffer / file .
2023-03-13 19:29:59 +00:00
*
* What happens if you seek above the end of the file ? Implementation - defined .
2020-01-27 19:51:48 +00:00
*/
2020-01-27 18:44:30 +00:00
virtual off_t seek ( off_t off , int whence ) = 0 ;
2020-02-14 14:28:33 +00:00
2021-02-06 16:30:46 +00:00
/**
* Keep in mind that seekable buffer may encounter eof ( ) once and the working buffer
* may get into inconsistent state . Don ' t forget to reset it on the first nextImpl ( )
* after seek ( ) .
*/
2020-02-14 14:28:33 +00:00
/**
2020-02-19 19:26:33 +00:00
* @ return Offset from the begin of the underlying buffer / file corresponds to the buffer current position .
2020-02-14 14:28:33 +00:00
*/
virtual off_t getPosition ( ) = 0 ;
2022-02-12 22:20:05 +00:00
virtual String getInfoForLog ( ) { return " " ; }
2024-02-13 11:02:46 +00:00
/// NOTE: This method should be thread-safe against seek(), since it can be
/// used in CachedOnDiskReadBufferFromFile from multiple threads (because
/// it first releases the buffer, and then do logging, and so other thread
/// can already call seek() which will lead to data-race).
virtual size_t getFileOffsetOfBufferEnd ( ) const { throw Exception ( ErrorCodes : : NOT_IMPLEMENTED , " Method getFileOffsetOfBufferEnd() not implemented " ) ; }
2023-04-17 04:57:17 +00:00
/// If true, setReadUntilPosition() guarantees that eof will be reported at the given position.
2022-04-26 10:55:27 +00:00
virtual bool supportsRightBoundedReads ( ) const { return false ; }
2022-05-21 22:21:40 +00:00
2023-04-17 04:57:17 +00:00
/// Returns true if seek() actually works, false if seek() will always throw (or make subsequent
/// nextImpl() calls throw).
///
/// This is needed because:
/// * Sometimes there's no cheap way to know in advance whether the buffer is really seekable.
/// Specifically, HTTP read buffer needs to send a request to check whether the server
/// supports byte ranges.
/// * Sometimes when we create such buffer we don't know in advance whether we'll need it to be
/// seekable or not. So we don't want to pay the price for this check in advance.
2023-03-28 20:28:28 +00:00
virtual bool checkIfActuallySeekable ( ) { return true ; }
2023-05-05 03:11:51 +00:00
/// Unbuffered positional read.
/// Doesn't affect the buffer state (position, working_buffer, etc).
///
/// `progress_callback` may be called periodically during the read, reporting that to[0..m-1]
/// has been filled. If it returns true, reading is stopped, and readBigAt() returns bytes read
/// so far. Called only from inside readBigAt(), from the same thread, with increasing m.
///
/// Stops either after n bytes, or at end of file, or on exception. Returns number of bytes read.
/// If offset is past the end of file, may return 0 or throw exception.
///
/// Caller needs to be careful:
/// * supportsReadAt() must be checked (called and return true) before calling readBigAt().
/// Otherwise readBigAt() may crash.
/// * Thread safety: multiple readBigAt() calls may be performed in parallel.
/// But readBigAt() may not be called in parallel with any other methods
/// (e.g. next() or supportsReadAt()).
/// * Performance: there's no buffering. Each readBigAt() call typically translates into actual
/// IO operation (e.g. HTTP request). Don't use it for small adjacent reads.
2024-03-14 12:16:33 +00:00
virtual size_t readBigAt ( char * /*to*/ , size_t /*n*/ , size_t /*offset*/ , const std : : function < bool ( size_t m ) > & /*progress_callback*/ ) const
2023-05-05 03:11:51 +00:00
{ throw Exception ( ErrorCodes : : NOT_IMPLEMENTED , " Method readBigAt() not implemented " ) ; }
/// Checks if readBigAt() is allowed. May be slow, may throw (e.g. it may do an HTTP request or an fstat).
virtual bool supportsReadAt ( ) { return false ; }
2023-06-13 16:02:25 +00:00
/// We do some tricks to avoid seek cost. E.g we read more data and than ignore it (see remote_read_min_bytes_for_seek).
/// Sometimes however seek is basically free because underlying read buffer wasn't yet initialised (or re-initialised after reset).
2023-11-03 21:50:03 +00:00
virtual bool isSeekCheap ( ) { return false ; }
2023-10-13 22:26:59 +00:00
2023-10-31 21:45:42 +00:00
/// For tables that have an external storage (like S3) as their main storage we'd like to distinguish whether we're reading from this storage or from a local cache.
/// It allows to reuse all the optimisations done for reading from local tables when reading from cache.
2023-11-07 21:04:13 +00:00
virtual bool isContentCached ( [[maybe_unused]] size_t offset, [[maybe_unused]] size_t size ) { return false ; }
2023-03-24 01:34:24 +00:00
} ;
2021-10-31 19:53:24 +00:00
2023-05-05 03:11:51 +00:00
using SeekableReadBufferPtr = std : : shared_ptr < SeekableReadBuffer > ;
2023-03-24 01:34:24 +00:00
2023-01-10 00:32:37 +00:00
/// Wraps a reference to a SeekableReadBuffer into an unique pointer to SeekableReadBuffer.
/// This function is like wrapReadBufferReference() but for SeekableReadBuffer.
std : : unique_ptr < SeekableReadBuffer > wrapSeekableReadBufferReference ( SeekableReadBuffer & ref ) ;
std : : unique_ptr < SeekableReadBuffer > wrapSeekableReadBufferPointer ( SeekableReadBufferPtr ptr ) ;
2023-05-05 03:11:51 +00:00
/// Helper for implementing readBigAt().
2023-06-26 21:49:44 +00:00
/// Updates *out_bytes_copied after each call to the callback, as well as at the end.
void copyFromIStreamWithProgressCallback ( std : : istream & istr , char * to , size_t n , const std : : function < bool ( size_t ) > & progress_callback , size_t * out_bytes_copied , bool * out_cancelled = nullptr ) ;
2023-05-05 03:11:51 +00:00
2020-01-22 16:17:25 +00:00
}