mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Unbreak reading from web servers that don't support HEAD requests
This commit is contained in:
parent
473f212c82
commit
bd426a7d6d
@ -91,6 +91,7 @@ namespace detail
|
||||
class ReadWriteBufferFromHTTPBase : public SeekableReadBuffer, public WithFileName, public WithFileSize
|
||||
{
|
||||
public:
|
||||
/// Information from HTTP response header.
|
||||
struct FileInfo
|
||||
{
|
||||
// nullopt if the server doesn't report it.
|
||||
@ -796,7 +797,24 @@ namespace detail
|
||||
FileInfo getFileInfo()
|
||||
{
|
||||
Poco::Net::HTTPResponse response;
|
||||
getHeadResponse(response);
|
||||
try
|
||||
{
|
||||
getHeadResponse(response);
|
||||
}
|
||||
catch (HTTPException & e)
|
||||
{
|
||||
/// Maybe the web server doesn't support HEAD requests.
|
||||
/// E.g. webhdfs reports status 400.
|
||||
/// We should proceed in hopes that the actual GET request will succeed.
|
||||
/// (Unless the error in transient. Don't want to nondeterministically sometimes
|
||||
/// fall back to slow whole-file reads when HEAD is actually supported; that sounds
|
||||
/// like a nightmare to debug.)
|
||||
if (e.getHTTPStatus() >= 400 && e.getHTTPStatus() <= 499 &&
|
||||
e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_TOO_MANY_REQUESTS)
|
||||
return FileInfo{};
|
||||
|
||||
throw;
|
||||
}
|
||||
return parseFileInfo(response, 0);
|
||||
}
|
||||
|
||||
|
@ -46,25 +46,34 @@ public:
|
||||
|
||||
virtual size_t getFileOffsetOfBufferEnd() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFileOffsetOfBufferEnd() not implemented"); }
|
||||
|
||||
// If true, setReadUntilPosition() guarantees that eof will be reported at the given position.
|
||||
/// If true, setReadUntilPosition() guarantees that eof will be reported at the given position.
|
||||
virtual bool supportsRightBoundedReads() const { return false; }
|
||||
|
||||
virtual bool isIntegratedWithFilesystemCache() const { return false; }
|
||||
|
||||
// Returns true if seek() actually works, false if seek() will always throw (or make subsequent
|
||||
// nextImpl() calls throw).
|
||||
//
|
||||
// This is needed because:
|
||||
// * Sometimes there's no cheap way to know in advance whether the buffer is really seekable.
|
||||
// Specifically, HTTP read buffer needs to send a request to check whether the server
|
||||
// supports byte ranges.
|
||||
// * Sometimes when we create such buffer we don't know in advance whether we'll need it to be
|
||||
// seekable or not. So we don't want to pay the price for this check in advance.
|
||||
/// Returns true if seek() actually works, false if seek() will always throw (or make subsequent
|
||||
/// nextImpl() calls throw).
|
||||
///
|
||||
/// This is needed because:
|
||||
/// * Sometimes there's no cheap way to know in advance whether the buffer is really seekable.
|
||||
/// Specifically, HTTP read buffer needs to send a request to check whether the server
|
||||
/// supports byte ranges.
|
||||
/// * Sometimes when we create such buffer we don't know in advance whether we'll need it to be
|
||||
/// seekable or not. So we don't want to pay the price for this check in advance.
|
||||
virtual bool checkIfActuallySeekable() { return true; }
|
||||
};
|
||||
|
||||
// Useful for reading in parallel.
|
||||
// The created read buffers may outlive the factory.
|
||||
/// Useful for reading in parallel.
|
||||
/// The created read buffers may outlive the factory.
|
||||
///
|
||||
/// There are 2 ways to use this:
|
||||
/// (1) Never call seek() or getFileSize(), read the file sequentially.
|
||||
/// For HTTP, this usually translates to just one HTTP request.
|
||||
/// (2) Call checkIfActuallySeekable(), then:
|
||||
/// a. If it returned false, go to (1). seek() and getFileSize() are not available (throw if called).
|
||||
/// b. If it returned true, seek() and getFileSize() are available, knock yourself out.
|
||||
/// For HTTP, checkIfActuallySeekable() sends a HEAD request and returns false if the web server
|
||||
/// doesn't support ranges (or doesn't support HEAD requests).
|
||||
class SeekableReadBufferFactory : public WithFileSize
|
||||
{
|
||||
public:
|
||||
|
Loading…
Reference in New Issue
Block a user