diff --git a/src/Disks/ReadIndirectBufferFromWebServer.cpp b/src/Disks/ReadIndirectBufferFromWebServer.cpp index ab652ed189e..4ed93438693 100644 --- a/src/Disks/ReadIndirectBufferFromWebServer.cpp +++ b/src/Disks/ReadIndirectBufferFromWebServer.cpp @@ -37,10 +37,10 @@ std::unique_ptr ReadIndirectBufferFromWebServer::initialize() { Poco::URI uri(url); - ReadWriteBufferFromHTTP::HTTPHeaderEntries headers; - headers.emplace_back(std::make_pair("Range", fmt::format("bytes={}-", offset))); - const auto & settings = context->getSettingsRef(); + read_settings.http_start_offset = offset; LOG_DEBUG(log, "Reading from offset: {}", offset); + + const auto & settings = context->getSettingsRef(); const auto & config = context->getConfigRef(); Poco::Timespan http_keep_alive_timeout{config.getUInt("keep_alive_timeout", 20), 0}; @@ -56,8 +56,7 @@ std::unique_ptr ReadIndirectBufferFromWebServer::initialize() 0, Poco::Net::HTTPBasicCredentials{}, buf_size, - read_settings, - headers); + read_settings); } diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index a200ce31b51..04750072b34 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -72,6 +72,7 @@ struct ReadSettings size_t http_max_tries = 1; size_t http_retry_initial_backoff_ms = 100; size_t http_retry_max_backoff_ms = 10000; + size_t http_start_offset = 0; ReadSettings adjustBufferSize(size_t file_size) const { diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 14946ffb2d5..3dd3edd251c 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -20,7 +20,6 @@ #include #include #include -#include #if !defined(ARCADIA_BUILD) # include @@ -108,15 +107,16 @@ namespace detail std::function next_callback; size_t buffer_size; - size_t bytes_read = 0; + /// Read from offset with range header if needed. size_t start_byte = 0; - bool with_partial_content = false; + /// Non-empty if content-length header was received. std::optional total_bytes_to_read; - /// Delayed exception in case retries with partial content are not satisfiable - std::optional exception; + /// Delayed exception in case retries with partial content are not satisfiable. + std::exception_ptr exception; + ReadSettings settings; Poco::Logger * log; @@ -137,7 +137,8 @@ namespace detail request.set(std::get<0>(http_header_entry), std::get<1>(http_header_entry)); } - if (bytes_read && with_partial_content) + bool with_partial_content = bytes_read && total_bytes_to_read; + if (with_partial_content) request.set("Range", fmt::format("bytes={}-", start_byte + bytes_read)); if (!credentials.getUsername().empty()) @@ -159,9 +160,9 @@ namespace detail if (with_partial_content && response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT) { - /// If we retries some request, throw error from that request. + /// If we retried some request, throw error from that request. if (exception) - exception->rethrow(); + std::rethrow_exception(exception); throw Exception(ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, "Cannot read with range: {}", request.get("Range")); } @@ -200,36 +201,15 @@ namespace detail , http_header_entries {http_header_entries_} , remote_host_filter {remote_host_filter_} , buffer_size {buffer_size_} + , start_byte {settings_.http_start_offset} , settings {settings_} , log(&Poco::Logger::get("ReadWriteBufferFromHTTP")) { - /** - * Get first byte from `bytes=offset-`, `bytes=offset-end`. - * Now there are two places, where it can be set: 1. in DiskWeb (offset), 2. via config as part of named-collection. - * Other cases not supported. - */ - auto range_header = std::find_if(http_header_entries_.begin(), http_header_entries_.end(), - [&](const HTTPHeaderEntry & header) { return std::get<0>(header) == "Range"; }); - if (range_header != http_header_entries_.end()) - { - if (method != Poco::Net::HTTPRequest::HTTP_GET) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Headers are allowed only with GET request"); - auto range = std::get<1>(*range_header).substr(std::strlen("bytes=")); - UInt64 start; - auto parsed = tryParse(start, range); - if (parsed) - start_byte = start; - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot extract start byte"); - } - - initialize(); } void initialize() { - Poco::Net::HTTPResponse response; istr = call(uri, response); @@ -244,14 +224,8 @@ namespace detail } /// If it is the very first initialization. - if (!bytes_read && !total_bytes_to_read) - { - /// If we do not know total size, disable retries in the middle of reading. - if (response.hasContentLength()) - total_bytes_to_read = response.getContentLength(); - else - with_partial_content = false; - } + if (!bytes_read && !total_bytes_to_read && response.hasContentLength()) + total_bytes_to_read = response.getContentLength(); try { @@ -298,15 +272,14 @@ namespace detail } catch (const Poco::Exception & e) { - if (bytes_read && !with_partial_content) + bool can_retry_request = !bytes_read || total_bytes_to_read.has_value(); + if (!can_retry_request) throw; LOG_ERROR(&Poco::Logger::get("ReadBufferFromHTTP"), "Error: {}, code: {}", e.what(), e.code()); + + exception = std::current_exception(); impl.reset(); - - exception.reset(); - exception.emplace(e); - sleepForMilliseconds(milliseconds_to_wait); milliseconds_to_wait *= 2; } @@ -315,7 +288,7 @@ namespace detail } if (!successful_read && exception) - exception->rethrow(); + std::rethrow_exception(exception); if (!result) return false; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 4d8f1d8c492..f1531279c2b 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -450,6 +450,8 @@ void registerStorageURL(StorageFactory & factory) for (const auto & [header, value] : configuration.headers) { auto value_literal = value.safeGet(); + if (header == "Range") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Range headers are not allowed"); headers.emplace_back(std::make_pair(header, value_literal)); }