mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Reduce memory usage for some formats
This commit is contained in:
parent
2940d9fd19
commit
45ea820297
@ -594,6 +594,7 @@
|
|||||||
M(624, BAD_FILE_TYPE) \
|
M(624, BAD_FILE_TYPE) \
|
||||||
M(625, IO_SETUP_ERROR) \
|
M(625, IO_SETUP_ERROR) \
|
||||||
M(626, HTTP_RANGE_NOT_SATISFIABLE) \
|
M(626, HTTP_RANGE_NOT_SATISFIABLE) \
|
||||||
|
M(627, FILE_SIZE_UNKNOWN) \
|
||||||
\
|
\
|
||||||
M(999, KEEPER_EXCEPTION) \
|
M(999, KEEPER_EXCEPTION) \
|
||||||
M(1000, POCO_EXCEPTION) \
|
M(1000, POCO_EXCEPTION) \
|
||||||
|
@ -261,6 +261,8 @@
|
|||||||
M(RemoteFSUnprefetchedReads, "Number of reads from unprefetched buffer") \
|
M(RemoteFSUnprefetchedReads, "Number of reads from unprefetched buffer") \
|
||||||
M(RemoteFSBuffers, "Number of buffers created for asynchronous reading from remote filesystem") \
|
M(RemoteFSBuffers, "Number of buffers created for asynchronous reading from remote filesystem") \
|
||||||
\
|
\
|
||||||
|
M(ReadBufferSeekCancelConnection, "Number of seeks which lead to new connection (s3, http)") \
|
||||||
|
\
|
||||||
M(SleepFunctionCalls, "Number of times a sleep function (sleep, sleepEachRow) has been called.") \
|
M(SleepFunctionCalls, "Number of times a sleep function (sleep, sleepEachRow) has been called.") \
|
||||||
M(SleepFunctionMicroseconds, "Time spent sleeping due to a sleep function call.") \
|
M(SleepFunctionMicroseconds, "Time spent sleeping due to a sleep function call.") \
|
||||||
\
|
\
|
||||||
|
@ -511,6 +511,7 @@ class IColumn;
|
|||||||
M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
|
M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
|
||||||
M(Bool, remote_filesystem_read_prefetch, true, "Should use prefetching when reading data from remote filesystem.", 0) \
|
M(Bool, remote_filesystem_read_prefetch, true, "Should use prefetching when reading data from remote filesystem.", 0) \
|
||||||
M(Int64, read_priority, 0, "Priority to read data from local filesystem. Only supported for 'pread_threadpool' method.", 0) \
|
M(Int64, read_priority, 0, "Priority to read data from local filesystem. Only supported for 'pread_threadpool' method.", 0) \
|
||||||
|
M(UInt64, remote_read_min_bytes_for_seek, DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read ( url, s3 ) to do seek, instead for read with ignore", 0) \
|
||||||
\
|
\
|
||||||
M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
|
M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
|
||||||
M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. Makes sense only for inserts via HTTP protocol. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \
|
M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. Makes sense only for inserts via HTTP protocol. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \
|
||||||
@ -570,6 +571,7 @@ class IColumn;
|
|||||||
M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \
|
M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \
|
||||||
M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \
|
M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \
|
||||||
M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \
|
M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \
|
||||||
|
M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
|
||||||
\
|
\
|
||||||
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
|
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
|
||||||
M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \
|
M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \
|
||||||
|
@ -88,6 +88,9 @@ void ReadBufferFromRemoteFSGather::initialize()
|
|||||||
{
|
{
|
||||||
current_buf = createImplementationBuffer(file_path, read_until_position);
|
current_buf = createImplementationBuffer(file_path, read_until_position);
|
||||||
current_buf_idx = i;
|
current_buf_idx = i;
|
||||||
|
|
||||||
|
if (auto * in = dynamic_cast<SeekableReadBufferWithSize *>(current_buf.get()))
|
||||||
|
in->setReadType(SeekableReadBufferWithSize::ReadType::DISK_READ);
|
||||||
}
|
}
|
||||||
|
|
||||||
current_buf->seek(current_buf_offset, SEEK_SET);
|
current_buf->seek(current_buf_offset, SEEK_SET);
|
||||||
|
@ -53,7 +53,7 @@ std::unique_ptr<ReadBuffer> ReadBufferFromWebServer::initialize()
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
range = { .begin = static_cast<size_t>(offset) };
|
range = { .begin = static_cast<size_t>(offset), .end = std::nullopt };
|
||||||
LOG_DEBUG(log, "Reading from offset: {}", offset);
|
LOG_DEBUG(log, "Reading from offset: {}", offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,6 +112,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
|||||||
format_settings.arrow.import_nested = settings.input_format_arrow_import_nested;
|
format_settings.arrow.import_nested = settings.input_format_arrow_import_nested;
|
||||||
format_settings.orc.import_nested = settings.input_format_orc_import_nested;
|
format_settings.orc.import_nested = settings.input_format_orc_import_nested;
|
||||||
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
|
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
|
||||||
|
format_settings.seekable_read = settings.input_format_allow_seeks;
|
||||||
|
|
||||||
/// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context
|
/// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context
|
||||||
if (format_settings.schema.is_server)
|
if (format_settings.schema.is_server)
|
||||||
|
@ -30,6 +30,8 @@ struct FormatSettings
|
|||||||
bool null_as_default = true;
|
bool null_as_default = true;
|
||||||
bool decimal_trailing_zeros = false;
|
bool decimal_trailing_zeros = false;
|
||||||
|
|
||||||
|
bool seekable_read = true;
|
||||||
|
|
||||||
enum class DateTimeInputFormat
|
enum class DateTimeInputFormat
|
||||||
{
|
{
|
||||||
Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp.
|
Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp.
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
#include <aws/s3/S3Client.h>
|
#include <aws/s3/S3Client.h>
|
||||||
#include <aws/s3/model/GetObjectRequest.h>
|
#include <aws/s3/model/GetObjectRequest.h>
|
||||||
|
#include <aws/s3/model/HeadObjectRequest.h>
|
||||||
|
|
||||||
#include <base/logger_useful.h>
|
#include <base/logger_useful.h>
|
||||||
#include <base/sleep.h>
|
#include <base/sleep.h>
|
||||||
@ -20,6 +21,7 @@ namespace ProfileEvents
|
|||||||
extern const Event S3ReadMicroseconds;
|
extern const Event S3ReadMicroseconds;
|
||||||
extern const Event S3ReadBytes;
|
extern const Event S3ReadBytes;
|
||||||
extern const Event S3ReadRequestsErrors;
|
extern const Event S3ReadRequestsErrors;
|
||||||
|
extern const Event ReadBufferSeekCancelConnection;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -34,9 +36,14 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
|
|
||||||
ReadBufferFromS3::ReadBufferFromS3(
|
ReadBufferFromS3::ReadBufferFromS3(
|
||||||
std::shared_ptr<Aws::S3::S3Client> client_ptr_, const String & bucket_, const String & key_,
|
std::shared_ptr<Aws::S3::S3Client> client_ptr_,
|
||||||
UInt64 max_single_read_retries_, const ReadSettings & settings_, bool use_external_buffer_, size_t read_until_position_)
|
const String & bucket_,
|
||||||
: SeekableReadBuffer(nullptr, 0)
|
const String & key_,
|
||||||
|
UInt64 max_single_read_retries_,
|
||||||
|
const ReadSettings & settings_,
|
||||||
|
bool use_external_buffer_,
|
||||||
|
size_t read_until_position_)
|
||||||
|
: SeekableReadBufferWithSize(nullptr, 0)
|
||||||
, client_ptr(std::move(client_ptr_))
|
, client_ptr(std::move(client_ptr_))
|
||||||
, bucket(bucket_)
|
, bucket(bucket_)
|
||||||
, key(key_)
|
, key(key_)
|
||||||
@ -142,9 +149,12 @@ bool ReadBufferFromS3::nextImpl()
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
off_t ReadBufferFromS3::seek(off_t offset_, int whence)
|
off_t ReadBufferFromS3::seek(off_t offset_, int whence)
|
||||||
{
|
{
|
||||||
if (impl)
|
bool restricted_seek = read_type == SeekableReadBufferWithSize::ReadType::DISK_READ;
|
||||||
|
|
||||||
|
if (impl && restricted_seek)
|
||||||
throw Exception("Seek is allowed only before first read attempt from the buffer.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
throw Exception("Seek is allowed only before first read attempt from the buffer.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
||||||
|
|
||||||
if (whence != SEEK_SET)
|
if (whence != SEEK_SET)
|
||||||
@ -153,11 +163,57 @@ off_t ReadBufferFromS3::seek(off_t offset_, int whence)
|
|||||||
if (offset_ < 0)
|
if (offset_ < 0)
|
||||||
throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
|
throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
|
||||||
|
|
||||||
offset = offset_;
|
if (!restricted_seek)
|
||||||
|
{
|
||||||
|
if (!working_buffer.empty()
|
||||||
|
&& size_t(offset_) >= offset - working_buffer.size()
|
||||||
|
&& offset_ < offset)
|
||||||
|
{
|
||||||
|
pos = working_buffer.end() - (offset - offset_);
|
||||||
|
assert(pos >= working_buffer.begin());
|
||||||
|
assert(pos <= working_buffer.end());
|
||||||
|
|
||||||
|
return getPosition();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto position = getPosition();
|
||||||
|
if (offset_ > position)
|
||||||
|
{
|
||||||
|
size_t diff = offset_ - position;
|
||||||
|
if (diff < read_settings.remote_read_min_bytes_for_seek)
|
||||||
|
{
|
||||||
|
ignore(diff);
|
||||||
|
return offset_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pos = working_buffer.end();
|
||||||
|
if (impl)
|
||||||
|
{
|
||||||
|
ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection);
|
||||||
|
impl.reset();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = offset_;
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<size_t> ReadBufferFromS3::getTotalSize()
|
||||||
|
{
|
||||||
|
if (file_size)
|
||||||
|
return file_size;
|
||||||
|
|
||||||
|
Aws::S3::Model::HeadObjectRequest request;
|
||||||
|
request.SetBucket(bucket);
|
||||||
|
request.SetKey(key);
|
||||||
|
|
||||||
|
auto outcome = client_ptr->HeadObject(request);
|
||||||
|
auto head_result = outcome.GetResultWithOwnership();
|
||||||
|
file_size = head_result.GetContentLength();
|
||||||
|
return file_size;
|
||||||
|
}
|
||||||
|
|
||||||
off_t ReadBufferFromS3::getPosition()
|
off_t ReadBufferFromS3::getPosition()
|
||||||
{
|
{
|
||||||
return offset - available();
|
return offset - available();
|
||||||
|
@ -23,7 +23,7 @@ namespace DB
|
|||||||
/**
|
/**
|
||||||
* Perform S3 HTTP GET request and provide response to read.
|
* Perform S3 HTTP GET request and provide response to read.
|
||||||
*/
|
*/
|
||||||
class ReadBufferFromS3 : public SeekableReadBuffer
|
class ReadBufferFromS3 : public SeekableReadBufferWithSize
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<Aws::S3::S3Client> client_ptr;
|
std::shared_ptr<Aws::S3::S3Client> client_ptr;
|
||||||
@ -49,13 +49,18 @@ public:
|
|||||||
bool nextImpl() override;
|
bool nextImpl() override;
|
||||||
|
|
||||||
off_t seek(off_t off, int whence) override;
|
off_t seek(off_t off, int whence) override;
|
||||||
|
|
||||||
off_t getPosition() override;
|
off_t getPosition() override;
|
||||||
|
|
||||||
|
std::optional<size_t> getTotalSize() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::unique_ptr<ReadBuffer> initialize();
|
std::unique_ptr<ReadBuffer> initialize();
|
||||||
|
|
||||||
ReadSettings read_settings;
|
ReadSettings read_settings;
|
||||||
|
|
||||||
bool use_external_buffer;
|
bool use_external_buffer;
|
||||||
|
|
||||||
off_t read_until_position = 0;
|
off_t read_until_position = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -77,6 +77,8 @@ struct ReadSettings
|
|||||||
size_t remote_fs_read_max_backoff_ms = 10000;
|
size_t remote_fs_read_max_backoff_ms = 10000;
|
||||||
size_t remote_fs_read_backoff_max_tries = 4;
|
size_t remote_fs_read_backoff_max_tries = 4;
|
||||||
|
|
||||||
|
size_t remote_read_min_bytes_for_seek = DBMS_DEFAULT_BUFFER_SIZE;
|
||||||
|
|
||||||
size_t http_max_tries = 1;
|
size_t http_max_tries = 1;
|
||||||
size_t http_retry_initial_backoff_ms = 100;
|
size_t http_retry_initial_backoff_ms = 100;
|
||||||
size_t http_retry_max_backoff_ms = 1600;
|
size_t http_retry_max_backoff_ms = 1600;
|
||||||
|
@ -18,11 +18,15 @@
|
|||||||
#include <Poco/Version.h>
|
#include <Poco/Version.h>
|
||||||
#include <Common/DNSResolver.h>
|
#include <Common/DNSResolver.h>
|
||||||
#include <Common/RemoteHostFilter.h>
|
#include <Common/RemoteHostFilter.h>
|
||||||
|
#include <Common/config.h>
|
||||||
#include <base/logger_useful.h>
|
#include <base/logger_useful.h>
|
||||||
#include <Poco/URIStreamFactory.h>
|
#include <Poco/URIStreamFactory.h>
|
||||||
|
|
||||||
#include <Common/config.h>
|
|
||||||
|
|
||||||
|
namespace ProfileEvents
|
||||||
|
{
|
||||||
|
extern const Event ReadBufferSeekCancelConnection;
|
||||||
|
}
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -34,6 +38,8 @@ namespace ErrorCodes
|
|||||||
extern const int TOO_MANY_REDIRECTS;
|
extern const int TOO_MANY_REDIRECTS;
|
||||||
extern const int HTTP_RANGE_NOT_SATISFIABLE;
|
extern const int HTTP_RANGE_NOT_SATISFIABLE;
|
||||||
extern const int BAD_ARGUMENTS;
|
extern const int BAD_ARGUMENTS;
|
||||||
|
extern const int CANNOT_SEEK_THROUGH_FILE;
|
||||||
|
extern const int SEEK_POSITION_OUT_OF_BOUND;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename SessionPtr>
|
template <typename SessionPtr>
|
||||||
@ -83,7 +89,7 @@ public:
|
|||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
template <typename UpdatableSessionPtr>
|
template <typename UpdatableSessionPtr>
|
||||||
class ReadWriteBufferFromHTTPBase : public ReadBuffer
|
class ReadWriteBufferFromHTTPBase : public SeekableReadBufferWithSize
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
using HTTPHeaderEntry = std::tuple<std::string, std::string>;
|
using HTTPHeaderEntry = std::tuple<std::string, std::string>;
|
||||||
@ -114,7 +120,7 @@ namespace detail
|
|||||||
size_t buffer_size;
|
size_t buffer_size;
|
||||||
bool use_external_buffer;
|
bool use_external_buffer;
|
||||||
|
|
||||||
size_t bytes_read = 0;
|
size_t offset_from_begin_pos = 0;
|
||||||
Range read_range;
|
Range read_range;
|
||||||
|
|
||||||
/// Delayed exception in case retries with partial content are not satisfiable.
|
/// Delayed exception in case retries with partial content are not satisfiable.
|
||||||
@ -124,13 +130,13 @@ namespace detail
|
|||||||
ReadSettings settings;
|
ReadSettings settings;
|
||||||
Poco::Logger * log;
|
Poco::Logger * log;
|
||||||
|
|
||||||
std::istream * call(Poco::URI uri_, Poco::Net::HTTPResponse & response)
|
std::istream * call(Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_)
|
||||||
{
|
{
|
||||||
// With empty path poco will send "POST HTTP/1.1" its bug.
|
// With empty path poco will send "POST HTTP/1.1" its bug.
|
||||||
if (uri_.getPath().empty())
|
if (uri_.getPath().empty())
|
||||||
uri_.setPath("/");
|
uri_.setPath("/");
|
||||||
|
|
||||||
Poco::Net::HTTPRequest request(method, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
|
Poco::Net::HTTPRequest request(method_, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
|
||||||
request.setHost(uri_.getHost()); // use original, not resolved host name in header
|
request.setHost(uri_.getHost()); // use original, not resolved host name in header
|
||||||
|
|
||||||
if (out_stream_callback)
|
if (out_stream_callback)
|
||||||
@ -145,13 +151,13 @@ namespace detail
|
|||||||
* Add range header if we have some passed range (for disk web)
|
* Add range header if we have some passed range (for disk web)
|
||||||
* or if we want to retry GET request on purpose.
|
* or if we want to retry GET request on purpose.
|
||||||
*/
|
*/
|
||||||
bool with_partial_content = (read_range.begin || read_range.end) || retry_with_range_header;
|
bool with_partial_content = read_range.begin || read_range.end || retry_with_range_header;
|
||||||
if (with_partial_content)
|
if (with_partial_content)
|
||||||
{
|
{
|
||||||
if (read_range.end)
|
if (read_range.end)
|
||||||
request.set("Range", fmt::format("bytes={}-{}", read_range.begin + bytes_read, *read_range.end));
|
request.set("Range", fmt::format("bytes={}-{}", read_range.begin + offset_from_begin_pos, *read_range.end));
|
||||||
else
|
else
|
||||||
request.set("Range", fmt::format("bytes={}-", read_range.begin + bytes_read));
|
request.set("Range", fmt::format("bytes={}-", read_range.begin + offset_from_begin_pos));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!credentials.getUsername().empty())
|
if (!credentials.getUsername().empty())
|
||||||
@ -191,6 +197,46 @@ namespace detail
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
off_t getOffset() const
|
||||||
|
{
|
||||||
|
return read_range.begin + offset_from_begin_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<size_t> getTotalSize() override
|
||||||
|
{
|
||||||
|
if (read_range.end)
|
||||||
|
return *read_range.end - read_range.begin;
|
||||||
|
|
||||||
|
Poco::Net::HTTPResponse response;
|
||||||
|
for (size_t i = 0; i < 10; ++i)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
call(uri, response, Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||||
|
|
||||||
|
while (isRedirect(response.getStatus()))
|
||||||
|
{
|
||||||
|
Poco::URI uri_redirect(response.get("Location"));
|
||||||
|
remote_host_filter.checkURL(uri_redirect);
|
||||||
|
|
||||||
|
session->updateSession(uri_redirect);
|
||||||
|
|
||||||
|
istr = call(uri_redirect, response, method);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (response.hasContentLength())
|
||||||
|
read_range.end = read_range.begin + response.getContentLength();
|
||||||
|
|
||||||
|
return read_range.end;
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using NextCallback = std::function<void(size_t)>;
|
using NextCallback = std::function<void(size_t)>;
|
||||||
using OutStreamCallback = std::function<void(std::ostream &)>;
|
using OutStreamCallback = std::function<void(std::ostream &)>;
|
||||||
@ -208,7 +254,7 @@ namespace detail
|
|||||||
const RemoteHostFilter & remote_host_filter_ = {},
|
const RemoteHostFilter & remote_host_filter_ = {},
|
||||||
bool delay_initialization = false,
|
bool delay_initialization = false,
|
||||||
bool use_external_buffer_ = false)
|
bool use_external_buffer_ = false)
|
||||||
: ReadBuffer(nullptr, 0)
|
: SeekableReadBufferWithSize(nullptr, 0)
|
||||||
, uri {uri_}
|
, uri {uri_}
|
||||||
, method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET}
|
, method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET}
|
||||||
, session {session_}
|
, session {session_}
|
||||||
@ -237,7 +283,7 @@ namespace detail
|
|||||||
void initialize()
|
void initialize()
|
||||||
{
|
{
|
||||||
Poco::Net::HTTPResponse response;
|
Poco::Net::HTTPResponse response;
|
||||||
istr = call(uri, response);
|
istr = call(uri, response, method);
|
||||||
|
|
||||||
while (isRedirect(response.getStatus()))
|
while (isRedirect(response.getStatus()))
|
||||||
{
|
{
|
||||||
@ -246,12 +292,11 @@ namespace detail
|
|||||||
|
|
||||||
session->updateSession(uri_redirect);
|
session->updateSession(uri_redirect);
|
||||||
|
|
||||||
istr = call(uri_redirect, response);
|
istr = call(uri_redirect, response, method);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!bytes_read && !read_range.end && response.hasContentLength())
|
if (!offset_from_begin_pos && !read_range.end && response.hasContentLength())
|
||||||
read_range.end = response.getContentLength();
|
read_range.end = response.getContentLength();
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
impl = std::make_unique<ReadBufferFromIStream>(*istr, buffer_size);
|
impl = std::make_unique<ReadBufferFromIStream>(*istr, buffer_size);
|
||||||
@ -281,9 +326,10 @@ namespace detail
|
|||||||
if (next_callback)
|
if (next_callback)
|
||||||
next_callback(count());
|
next_callback(count());
|
||||||
|
|
||||||
if (read_range.end && bytes_read == read_range.end.value())
|
if (read_range.end && static_cast<size_t>(getOffset()) == read_range.end.value())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
||||||
if (impl)
|
if (impl)
|
||||||
{
|
{
|
||||||
if (use_external_buffer)
|
if (use_external_buffer)
|
||||||
@ -342,14 +388,14 @@ namespace detail
|
|||||||
* Retry request unconditionally if nothing has beed read yet.
|
* Retry request unconditionally if nothing has beed read yet.
|
||||||
* Otherwise if it is GET method retry with range header starting from bytes_read.
|
* Otherwise if it is GET method retry with range header starting from bytes_read.
|
||||||
*/
|
*/
|
||||||
bool can_retry_request = !bytes_read || method == Poco::Net::HTTPRequest::HTTP_GET;
|
bool can_retry_request = !offset_from_begin_pos || method == Poco::Net::HTTPRequest::HTTP_GET;
|
||||||
if (!can_retry_request)
|
if (!can_retry_request)
|
||||||
throw;
|
throw;
|
||||||
|
|
||||||
LOG_ERROR(log,
|
LOG_ERROR(log,
|
||||||
"HTTP request to `{}` failed at try {}/{} with bytes read: {}. "
|
"HTTP request to `{}` failed at try {}/{} with bytes read: {}. "
|
||||||
"Error: {}. (Current backoff wait is {}/{} ms)",
|
"Error: {}. (Current backoff wait is {}/{} ms)",
|
||||||
uri.toString(), i, settings.http_max_tries, bytes_read, e.what(),
|
uri.toString(), i, settings.http_max_tries, offset_from_begin_pos, e.what(),
|
||||||
milliseconds_to_wait, settings.http_retry_max_backoff_ms);
|
milliseconds_to_wait, settings.http_retry_max_backoff_ms);
|
||||||
|
|
||||||
retry_with_range_header = true;
|
retry_with_range_header = true;
|
||||||
@ -372,9 +418,60 @@ namespace detail
|
|||||||
|
|
||||||
internal_buffer = impl->buffer();
|
internal_buffer = impl->buffer();
|
||||||
working_buffer = internal_buffer;
|
working_buffer = internal_buffer;
|
||||||
|
offset_from_begin_pos += working_buffer.size();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
off_t getPosition() override
|
||||||
|
{
|
||||||
|
return getOffset() - available();
|
||||||
|
}
|
||||||
|
|
||||||
|
off_t seek(off_t offset_, int whence) override
|
||||||
|
{
|
||||||
|
if (whence != SEEK_SET)
|
||||||
|
throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
||||||
|
|
||||||
|
if (offset_ < 0)
|
||||||
|
throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
|
||||||
|
|
||||||
|
auto current_offset = getOffset();
|
||||||
|
if (!working_buffer.empty()
|
||||||
|
&& size_t(offset_) >= current_offset - working_buffer.size()
|
||||||
|
&& offset_ < current_offset)
|
||||||
|
{
|
||||||
|
pos = working_buffer.end() - (current_offset - offset_);
|
||||||
|
assert(pos >= working_buffer.begin());
|
||||||
|
assert(pos <= working_buffer.end());
|
||||||
|
|
||||||
|
return getPosition();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto position = getPosition();
|
||||||
|
if (offset_ > position)
|
||||||
|
{
|
||||||
|
size_t diff = offset_ - position;
|
||||||
|
if (diff < settings.remote_read_min_bytes_for_seek)
|
||||||
|
{
|
||||||
|
ignore(diff);
|
||||||
|
return offset_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (impl)
|
||||||
|
{
|
||||||
|
|
||||||
|
ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection);
|
||||||
|
impl.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
pos = working_buffer.end();
|
||||||
|
read_range.begin = offset_;
|
||||||
|
offset_from_begin_pos = 0;
|
||||||
|
|
||||||
|
return offset_;
|
||||||
|
}
|
||||||
|
|
||||||
std::string getResponseCookie(const std::string & name, const std::string & def) const
|
std::string getResponseCookie(const std::string & name, const std::string & def) const
|
||||||
{
|
{
|
||||||
for (const auto & cookie : cookies)
|
for (const auto & cookie : cookies)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <IO/ReadBuffer.h>
|
#include <IO/ReadBuffer.h>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -34,4 +35,36 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
using SeekableReadBufferPtr = std::shared_ptr<SeekableReadBuffer>;
|
using SeekableReadBufferPtr = std::shared_ptr<SeekableReadBuffer>;
|
||||||
|
|
||||||
|
|
||||||
|
class SeekableReadBufferWithSize : public SeekableReadBuffer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SeekableReadBufferWithSize(Position ptr, size_t size)
|
||||||
|
: SeekableReadBuffer(ptr, size) {}
|
||||||
|
SeekableReadBufferWithSize(Position ptr, size_t size, size_t offset)
|
||||||
|
: SeekableReadBuffer(ptr, size, offset) {}
|
||||||
|
|
||||||
|
/// set std::nullopt in case it is impossible to find out total size.
|
||||||
|
virtual std::optional<size_t> getTotalSize() = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Some buffers might have different seek restrictions according to where it is used.
|
||||||
|
* For example, ReadBufferFromS3 and ReadBufferFromWebServer, when used for reading
|
||||||
|
* from remote disks, require some additional invariants and restrictions, which
|
||||||
|
* are not needed in other cases.
|
||||||
|
*/
|
||||||
|
enum class ReadType
|
||||||
|
{
|
||||||
|
DEFAULT,
|
||||||
|
DISK_READ
|
||||||
|
};
|
||||||
|
|
||||||
|
void setReadType(ReadType type) { read_type = type; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
ReadType read_type = ReadType::DEFAULT;
|
||||||
|
std::optional<size_t> file_size;
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -3083,6 +3083,8 @@ ReadSettings Context::getReadSettings() const
|
|||||||
res.remote_fs_read_max_backoff_ms = settings.remote_fs_read_max_backoff_ms;
|
res.remote_fs_read_max_backoff_ms = settings.remote_fs_read_max_backoff_ms;
|
||||||
res.remote_fs_read_backoff_max_tries = settings.remote_fs_read_backoff_max_tries;
|
res.remote_fs_read_backoff_max_tries = settings.remote_fs_read_backoff_max_tries;
|
||||||
|
|
||||||
|
res.remote_read_min_bytes_for_seek = settings.remote_read_min_bytes_for_seek;
|
||||||
|
|
||||||
res.local_fs_buffer_size = settings.max_read_buffer_size;
|
res.local_fs_buffer_size = settings.max_read_buffer_size;
|
||||||
res.direct_io_threshold = settings.min_bytes_to_use_direct_io;
|
res.direct_io_threshold = settings.min_bytes_to_use_direct_io;
|
||||||
res.mmap_threshold = settings.min_bytes_to_use_mmap_io;
|
res.mmap_threshold = settings.min_bytes_to_use_mmap_io;
|
||||||
|
@ -94,7 +94,7 @@ void ArrowBlockInputFormat::prepareReader()
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto file_reader_status = arrow::ipc::RecordBatchFileReader::Open(asArrowFile(*in));
|
auto file_reader_status = arrow::ipc::RecordBatchFileReader::Open(asArrowFile(*in, format_settings));
|
||||||
if (!file_reader_status.ok())
|
if (!file_reader_status.ok())
|
||||||
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
|
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
|
||||||
"Error while opening a table: {}", file_reader_status.status().ToString());
|
"Error while opening a table: {}", file_reader_status.status().ToString());
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
#if USE_ARROW || USE_ORC || USE_PARQUET
|
#if USE_ARROW || USE_ORC || USE_PARQUET
|
||||||
|
|
||||||
|
#include <Common/assert_cast.h>
|
||||||
#include <IO/ReadBufferFromFileDescriptor.h>
|
#include <IO/ReadBufferFromFileDescriptor.h>
|
||||||
#include <IO/WriteBufferFromString.h>
|
#include <IO/WriteBufferFromString.h>
|
||||||
#include <IO/copyData.h>
|
#include <IO/copyData.h>
|
||||||
@ -19,6 +20,11 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int FILE_SIZE_UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
ArrowBufferedOutputStream::ArrowBufferedOutputStream(WriteBuffer & out_) : out{out_}, is_open{true}
|
ArrowBufferedOutputStream::ArrowBufferedOutputStream(WriteBuffer & out_) : out{out_}, is_open{true}
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
@ -46,9 +52,22 @@ RandomAccessFileFromSeekableReadBuffer::RandomAccessFileFromSeekableReadBuffer(S
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RandomAccessFileFromSeekableReadBuffer::RandomAccessFileFromSeekableReadBuffer(SeekableReadBufferWithSize & in_)
|
||||||
|
: in{in_}, is_open{true}
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::GetSize()
|
arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::GetSize()
|
||||||
{
|
{
|
||||||
return arrow::Result<int64_t>(file_size);
|
if (!file_size)
|
||||||
|
{
|
||||||
|
auto * buf_with_size = dynamic_cast<SeekableReadBufferWithSize *>(&in);
|
||||||
|
if (buf_with_size)
|
||||||
|
file_size = buf_with_size->getTotalSize();
|
||||||
|
if (!file_size)
|
||||||
|
throw Exception(ErrorCodes::FILE_SIZE_UNKNOWN, "Cannot find out size of file");
|
||||||
|
}
|
||||||
|
return arrow::Result<int64_t>(*file_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
arrow::Status RandomAccessFileFromSeekableReadBuffer::Close()
|
arrow::Status RandomAccessFileFromSeekableReadBuffer::Close()
|
||||||
@ -121,7 +140,7 @@ arrow::Status ArrowInputStreamFromReadBuffer::Close()
|
|||||||
return arrow::Status();
|
return arrow::Status();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(ReadBuffer & in)
|
std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(ReadBuffer & in, const FormatSettings & settings)
|
||||||
{
|
{
|
||||||
if (auto * fd_in = dynamic_cast<ReadBufferFromFileDescriptor *>(&in))
|
if (auto * fd_in = dynamic_cast<ReadBufferFromFileDescriptor *>(&in))
|
||||||
{
|
{
|
||||||
@ -131,6 +150,11 @@ std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(ReadBuffer & in)
|
|||||||
if (res == 0 && S_ISREG(stat.st_mode))
|
if (res == 0 && S_ISREG(stat.st_mode))
|
||||||
return std::make_shared<RandomAccessFileFromSeekableReadBuffer>(*fd_in, stat.st_size);
|
return std::make_shared<RandomAccessFileFromSeekableReadBuffer>(*fd_in, stat.st_size);
|
||||||
}
|
}
|
||||||
|
else if (auto * seekable_in = dynamic_cast<SeekableReadBufferWithSize *>(&in))
|
||||||
|
{
|
||||||
|
if (settings.seekable_read)
|
||||||
|
return std::make_shared<RandomAccessFileFromSeekableReadBuffer>(*seekable_in);
|
||||||
|
}
|
||||||
|
|
||||||
// fallback to loading the entire file in memory
|
// fallback to loading the entire file in memory
|
||||||
std::string file_data;
|
std::string file_data;
|
||||||
|
@ -4,14 +4,18 @@
|
|||||||
#if USE_ARROW || USE_ORC || USE_PARQUET
|
#if USE_ARROW || USE_ORC || USE_PARQUET
|
||||||
|
|
||||||
#include <arrow/io/interfaces.h>
|
#include <arrow/io/interfaces.h>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
class ReadBuffer;
|
class ReadBuffer;
|
||||||
class SeekableReadBuffer;
|
|
||||||
class WriteBuffer;
|
class WriteBuffer;
|
||||||
|
|
||||||
|
class SeekableReadBuffer;
|
||||||
|
class SeekableReadBufferWithSize;
|
||||||
|
struct FormatSettings;
|
||||||
|
|
||||||
class ArrowBufferedOutputStream : public arrow::io::OutputStream
|
class ArrowBufferedOutputStream : public arrow::io::OutputStream
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -40,6 +44,8 @@ class RandomAccessFileFromSeekableReadBuffer : public arrow::io::RandomAccessFil
|
|||||||
public:
|
public:
|
||||||
RandomAccessFileFromSeekableReadBuffer(SeekableReadBuffer & in_, off_t file_size_);
|
RandomAccessFileFromSeekableReadBuffer(SeekableReadBuffer & in_, off_t file_size_);
|
||||||
|
|
||||||
|
RandomAccessFileFromSeekableReadBuffer(SeekableReadBufferWithSize & in_);
|
||||||
|
|
||||||
arrow::Result<int64_t> GetSize() override;
|
arrow::Result<int64_t> GetSize() override;
|
||||||
|
|
||||||
arrow::Status Close() override;
|
arrow::Status Close() override;
|
||||||
@ -56,7 +62,7 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
SeekableReadBuffer & in;
|
SeekableReadBuffer & in;
|
||||||
off_t file_size;
|
std::optional<off_t> file_size;
|
||||||
bool is_open = false;
|
bool is_open = false;
|
||||||
|
|
||||||
ARROW_DISALLOW_COPY_AND_ASSIGN(RandomAccessFileFromSeekableReadBuffer);
|
ARROW_DISALLOW_COPY_AND_ASSIGN(RandomAccessFileFromSeekableReadBuffer);
|
||||||
@ -80,7 +86,7 @@ private:
|
|||||||
ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowInputStreamFromReadBuffer);
|
ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowInputStreamFromReadBuffer);
|
||||||
};
|
};
|
||||||
|
|
||||||
std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(ReadBuffer & in);
|
std::shared_ptr<arrow::io::RandomAccessFile> asArrowFile(ReadBuffer & in, const FormatSettings & settings);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -518,13 +518,6 @@ ArrowColumnToCHColumn::ArrowColumnToCHColumn(
|
|||||||
|
|
||||||
void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table)
|
void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table)
|
||||||
{
|
{
|
||||||
Columns columns_list;
|
|
||||||
UInt64 num_rows = 0;
|
|
||||||
|
|
||||||
columns_list.reserve(header.rows());
|
|
||||||
|
|
||||||
using NameToColumnPtr = std::unordered_map<std::string, std::shared_ptr<arrow::ChunkedArray>>;
|
|
||||||
|
|
||||||
NameToColumnPtr name_to_column_ptr;
|
NameToColumnPtr name_to_column_ptr;
|
||||||
for (const auto& column_name : table->ColumnNames())
|
for (const auto& column_name : table->ColumnNames())
|
||||||
{
|
{
|
||||||
@ -532,6 +525,16 @@ void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr<arr
|
|||||||
name_to_column_ptr[column_name] = arrow_column;
|
name_to_column_ptr[column_name] = arrow_column;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
arrowColumnsToCHChunk(res, name_to_column_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr)
|
||||||
|
{
|
||||||
|
Columns columns_list;
|
||||||
|
UInt64 num_rows = 0;
|
||||||
|
|
||||||
|
columns_list.reserve(header.rows());
|
||||||
|
|
||||||
std::unordered_map<String, BlockPtr> nested_tables;
|
std::unordered_map<String, BlockPtr> nested_tables;
|
||||||
for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i)
|
for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i)
|
||||||
{
|
{
|
||||||
@ -587,7 +590,5 @@ void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr<arr
|
|||||||
|
|
||||||
res.setColumns(columns_list, num_rows);
|
res.setColumns(columns_list, num_rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -19,6 +19,8 @@ class Chunk;
|
|||||||
class ArrowColumnToCHColumn
|
class ArrowColumnToCHColumn
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
using NameToColumnPtr = std::unordered_map<std::string, std::shared_ptr<arrow::ChunkedArray>>;
|
||||||
|
|
||||||
ArrowColumnToCHColumn(const Block & header_, const std::string & format_name_, bool import_nested_);
|
ArrowColumnToCHColumn(const Block & header_, const std::string & format_name_, bool import_nested_);
|
||||||
|
|
||||||
/// Constructor that create header by arrow schema. It will be useful for inserting
|
/// Constructor that create header by arrow schema. It will be useful for inserting
|
||||||
@ -27,6 +29,8 @@ public:
|
|||||||
|
|
||||||
void arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table);
|
void arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table);
|
||||||
|
|
||||||
|
void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Block header;
|
const Block header;
|
||||||
const std::string format_name;
|
const std::string format_name;
|
||||||
|
@ -5,7 +5,6 @@
|
|||||||
#include <IO/ReadBufferFromMemory.h>
|
#include <IO/ReadBufferFromMemory.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <IO/copyData.h>
|
#include <IO/copyData.h>
|
||||||
#include <arrow/adapters/orc/adapter.h>
|
|
||||||
#include <arrow/io/memory.h>
|
#include <arrow/io/memory.h>
|
||||||
#include "ArrowBufferedStreams.h"
|
#include "ArrowBufferedStreams.h"
|
||||||
#include "ArrowColumnToCHColumn.h"
|
#include "ArrowColumnToCHColumn.h"
|
||||||
@ -39,23 +38,53 @@ Chunk ORCBlockInputFormat::generate()
|
|||||||
if (!file_reader)
|
if (!file_reader)
|
||||||
prepareReader();
|
prepareReader();
|
||||||
|
|
||||||
if (stripe_current >= stripe_total)
|
while (true)
|
||||||
return res;
|
{
|
||||||
|
if (!batch_reader)
|
||||||
|
{
|
||||||
|
arrow::Status reader_status = file_reader->NextStripeReader(
|
||||||
|
DBMS_DEFAULT_BUFFER_SIZE, include_indices, &batch_reader);
|
||||||
|
if (!reader_status.ok())
|
||||||
|
throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA,
|
||||||
|
"Failed to create batch reader: {}",
|
||||||
|
reader_status.ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!batch_reader)
|
||||||
|
break;
|
||||||
|
|
||||||
std::shared_ptr<arrow::RecordBatch> batch_result;
|
std::shared_ptr<arrow::RecordBatch> batch_result;
|
||||||
arrow::Status batch_status = file_reader->ReadStripe(stripe_current, include_indices, &batch_result);
|
arrow::Status batch_status = batch_reader->ReadNext(&batch_result);
|
||||||
if (!batch_status.ok())
|
if (!batch_status.ok())
|
||||||
throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA,
|
throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA,
|
||||||
"Error while reading batch of ORC data: {}", batch_status.ToString());
|
"Error while reading batch of ORC data: {}",
|
||||||
|
batch_status.ToString());
|
||||||
|
|
||||||
|
if (!batch_result || !batch_result->num_rows())
|
||||||
|
{
|
||||||
|
batch_reader.reset();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
auto table_result = arrow::Table::FromRecordBatches({batch_result});
|
auto table_result = arrow::Table::FromRecordBatches({batch_result});
|
||||||
if (!table_result.ok())
|
if (!table_result.ok())
|
||||||
throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA,
|
throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA,
|
||||||
"Error while reading batch of ORC data: {}", table_result.status().ToString());
|
"Error while reading batch of ORC data: {}",
|
||||||
|
table_result.status().ToString());
|
||||||
|
|
||||||
++stripe_current;
|
using NameToColumnPtr = std::unordered_map<std::string, std::shared_ptr<arrow::ChunkedArray>>;
|
||||||
|
|
||||||
|
NameToColumnPtr name_to_column_ptr;
|
||||||
|
for (const auto& column_name : column_names)
|
||||||
|
{
|
||||||
|
arrow::ArrayVector vec = {batch_result->GetColumnByName(column_name)};
|
||||||
|
std::shared_ptr<arrow::ChunkedArray> arrow_column = std::make_shared<arrow::ChunkedArray>(vec);
|
||||||
|
name_to_column_ptr[column_name] = arrow_column;
|
||||||
|
}
|
||||||
|
arrow_column_to_ch_column->arrowColumnsToCHChunk(res, name_to_column_ptr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result);
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -93,7 +122,7 @@ static size_t countIndicesForType(std::shared_ptr<arrow::DataType> type)
|
|||||||
|
|
||||||
void ORCBlockInputFormat::prepareReader()
|
void ORCBlockInputFormat::prepareReader()
|
||||||
{
|
{
|
||||||
THROW_ARROW_NOT_OK(arrow::adapters::orc::ORCFileReader::Open(asArrowFile(*in), arrow::default_memory_pool(), &file_reader));
|
THROW_ARROW_NOT_OK(arrow::adapters::orc::ORCFileReader::Open(asArrowFile(*in, format_settings), arrow::default_memory_pool(), &file_reader));
|
||||||
stripe_total = file_reader->NumberOfStripes();
|
stripe_total = file_reader->NumberOfStripes();
|
||||||
stripe_current = 0;
|
stripe_current = 0;
|
||||||
|
|
||||||
@ -117,6 +146,7 @@ void ORCBlockInputFormat::prepareReader()
|
|||||||
const auto & name = schema->field(i)->name();
|
const auto & name = schema->field(i)->name();
|
||||||
if (getPort().getHeader().has(name) || nested_table_names.contains(name))
|
if (getPort().getHeader().has(name) || nested_table_names.contains(name))
|
||||||
{
|
{
|
||||||
|
column_names.push_back(name);
|
||||||
for (int j = 0; j != indexes_count; ++j)
|
for (int j = 0; j != indexes_count; ++j)
|
||||||
include_indices.push_back(index + j);
|
include_indices.push_back(index + j);
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,12 @@
|
|||||||
#include <Processors/Formats/IInputFormat.h>
|
#include <Processors/Formats/IInputFormat.h>
|
||||||
#include <Formats/FormatSettings.h>
|
#include <Formats/FormatSettings.h>
|
||||||
|
|
||||||
namespace arrow::adapters::orc { class ORCFileReader; }
|
#include <arrow/adapters/orc/adapter.h>
|
||||||
|
|
||||||
|
namespace arrow::adapters::orc
|
||||||
|
{
|
||||||
|
class ORCFileReader;
|
||||||
|
}
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -30,8 +35,12 @@ private:
|
|||||||
|
|
||||||
std::unique_ptr<arrow::adapters::orc::ORCFileReader> file_reader;
|
std::unique_ptr<arrow::adapters::orc::ORCFileReader> file_reader;
|
||||||
|
|
||||||
|
std::shared_ptr<arrow::RecordBatchReader> batch_reader;
|
||||||
|
|
||||||
std::unique_ptr<ArrowColumnToCHColumn> arrow_column_to_ch_column;
|
std::unique_ptr<ArrowColumnToCHColumn> arrow_column_to_ch_column;
|
||||||
|
|
||||||
|
std::vector<String> column_names;
|
||||||
|
|
||||||
int stripe_total = 0;
|
int stripe_total = 0;
|
||||||
|
|
||||||
int stripe_current = 0;
|
int stripe_current = 0;
|
||||||
|
@ -93,7 +93,7 @@ static size_t countIndicesForType(std::shared_ptr<arrow::DataType> type)
|
|||||||
|
|
||||||
void ParquetBlockInputFormat::prepareReader()
|
void ParquetBlockInputFormat::prepareReader()
|
||||||
{
|
{
|
||||||
THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(asArrowFile(*in), arrow::default_memory_pool(), &file_reader));
|
THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(asArrowFile(*in, format_settings), arrow::default_memory_pool(), &file_reader));
|
||||||
row_group_total = file_reader->num_row_groups();
|
row_group_total = file_reader->num_row_groups();
|
||||||
row_group_current = 0;
|
row_group_current = 0;
|
||||||
|
|
||||||
|
@ -74,6 +74,14 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
|
|||||||
throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Fail to seek HDFS file: {}, error: {}", hdfs_uri, std::string(hdfsGetLastError()));
|
throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Fail to seek HDFS file: {}, error: {}", hdfs_uri, std::string(hdfsGetLastError()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<size_t> getTotalSize() const
|
||||||
|
{
|
||||||
|
auto * file_info = hdfsGetPathInfo(fs.get(), hdfs_file_path.c_str());
|
||||||
|
if (!file_info)
|
||||||
|
return std::nullopt;
|
||||||
|
return file_info->mSize;
|
||||||
|
}
|
||||||
|
|
||||||
bool nextImpl() override
|
bool nextImpl() override
|
||||||
{
|
{
|
||||||
if (!initialized)
|
if (!initialized)
|
||||||
@ -115,19 +123,10 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
off_t seek(off_t offset_, int whence) override
|
off_t seek(off_t offset_, int) override
|
||||||
{
|
{
|
||||||
if (initialized)
|
|
||||||
throw Exception("Seek is allowed only before first read attempt from the buffer.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
|
||||||
|
|
||||||
if (whence != SEEK_SET)
|
|
||||||
throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
|
||||||
|
|
||||||
if (offset_ < 0)
|
|
||||||
throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}", std::to_string(offset_));
|
|
||||||
|
|
||||||
offset = offset_;
|
offset = offset_;
|
||||||
|
initialize();
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -145,11 +144,15 @@ ReadBufferFromHDFS::ReadBufferFromHDFS(
|
|||||||
const String & hdfs_file_path_,
|
const String & hdfs_file_path_,
|
||||||
const Poco::Util::AbstractConfiguration & config_,
|
const Poco::Util::AbstractConfiguration & config_,
|
||||||
size_t buf_size_, size_t read_until_position_)
|
size_t buf_size_, size_t read_until_position_)
|
||||||
: SeekableReadBuffer(nullptr, 0)
|
: SeekableReadBufferWithSize(nullptr, 0)
|
||||||
, impl(std::make_unique<ReadBufferFromHDFSImpl>(hdfs_uri_, hdfs_file_path_, config_, buf_size_, read_until_position_))
|
, impl(std::make_unique<ReadBufferFromHDFSImpl>(hdfs_uri_, hdfs_file_path_, config_, buf_size_, read_until_position_))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<size_t> ReadBufferFromHDFS::getTotalSize()
|
||||||
|
{
|
||||||
|
return impl->getTotalSize();
|
||||||
|
}
|
||||||
|
|
||||||
bool ReadBufferFromHDFS::nextImpl()
|
bool ReadBufferFromHDFS::nextImpl()
|
||||||
{
|
{
|
||||||
@ -163,9 +166,29 @@ bool ReadBufferFromHDFS::nextImpl()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
off_t ReadBufferFromHDFS::seek(off_t off, int whence)
|
off_t ReadBufferFromHDFS::seek(off_t offset_, int whence)
|
||||||
{
|
{
|
||||||
return impl->seek(off, whence);
|
if (whence != SEEK_SET)
|
||||||
|
throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
||||||
|
|
||||||
|
if (offset_ < 0)
|
||||||
|
throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
|
||||||
|
|
||||||
|
if (!working_buffer.empty()
|
||||||
|
&& size_t(offset_) >= impl->getPosition() - working_buffer.size()
|
||||||
|
&& offset_ < impl->getPosition())
|
||||||
|
{
|
||||||
|
pos = working_buffer.end() - (impl->getPosition() - offset_);
|
||||||
|
assert(pos >= working_buffer.begin());
|
||||||
|
assert(pos <= working_buffer.end());
|
||||||
|
|
||||||
|
return getPosition();
|
||||||
|
}
|
||||||
|
|
||||||
|
pos = working_buffer.end();
|
||||||
|
impl->initialize();
|
||||||
|
impl->seek(offset_, whence);
|
||||||
|
return impl->getPosition();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ namespace DB
|
|||||||
/** Accepts HDFS path to file and opens it.
|
/** Accepts HDFS path to file and opens it.
|
||||||
* Closes file by himself (thus "owns" a file descriptor).
|
* Closes file by himself (thus "owns" a file descriptor).
|
||||||
*/
|
*/
|
||||||
class ReadBufferFromHDFS : public SeekableReadBuffer
|
class ReadBufferFromHDFS : public SeekableReadBufferWithSize
|
||||||
{
|
{
|
||||||
struct ReadBufferFromHDFSImpl;
|
struct ReadBufferFromHDFSImpl;
|
||||||
|
|
||||||
@ -37,6 +37,8 @@ public:
|
|||||||
|
|
||||||
off_t getPosition() override;
|
off_t getPosition() override;
|
||||||
|
|
||||||
|
std::optional<size_t> getTotalSize() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::unique_ptr<ReadBufferFromHDFSImpl> impl;
|
std::unique_ptr<ReadBufferFromHDFSImpl> impl;
|
||||||
};
|
};
|
||||||
|
@ -1,10 +1,6 @@
|
|||||||
import pytest
|
import pytest
|
||||||
from helpers.cluster import ClickHouseCluster
|
from helpers.cluster import ClickHouseCluster
|
||||||
|
|
||||||
from helpers.network import PartitionManager
|
|
||||||
import threading
|
|
||||||
import time
|
|
||||||
|
|
||||||
cluster = ClickHouseCluster(__file__)
|
cluster = ClickHouseCluster(__file__)
|
||||||
node1 = cluster.add_instance('node1', main_configs=['configs/named_collections.xml'], with_zookeeper=False, with_hdfs=True)
|
node1 = cluster.add_instance('node1', main_configs=['configs/named_collections.xml'], with_zookeeper=False, with_hdfs=True)
|
||||||
|
|
||||||
@ -99,61 +95,3 @@ def test_predefined_connection_configuration(started_cluster):
|
|||||||
result = node1.query("SET max_http_get_redirects=1; select * from url(url1, url='http://hdfs1:50070/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', format='TSV', structure='id UInt32, name String, weight Float64')")
|
result = node1.query("SET max_http_get_redirects=1; select * from url(url1, url='http://hdfs1:50070/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', format='TSV', structure='id UInt32, name String, weight Float64')")
|
||||||
assert(result == "1\tMark\t72.53\n")
|
assert(result == "1\tMark\t72.53\n")
|
||||||
node1.query("drop table WebHDFSStorageWithRedirect")
|
node1.query("drop table WebHDFSStorageWithRedirect")
|
||||||
|
|
||||||
|
|
||||||
result = ''
|
|
||||||
def test_url_reconnect_at_start(started_cluster):
|
|
||||||
hdfs_api = started_cluster.hdfs_api
|
|
||||||
|
|
||||||
with PartitionManager() as pm:
|
|
||||||
node1.query(
|
|
||||||
"insert into table function hdfs('hdfs://hdfs1:9000/storage_big', 'TSV', 'id Int32') select number from numbers(500000)")
|
|
||||||
|
|
||||||
pm._add_rule({'probability': 1, 'destination': node1.ip_address, 'source_port': 50075, 'action': 'DROP'})
|
|
||||||
|
|
||||||
def select():
|
|
||||||
global result
|
|
||||||
print("reading")
|
|
||||||
result = node1.query(
|
|
||||||
"select sum(cityHash64(id)) from url('http://hdfs1:50075/webhdfs/v1/storage_big?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'id Int32') settings http_max_tries = 20, http_retry_max_backoff_ms=10000")
|
|
||||||
print(result)
|
|
||||||
|
|
||||||
thread = threading.Thread(target=select)
|
|
||||||
thread.start()
|
|
||||||
time.sleep(1)
|
|
||||||
print("delete rule")
|
|
||||||
pm._delete_rule({'probability': 1, 'destination': node1.ip_address, 'source_port': 50075, 'action': 'DROP'})
|
|
||||||
|
|
||||||
thread.join()
|
|
||||||
#assert node1.contains_in_log("Error: Timeout, code:")
|
|
||||||
print(result)
|
|
||||||
|
|
||||||
result = ''
|
|
||||||
def test_url_reconnect_in_the_middle(started_cluster):
|
|
||||||
hdfs_api = started_cluster.hdfs_api
|
|
||||||
|
|
||||||
with PartitionManager() as pm:
|
|
||||||
node1.query(
|
|
||||||
"insert into table function hdfs('hdfs://hdfs1:9000/storage_big2', 'TSV', 'id Int32') select number from numbers(10000000)")
|
|
||||||
|
|
||||||
def select():
|
|
||||||
global result
|
|
||||||
print("reading")
|
|
||||||
result = node1.query(
|
|
||||||
"select sum(cityHash64(id)) from url('http://hdfs1:50075/webhdfs/v1/storage_big2?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'id Int32')")
|
|
||||||
print(result)
|
|
||||||
|
|
||||||
thread = threading.Thread(target=select)
|
|
||||||
print("add rule")
|
|
||||||
pm._add_rule({'probability': 0.3, 'destination': node1.ip_address, 'source_port': 50075, 'action': 'DROP'})
|
|
||||||
thread.start()
|
|
||||||
time.sleep(0.5)
|
|
||||||
pm._add_rule({'probability': 1, 'destination': node1.ip_address, 'source_port': 50075, 'action': 'DROP'})
|
|
||||||
time.sleep(3)
|
|
||||||
print("delete rule")
|
|
||||||
pm._delete_rule({'probability': 0.3, 'destination': node1.ip_address, 'source_port': 50075, 'action': 'DROP'})
|
|
||||||
pm._delete_rule({'probability': 1, 'destination': node1.ip_address, 'source_port': 50075, 'action': 'DROP'})
|
|
||||||
|
|
||||||
thread.join()
|
|
||||||
assert node1.contains_in_log("Error: Timeout, code:")
|
|
||||||
print(result)
|
|
||||||
|
@ -257,6 +257,22 @@ def test_truncate_table(started_cluster):
|
|||||||
node1.query("drop table test_truncate")
|
node1.query("drop table test_truncate")
|
||||||
|
|
||||||
|
|
||||||
|
def test_seekable_formats(started_cluster):
|
||||||
|
hdfs_api = started_cluster.hdfs_api
|
||||||
|
|
||||||
|
table_function = f"hdfs('hdfs://hdfs1:9000/parquet', 'Parquet', 'a Int32, b String')"
|
||||||
|
node1.query(f"insert into table function {table_function} SELECT number, randomString(1000) FROM numbers(5000000)")
|
||||||
|
|
||||||
|
result = node1.query(f"SELECT count() FROM {table_function}")
|
||||||
|
assert(int(result) == 5000000)
|
||||||
|
|
||||||
|
table_function = f"hdfs('hdfs://hdfs1:9000/orc', 'ORC', 'a Int32, b String')"
|
||||||
|
node1.query(f"insert into table function {table_function} SELECT number, randomString(1000) FROM numbers(5000000)")
|
||||||
|
|
||||||
|
result = node1.query(f"SELECT count() FROM {table_function}")
|
||||||
|
assert(int(result) == 5000000)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
cluster.start()
|
cluster.start()
|
||||||
input("Cluster created, press any key to destroy...")
|
input("Cluster created, press any key to destroy...")
|
||||||
|
@ -5,5 +5,15 @@
|
|||||||
<access_key_id>minio</access_key_id>
|
<access_key_id>minio</access_key_id>
|
||||||
<secret_access_key>minio123</secret_access_key>
|
<secret_access_key>minio123</secret_access_key>
|
||||||
</s3_conf1>
|
</s3_conf1>
|
||||||
|
<s3_parquet>
|
||||||
|
<url>http://minio1:9001/root/test_parquet</url>
|
||||||
|
<access_key_id>minio</access_key_id>
|
||||||
|
<secret_access_key>minio123</secret_access_key>
|
||||||
|
</s3_parquet>
|
||||||
|
<s3_orc>
|
||||||
|
<url>http://minio1:9001/root/test_orc</url>
|
||||||
|
<access_key_id>minio</access_key_id>
|
||||||
|
<secret_access_key>minio123</secret_access_key>
|
||||||
|
</s3_orc>
|
||||||
</named_collections>
|
</named_collections>
|
||||||
</clickhouse>
|
</clickhouse>
|
||||||
|
@ -750,3 +750,20 @@ def test_predefined_connection_configuration(started_cluster):
|
|||||||
|
|
||||||
result = instance.query("SELECT * FROM s3(s3_conf1, format='CSV', structure='id UInt32')")
|
result = instance.query("SELECT * FROM s3(s3_conf1, format='CSV', structure='id UInt32')")
|
||||||
assert result == instance.query("SELECT number FROM numbers(10)")
|
assert result == instance.query("SELECT number FROM numbers(10)")
|
||||||
|
|
||||||
|
|
||||||
|
def test_seekable_formats(started_cluster):
|
||||||
|
bucket = started_cluster.minio_bucket
|
||||||
|
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
|
||||||
|
|
||||||
|
table_function = f"s3(s3_parquet, structure='a Int32, b String', format='Parquet')"
|
||||||
|
instance.query(f"insert into table function {table_function} SELECT number, randomString(1000) FROM numbers(5000000)")
|
||||||
|
|
||||||
|
result = instance.query(f"SELECT count() FROM {table_function}")
|
||||||
|
assert(int(result) == 5000000)
|
||||||
|
|
||||||
|
table_function = f"s3(s3_orc, structure='a Int32, b String', format='ORC')"
|
||||||
|
instance.query(f"insert into table function {table_function} SELECT number, randomString(1000) FROM numbers(5000000)")
|
||||||
|
|
||||||
|
result = instance.query(f"SELECT count() FROM {table_function}")
|
||||||
|
assert(int(result) == 5000000)
|
||||||
|
Loading…
Reference in New Issue
Block a user