2019-12-06 14:37:21 +00:00
|
|
|
#include <Common/config.h>
|
|
|
|
|
|
|
|
#if USE_AWS_S3
|
2019-05-31 10:58:43 +00:00
|
|
|
|
2020-01-28 13:05:37 +00:00
|
|
|
# include <IO/ReadBufferFromIStream.h>
|
|
|
|
# include <IO/ReadBufferFromS3.h>
|
2020-07-10 09:32:34 +00:00
|
|
|
# include <Common/Stopwatch.h>
|
2019-09-19 10:25:31 +00:00
|
|
|
|
2020-01-28 13:05:37 +00:00
|
|
|
# include <aws/s3/S3Client.h>
|
|
|
|
# include <aws/s3/model/GetObjectRequest.h>
|
|
|
|
# include <common/logger_useful.h>
|
2019-05-31 10:58:43 +00:00
|
|
|
|
2020-01-28 13:05:37 +00:00
|
|
|
# include <utility>
|
2020-01-22 16:17:25 +00:00
|
|
|
|
2021-04-13 19:11:58 +00:00
|
|
|
|
2020-07-10 09:32:34 +00:00
|
|
|
namespace ProfileEvents
|
|
|
|
{
|
|
|
|
extern const Event S3ReadMicroseconds;
|
|
|
|
extern const Event S3ReadBytes;
|
2021-04-12 22:25:19 +00:00
|
|
|
extern const Event S3ReadRequestsErrors;
|
2020-07-10 09:32:34 +00:00
|
|
|
}
|
|
|
|
|
2019-05-31 10:58:43 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
2019-12-03 16:23:24 +00:00
|
|
|
namespace ErrorCodes
|
2019-05-31 18:14:39 +00:00
|
|
|
{
|
2019-12-03 16:23:24 +00:00
|
|
|
extern const int S3_ERROR;
|
2020-01-28 12:48:01 +00:00
|
|
|
extern const int CANNOT_SEEK_THROUGH_FILE;
|
|
|
|
extern const int SEEK_POSITION_OUT_OF_BOUND;
|
2019-12-03 16:23:24 +00:00
|
|
|
}
|
2019-05-31 18:14:39 +00:00
|
|
|
|
|
|
|
|
2020-01-22 16:17:25 +00:00
|
|
|
ReadBufferFromS3::ReadBufferFromS3(
|
2021-05-19 21:42:25 +00:00
|
|
|
std::shared_ptr<Aws::S3::S3Client> client_ptr_, const String & bucket_, const String & key_, UInt64 max_single_read_retries_, size_t buffer_size_)
|
2021-04-12 22:25:19 +00:00
|
|
|
: SeekableReadBuffer(nullptr, 0)
|
|
|
|
, client_ptr(std::move(client_ptr_))
|
|
|
|
, bucket(bucket_)
|
|
|
|
, key(key_)
|
2021-05-19 21:42:25 +00:00
|
|
|
, max_single_read_retries(max_single_read_retries_)
|
2021-04-12 22:25:19 +00:00
|
|
|
, buffer_size(buffer_size_)
|
2019-12-03 16:23:24 +00:00
|
|
|
{
|
2020-01-22 16:17:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool ReadBufferFromS3::nextImpl()
|
|
|
|
{
|
2020-07-15 11:15:12 +00:00
|
|
|
Stopwatch watch;
|
2021-04-12 22:25:19 +00:00
|
|
|
bool next_result = false;
|
2021-05-19 21:42:25 +00:00
|
|
|
auto sleep_time_with_backoff_milliseconds = std::chrono::milliseconds(100);
|
2021-04-12 22:25:19 +00:00
|
|
|
|
2021-06-12 11:35:34 +00:00
|
|
|
impl = initialize();
|
|
|
|
|
2021-05-19 21:42:25 +00:00
|
|
|
for (size_t attempt = 0; attempt < max_single_read_retries; ++attempt)
|
2021-04-12 22:25:19 +00:00
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
|
|
|
next_result = impl->next();
|
2021-04-13 19:11:58 +00:00
|
|
|
/// FIXME. 1. Poco `istream` cannot read less than buffer_size or this state is being discarded during
|
|
|
|
/// istream <-> iostream conversion. `gcount` always contains 0,
|
|
|
|
/// that's why we always have error "Cannot read from istream at offset 0".
|
|
|
|
|
2021-04-12 22:25:19 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
catch (const Exception & e)
|
|
|
|
{
|
|
|
|
ProfileEvents::increment(ProfileEvents::S3ReadRequestsErrors, 1);
|
|
|
|
|
2021-04-29 06:29:06 +00:00
|
|
|
LOG_INFO(log, "Caught exception while reading S3 object. Bucket: {}, Key: {}, Offset: {}, Attempt: {}, Message: {}",
|
2021-04-13 09:09:03 +00:00
|
|
|
bucket, key, getPosition(), attempt, e.message());
|
|
|
|
|
2021-04-12 22:25:19 +00:00
|
|
|
impl.reset();
|
2021-05-19 21:42:25 +00:00
|
|
|
impl = initialize();
|
2021-04-12 22:25:19 +00:00
|
|
|
}
|
2021-04-22 01:25:40 +00:00
|
|
|
|
2021-06-03 23:46:21 +00:00
|
|
|
std::this_thread::sleep_for(sleep_time_with_backoff_milliseconds);
|
|
|
|
sleep_time_with_backoff_milliseconds *= 2;
|
2021-04-12 22:25:19 +00:00
|
|
|
}
|
|
|
|
|
2020-07-10 09:32:34 +00:00
|
|
|
watch.stop();
|
|
|
|
ProfileEvents::increment(ProfileEvents::S3ReadMicroseconds, watch.elapsedMicroseconds());
|
2021-04-12 22:25:19 +00:00
|
|
|
if (!next_result)
|
2020-01-22 16:17:25 +00:00
|
|
|
return false;
|
2021-06-02 15:03:25 +00:00
|
|
|
|
|
|
|
working_buffer = internal_buffer = impl->buffer();
|
|
|
|
pos = working_buffer.begin();
|
2020-07-10 09:32:34 +00:00
|
|
|
|
|
|
|
ProfileEvents::increment(ProfileEvents::S3ReadBytes, internal_buffer.size());
|
|
|
|
|
2021-06-02 15:03:25 +00:00
|
|
|
offset += working_buffer.size();
|
|
|
|
|
2020-01-22 16:17:25 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-01-28 12:48:01 +00:00
|
|
|
off_t ReadBufferFromS3::seek(off_t offset_, int whence)
|
2020-01-27 19:17:22 +00:00
|
|
|
{
|
2021-06-07 10:49:34 +00:00
|
|
|
if (impl)
|
|
|
|
throw Exception("Seek is allowed only before first read attempt from the buffer.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
|
|
|
|
2020-01-28 13:05:37 +00:00
|
|
|
if (whence != SEEK_SET)
|
|
|
|
throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
2020-01-28 12:48:01 +00:00
|
|
|
|
2020-01-28 13:05:37 +00:00
|
|
|
if (offset_ < 0)
|
|
|
|
throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
|
2020-01-28 12:48:01 +00:00
|
|
|
|
|
|
|
offset = offset_;
|
2020-01-22 16:17:25 +00:00
|
|
|
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
2020-02-14 14:28:33 +00:00
|
|
|
off_t ReadBufferFromS3::getPosition()
|
|
|
|
{
|
2021-06-02 15:03:25 +00:00
|
|
|
return offset - available();
|
2020-02-14 14:28:33 +00:00
|
|
|
}
|
|
|
|
|
2020-01-27 19:17:22 +00:00
|
|
|
std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
|
|
|
|
{
|
2021-06-02 15:03:25 +00:00
|
|
|
LOG_TRACE(log, "Read S3 object. Bucket: {}, Key: {}, Offset: {}", bucket, key, offset);
|
2020-01-27 18:44:30 +00:00
|
|
|
|
2019-12-03 16:23:24 +00:00
|
|
|
Aws::S3::Model::GetObjectRequest req;
|
|
|
|
req.SetBucket(bucket);
|
|
|
|
req.SetKey(key);
|
2021-06-02 15:03:25 +00:00
|
|
|
req.SetRange(fmt::format("bytes={}-", offset));
|
2019-05-31 18:14:39 +00:00
|
|
|
|
2019-12-03 16:23:24 +00:00
|
|
|
Aws::S3::Model::GetObjectOutcome outcome = client_ptr->GetObject(req);
|
2019-05-31 18:14:39 +00:00
|
|
|
|
2019-12-06 14:48:56 +00:00
|
|
|
if (outcome.IsSuccess())
|
|
|
|
{
|
2019-12-03 16:23:24 +00:00
|
|
|
read_result = outcome.GetResultWithOwnership();
|
2020-01-22 16:17:25 +00:00
|
|
|
return std::make_unique<ReadBufferFromIStream>(read_result.GetBody(), buffer_size);
|
2019-12-03 16:23:24 +00:00
|
|
|
}
|
2019-12-06 14:48:56 +00:00
|
|
|
else
|
2019-12-03 16:23:24 +00:00
|
|
|
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
2019-05-31 18:14:39 +00:00
|
|
|
}
|
|
|
|
|
2019-05-31 10:58:43 +00:00
|
|
|
}
|
2019-12-06 14:37:21 +00:00
|
|
|
|
2019-12-09 12:36:06 +00:00
|
|
|
#endif
|