AWS S3 SDK integration.

This commit is contained in:
Pavel Kovalenko 2019-12-03 19:23:24 +03:00 committed by Pavel Kovalenko
parent 3855540e45
commit a9e2327ec0
23 changed files with 364 additions and 345 deletions

12
.gitmodules vendored
View File

@ -107,3 +107,15 @@
[submodule "contrib/sparsehash-c11"] [submodule "contrib/sparsehash-c11"]
path = contrib/sparsehash-c11 path = contrib/sparsehash-c11
url = https://github.com/sparsehash/sparsehash-c11.git url = https://github.com/sparsehash/sparsehash-c11.git
[submodule "contrib/aws"]
path = contrib/aws
url = https://github.com/aws/aws-sdk-cpp.git
[submodule "aws-c-event-stream"]
path = contrib/aws-c-event-stream
url = https://github.com/awslabs/aws-c-event-stream.git
[submodule "aws-c-common"]
path = contrib/aws-c-common
url = https://github.com/awslabs/aws-c-common.git
[submodule "aws-checksums"]
path = contrib/aws-checksums
url = https://github.com/awslabs/aws-checksums.git

View File

@ -325,6 +325,7 @@ include (cmake/find/brotli.cmake)
include (cmake/find/protobuf.cmake) include (cmake/find/protobuf.cmake)
include (cmake/find/pdqsort.cmake) include (cmake/find/pdqsort.cmake)
include (cmake/find/hdfs3.cmake) # uses protobuf include (cmake/find/hdfs3.cmake) # uses protobuf
include (cmake/find/s3.cmake)
include (cmake/find/consistent-hashing.cmake) include (cmake/find/consistent-hashing.cmake)
include (cmake/find/base64.cmake) include (cmake/find/base64.cmake)
include (cmake/find/parquet.cmake) include (cmake/find/parquet.cmake)

19
cmake/find/s3.cmake Normal file
View File

@ -0,0 +1,19 @@
option (USE_AWS_S3 "Set to FALSE to use system libbrotli library instead of bundled" ${NOT_UNBUNDLED})
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-s3")
if (USE_AWS_S3)
message (WARNING "submodule contrib/aws is missing. to fix try run: \n git submodule update --init --recursive")
set (USE_AWS_S3 0)
endif ()
set (MISSING_AWS_S3 1)
endif ()
if (USE_AWS_S3 AND NOT MISSING_AWS_S3)
set(AWS_S3_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-s3/include")
set(AWS_S3_CORE_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-core/include")
set(AWS_S3_LIBRARY aws_s3)
set(USE_INTERNAL_AWS_S3_LIBRARY 1)
set(USE_AWS_S3 1)
endif ()
message (STATUS "Using aws_s3=${USE_AWS_S3}: ${AWS_S3_INCLUDE_DIR} : ${AWS_S3_LIBRARY}")

View File

@ -312,6 +312,10 @@ if (USE_INTERNAL_HDFS3_LIBRARY)
add_subdirectory(libhdfs3-cmake) add_subdirectory(libhdfs3-cmake)
endif () endif ()
if (USE_INTERNAL_AWS_S3_LIBRARY)
add_subdirectory(aws-s3-cmake)
endif ()
if (USE_BASE64) if (USE_BASE64)
add_subdirectory (base64-cmake) add_subdirectory (base64-cmake)
endif() endif()

1
contrib/aws vendored Submodule

@ -0,0 +1 @@
Subproject commit 5666c94dc90adf1aa8cb66527b322af1ec85bcf6

1
contrib/aws-c-common vendored Submodule

@ -0,0 +1 @@
Subproject commit 6cd01c101e233691adb27d7a55b267436673940a

1
contrib/aws-c-event-stream vendored Submodule

@ -0,0 +1 @@
Subproject commit 3bc33662f9ccff4f4cbcf9509cc78c26e022fde0

1
contrib/aws-checksums vendored Submodule

@ -0,0 +1 @@
Subproject commit d601f7b4949f6fd64a84e88a126359b734aa56d8

View File

@ -0,0 +1,100 @@
SET(AWS_S3_LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-s3)
SET(AWS_CORE_LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-core)
SET(AWS_CHECKSUMS_LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/aws-checksums)
SET(AWS_COMMON_LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/aws-c-common)
SET(AWS_EVENT_STREAM_LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/aws-c-event-stream)
file(GLOB AWS_CORE_SOURCES
"${AWS_CORE_LIBRARY_DIR}/source/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/auth/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/client/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/http/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/http/standard/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/http/curl/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/config/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/external/cjson/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/external/tinyxml2/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/internal/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/monitoring/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/net/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/linux-shared/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/platform/linux-shared/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/base64/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/event/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/crypto/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/crypto/openssl/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/crypto/factory/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/json/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/logging/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/memory/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/memory/stl/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/stream/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/threading/*.cpp"
"${AWS_CORE_LIBRARY_DIR}/source/utils/xml/*.cpp"
)
file(GLOB AWS_S3_SOURCES
"${AWS_S3_LIBRARY_DIR}/source/*.cpp"
)
file(GLOB AWS_S3_MODEL_SOURCES
"${AWS_S3_LIBRARY_DIR}/source/model/*.cpp"
)
file(GLOB AWS_EVENT_STREAM_SOURCES
"${AWS_EVENT_STREAM_LIBRARY_DIR}/source/*.c"
)
file(GLOB AWS_COMMON_SOURCES
"${AWS_COMMON_LIBRARY_DIR}/source/*.c"
"${AWS_COMMON_LIBRARY_DIR}/source/posix/*.c"
)
file(GLOB AWS_CHECKSUMS_SOURCES
"${AWS_CHECKSUMS_LIBRARY_DIR}/source/*.c"
# "${AWS_CHECKSUMS_LIBRARY_DIR}/source/intel/*.c"
# "${AWS_CHECKSUMS_LIBRARY_DIR}/source/arm/*.c"
# "${AWS_CHECKSUMS_LIBRARY_DIR}/source/visualc/*.c"
)
file(GLOB S3_UNIFIED_SRC
${AWS_EVENT_STREAM_SOURCES}
${AWS_COMMON_SOURCES}
${AWS_CHECKSUMS_SOURCES}
${AWS_S3_SOURCES}
${AWS_S3_MODEL_SOURCES}
${AWS_CORE_SOURCES}
)
set(S3_INCLUDES
"${CMAKE_CURRENT_SOURCE_DIR}/include/"
"${AWS_COMMON_LIBRARY_DIR}/include/"
"${AWS_CHECKSUMS_LIBRARY_DIR}/include/"
"${AWS_EVENT_STREAM_LIBRARY_DIR}/include/"
"${AWS_S3_LIBRARY_DIR}/include/"
"${AWS_CORE_LIBRARY_DIR}/include/"
)
add_library(aws_s3 ${S3_UNIFIED_SRC})
OPTION(USE_AWS_MEMORY_MANAGEMENT "Aws memory management" OFF)
configure_file("${AWS_CORE_LIBRARY_DIR}/include/aws/core/SDKConfig.h.in" "${AWS_CORE_LIBRARY_DIR}/include/aws/core/SDKConfig.h")
target_compile_definitions(aws_s3 PUBLIC "AWS_SDK_VERSION_MAJOR=1")
target_compile_definitions(aws_s3 PUBLIC "AWS_SDK_VERSION_MINOR=7")
target_compile_definitions(aws_s3 PUBLIC "AWS_SDK_VERSION_PATCH=231")
set(OPENSSL_USE_STATIC_LIBS TRUE)
find_package(OpenSSL REQUIRED)
set(CURL_LIBRARY "-lcurl")
add_definitions(-DCURL_STATICLIB)
find_package(CURL REQUIRED)
add_definitions(-DENABLE_OPENSSL_ENCRYPTION)
add_definitions(-DENABLE_CURL_CLIENT)
target_include_directories(aws_s3 PRIVATE ${S3_INCLUDES})
target_include_directories(aws_s3 PRIVATE ${CURL_INCLUDE_DIR})
target_link_libraries(aws_s3 OpenSSL::Crypto)
target_link_libraries(aws_s3 ${CURL_LIBRARIES})

View File

@ -421,6 +421,12 @@ if (USE_HDFS)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${HDFS3_INCLUDE_DIR}) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${HDFS3_INCLUDE_DIR})
endif() endif()
if (USE_AWS_S3)
target_link_libraries (clickhouse_common_io PUBLIC ${AWS_S3_LIBRARY})
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AWS_S3_CORE_INCLUDE_DIR})
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AWS_S3_INCLUDE_DIR})
endif()
if (USE_BROTLI) if (USE_BROTLI)
target_link_libraries (clickhouse_common_io PRIVATE ${BROTLI_LIBRARY}) target_link_libraries (clickhouse_common_io PRIVATE ${BROTLI_LIBRARY})
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR}) target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR})

View File

@ -465,6 +465,7 @@ namespace ErrorCodes
extern const int UNKNOWN_DICTIONARY = 488; extern const int UNKNOWN_DICTIONARY = 488;
extern const int INCORRECT_DICTIONARY_DEFINITION = 489; extern const int INCORRECT_DICTIONARY_DEFINITION = 489;
extern const int CANNOT_FORMAT_DATETIME = 490; extern const int CANNOT_FORMAT_DATETIME = 490;
extern const int S3_ERROR = 491;
extern const int KEEPER_EXCEPTION = 999; extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000; extern const int POCO_EXCEPTION = 1000;

View File

@ -1,65 +1,40 @@
#include <IO/ReadBufferFromS3.h> #include <IO/ReadBufferFromS3.h>
#include <IO/ReadBufferFromIStream.h> #include <IO/ReadBufferFromIStream.h>
#include <IO/S3Common.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include <aws/s3/model/GetObjectRequest.h>
namespace DB namespace DB
{ {
const int DEFAULT_S3_MAX_FOLLOW_GET_REDIRECT = 2; namespace ErrorCodes
ReadBufferFromS3::ReadBufferFromS3(const Poco::URI & uri_,
const String & access_key_id_,
const String & secret_access_key_,
const ConnectionTimeouts & timeouts)
: ReadBuffer(nullptr, 0)
, uri {uri_}
, session {makeHTTPSession(uri_, timeouts)}
{ {
Poco::Net::HTTPResponse response; extern const int S3_ERROR;
std::unique_ptr<Poco::Net::HTTPRequest> request;
for (int i = 0; i < DEFAULT_S3_MAX_FOLLOW_GET_REDIRECT; ++i)
{
// With empty path poco will send "POST HTTP/1.1" its bug.
if (uri.getPath().empty())
uri.setPath("/");
request = std::make_unique<Poco::Net::HTTPRequest>(
Poco::Net::HTTPRequest::HTTP_GET,
uri.getPathAndQuery(),
Poco::Net::HTTPRequest::HTTP_1_1);
request->setHost(uri.getHost()); // use original, not resolved host name in header
S3Helper::authenticateRequest(*request, access_key_id_, secret_access_key_);
LOG_TRACE((&Logger::get("ReadBufferFromS3")), "Sending request to " << uri.toString());
session->sendRequest(*request);
istr = &session->receiveResponse(response);
// Handle 307 Temporary Redirect in order to allow request redirection
// See https://docs.aws.amazon.com/AmazonS3/latest/dev/Redirects.html
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT)
break;
auto location_iterator = response.find("Location");
if (location_iterator == response.end())
break;
uri = location_iterator->second;
session = makeHTTPSession(uri, timeouts);
}
assertResponseIsOk(*request, response, *istr);
impl = std::make_unique<ReadBufferFromIStream>(*istr, DBMS_DEFAULT_BUFFER_SIZE);
} }
ReadBufferFromS3::ReadBufferFromS3(const std::shared_ptr<Aws::S3::S3Client> & client_ptr,
const String & bucket,
const String & key,
size_t buffer_size_): ReadBuffer(nullptr, 0)
{
Aws::S3::Model::GetObjectRequest req;
req.SetBucket(bucket);
req.SetKey(key);
Aws::S3::Model::GetObjectOutcome outcome = client_ptr->GetObject(req);
if (outcome.IsSuccess()) {
read_result = outcome.GetResultWithOwnership();
impl = std::make_unique<ReadBufferFromIStream>(read_result.GetBody(), buffer_size_);
}
else {
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
}
}
bool ReadBufferFromS3::nextImpl() bool ReadBufferFromS3::nextImpl()
{ {
if (!impl->next()) if (!impl->next())

View File

@ -7,7 +7,7 @@
#include <IO/ReadBuffer.h> #include <IO/ReadBuffer.h>
#include <Poco/Net/HTTPBasicCredentials.h> #include <Poco/Net/HTTPBasicCredentials.h>
#include <Poco/URI.h> #include <Poco/URI.h>
#include <aws/s3/S3Client.h>
namespace DB namespace DB
{ {
@ -15,17 +15,18 @@ namespace DB
*/ */
class ReadBufferFromS3 : public ReadBuffer class ReadBufferFromS3 : public ReadBuffer
{ {
private:
Logger * log = &Logger::get("ReadBufferFromS3");
Aws::S3::Model::GetObjectResult read_result;
protected: protected:
Poco::URI uri;
HTTPSessionPtr session;
std::istream * istr; /// owned by session
std::unique_ptr<ReadBuffer> impl; std::unique_ptr<ReadBuffer> impl;
public: public:
explicit ReadBufferFromS3(const Poco::URI & uri_, explicit ReadBufferFromS3(const std::shared_ptr<Aws::S3::S3Client> & client_ptr,
const String & access_key_id_, const String & bucket,
const String & secret_access_key_, const String & key,
const ConnectionTimeouts & timeouts = {}); size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE);
bool nextImpl() override; bool nextImpl() override;
}; };

View File

@ -3,12 +3,8 @@
#include <IO/WriteBufferFromString.h> #include <IO/WriteBufferFromString.h>
#include <iterator> #include <iterator>
#include <sstream> #include <aws/core/Aws.h>
#include <aws/core/auth/AWSCredentialsProvider.h>
#include <Poco/Base64Encoder.h>
#include <Poco/HMACEngine.h>
#include <Poco/SHA1Engine.h>
#include <Poco/URI.h>
namespace DB namespace DB
@ -16,45 +12,59 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int CANNOT_FORMAT_DATETIME; extern const int BAD_ARGUMENTS;
} }
void S3Helper::authenticateRequest(
Poco::Net::HTTPRequest & request, static std::mutex aws_init_lock;
static Aws::SDKOptions aws_options;
static std::atomic<bool> aws_initialized(false);
static const std::regex S3_URL_REGEX(R"((https?://.*)/(.*)/(.*))");
static void initializeAwsAPI() {
std::lock_guard<std::mutex> lock(aws_init_lock);
if (!aws_initialized.load()) {
Aws::InitAPI(aws_options);
aws_initialized.store(true);
}
}
std::shared_ptr<Aws::S3::S3Client> S3Helper::createS3Client(const String & endpoint_url,
const String & access_key_id, const String & access_key_id,
const String & secret_access_key) const String & secret_access_key)
{ {
/// See https://docs.aws.amazon.com/AmazonS3/latest/dev/RESTAuthentication.html initializeAwsAPI();
if (access_key_id.empty()) Aws::Client::ClientConfiguration cfg;
return; cfg.endpointOverride = endpoint_url;
cfg.scheme = Aws::Http::Scheme::HTTP;
/// Limitations: auto cred_provider = std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(access_key_id, secret_access_key);
/// 1. Virtual hosted-style requests are not supported (e.g. `http://johnsmith.net.s3.amazonaws.com/homepage.html`).
/// 2. AMZ headers are not supported (TODO).
if (!request.has("Date")) return std::make_shared<Aws::S3::S3Client>(
{ std::move(cred_provider),
WriteBufferFromOwnString out; std::move(cfg),
writeDateTimeTextRFC1123(time(nullptr), out, DateLUT::instance("UTC")); Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
request.set("Date", out.str()); false
);
}
S3Endpoint S3Helper::parseS3EndpointFromUrl(const String & url) {
std::smatch match;
if (std::regex_search(url, match, S3_URL_REGEX) && match.size() > 1) {
S3Endpoint endpoint;
endpoint.endpoint_url = match.str(1);
endpoint.bucket = match.str(2);
endpoint.key = match.str(3);
return endpoint;
} }
else
String string_to_sign = request.getMethod() + "\n" throw Exception("Failed to parse S3 Storage URL. It should contain endpoint url, bucket and file. "
+ request.get("Content-MD5", "") + "\n" "Regex is (https?://.*)/(.*)/(.*)", ErrorCodes::BAD_ARGUMENTS);
+ request.get("Content-Type", "") + "\n"
+ request.get("Date") + "\n"
+ Poco::URI(request.getURI()).getPathAndQuery();
Poco::HMACEngine<Poco::SHA1Engine> engine(secret_access_key);
engine.update(string_to_sign);
auto digest = engine.digest();
std::ostringstream signature;
Poco::Base64Encoder encoder(signature);
std::copy(digest.begin(), digest.end(), std::ostream_iterator<char>(encoder));
encoder.close();
request.set("Authorization", "AWS " + access_key_id + ":" + signature.str());
} }
} }

View File

@ -1,19 +1,29 @@
#pragma once #pragma once
#include <regex>
#include <Core/Types.h> #include <Core/Types.h>
#include <Poco/Net/HTTPRequest.h> #include <Poco/Net/HTTPRequest.h>
#include <aws/s3/S3Client.h>
namespace DB namespace DB
{ {
namespace S3Helper struct S3Endpoint {
{ String endpoint_url;
void authenticateRequest( String bucket;
Poco::Net::HTTPRequest & request, String key;
const String & access_key_id,
const String & secret_access_key);
}; };
namespace S3Helper
{
S3Endpoint parseS3EndpointFromUrl(const String & url);
std::shared_ptr<Aws::S3::S3Client> createS3Client(const String & endpoint_url,
const String & access_key_id,
const String & secret_access_key);
}
} }

View File

@ -1,22 +1,18 @@
#include <IO/WriteBufferFromS3.h> #include <IO/WriteBufferFromS3.h>
#include <IO/S3Common.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Poco/DOM/AutoPtr.h>
#include <Poco/DOM/DOMParser.h>
#include <Poco/DOM/Document.h>
#include <Poco/DOM/NodeList.h>
#include <Poco/SAX/InputSource.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include <aws/s3/model/CreateMultipartUploadRequest.h>
#include <aws/s3/model/UploadPartRequest.h>
#include <aws/s3/model/CompleteMultipartUploadRequest.h>
#include <utility>
namespace DB namespace DB
{ {
const int DEFAULT_S3_MAX_FOLLOW_PUT_REDIRECT = 2;
// S3 protocol does not allow to have multipart upload with more than 10000 parts. // S3 protocol does not allow to have multipart upload with more than 10000 parts.
// In case server does not return an error on exceeding that number, we print a warning // In case server does not return an error on exceeding that number, we print a warning
// because custom S3 implementation may allow relaxed requirements on that. // because custom S3 implementation may allow relaxed requirements on that.
@ -25,28 +21,26 @@ const int S3_WARN_MAX_PARTS = 10000;
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int INCORRECT_DATA; extern const int S3_ERROR;
} }
WriteBufferFromS3::WriteBufferFromS3( WriteBufferFromS3::WriteBufferFromS3(
const Poco::URI & uri_, std::shared_ptr<Aws::S3::S3Client> client_ptr_,
const String & access_key_id_, const String & bucket_,
const String & secret_access_key_, const String & key_,
size_t minimum_upload_part_size_, size_t minimum_upload_part_size_,
const ConnectionTimeouts & timeouts_) size_t buffer_size_
: BufferWithOwnMemory<WriteBuffer>(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0) )
, uri {uri_} : BufferWithOwnMemory<WriteBuffer>(buffer_size_, nullptr, 0)
, access_key_id {access_key_id_} , bucket(bucket_)
, secret_access_key {secret_access_key_} , key(key_)
, client_ptr(std::move(client_ptr_))
, minimum_upload_part_size {minimum_upload_part_size_} , minimum_upload_part_size {minimum_upload_part_size_}
, timeouts {timeouts_}
, temporary_buffer {std::make_unique<WriteBufferFromString>(buffer_string)} , temporary_buffer {std::make_unique<WriteBufferFromString>(buffer_string)}
, last_part_size {0} , last_part_size {0}
{ {
initiate(); initiate();
/// FIXME: Implement rest of S3 authorization.
} }
@ -96,184 +90,72 @@ WriteBufferFromS3::~WriteBufferFromS3()
void WriteBufferFromS3::initiate() void WriteBufferFromS3::initiate()
{ {
// See https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadInitiate.html Aws::S3::Model::CreateMultipartUploadRequest req;
Poco::Net::HTTPResponse response; req.SetBucket(bucket);
std::unique_ptr<Poco::Net::HTTPRequest> request_ptr; req.SetKey(key);
HTTPSessionPtr session;
std::istream * istr = nullptr; /// owned by session
Poco::URI initiate_uri = uri;
initiate_uri.setRawQuery("uploads");
for (auto & param: uri.getQueryParameters())
{
initiate_uri.addQueryParameter(param.first, param.second);
}
for (int i = 0; i < DEFAULT_S3_MAX_FOLLOW_PUT_REDIRECT; ++i) auto outcome = client_ptr->CreateMultipartUpload(req);
{
session = makeHTTPSession(initiate_uri, timeouts);
request_ptr = std::make_unique<Poco::Net::HTTPRequest>(Poco::Net::HTTPRequest::HTTP_POST, initiate_uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
request_ptr->setHost(initiate_uri.getHost()); // use original, not resolved host name in header
S3Helper::authenticateRequest(*request_ptr, access_key_id, secret_access_key); if (outcome.IsSuccess()) {
upload_id = outcome.GetResult().GetUploadId();
request_ptr->setContentLength(0); LOG_DEBUG(log, "Multipart upload initiated. Upload id = " + upload_id);
} else {
LOG_TRACE((&Logger::get("WriteBufferFromS3")), "Sending request to " << initiate_uri.toString()); throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
session->sendRequest(*request_ptr);
istr = &session->receiveResponse(response);
// Handle 307 Temporary Redirect in order to allow request redirection
// See https://docs.aws.amazon.com/AmazonS3/latest/dev/Redirects.html
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT)
break;
auto location_iterator = response.find("Location");
if (location_iterator == response.end())
break;
initiate_uri = location_iterator->second;
}
assertResponseIsOk(*request_ptr, response, *istr);
Poco::XML::InputSource src(*istr);
Poco::XML::DOMParser parser;
Poco::AutoPtr<Poco::XML::Document> document = parser.parse(&src);
Poco::AutoPtr<Poco::XML::NodeList> nodes = document->getElementsByTagName("UploadId");
if (nodes->length() != 1)
{
throw Exception("Incorrect XML in response, no upload id", ErrorCodes::INCORRECT_DATA);
}
upload_id = nodes->item(0)->innerText();
if (upload_id.empty())
{
throw Exception("Incorrect XML in response, empty upload id", ErrorCodes::INCORRECT_DATA);
} }
} }
void WriteBufferFromS3::writePart(const String & data) void WriteBufferFromS3::writePart(const String & data)
{ {
// See https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadUploadPart.html
Poco::Net::HTTPResponse response;
std::unique_ptr<Poco::Net::HTTPRequest> request_ptr;
HTTPSessionPtr session;
std::istream * istr = nullptr; /// owned by session
Poco::URI part_uri = uri;
part_uri.addQueryParameter("partNumber", std::to_string(part_tags.size() + 1));
part_uri.addQueryParameter("uploadId", upload_id);
if (part_tags.size() == S3_WARN_MAX_PARTS) if (part_tags.size() == S3_WARN_MAX_PARTS)
{ {
// Don't throw exception here by ourselves but leave the decision to take by S3 server. // Don't throw exception here by ourselves but leave the decision to take by S3 server.
LOG_WARNING(&Logger::get("WriteBufferFromS3"), "Maximum part number in S3 protocol has reached (too many parts). Server may not accept this whole upload."); LOG_WARNING(log, "Maximum part number in S3 protocol has reached (too many parts). Server may not accept this whole upload.");
} }
for (int i = 0; i < DEFAULT_S3_MAX_FOLLOW_PUT_REDIRECT; ++i) Aws::S3::Model::UploadPartRequest req;
{
session = makeHTTPSession(part_uri, timeouts);
request_ptr = std::make_unique<Poco::Net::HTTPRequest>(Poco::Net::HTTPRequest::HTTP_PUT, part_uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
request_ptr->setHost(part_uri.getHost()); // use original, not resolved host name in header
S3Helper::authenticateRequest(*request_ptr, access_key_id, secret_access_key); req.SetBucket(bucket);
req.SetKey(key);
req.SetPartNumber(part_tags.size() + 1);
req.SetUploadId(upload_id);
req.SetContentLength(data.size());
req.SetBody(std::make_shared<Aws::StringStream>(data));
request_ptr->setExpectContinue(true); auto outcome = client_ptr->UploadPart(req);
request_ptr->setContentLength(data.size()); if (outcome.IsSuccess()) {
auto etag = outcome.GetResult().GetETag();
LOG_TRACE((&Logger::get("WriteBufferFromS3")), "Sending request to " << part_uri.toString()); part_tags.push_back(etag);
LOG_DEBUG(log, "Write part " + std::to_string(part_tags.size()) + " finished. Upload id = " + upload_id + ". Etag = " + etag);
std::ostream & ostr = session->sendRequest(*request_ptr); } else {
if (session->peekResponse(response)) throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
{
// Received 100-continue.
ostr << data;
}
istr = &session->receiveResponse(response);
// Handle 307 Temporary Redirect in order to allow request redirection
// See https://docs.aws.amazon.com/AmazonS3/latest/dev/Redirects.html
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT)
break;
auto location_iterator = response.find("Location");
if (location_iterator == response.end())
break;
part_uri = location_iterator->second;
} }
assertResponseIsOk(*request_ptr, response, *istr);
auto etag_iterator = response.find("ETag");
if (etag_iterator == response.end())
{
throw Exception("Incorrect response, no ETag", ErrorCodes::INCORRECT_DATA);
}
part_tags.push_back(etag_iterator->second);
} }
void WriteBufferFromS3::complete() void WriteBufferFromS3::complete()
{ {
// See https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadComplete.html Aws::S3::Model::CompleteMultipartUploadRequest req;
Poco::Net::HTTPResponse response; req.SetBucket(bucket);
std::unique_ptr<Poco::Net::HTTPRequest> request_ptr; req.SetKey(key);
HTTPSessionPtr session; req.SetUploadId(upload_id);
std::istream * istr = nullptr; /// owned by session
Poco::URI complete_uri = uri;
complete_uri.addQueryParameter("uploadId", upload_id);
String data; Aws::S3::Model::CompletedMultipartUpload multipart_upload;
WriteBufferFromString buffer(data); for (size_t i = 0; i < part_tags.size(); i++) {
writeString("<CompleteMultipartUpload>", buffer); Aws::S3::Model::CompletedPart part;
for (size_t i = 0; i < part_tags.size(); ++i) multipart_upload.AddParts(part.WithETag(part_tags[i]).WithPartNumber(i + 1));
{
writeString("<Part><PartNumber>", buffer);
writeIntText(i + 1, buffer);
writeString("</PartNumber><ETag>", buffer);
writeString(part_tags[i], buffer);
writeString("</ETag></Part>", buffer);
} }
writeString("</CompleteMultipartUpload>", buffer);
buffer.finish();
for (int i = 0; i < DEFAULT_S3_MAX_FOLLOW_PUT_REDIRECT; ++i) req.SetMultipartUpload(multipart_upload);
{
session = makeHTTPSession(complete_uri, timeouts);
request_ptr = std::make_unique<Poco::Net::HTTPRequest>(Poco::Net::HTTPRequest::HTTP_POST, complete_uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
request_ptr->setHost(complete_uri.getHost()); // use original, not resolved host name in header
S3Helper::authenticateRequest(*request_ptr, access_key_id, secret_access_key); auto outcome = client_ptr->CompleteMultipartUpload(req);
request_ptr->setExpectContinue(true); if (outcome.IsSuccess()) {
LOG_DEBUG(log, "Multipart upload completed. Upload_id = " + upload_id);
request_ptr->setContentLength(data.size()); } else {
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
LOG_TRACE((&Logger::get("WriteBufferFromS3")), "Sending request to " << complete_uri.toString());
std::ostream & ostr = session->sendRequest(*request_ptr);
if (session->peekResponse(response))
{
// Received 100-continue.
ostr << data;
}
istr = &session->receiveResponse(response);
// Handle 307 Temporary Redirect in order to allow request redirection
// See https://docs.aws.amazon.com/AmazonS3/latest/dev/Redirects.html
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT)
break;
auto location_iterator = response.find("Location");
if (location_iterator == response.end())
break;
complete_uri = location_iterator->second;
} }
assertResponseIsOk(*request_ptr, response, *istr);
} }
} }

View File

@ -8,9 +8,7 @@
#include <IO/BufferWithOwnMemory.h> #include <IO/BufferWithOwnMemory.h>
#include <IO/WriteBuffer.h> #include <IO/WriteBuffer.h>
#include <IO/WriteBufferFromString.h> #include <IO/WriteBufferFromString.h>
#include <Poco/Net/HTTPBasicCredentials.h> #include <aws/s3/S3Client.h>
#include <Poco/Net/HTTPRequest.h>
#include <Poco/URI.h>
namespace DB namespace DB
@ -20,11 +18,10 @@ namespace DB
class WriteBufferFromS3 : public BufferWithOwnMemory<WriteBuffer> class WriteBufferFromS3 : public BufferWithOwnMemory<WriteBuffer>
{ {
private: private:
Poco::URI uri; String bucket;
String access_key_id; String key;
String secret_access_key; std::shared_ptr<Aws::S3::S3Client> client_ptr;
size_t minimum_upload_part_size; size_t minimum_upload_part_size;
ConnectionTimeouts timeouts;
String buffer_string; String buffer_string;
std::unique_ptr<WriteBufferFromString> temporary_buffer; std::unique_ptr<WriteBufferFromString> temporary_buffer;
size_t last_part_size; size_t last_part_size;
@ -34,12 +31,14 @@ private:
String upload_id; String upload_id;
std::vector<String> part_tags; std::vector<String> part_tags;
Logger * log = &Logger::get("WriteBufferFromS3");
public: public:
explicit WriteBufferFromS3(const Poco::URI & uri, explicit WriteBufferFromS3(std::shared_ptr<Aws::S3::S3Client> client_ptr_,
const String & access_key_id, const String & bucket_,
const String & secret_access_key, const String & key_,
size_t minimum_upload_part_size_, size_t minimum_upload_part_size_,
const ConnectionTimeouts & timeouts = {}); size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE);
void nextImpl() override; void nextImpl() override;

View File

@ -1,3 +1,4 @@
#include <IO/S3Common.h>
#include <Storages/StorageFactory.h> #include <Storages/StorageFactory.h>
#include <Storages/StorageS3.h> #include <Storages/StorageS3.h>
@ -16,7 +17,8 @@
#include <DataStreams/IBlockInputStream.h> #include <DataStreams/IBlockInputStream.h>
#include <DataStreams/AddingDefaultsBlockInputStream.h> #include <DataStreams/AddingDefaultsBlockInputStream.h>
#include <Poco/Net/HTTPRequest.h> #include <aws/core/client/ClientConfiguration.h>
#include <aws/s3/S3Client.h>
namespace DB namespace DB
@ -26,24 +28,25 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
} }
namespace namespace
{ {
class StorageS3BlockInputStream : public IBlockInputStream class StorageS3BlockInputStream : public IBlockInputStream
{ {
public: public:
StorageS3BlockInputStream(const Poco::URI & uri, StorageS3BlockInputStream(
const String & access_key_id,
const String & secret_access_key,
const String & format, const String & format,
const String & name_, const String & name_,
const Block & sample_block, const Block & sample_block,
const Context & context, const Context & context,
UInt64 max_block_size, UInt64 max_block_size,
const ConnectionTimeouts & timeouts, const CompressionMethod compression_method,
const CompressionMethod compression_method) const std::shared_ptr<Aws::S3::S3Client> & client,
const String & bucket,
const String & key)
: name(name_) : name(name_)
{ {
read_buf = getReadBuffer<ReadBufferFromS3>(compression_method, uri, access_key_id, secret_access_key, timeouts); read_buf = getReadBuffer<ReadBufferFromS3>(compression_method, client, bucket, key);
reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size); reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size);
} }
@ -81,24 +84,18 @@ namespace
class StorageS3BlockOutputStream : public IBlockOutputStream class StorageS3BlockOutputStream : public IBlockOutputStream
{ {
public: public:
StorageS3BlockOutputStream(const Poco::URI & uri, StorageS3BlockOutputStream(
const String & access_key_id,
const String & secret_access_key,
const String & format, const String & format,
UInt64 min_upload_part_size, UInt64 min_upload_part_size,
const Block & sample_block_, const Block & sample_block_,
const Context & context, const Context & context,
const ConnectionTimeouts & timeouts, const CompressionMethod compression_method,
const CompressionMethod compression_method) const std::shared_ptr<Aws::S3::S3Client> & client,
const String & bucket,
const String & key)
: sample_block(sample_block_) : sample_block(sample_block_)
{ {
write_buf = getWriteBuffer<WriteBufferFromS3>( write_buf = getWriteBuffer<WriteBufferFromS3>(compression_method, client, bucket, key, min_upload_part_size);
compression_method,
uri,
access_key_id,
secret_access_key,
min_upload_part_size,
timeouts);
writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context); writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context);
} }
@ -132,8 +129,7 @@ namespace
} }
StorageS3::StorageS3( StorageS3::StorageS3(const S3Endpoint & endpoint_,
const Poco::URI & uri_,
const String & access_key_id_, const String & access_key_id_,
const String & secret_access_key_, const String & secret_access_key_,
const std::string & database_name_, const std::string & database_name_,
@ -145,15 +141,14 @@ StorageS3::StorageS3(
Context & context_, Context & context_,
const String & compression_method_ = "") const String & compression_method_ = "")
: IStorage(columns_) : IStorage(columns_)
, uri(uri_) , endpoint(endpoint_)
, access_key_id(access_key_id_)
, secret_access_key(secret_access_key_)
, context_global(context_) , context_global(context_)
, format_name(format_name_) , format_name(format_name_)
, database_name(database_name_) , database_name(database_name_)
, table_name(table_name_) , table_name(table_name_)
, min_upload_part_size(min_upload_part_size_) , min_upload_part_size(min_upload_part_size_)
, compression_method(compression_method_) , compression_method(compression_method_)
, client(S3Helper::createS3Client(endpoint_.endpoint_url, access_key_id_, secret_access_key_))
{ {
setColumns(columns_); setColumns(columns_);
setConstraints(constraints_); setConstraints(constraints_);
@ -169,16 +164,15 @@ BlockInputStreams StorageS3::read(
unsigned /*num_streams*/) unsigned /*num_streams*/)
{ {
BlockInputStreamPtr block_input = std::make_shared<StorageS3BlockInputStream>( BlockInputStreamPtr block_input = std::make_shared<StorageS3BlockInputStream>(
uri,
access_key_id,
secret_access_key,
format_name, format_name,
getName(), getName(),
getHeaderBlock(column_names), getHeaderBlock(column_names),
context, context,
max_block_size, max_block_size,
ConnectionTimeouts::getHTTPTimeouts(context), IStorage::chooseCompressionMethod(endpoint.endpoint_url, compression_method),
IStorage::chooseCompressionMethod(uri.toString(), compression_method)); client,
endpoint.bucket,
endpoint.key);
auto column_defaults = getColumns().getDefaults(); auto column_defaults = getColumns().getDefaults();
if (column_defaults.empty()) if (column_defaults.empty())
@ -195,15 +189,9 @@ void StorageS3::rename(const String & /*new_path_to_db*/, const String & new_dat
BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const Context & /*context*/) BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const Context & /*context*/)
{ {
return std::make_shared<StorageS3BlockOutputStream>( return std::make_shared<StorageS3BlockOutputStream>(
uri, format_name, min_upload_part_size, getSampleBlock(), context_global,
access_key_id, IStorage::chooseCompressionMethod(endpoint.endpoint_url, compression_method),
secret_access_key, client, endpoint.bucket, endpoint.key);
format_name,
min_upload_part_size,
getSampleBlock(),
context_global,
ConnectionTimeouts::getHTTPTimeouts(context_global),
IStorage::chooseCompressionMethod(uri.toString(), compression_method));
} }
void registerStorageS3(StorageFactory & factory) void registerStorageS3(StorageFactory & factory)
@ -220,7 +208,7 @@ void registerStorageS3(StorageFactory & factory)
engine_args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[i], args.local_context); engine_args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[i], args.local_context);
String url = engine_args[0]->as<ASTLiteral &>().value.safeGet<String>(); String url = engine_args[0]->as<ASTLiteral &>().value.safeGet<String>();
Poco::URI uri(url); S3Endpoint endpoint = S3Helper::parseS3EndpointFromUrl(url);
String format_name = engine_args[engine_args.size() - 1]->as<ASTLiteral &>().value.safeGet<String>(); String format_name = engine_args[engine_args.size() - 1]->as<ASTLiteral &>().value.safeGet<String>();
@ -240,7 +228,8 @@ void registerStorageS3(StorageFactory & factory)
else else
compression_method = "auto"; compression_method = "auto";
return StorageS3::create(uri, access_key_id, secret_access_key, args.database_name, args.table_name, format_name, min_upload_part_size, args.columns, args.constraints, args.context); return StorageS3::create(endpoint, access_key_id, secret_access_key, args.database_name, args.table_name, format_name, min_upload_part_size, args.columns, args.constraints, args.context);
}); });
} }
} }

View File

@ -4,10 +4,12 @@
#include <Poco/URI.h> #include <Poco/URI.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include <ext/shared_ptr_helper.h> #include <ext/shared_ptr_helper.h>
#include <aws/s3/S3Client.h>
#include <aws/core/auth/AWSCredentialsProvider.h>
namespace DB namespace DB
{ {
/** /**
* This class represents table engine for external S3 urls. * This class represents table engine for external S3 urls.
* It sends HTTP GET to server when select is called and * It sends HTTP GET to server when select is called and
@ -16,8 +18,7 @@ namespace DB
class StorageS3 : public ext::shared_ptr_helper<StorageS3>, public IStorage class StorageS3 : public ext::shared_ptr_helper<StorageS3>, public IStorage
{ {
public: public:
StorageS3( StorageS3(const S3Endpoint & endpoint,
const Poco::URI & uri_,
const String & access_key_id, const String & access_key_id,
const String & secret_access_key, const String & secret_access_key,
const String & database_name_, const String & database_name_,
@ -57,9 +58,7 @@ public:
void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name, TableStructureWriteLockHolder &) override; void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name, TableStructureWriteLockHolder &) override;
private: private:
Poco::URI uri; S3Endpoint endpoint;
String access_key_id;
String secret_access_key;
const Context & context_global; const Context & context_global;
String format_name; String format_name;
@ -67,6 +66,7 @@ private:
String table_name; String table_name;
UInt64 min_upload_part_size; UInt64 min_upload_part_size;
String compression_method; String compression_method;
std::shared_ptr<Aws::S3::S3Client> client;
}; };
} }

View File

@ -1,10 +1,10 @@
#include <IO/S3Common.h>
#include <Storages/StorageS3.h> #include <Storages/StorageS3.h>
#include <Interpreters/evaluateConstantExpression.h> #include <Interpreters/evaluateConstantExpression.h>
#include <TableFunctions/TableFunctionFactory.h> #include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/TableFunctionS3.h> #include <TableFunctions/TableFunctionS3.h>
#include <TableFunctions/parseColumnsListForTableFunction.h>
#include <Parsers/ASTLiteral.h> #include <Parsers/ASTLiteral.h>
#include <Poco/URI.h> #include "parseColumnsListForTableFunction.h"
namespace DB namespace DB
{ {
@ -76,9 +76,9 @@ StoragePtr TableFunctionS3::getStorage(
const std::string & table_name, const std::string & table_name,
const String & compression_method) const const String & compression_method) const
{ {
Poco::URI uri(source); S3Endpoint endpoint = S3Helper::parseS3EndpointFromUrl(source);
UInt64 min_upload_part_size = global_context.getSettingsRef().s3_min_upload_part_size; UInt64 min_upload_part_size = global_context.getSettingsRef().s3_min_upload_part_size;
return StorageS3::create(uri, access_key_id, secret_access_key, getDatabaseName(), table_name, format, min_upload_part_size, columns, ConstraintsDescription{}, global_context, compression_method); return StorageS3::create(endpoint, access_key_id, secret_access_key, getDatabaseName(), table_name, format, min_upload_part_size, columns, ConstraintsDescription{}, global_context, compression_method);
} }
void registerTableFunctionS3(TableFunctionFactory & factory) void registerTableFunctionS3(TableFunctionFactory & factory)

View File

@ -435,6 +435,9 @@ class ClickHouseCluster:
logging.info("Trying to connect to Minio...") logging.info("Trying to connect to Minio...")
self.wait_minio_to_start() self.wait_minio_to_start()
# TODO: Remove after image update by separate commit.
subprocess.check_output(['docker', 'build', '-t', 'yandex/clickhouse-integration-test', '/ClickHouse/docker/test/integration/'], stderr=subprocess.STDOUT)
clickhouse_start_cmd = self.base_cmd + ['up', '-d', '--no-recreate'] clickhouse_start_cmd = self.base_cmd + ['up', '-d', '--no-recreate']
logging.info("Trying to create ClickHouse instance by command %s", ' '.join(map(str, clickhouse_start_cmd))) logging.info("Trying to create ClickHouse instance by command %s", ' '.join(map(str, clickhouse_start_cmd)))
subprocess_check_call(clickhouse_start_cmd) subprocess_check_call(clickhouse_start_cmd)

View File

@ -111,9 +111,9 @@ def run_query(instance, query, stdin=None, settings=None):
# Test simple put. # Test simple put.
@pytest.mark.parametrize("maybe_auth,positive", [ @pytest.mark.parametrize("maybe_auth,positive", [
("",True), ("", True),
("'minio','minio123',",True), ("'minio','minio123',", True),
("'wrongid','wrongkey',",False) ("'wrongid','wrongkey',", False)
]) ])
def test_put(cluster, maybe_auth, positive): def test_put(cluster, maybe_auth, positive):
# type: (ClickHouseCluster) -> None # type: (ClickHouseCluster) -> None
@ -130,7 +130,8 @@ def test_put(cluster, maybe_auth, positive):
try: try:
run_query(instance, put_query) run_query(instance, put_query)
except helpers.client.QueryRuntimeException: except helpers.client.QueryRuntimeException:
assert not positive if positive:
raise
else: else:
assert positive assert positive
assert values_csv == get_s3_file_content(cluster, bucket, filename) assert values_csv == get_s3_file_content(cluster, bucket, filename)
@ -138,9 +139,9 @@ def test_put(cluster, maybe_auth, positive):
# Test put values in CSV format. # Test put values in CSV format.
@pytest.mark.parametrize("maybe_auth,positive", [ @pytest.mark.parametrize("maybe_auth,positive", [
("",True), ("", True),
("'minio','minio123',",True), ("'minio','minio123',", True),
("'wrongid','wrongkey',",False) ("'wrongid','wrongkey',", False)
]) ])
def test_put_csv(cluster, maybe_auth, positive): def test_put_csv(cluster, maybe_auth, positive):
# type: (ClickHouseCluster) -> None # type: (ClickHouseCluster) -> None
@ -156,7 +157,8 @@ def test_put_csv(cluster, maybe_auth, positive):
try: try:
run_query(instance, put_query, stdin=csv_data) run_query(instance, put_query, stdin=csv_data)
except helpers.client.QueryRuntimeException: except helpers.client.QueryRuntimeException:
assert not positive if positive:
raise
else: else:
assert positive assert positive
assert csv_data == get_s3_file_content(cluster, bucket, filename) assert csv_data == get_s3_file_content(cluster, bucket, filename)
@ -191,9 +193,9 @@ def test_put_get_with_redirect(cluster):
# Test multipart put. # Test multipart put.
@pytest.mark.parametrize("maybe_auth,positive", [ @pytest.mark.parametrize("maybe_auth,positive", [
("",True), ("", True),
("'minio','minio123',",True), # ("'minio','minio123',",True), Redirect with credentials not working with nginx.
("'wrongid','wrongkey',",False) ("'wrongid','wrongkey',", False)
]) ])
def test_multipart_put(cluster, maybe_auth, positive): def test_multipart_put(cluster, maybe_auth, positive):
# type: (ClickHouseCluster) -> None # type: (ClickHouseCluster) -> None
@ -222,13 +224,14 @@ def test_multipart_put(cluster, maybe_auth, positive):
try: try:
run_query(instance, put_query, stdin=csv_data, settings={'s3_min_upload_part_size': min_part_size_bytes}) run_query(instance, put_query, stdin=csv_data, settings={'s3_min_upload_part_size': min_part_size_bytes})
except helpers.client.QueryRuntimeException: except helpers.client.QueryRuntimeException:
assert not positive if positive:
raise
else: else:
assert positive assert positive
# Use Nginx access logs to count number of parts uploaded to Minio. # Use Nginx access logs to count number of parts uploaded to Minio.
nginx_logs = get_nginx_access_logs() nginx_logs = get_nginx_access_logs()
uploaded_parts = filter(lambda log_line: log_line.find(filename) >= 0 and log_line.find("PUT") >= 0, nginx_logs) uploaded_parts = filter(lambda log_line: log_line.find(filename) >= 0 and log_line.find("PUT") >= 0, nginx_logs)
assert uploaded_parts > 1 assert len(uploaded_parts) > 1
assert csv_data == get_s3_file_content(cluster, bucket, filename) assert csv_data == get_s3_file_content(cluster, bucket, filename)

View File

@ -4,7 +4,7 @@ FROM ubuntu:18.04
RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list
RUN apt-get update \ RUN apt-get update \
&& env DEBIAN_FRONTEND=noninteractive apt-get -y install tzdata python llvm-6.0 llvm-6.0-dev libreadline-dev libicu-dev bsdutils llvm-8 \ && env DEBIAN_FRONTEND=noninteractive apt-get -y install tzdata python llvm-6.0 llvm-6.0-dev libreadline-dev libicu-dev bsdutils llvm-8 curl libcurl4 libcurl4-openssl-dev \
&& rm -rf \ && rm -rf \
/var/lib/apt/lists/* \ /var/lib/apt/lists/* \
/var/cache/debconf \ /var/cache/debconf \