mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
AWS S3 SDK integration.
This commit is contained in:
parent
3855540e45
commit
a9e2327ec0
12
.gitmodules
vendored
12
.gitmodules
vendored
@ -107,3 +107,15 @@
|
|||||||
[submodule "contrib/sparsehash-c11"]
|
[submodule "contrib/sparsehash-c11"]
|
||||||
path = contrib/sparsehash-c11
|
path = contrib/sparsehash-c11
|
||||||
url = https://github.com/sparsehash/sparsehash-c11.git
|
url = https://github.com/sparsehash/sparsehash-c11.git
|
||||||
|
[submodule "contrib/aws"]
|
||||||
|
path = contrib/aws
|
||||||
|
url = https://github.com/aws/aws-sdk-cpp.git
|
||||||
|
[submodule "aws-c-event-stream"]
|
||||||
|
path = contrib/aws-c-event-stream
|
||||||
|
url = https://github.com/awslabs/aws-c-event-stream.git
|
||||||
|
[submodule "aws-c-common"]
|
||||||
|
path = contrib/aws-c-common
|
||||||
|
url = https://github.com/awslabs/aws-c-common.git
|
||||||
|
[submodule "aws-checksums"]
|
||||||
|
path = contrib/aws-checksums
|
||||||
|
url = https://github.com/awslabs/aws-checksums.git
|
||||||
|
@ -325,6 +325,7 @@ include (cmake/find/brotli.cmake)
|
|||||||
include (cmake/find/protobuf.cmake)
|
include (cmake/find/protobuf.cmake)
|
||||||
include (cmake/find/pdqsort.cmake)
|
include (cmake/find/pdqsort.cmake)
|
||||||
include (cmake/find/hdfs3.cmake) # uses protobuf
|
include (cmake/find/hdfs3.cmake) # uses protobuf
|
||||||
|
include (cmake/find/s3.cmake)
|
||||||
include (cmake/find/consistent-hashing.cmake)
|
include (cmake/find/consistent-hashing.cmake)
|
||||||
include (cmake/find/base64.cmake)
|
include (cmake/find/base64.cmake)
|
||||||
include (cmake/find/parquet.cmake)
|
include (cmake/find/parquet.cmake)
|
||||||
|
19
cmake/find/s3.cmake
Normal file
19
cmake/find/s3.cmake
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
option (USE_AWS_S3 "Set to FALSE to use system libbrotli library instead of bundled" ${NOT_UNBUNDLED})
|
||||||
|
|
||||||
|
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-s3")
|
||||||
|
if (USE_AWS_S3)
|
||||||
|
message (WARNING "submodule contrib/aws is missing. to fix try run: \n git submodule update --init --recursive")
|
||||||
|
set (USE_AWS_S3 0)
|
||||||
|
endif ()
|
||||||
|
set (MISSING_AWS_S3 1)
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
if (USE_AWS_S3 AND NOT MISSING_AWS_S3)
|
||||||
|
set(AWS_S3_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-s3/include")
|
||||||
|
set(AWS_S3_CORE_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-core/include")
|
||||||
|
set(AWS_S3_LIBRARY aws_s3)
|
||||||
|
set(USE_INTERNAL_AWS_S3_LIBRARY 1)
|
||||||
|
set(USE_AWS_S3 1)
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
message (STATUS "Using aws_s3=${USE_AWS_S3}: ${AWS_S3_INCLUDE_DIR} : ${AWS_S3_LIBRARY}")
|
4
contrib/CMakeLists.txt
vendored
4
contrib/CMakeLists.txt
vendored
@ -312,6 +312,10 @@ if (USE_INTERNAL_HDFS3_LIBRARY)
|
|||||||
add_subdirectory(libhdfs3-cmake)
|
add_subdirectory(libhdfs3-cmake)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (USE_INTERNAL_AWS_S3_LIBRARY)
|
||||||
|
add_subdirectory(aws-s3-cmake)
|
||||||
|
endif ()
|
||||||
|
|
||||||
if (USE_BASE64)
|
if (USE_BASE64)
|
||||||
add_subdirectory (base64-cmake)
|
add_subdirectory (base64-cmake)
|
||||||
endif()
|
endif()
|
||||||
|
1
contrib/aws
vendored
Submodule
1
contrib/aws
vendored
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 5666c94dc90adf1aa8cb66527b322af1ec85bcf6
|
1
contrib/aws-c-common
vendored
Submodule
1
contrib/aws-c-common
vendored
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 6cd01c101e233691adb27d7a55b267436673940a
|
1
contrib/aws-c-event-stream
vendored
Submodule
1
contrib/aws-c-event-stream
vendored
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 3bc33662f9ccff4f4cbcf9509cc78c26e022fde0
|
1
contrib/aws-checksums
vendored
Submodule
1
contrib/aws-checksums
vendored
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit d601f7b4949f6fd64a84e88a126359b734aa56d8
|
100
contrib/aws-s3-cmake/CMakeLists.txt
Normal file
100
contrib/aws-s3-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
SET(AWS_S3_LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-s3)
|
||||||
|
SET(AWS_CORE_LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-core)
|
||||||
|
SET(AWS_CHECKSUMS_LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/aws-checksums)
|
||||||
|
SET(AWS_COMMON_LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/aws-c-common)
|
||||||
|
SET(AWS_EVENT_STREAM_LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/aws-c-event-stream)
|
||||||
|
|
||||||
|
file(GLOB AWS_CORE_SOURCES
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/auth/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/client/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/http/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/http/standard/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/http/curl/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/config/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/external/cjson/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/external/tinyxml2/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/internal/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/monitoring/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/net/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/linux-shared/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/platform/linux-shared/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/utils/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/utils/base64/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/utils/event/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/utils/crypto/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/utils/crypto/openssl/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/utils/crypto/factory/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/utils/json/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/utils/logging/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/utils/memory/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/utils/memory/stl/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/utils/stream/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/utils/threading/*.cpp"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/source/utils/xml/*.cpp"
|
||||||
|
)
|
||||||
|
|
||||||
|
file(GLOB AWS_S3_SOURCES
|
||||||
|
"${AWS_S3_LIBRARY_DIR}/source/*.cpp"
|
||||||
|
)
|
||||||
|
|
||||||
|
file(GLOB AWS_S3_MODEL_SOURCES
|
||||||
|
"${AWS_S3_LIBRARY_DIR}/source/model/*.cpp"
|
||||||
|
)
|
||||||
|
|
||||||
|
file(GLOB AWS_EVENT_STREAM_SOURCES
|
||||||
|
"${AWS_EVENT_STREAM_LIBRARY_DIR}/source/*.c"
|
||||||
|
)
|
||||||
|
|
||||||
|
file(GLOB AWS_COMMON_SOURCES
|
||||||
|
"${AWS_COMMON_LIBRARY_DIR}/source/*.c"
|
||||||
|
"${AWS_COMMON_LIBRARY_DIR}/source/posix/*.c"
|
||||||
|
)
|
||||||
|
|
||||||
|
file(GLOB AWS_CHECKSUMS_SOURCES
|
||||||
|
"${AWS_CHECKSUMS_LIBRARY_DIR}/source/*.c"
|
||||||
|
# "${AWS_CHECKSUMS_LIBRARY_DIR}/source/intel/*.c"
|
||||||
|
# "${AWS_CHECKSUMS_LIBRARY_DIR}/source/arm/*.c"
|
||||||
|
# "${AWS_CHECKSUMS_LIBRARY_DIR}/source/visualc/*.c"
|
||||||
|
)
|
||||||
|
|
||||||
|
file(GLOB S3_UNIFIED_SRC
|
||||||
|
${AWS_EVENT_STREAM_SOURCES}
|
||||||
|
${AWS_COMMON_SOURCES}
|
||||||
|
${AWS_CHECKSUMS_SOURCES}
|
||||||
|
${AWS_S3_SOURCES}
|
||||||
|
${AWS_S3_MODEL_SOURCES}
|
||||||
|
${AWS_CORE_SOURCES}
|
||||||
|
)
|
||||||
|
|
||||||
|
set(S3_INCLUDES
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/include/"
|
||||||
|
"${AWS_COMMON_LIBRARY_DIR}/include/"
|
||||||
|
"${AWS_CHECKSUMS_LIBRARY_DIR}/include/"
|
||||||
|
"${AWS_EVENT_STREAM_LIBRARY_DIR}/include/"
|
||||||
|
"${AWS_S3_LIBRARY_DIR}/include/"
|
||||||
|
"${AWS_CORE_LIBRARY_DIR}/include/"
|
||||||
|
)
|
||||||
|
|
||||||
|
add_library(aws_s3 ${S3_UNIFIED_SRC})
|
||||||
|
|
||||||
|
OPTION(USE_AWS_MEMORY_MANAGEMENT "Aws memory management" OFF)
|
||||||
|
configure_file("${AWS_CORE_LIBRARY_DIR}/include/aws/core/SDKConfig.h.in" "${AWS_CORE_LIBRARY_DIR}/include/aws/core/SDKConfig.h")
|
||||||
|
|
||||||
|
target_compile_definitions(aws_s3 PUBLIC "AWS_SDK_VERSION_MAJOR=1")
|
||||||
|
target_compile_definitions(aws_s3 PUBLIC "AWS_SDK_VERSION_MINOR=7")
|
||||||
|
target_compile_definitions(aws_s3 PUBLIC "AWS_SDK_VERSION_PATCH=231")
|
||||||
|
|
||||||
|
set(OPENSSL_USE_STATIC_LIBS TRUE)
|
||||||
|
find_package(OpenSSL REQUIRED)
|
||||||
|
|
||||||
|
set(CURL_LIBRARY "-lcurl")
|
||||||
|
add_definitions(-DCURL_STATICLIB)
|
||||||
|
find_package(CURL REQUIRED)
|
||||||
|
add_definitions(-DENABLE_OPENSSL_ENCRYPTION)
|
||||||
|
add_definitions(-DENABLE_CURL_CLIENT)
|
||||||
|
|
||||||
|
target_include_directories(aws_s3 PRIVATE ${S3_INCLUDES})
|
||||||
|
target_include_directories(aws_s3 PRIVATE ${CURL_INCLUDE_DIR})
|
||||||
|
target_link_libraries(aws_s3 OpenSSL::Crypto)
|
||||||
|
target_link_libraries(aws_s3 ${CURL_LIBRARIES})
|
@ -421,6 +421,12 @@ if (USE_HDFS)
|
|||||||
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${HDFS3_INCLUDE_DIR})
|
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${HDFS3_INCLUDE_DIR})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (USE_AWS_S3)
|
||||||
|
target_link_libraries (clickhouse_common_io PUBLIC ${AWS_S3_LIBRARY})
|
||||||
|
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AWS_S3_CORE_INCLUDE_DIR})
|
||||||
|
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AWS_S3_INCLUDE_DIR})
|
||||||
|
endif()
|
||||||
|
|
||||||
if (USE_BROTLI)
|
if (USE_BROTLI)
|
||||||
target_link_libraries (clickhouse_common_io PRIVATE ${BROTLI_LIBRARY})
|
target_link_libraries (clickhouse_common_io PRIVATE ${BROTLI_LIBRARY})
|
||||||
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR})
|
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR})
|
||||||
|
@ -465,6 +465,7 @@ namespace ErrorCodes
|
|||||||
extern const int UNKNOWN_DICTIONARY = 488;
|
extern const int UNKNOWN_DICTIONARY = 488;
|
||||||
extern const int INCORRECT_DICTIONARY_DEFINITION = 489;
|
extern const int INCORRECT_DICTIONARY_DEFINITION = 489;
|
||||||
extern const int CANNOT_FORMAT_DATETIME = 490;
|
extern const int CANNOT_FORMAT_DATETIME = 490;
|
||||||
|
extern const int S3_ERROR = 491;
|
||||||
|
|
||||||
extern const int KEEPER_EXCEPTION = 999;
|
extern const int KEEPER_EXCEPTION = 999;
|
||||||
extern const int POCO_EXCEPTION = 1000;
|
extern const int POCO_EXCEPTION = 1000;
|
||||||
|
@ -1,65 +1,40 @@
|
|||||||
#include <IO/ReadBufferFromS3.h>
|
#include <IO/ReadBufferFromS3.h>
|
||||||
|
|
||||||
#include <IO/ReadBufferFromIStream.h>
|
#include <IO/ReadBufferFromIStream.h>
|
||||||
#include <IO/S3Common.h>
|
|
||||||
|
|
||||||
#include <common/logger_useful.h>
|
#include <common/logger_useful.h>
|
||||||
|
#include <aws/s3/model/GetObjectRequest.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
const int DEFAULT_S3_MAX_FOLLOW_GET_REDIRECT = 2;
|
namespace ErrorCodes
|
||||||
|
|
||||||
ReadBufferFromS3::ReadBufferFromS3(const Poco::URI & uri_,
|
|
||||||
const String & access_key_id_,
|
|
||||||
const String & secret_access_key_,
|
|
||||||
const ConnectionTimeouts & timeouts)
|
|
||||||
: ReadBuffer(nullptr, 0)
|
|
||||||
, uri {uri_}
|
|
||||||
, session {makeHTTPSession(uri_, timeouts)}
|
|
||||||
{
|
{
|
||||||
Poco::Net::HTTPResponse response;
|
extern const int S3_ERROR;
|
||||||
std::unique_ptr<Poco::Net::HTTPRequest> request;
|
|
||||||
|
|
||||||
for (int i = 0; i < DEFAULT_S3_MAX_FOLLOW_GET_REDIRECT; ++i)
|
|
||||||
{
|
|
||||||
// With empty path poco will send "POST HTTP/1.1" its bug.
|
|
||||||
if (uri.getPath().empty())
|
|
||||||
uri.setPath("/");
|
|
||||||
|
|
||||||
request = std::make_unique<Poco::Net::HTTPRequest>(
|
|
||||||
Poco::Net::HTTPRequest::HTTP_GET,
|
|
||||||
uri.getPathAndQuery(),
|
|
||||||
Poco::Net::HTTPRequest::HTTP_1_1);
|
|
||||||
request->setHost(uri.getHost()); // use original, not resolved host name in header
|
|
||||||
|
|
||||||
S3Helper::authenticateRequest(*request, access_key_id_, secret_access_key_);
|
|
||||||
|
|
||||||
LOG_TRACE((&Logger::get("ReadBufferFromS3")), "Sending request to " << uri.toString());
|
|
||||||
|
|
||||||
session->sendRequest(*request);
|
|
||||||
|
|
||||||
istr = &session->receiveResponse(response);
|
|
||||||
|
|
||||||
// Handle 307 Temporary Redirect in order to allow request redirection
|
|
||||||
// See https://docs.aws.amazon.com/AmazonS3/latest/dev/Redirects.html
|
|
||||||
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT)
|
|
||||||
break;
|
|
||||||
|
|
||||||
auto location_iterator = response.find("Location");
|
|
||||||
if (location_iterator == response.end())
|
|
||||||
break;
|
|
||||||
|
|
||||||
uri = location_iterator->second;
|
|
||||||
session = makeHTTPSession(uri, timeouts);
|
|
||||||
}
|
|
||||||
|
|
||||||
assertResponseIsOk(*request, response, *istr);
|
|
||||||
impl = std::make_unique<ReadBufferFromIStream>(*istr, DBMS_DEFAULT_BUFFER_SIZE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ReadBufferFromS3::ReadBufferFromS3(const std::shared_ptr<Aws::S3::S3Client> & client_ptr,
|
||||||
|
const String & bucket,
|
||||||
|
const String & key,
|
||||||
|
size_t buffer_size_): ReadBuffer(nullptr, 0)
|
||||||
|
{
|
||||||
|
Aws::S3::Model::GetObjectRequest req;
|
||||||
|
req.SetBucket(bucket);
|
||||||
|
req.SetKey(key);
|
||||||
|
|
||||||
|
Aws::S3::Model::GetObjectOutcome outcome = client_ptr->GetObject(req);
|
||||||
|
|
||||||
|
if (outcome.IsSuccess()) {
|
||||||
|
read_result = outcome.GetResultWithOwnership();
|
||||||
|
impl = std::make_unique<ReadBufferFromIStream>(read_result.GetBody(), buffer_size_);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool ReadBufferFromS3::nextImpl()
|
bool ReadBufferFromS3::nextImpl()
|
||||||
{
|
{
|
||||||
if (!impl->next())
|
if (!impl->next())
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
#include <IO/ReadBuffer.h>
|
#include <IO/ReadBuffer.h>
|
||||||
#include <Poco/Net/HTTPBasicCredentials.h>
|
#include <Poco/Net/HTTPBasicCredentials.h>
|
||||||
#include <Poco/URI.h>
|
#include <Poco/URI.h>
|
||||||
|
#include <aws/s3/S3Client.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -15,17 +15,18 @@ namespace DB
|
|||||||
*/
|
*/
|
||||||
class ReadBufferFromS3 : public ReadBuffer
|
class ReadBufferFromS3 : public ReadBuffer
|
||||||
{
|
{
|
||||||
|
private:
|
||||||
|
Logger * log = &Logger::get("ReadBufferFromS3");
|
||||||
|
Aws::S3::Model::GetObjectResult read_result;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Poco::URI uri;
|
|
||||||
HTTPSessionPtr session;
|
|
||||||
std::istream * istr; /// owned by session
|
|
||||||
std::unique_ptr<ReadBuffer> impl;
|
std::unique_ptr<ReadBuffer> impl;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit ReadBufferFromS3(const Poco::URI & uri_,
|
explicit ReadBufferFromS3(const std::shared_ptr<Aws::S3::S3Client> & client_ptr,
|
||||||
const String & access_key_id_,
|
const String & bucket,
|
||||||
const String & secret_access_key_,
|
const String & key,
|
||||||
const ConnectionTimeouts & timeouts = {});
|
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE);
|
||||||
|
|
||||||
bool nextImpl() override;
|
bool nextImpl() override;
|
||||||
};
|
};
|
||||||
|
@ -3,12 +3,8 @@
|
|||||||
#include <IO/WriteBufferFromString.h>
|
#include <IO/WriteBufferFromString.h>
|
||||||
|
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <sstream>
|
#include <aws/core/Aws.h>
|
||||||
|
#include <aws/core/auth/AWSCredentialsProvider.h>
|
||||||
#include <Poco/Base64Encoder.h>
|
|
||||||
#include <Poco/HMACEngine.h>
|
|
||||||
#include <Poco/SHA1Engine.h>
|
|
||||||
#include <Poco/URI.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -16,45 +12,59 @@ namespace DB
|
|||||||
|
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int CANNOT_FORMAT_DATETIME;
|
extern const int BAD_ARGUMENTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
void S3Helper::authenticateRequest(
|
|
||||||
Poco::Net::HTTPRequest & request,
|
static std::mutex aws_init_lock;
|
||||||
|
static Aws::SDKOptions aws_options;
|
||||||
|
static std::atomic<bool> aws_initialized(false);
|
||||||
|
|
||||||
|
static const std::regex S3_URL_REGEX(R"((https?://.*)/(.*)/(.*))");
|
||||||
|
|
||||||
|
|
||||||
|
static void initializeAwsAPI() {
|
||||||
|
std::lock_guard<std::mutex> lock(aws_init_lock);
|
||||||
|
|
||||||
|
if (!aws_initialized.load()) {
|
||||||
|
Aws::InitAPI(aws_options);
|
||||||
|
aws_initialized.store(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<Aws::S3::S3Client> S3Helper::createS3Client(const String & endpoint_url,
|
||||||
const String & access_key_id,
|
const String & access_key_id,
|
||||||
const String & secret_access_key)
|
const String & secret_access_key)
|
||||||
{
|
{
|
||||||
/// See https://docs.aws.amazon.com/AmazonS3/latest/dev/RESTAuthentication.html
|
initializeAwsAPI();
|
||||||
|
|
||||||
if (access_key_id.empty())
|
Aws::Client::ClientConfiguration cfg;
|
||||||
return;
|
cfg.endpointOverride = endpoint_url;
|
||||||
|
cfg.scheme = Aws::Http::Scheme::HTTP;
|
||||||
|
|
||||||
/// Limitations:
|
auto cred_provider = std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(access_key_id, secret_access_key);
|
||||||
/// 1. Virtual hosted-style requests are not supported (e.g. `http://johnsmith.net.s3.amazonaws.com/homepage.html`).
|
|
||||||
/// 2. AMZ headers are not supported (TODO).
|
|
||||||
|
|
||||||
if (!request.has("Date"))
|
return std::make_shared<Aws::S3::S3Client>(
|
||||||
{
|
std::move(cred_provider),
|
||||||
WriteBufferFromOwnString out;
|
std::move(cfg),
|
||||||
writeDateTimeTextRFC1123(time(nullptr), out, DateLUT::instance("UTC"));
|
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
|
||||||
request.set("Date", out.str());
|
false
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
S3Endpoint S3Helper::parseS3EndpointFromUrl(const String & url) {
|
||||||
|
std::smatch match;
|
||||||
|
if (std::regex_search(url, match, S3_URL_REGEX) && match.size() > 1) {
|
||||||
|
S3Endpoint endpoint;
|
||||||
|
endpoint.endpoint_url = match.str(1);
|
||||||
|
endpoint.bucket = match.str(2);
|
||||||
|
endpoint.key = match.str(3);
|
||||||
|
return endpoint;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
String string_to_sign = request.getMethod() + "\n"
|
throw Exception("Failed to parse S3 Storage URL. It should contain endpoint url, bucket and file. "
|
||||||
+ request.get("Content-MD5", "") + "\n"
|
"Regex is (https?://.*)/(.*)/(.*)", ErrorCodes::BAD_ARGUMENTS);
|
||||||
+ request.get("Content-Type", "") + "\n"
|
|
||||||
+ request.get("Date") + "\n"
|
|
||||||
+ Poco::URI(request.getURI()).getPathAndQuery();
|
|
||||||
|
|
||||||
Poco::HMACEngine<Poco::SHA1Engine> engine(secret_access_key);
|
|
||||||
engine.update(string_to_sign);
|
|
||||||
auto digest = engine.digest();
|
|
||||||
std::ostringstream signature;
|
|
||||||
Poco::Base64Encoder encoder(signature);
|
|
||||||
std::copy(digest.begin(), digest.end(), std::ostream_iterator<char>(encoder));
|
|
||||||
encoder.close();
|
|
||||||
|
|
||||||
request.set("Authorization", "AWS " + access_key_id + ":" + signature.str());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,19 +1,29 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <regex>
|
||||||
#include <Core/Types.h>
|
#include <Core/Types.h>
|
||||||
|
|
||||||
#include <Poco/Net/HTTPRequest.h>
|
#include <Poco/Net/HTTPRequest.h>
|
||||||
|
#include <aws/s3/S3Client.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
namespace S3Helper
|
struct S3Endpoint {
|
||||||
{
|
String endpoint_url;
|
||||||
void authenticateRequest(
|
String bucket;
|
||||||
Poco::Net::HTTPRequest & request,
|
String key;
|
||||||
const String & access_key_id,
|
|
||||||
const String & secret_access_key);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
namespace S3Helper
|
||||||
|
{
|
||||||
|
S3Endpoint parseS3EndpointFromUrl(const String & url);
|
||||||
|
|
||||||
|
std::shared_ptr<Aws::S3::S3Client> createS3Client(const String & endpoint_url,
|
||||||
|
const String & access_key_id,
|
||||||
|
const String & secret_access_key);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,22 +1,18 @@
|
|||||||
#include <IO/WriteBufferFromS3.h>
|
#include <IO/WriteBufferFromS3.h>
|
||||||
|
|
||||||
#include <IO/S3Common.h>
|
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
|
|
||||||
#include <Poco/DOM/AutoPtr.h>
|
|
||||||
#include <Poco/DOM/DOMParser.h>
|
|
||||||
#include <Poco/DOM/Document.h>
|
|
||||||
#include <Poco/DOM/NodeList.h>
|
|
||||||
#include <Poco/SAX/InputSource.h>
|
|
||||||
|
|
||||||
#include <common/logger_useful.h>
|
#include <common/logger_useful.h>
|
||||||
|
#include <aws/s3/model/CreateMultipartUploadRequest.h>
|
||||||
|
#include <aws/s3/model/UploadPartRequest.h>
|
||||||
|
#include <aws/s3/model/CompleteMultipartUploadRequest.h>
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
const int DEFAULT_S3_MAX_FOLLOW_PUT_REDIRECT = 2;
|
|
||||||
|
|
||||||
// S3 protocol does not allow to have multipart upload with more than 10000 parts.
|
// S3 protocol does not allow to have multipart upload with more than 10000 parts.
|
||||||
// In case server does not return an error on exceeding that number, we print a warning
|
// In case server does not return an error on exceeding that number, we print a warning
|
||||||
// because custom S3 implementation may allow relaxed requirements on that.
|
// because custom S3 implementation may allow relaxed requirements on that.
|
||||||
@ -25,28 +21,26 @@ const int S3_WARN_MAX_PARTS = 10000;
|
|||||||
|
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int INCORRECT_DATA;
|
extern const int S3_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
WriteBufferFromS3::WriteBufferFromS3(
|
WriteBufferFromS3::WriteBufferFromS3(
|
||||||
const Poco::URI & uri_,
|
std::shared_ptr<Aws::S3::S3Client> client_ptr_,
|
||||||
const String & access_key_id_,
|
const String & bucket_,
|
||||||
const String & secret_access_key_,
|
const String & key_,
|
||||||
size_t minimum_upload_part_size_,
|
size_t minimum_upload_part_size_,
|
||||||
const ConnectionTimeouts & timeouts_)
|
size_t buffer_size_
|
||||||
: BufferWithOwnMemory<WriteBuffer>(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0)
|
)
|
||||||
, uri {uri_}
|
: BufferWithOwnMemory<WriteBuffer>(buffer_size_, nullptr, 0)
|
||||||
, access_key_id {access_key_id_}
|
, bucket(bucket_)
|
||||||
, secret_access_key {secret_access_key_}
|
, key(key_)
|
||||||
|
, client_ptr(std::move(client_ptr_))
|
||||||
, minimum_upload_part_size {minimum_upload_part_size_}
|
, minimum_upload_part_size {minimum_upload_part_size_}
|
||||||
, timeouts {timeouts_}
|
|
||||||
, temporary_buffer {std::make_unique<WriteBufferFromString>(buffer_string)}
|
, temporary_buffer {std::make_unique<WriteBufferFromString>(buffer_string)}
|
||||||
, last_part_size {0}
|
, last_part_size {0}
|
||||||
{
|
{
|
||||||
initiate();
|
initiate();
|
||||||
|
|
||||||
/// FIXME: Implement rest of S3 authorization.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -96,184 +90,72 @@ WriteBufferFromS3::~WriteBufferFromS3()
|
|||||||
|
|
||||||
void WriteBufferFromS3::initiate()
|
void WriteBufferFromS3::initiate()
|
||||||
{
|
{
|
||||||
// See https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadInitiate.html
|
Aws::S3::Model::CreateMultipartUploadRequest req;
|
||||||
Poco::Net::HTTPResponse response;
|
req.SetBucket(bucket);
|
||||||
std::unique_ptr<Poco::Net::HTTPRequest> request_ptr;
|
req.SetKey(key);
|
||||||
HTTPSessionPtr session;
|
|
||||||
std::istream * istr = nullptr; /// owned by session
|
|
||||||
Poco::URI initiate_uri = uri;
|
|
||||||
initiate_uri.setRawQuery("uploads");
|
|
||||||
for (auto & param: uri.getQueryParameters())
|
|
||||||
{
|
|
||||||
initiate_uri.addQueryParameter(param.first, param.second);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < DEFAULT_S3_MAX_FOLLOW_PUT_REDIRECT; ++i)
|
auto outcome = client_ptr->CreateMultipartUpload(req);
|
||||||
{
|
|
||||||
session = makeHTTPSession(initiate_uri, timeouts);
|
|
||||||
request_ptr = std::make_unique<Poco::Net::HTTPRequest>(Poco::Net::HTTPRequest::HTTP_POST, initiate_uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
|
|
||||||
request_ptr->setHost(initiate_uri.getHost()); // use original, not resolved host name in header
|
|
||||||
|
|
||||||
S3Helper::authenticateRequest(*request_ptr, access_key_id, secret_access_key);
|
if (outcome.IsSuccess()) {
|
||||||
|
upload_id = outcome.GetResult().GetUploadId();
|
||||||
request_ptr->setContentLength(0);
|
LOG_DEBUG(log, "Multipart upload initiated. Upload id = " + upload_id);
|
||||||
|
} else {
|
||||||
LOG_TRACE((&Logger::get("WriteBufferFromS3")), "Sending request to " << initiate_uri.toString());
|
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||||
|
|
||||||
session->sendRequest(*request_ptr);
|
|
||||||
|
|
||||||
istr = &session->receiveResponse(response);
|
|
||||||
|
|
||||||
// Handle 307 Temporary Redirect in order to allow request redirection
|
|
||||||
// See https://docs.aws.amazon.com/AmazonS3/latest/dev/Redirects.html
|
|
||||||
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT)
|
|
||||||
break;
|
|
||||||
|
|
||||||
auto location_iterator = response.find("Location");
|
|
||||||
if (location_iterator == response.end())
|
|
||||||
break;
|
|
||||||
|
|
||||||
initiate_uri = location_iterator->second;
|
|
||||||
}
|
|
||||||
assertResponseIsOk(*request_ptr, response, *istr);
|
|
||||||
|
|
||||||
Poco::XML::InputSource src(*istr);
|
|
||||||
Poco::XML::DOMParser parser;
|
|
||||||
Poco::AutoPtr<Poco::XML::Document> document = parser.parse(&src);
|
|
||||||
Poco::AutoPtr<Poco::XML::NodeList> nodes = document->getElementsByTagName("UploadId");
|
|
||||||
if (nodes->length() != 1)
|
|
||||||
{
|
|
||||||
throw Exception("Incorrect XML in response, no upload id", ErrorCodes::INCORRECT_DATA);
|
|
||||||
}
|
|
||||||
upload_id = nodes->item(0)->innerText();
|
|
||||||
if (upload_id.empty())
|
|
||||||
{
|
|
||||||
throw Exception("Incorrect XML in response, empty upload id", ErrorCodes::INCORRECT_DATA);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void WriteBufferFromS3::writePart(const String & data)
|
void WriteBufferFromS3::writePart(const String & data)
|
||||||
{
|
{
|
||||||
// See https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadUploadPart.html
|
|
||||||
Poco::Net::HTTPResponse response;
|
|
||||||
std::unique_ptr<Poco::Net::HTTPRequest> request_ptr;
|
|
||||||
HTTPSessionPtr session;
|
|
||||||
std::istream * istr = nullptr; /// owned by session
|
|
||||||
Poco::URI part_uri = uri;
|
|
||||||
part_uri.addQueryParameter("partNumber", std::to_string(part_tags.size() + 1));
|
|
||||||
part_uri.addQueryParameter("uploadId", upload_id);
|
|
||||||
|
|
||||||
if (part_tags.size() == S3_WARN_MAX_PARTS)
|
if (part_tags.size() == S3_WARN_MAX_PARTS)
|
||||||
{
|
{
|
||||||
// Don't throw exception here by ourselves but leave the decision to take by S3 server.
|
// Don't throw exception here by ourselves but leave the decision to take by S3 server.
|
||||||
LOG_WARNING(&Logger::get("WriteBufferFromS3"), "Maximum part number in S3 protocol has reached (too many parts). Server may not accept this whole upload.");
|
LOG_WARNING(log, "Maximum part number in S3 protocol has reached (too many parts). Server may not accept this whole upload.");
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < DEFAULT_S3_MAX_FOLLOW_PUT_REDIRECT; ++i)
|
Aws::S3::Model::UploadPartRequest req;
|
||||||
{
|
|
||||||
session = makeHTTPSession(part_uri, timeouts);
|
|
||||||
request_ptr = std::make_unique<Poco::Net::HTTPRequest>(Poco::Net::HTTPRequest::HTTP_PUT, part_uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
|
|
||||||
request_ptr->setHost(part_uri.getHost()); // use original, not resolved host name in header
|
|
||||||
|
|
||||||
S3Helper::authenticateRequest(*request_ptr, access_key_id, secret_access_key);
|
req.SetBucket(bucket);
|
||||||
|
req.SetKey(key);
|
||||||
|
req.SetPartNumber(part_tags.size() + 1);
|
||||||
|
req.SetUploadId(upload_id);
|
||||||
|
req.SetContentLength(data.size());
|
||||||
|
req.SetBody(std::make_shared<Aws::StringStream>(data));
|
||||||
|
|
||||||
request_ptr->setExpectContinue(true);
|
auto outcome = client_ptr->UploadPart(req);
|
||||||
|
|
||||||
request_ptr->setContentLength(data.size());
|
if (outcome.IsSuccess()) {
|
||||||
|
auto etag = outcome.GetResult().GetETag();
|
||||||
LOG_TRACE((&Logger::get("WriteBufferFromS3")), "Sending request to " << part_uri.toString());
|
part_tags.push_back(etag);
|
||||||
|
LOG_DEBUG(log, "Write part " + std::to_string(part_tags.size()) + " finished. Upload id = " + upload_id + ". Etag = " + etag);
|
||||||
std::ostream & ostr = session->sendRequest(*request_ptr);
|
} else {
|
||||||
if (session->peekResponse(response))
|
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||||
{
|
|
||||||
// Received 100-continue.
|
|
||||||
ostr << data;
|
|
||||||
}
|
|
||||||
|
|
||||||
istr = &session->receiveResponse(response);
|
|
||||||
|
|
||||||
// Handle 307 Temporary Redirect in order to allow request redirection
|
|
||||||
// See https://docs.aws.amazon.com/AmazonS3/latest/dev/Redirects.html
|
|
||||||
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT)
|
|
||||||
break;
|
|
||||||
|
|
||||||
auto location_iterator = response.find("Location");
|
|
||||||
if (location_iterator == response.end())
|
|
||||||
break;
|
|
||||||
|
|
||||||
part_uri = location_iterator->second;
|
|
||||||
}
|
}
|
||||||
assertResponseIsOk(*request_ptr, response, *istr);
|
|
||||||
|
|
||||||
auto etag_iterator = response.find("ETag");
|
|
||||||
if (etag_iterator == response.end())
|
|
||||||
{
|
|
||||||
throw Exception("Incorrect response, no ETag", ErrorCodes::INCORRECT_DATA);
|
|
||||||
}
|
|
||||||
part_tags.push_back(etag_iterator->second);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void WriteBufferFromS3::complete()
|
void WriteBufferFromS3::complete()
|
||||||
{
|
{
|
||||||
// See https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadComplete.html
|
Aws::S3::Model::CompleteMultipartUploadRequest req;
|
||||||
Poco::Net::HTTPResponse response;
|
req.SetBucket(bucket);
|
||||||
std::unique_ptr<Poco::Net::HTTPRequest> request_ptr;
|
req.SetKey(key);
|
||||||
HTTPSessionPtr session;
|
req.SetUploadId(upload_id);
|
||||||
std::istream * istr = nullptr; /// owned by session
|
|
||||||
Poco::URI complete_uri = uri;
|
|
||||||
complete_uri.addQueryParameter("uploadId", upload_id);
|
|
||||||
|
|
||||||
String data;
|
Aws::S3::Model::CompletedMultipartUpload multipart_upload;
|
||||||
WriteBufferFromString buffer(data);
|
for (size_t i = 0; i < part_tags.size(); i++) {
|
||||||
writeString("<CompleteMultipartUpload>", buffer);
|
Aws::S3::Model::CompletedPart part;
|
||||||
for (size_t i = 0; i < part_tags.size(); ++i)
|
multipart_upload.AddParts(part.WithETag(part_tags[i]).WithPartNumber(i + 1));
|
||||||
{
|
|
||||||
writeString("<Part><PartNumber>", buffer);
|
|
||||||
writeIntText(i + 1, buffer);
|
|
||||||
writeString("</PartNumber><ETag>", buffer);
|
|
||||||
writeString(part_tags[i], buffer);
|
|
||||||
writeString("</ETag></Part>", buffer);
|
|
||||||
}
|
}
|
||||||
writeString("</CompleteMultipartUpload>", buffer);
|
|
||||||
buffer.finish();
|
|
||||||
|
|
||||||
for (int i = 0; i < DEFAULT_S3_MAX_FOLLOW_PUT_REDIRECT; ++i)
|
req.SetMultipartUpload(multipart_upload);
|
||||||
{
|
|
||||||
session = makeHTTPSession(complete_uri, timeouts);
|
|
||||||
request_ptr = std::make_unique<Poco::Net::HTTPRequest>(Poco::Net::HTTPRequest::HTTP_POST, complete_uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
|
|
||||||
request_ptr->setHost(complete_uri.getHost()); // use original, not resolved host name in header
|
|
||||||
|
|
||||||
S3Helper::authenticateRequest(*request_ptr, access_key_id, secret_access_key);
|
auto outcome = client_ptr->CompleteMultipartUpload(req);
|
||||||
|
|
||||||
request_ptr->setExpectContinue(true);
|
if (outcome.IsSuccess()) {
|
||||||
|
LOG_DEBUG(log, "Multipart upload completed. Upload_id = " + upload_id);
|
||||||
request_ptr->setContentLength(data.size());
|
} else {
|
||||||
|
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||||
LOG_TRACE((&Logger::get("WriteBufferFromS3")), "Sending request to " << complete_uri.toString());
|
|
||||||
|
|
||||||
std::ostream & ostr = session->sendRequest(*request_ptr);
|
|
||||||
if (session->peekResponse(response))
|
|
||||||
{
|
|
||||||
// Received 100-continue.
|
|
||||||
ostr << data;
|
|
||||||
}
|
|
||||||
|
|
||||||
istr = &session->receiveResponse(response);
|
|
||||||
|
|
||||||
// Handle 307 Temporary Redirect in order to allow request redirection
|
|
||||||
// See https://docs.aws.amazon.com/AmazonS3/latest/dev/Redirects.html
|
|
||||||
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT)
|
|
||||||
break;
|
|
||||||
|
|
||||||
auto location_iterator = response.find("Location");
|
|
||||||
if (location_iterator == response.end())
|
|
||||||
break;
|
|
||||||
|
|
||||||
complete_uri = location_iterator->second;
|
|
||||||
}
|
}
|
||||||
assertResponseIsOk(*request_ptr, response, *istr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -8,9 +8,7 @@
|
|||||||
#include <IO/BufferWithOwnMemory.h>
|
#include <IO/BufferWithOwnMemory.h>
|
||||||
#include <IO/WriteBuffer.h>
|
#include <IO/WriteBuffer.h>
|
||||||
#include <IO/WriteBufferFromString.h>
|
#include <IO/WriteBufferFromString.h>
|
||||||
#include <Poco/Net/HTTPBasicCredentials.h>
|
#include <aws/s3/S3Client.h>
|
||||||
#include <Poco/Net/HTTPRequest.h>
|
|
||||||
#include <Poco/URI.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -20,11 +18,10 @@ namespace DB
|
|||||||
class WriteBufferFromS3 : public BufferWithOwnMemory<WriteBuffer>
|
class WriteBufferFromS3 : public BufferWithOwnMemory<WriteBuffer>
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
Poco::URI uri;
|
String bucket;
|
||||||
String access_key_id;
|
String key;
|
||||||
String secret_access_key;
|
std::shared_ptr<Aws::S3::S3Client> client_ptr;
|
||||||
size_t minimum_upload_part_size;
|
size_t minimum_upload_part_size;
|
||||||
ConnectionTimeouts timeouts;
|
|
||||||
String buffer_string;
|
String buffer_string;
|
||||||
std::unique_ptr<WriteBufferFromString> temporary_buffer;
|
std::unique_ptr<WriteBufferFromString> temporary_buffer;
|
||||||
size_t last_part_size;
|
size_t last_part_size;
|
||||||
@ -34,12 +31,14 @@ private:
|
|||||||
String upload_id;
|
String upload_id;
|
||||||
std::vector<String> part_tags;
|
std::vector<String> part_tags;
|
||||||
|
|
||||||
|
Logger * log = &Logger::get("WriteBufferFromS3");
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit WriteBufferFromS3(const Poco::URI & uri,
|
explicit WriteBufferFromS3(std::shared_ptr<Aws::S3::S3Client> client_ptr_,
|
||||||
const String & access_key_id,
|
const String & bucket_,
|
||||||
const String & secret_access_key,
|
const String & key_,
|
||||||
size_t minimum_upload_part_size_,
|
size_t minimum_upload_part_size_,
|
||||||
const ConnectionTimeouts & timeouts = {});
|
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE);
|
||||||
|
|
||||||
void nextImpl() override;
|
void nextImpl() override;
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#include <IO/S3Common.h>
|
||||||
#include <Storages/StorageFactory.h>
|
#include <Storages/StorageFactory.h>
|
||||||
#include <Storages/StorageS3.h>
|
#include <Storages/StorageS3.h>
|
||||||
|
|
||||||
@ -16,7 +17,8 @@
|
|||||||
#include <DataStreams/IBlockInputStream.h>
|
#include <DataStreams/IBlockInputStream.h>
|
||||||
#include <DataStreams/AddingDefaultsBlockInputStream.h>
|
#include <DataStreams/AddingDefaultsBlockInputStream.h>
|
||||||
|
|
||||||
#include <Poco/Net/HTTPRequest.h>
|
#include <aws/core/client/ClientConfiguration.h>
|
||||||
|
#include <aws/s3/S3Client.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -26,24 +28,25 @@ namespace ErrorCodes
|
|||||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
class StorageS3BlockInputStream : public IBlockInputStream
|
class StorageS3BlockInputStream : public IBlockInputStream
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
StorageS3BlockInputStream(const Poco::URI & uri,
|
StorageS3BlockInputStream(
|
||||||
const String & access_key_id,
|
|
||||||
const String & secret_access_key,
|
|
||||||
const String & format,
|
const String & format,
|
||||||
const String & name_,
|
const String & name_,
|
||||||
const Block & sample_block,
|
const Block & sample_block,
|
||||||
const Context & context,
|
const Context & context,
|
||||||
UInt64 max_block_size,
|
UInt64 max_block_size,
|
||||||
const ConnectionTimeouts & timeouts,
|
const CompressionMethod compression_method,
|
||||||
const CompressionMethod compression_method)
|
const std::shared_ptr<Aws::S3::S3Client> & client,
|
||||||
|
const String & bucket,
|
||||||
|
const String & key)
|
||||||
: name(name_)
|
: name(name_)
|
||||||
{
|
{
|
||||||
read_buf = getReadBuffer<ReadBufferFromS3>(compression_method, uri, access_key_id, secret_access_key, timeouts);
|
read_buf = getReadBuffer<ReadBufferFromS3>(compression_method, client, bucket, key);
|
||||||
reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size);
|
reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -81,24 +84,18 @@ namespace
|
|||||||
class StorageS3BlockOutputStream : public IBlockOutputStream
|
class StorageS3BlockOutputStream : public IBlockOutputStream
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
StorageS3BlockOutputStream(const Poco::URI & uri,
|
StorageS3BlockOutputStream(
|
||||||
const String & access_key_id,
|
|
||||||
const String & secret_access_key,
|
|
||||||
const String & format,
|
const String & format,
|
||||||
UInt64 min_upload_part_size,
|
UInt64 min_upload_part_size,
|
||||||
const Block & sample_block_,
|
const Block & sample_block_,
|
||||||
const Context & context,
|
const Context & context,
|
||||||
const ConnectionTimeouts & timeouts,
|
const CompressionMethod compression_method,
|
||||||
const CompressionMethod compression_method)
|
const std::shared_ptr<Aws::S3::S3Client> & client,
|
||||||
|
const String & bucket,
|
||||||
|
const String & key)
|
||||||
: sample_block(sample_block_)
|
: sample_block(sample_block_)
|
||||||
{
|
{
|
||||||
write_buf = getWriteBuffer<WriteBufferFromS3>(
|
write_buf = getWriteBuffer<WriteBufferFromS3>(compression_method, client, bucket, key, min_upload_part_size);
|
||||||
compression_method,
|
|
||||||
uri,
|
|
||||||
access_key_id,
|
|
||||||
secret_access_key,
|
|
||||||
min_upload_part_size,
|
|
||||||
timeouts);
|
|
||||||
writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context);
|
writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -132,8 +129,7 @@ namespace
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
StorageS3::StorageS3(
|
StorageS3::StorageS3(const S3Endpoint & endpoint_,
|
||||||
const Poco::URI & uri_,
|
|
||||||
const String & access_key_id_,
|
const String & access_key_id_,
|
||||||
const String & secret_access_key_,
|
const String & secret_access_key_,
|
||||||
const std::string & database_name_,
|
const std::string & database_name_,
|
||||||
@ -145,15 +141,14 @@ StorageS3::StorageS3(
|
|||||||
Context & context_,
|
Context & context_,
|
||||||
const String & compression_method_ = "")
|
const String & compression_method_ = "")
|
||||||
: IStorage(columns_)
|
: IStorage(columns_)
|
||||||
, uri(uri_)
|
, endpoint(endpoint_)
|
||||||
, access_key_id(access_key_id_)
|
|
||||||
, secret_access_key(secret_access_key_)
|
|
||||||
, context_global(context_)
|
, context_global(context_)
|
||||||
, format_name(format_name_)
|
, format_name(format_name_)
|
||||||
, database_name(database_name_)
|
, database_name(database_name_)
|
||||||
, table_name(table_name_)
|
, table_name(table_name_)
|
||||||
, min_upload_part_size(min_upload_part_size_)
|
, min_upload_part_size(min_upload_part_size_)
|
||||||
, compression_method(compression_method_)
|
, compression_method(compression_method_)
|
||||||
|
, client(S3Helper::createS3Client(endpoint_.endpoint_url, access_key_id_, secret_access_key_))
|
||||||
{
|
{
|
||||||
setColumns(columns_);
|
setColumns(columns_);
|
||||||
setConstraints(constraints_);
|
setConstraints(constraints_);
|
||||||
@ -169,16 +164,15 @@ BlockInputStreams StorageS3::read(
|
|||||||
unsigned /*num_streams*/)
|
unsigned /*num_streams*/)
|
||||||
{
|
{
|
||||||
BlockInputStreamPtr block_input = std::make_shared<StorageS3BlockInputStream>(
|
BlockInputStreamPtr block_input = std::make_shared<StorageS3BlockInputStream>(
|
||||||
uri,
|
|
||||||
access_key_id,
|
|
||||||
secret_access_key,
|
|
||||||
format_name,
|
format_name,
|
||||||
getName(),
|
getName(),
|
||||||
getHeaderBlock(column_names),
|
getHeaderBlock(column_names),
|
||||||
context,
|
context,
|
||||||
max_block_size,
|
max_block_size,
|
||||||
ConnectionTimeouts::getHTTPTimeouts(context),
|
IStorage::chooseCompressionMethod(endpoint.endpoint_url, compression_method),
|
||||||
IStorage::chooseCompressionMethod(uri.toString(), compression_method));
|
client,
|
||||||
|
endpoint.bucket,
|
||||||
|
endpoint.key);
|
||||||
|
|
||||||
auto column_defaults = getColumns().getDefaults();
|
auto column_defaults = getColumns().getDefaults();
|
||||||
if (column_defaults.empty())
|
if (column_defaults.empty())
|
||||||
@ -195,15 +189,9 @@ void StorageS3::rename(const String & /*new_path_to_db*/, const String & new_dat
|
|||||||
BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const Context & /*context*/)
|
BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const Context & /*context*/)
|
||||||
{
|
{
|
||||||
return std::make_shared<StorageS3BlockOutputStream>(
|
return std::make_shared<StorageS3BlockOutputStream>(
|
||||||
uri,
|
format_name, min_upload_part_size, getSampleBlock(), context_global,
|
||||||
access_key_id,
|
IStorage::chooseCompressionMethod(endpoint.endpoint_url, compression_method),
|
||||||
secret_access_key,
|
client, endpoint.bucket, endpoint.key);
|
||||||
format_name,
|
|
||||||
min_upload_part_size,
|
|
||||||
getSampleBlock(),
|
|
||||||
context_global,
|
|
||||||
ConnectionTimeouts::getHTTPTimeouts(context_global),
|
|
||||||
IStorage::chooseCompressionMethod(uri.toString(), compression_method));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void registerStorageS3(StorageFactory & factory)
|
void registerStorageS3(StorageFactory & factory)
|
||||||
@ -220,7 +208,7 @@ void registerStorageS3(StorageFactory & factory)
|
|||||||
engine_args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[i], args.local_context);
|
engine_args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[i], args.local_context);
|
||||||
|
|
||||||
String url = engine_args[0]->as<ASTLiteral &>().value.safeGet<String>();
|
String url = engine_args[0]->as<ASTLiteral &>().value.safeGet<String>();
|
||||||
Poco::URI uri(url);
|
S3Endpoint endpoint = S3Helper::parseS3EndpointFromUrl(url);
|
||||||
|
|
||||||
String format_name = engine_args[engine_args.size() - 1]->as<ASTLiteral &>().value.safeGet<String>();
|
String format_name = engine_args[engine_args.size() - 1]->as<ASTLiteral &>().value.safeGet<String>();
|
||||||
|
|
||||||
@ -240,7 +228,8 @@ void registerStorageS3(StorageFactory & factory)
|
|||||||
else
|
else
|
||||||
compression_method = "auto";
|
compression_method = "auto";
|
||||||
|
|
||||||
return StorageS3::create(uri, access_key_id, secret_access_key, args.database_name, args.table_name, format_name, min_upload_part_size, args.columns, args.constraints, args.context);
|
return StorageS3::create(endpoint, access_key_id, secret_access_key, args.database_name, args.table_name, format_name, min_upload_part_size, args.columns, args.constraints, args.context);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -4,10 +4,12 @@
|
|||||||
#include <Poco/URI.h>
|
#include <Poco/URI.h>
|
||||||
#include <common/logger_useful.h>
|
#include <common/logger_useful.h>
|
||||||
#include <ext/shared_ptr_helper.h>
|
#include <ext/shared_ptr_helper.h>
|
||||||
|
#include <aws/s3/S3Client.h>
|
||||||
|
#include <aws/core/auth/AWSCredentialsProvider.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class represents table engine for external S3 urls.
|
* This class represents table engine for external S3 urls.
|
||||||
* It sends HTTP GET to server when select is called and
|
* It sends HTTP GET to server when select is called and
|
||||||
@ -16,8 +18,7 @@ namespace DB
|
|||||||
class StorageS3 : public ext::shared_ptr_helper<StorageS3>, public IStorage
|
class StorageS3 : public ext::shared_ptr_helper<StorageS3>, public IStorage
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
StorageS3(
|
StorageS3(const S3Endpoint & endpoint,
|
||||||
const Poco::URI & uri_,
|
|
||||||
const String & access_key_id,
|
const String & access_key_id,
|
||||||
const String & secret_access_key,
|
const String & secret_access_key,
|
||||||
const String & database_name_,
|
const String & database_name_,
|
||||||
@ -57,9 +58,7 @@ public:
|
|||||||
void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name, TableStructureWriteLockHolder &) override;
|
void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name, TableStructureWriteLockHolder &) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Poco::URI uri;
|
S3Endpoint endpoint;
|
||||||
String access_key_id;
|
|
||||||
String secret_access_key;
|
|
||||||
const Context & context_global;
|
const Context & context_global;
|
||||||
|
|
||||||
String format_name;
|
String format_name;
|
||||||
@ -67,6 +66,7 @@ private:
|
|||||||
String table_name;
|
String table_name;
|
||||||
UInt64 min_upload_part_size;
|
UInt64 min_upload_part_size;
|
||||||
String compression_method;
|
String compression_method;
|
||||||
|
std::shared_ptr<Aws::S3::S3Client> client;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
|
#include <IO/S3Common.h>
|
||||||
#include <Storages/StorageS3.h>
|
#include <Storages/StorageS3.h>
|
||||||
#include <Interpreters/evaluateConstantExpression.h>
|
#include <Interpreters/evaluateConstantExpression.h>
|
||||||
#include <TableFunctions/TableFunctionFactory.h>
|
#include <TableFunctions/TableFunctionFactory.h>
|
||||||
#include <TableFunctions/TableFunctionS3.h>
|
#include <TableFunctions/TableFunctionS3.h>
|
||||||
#include <TableFunctions/parseColumnsListForTableFunction.h>
|
|
||||||
#include <Parsers/ASTLiteral.h>
|
#include <Parsers/ASTLiteral.h>
|
||||||
#include <Poco/URI.h>
|
#include "parseColumnsListForTableFunction.h"
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -76,9 +76,9 @@ StoragePtr TableFunctionS3::getStorage(
|
|||||||
const std::string & table_name,
|
const std::string & table_name,
|
||||||
const String & compression_method) const
|
const String & compression_method) const
|
||||||
{
|
{
|
||||||
Poco::URI uri(source);
|
S3Endpoint endpoint = S3Helper::parseS3EndpointFromUrl(source);
|
||||||
UInt64 min_upload_part_size = global_context.getSettingsRef().s3_min_upload_part_size;
|
UInt64 min_upload_part_size = global_context.getSettingsRef().s3_min_upload_part_size;
|
||||||
return StorageS3::create(uri, access_key_id, secret_access_key, getDatabaseName(), table_name, format, min_upload_part_size, columns, ConstraintsDescription{}, global_context, compression_method);
|
return StorageS3::create(endpoint, access_key_id, secret_access_key, getDatabaseName(), table_name, format, min_upload_part_size, columns, ConstraintsDescription{}, global_context, compression_method);
|
||||||
}
|
}
|
||||||
|
|
||||||
void registerTableFunctionS3(TableFunctionFactory & factory)
|
void registerTableFunctionS3(TableFunctionFactory & factory)
|
||||||
|
@ -435,6 +435,9 @@ class ClickHouseCluster:
|
|||||||
logging.info("Trying to connect to Minio...")
|
logging.info("Trying to connect to Minio...")
|
||||||
self.wait_minio_to_start()
|
self.wait_minio_to_start()
|
||||||
|
|
||||||
|
# TODO: Remove after image update by separate commit.
|
||||||
|
subprocess.check_output(['docker', 'build', '-t', 'yandex/clickhouse-integration-test', '/ClickHouse/docker/test/integration/'], stderr=subprocess.STDOUT)
|
||||||
|
|
||||||
clickhouse_start_cmd = self.base_cmd + ['up', '-d', '--no-recreate']
|
clickhouse_start_cmd = self.base_cmd + ['up', '-d', '--no-recreate']
|
||||||
logging.info("Trying to create ClickHouse instance by command %s", ' '.join(map(str, clickhouse_start_cmd)))
|
logging.info("Trying to create ClickHouse instance by command %s", ' '.join(map(str, clickhouse_start_cmd)))
|
||||||
subprocess_check_call(clickhouse_start_cmd)
|
subprocess_check_call(clickhouse_start_cmd)
|
||||||
|
@ -111,9 +111,9 @@ def run_query(instance, query, stdin=None, settings=None):
|
|||||||
|
|
||||||
# Test simple put.
|
# Test simple put.
|
||||||
@pytest.mark.parametrize("maybe_auth,positive", [
|
@pytest.mark.parametrize("maybe_auth,positive", [
|
||||||
("",True),
|
("", True),
|
||||||
("'minio','minio123',",True),
|
("'minio','minio123',", True),
|
||||||
("'wrongid','wrongkey',",False)
|
("'wrongid','wrongkey',", False)
|
||||||
])
|
])
|
||||||
def test_put(cluster, maybe_auth, positive):
|
def test_put(cluster, maybe_auth, positive):
|
||||||
# type: (ClickHouseCluster) -> None
|
# type: (ClickHouseCluster) -> None
|
||||||
@ -130,7 +130,8 @@ def test_put(cluster, maybe_auth, positive):
|
|||||||
try:
|
try:
|
||||||
run_query(instance, put_query)
|
run_query(instance, put_query)
|
||||||
except helpers.client.QueryRuntimeException:
|
except helpers.client.QueryRuntimeException:
|
||||||
assert not positive
|
if positive:
|
||||||
|
raise
|
||||||
else:
|
else:
|
||||||
assert positive
|
assert positive
|
||||||
assert values_csv == get_s3_file_content(cluster, bucket, filename)
|
assert values_csv == get_s3_file_content(cluster, bucket, filename)
|
||||||
@ -138,9 +139,9 @@ def test_put(cluster, maybe_auth, positive):
|
|||||||
|
|
||||||
# Test put values in CSV format.
|
# Test put values in CSV format.
|
||||||
@pytest.mark.parametrize("maybe_auth,positive", [
|
@pytest.mark.parametrize("maybe_auth,positive", [
|
||||||
("",True),
|
("", True),
|
||||||
("'minio','minio123',",True),
|
("'minio','minio123',", True),
|
||||||
("'wrongid','wrongkey',",False)
|
("'wrongid','wrongkey',", False)
|
||||||
])
|
])
|
||||||
def test_put_csv(cluster, maybe_auth, positive):
|
def test_put_csv(cluster, maybe_auth, positive):
|
||||||
# type: (ClickHouseCluster) -> None
|
# type: (ClickHouseCluster) -> None
|
||||||
@ -156,7 +157,8 @@ def test_put_csv(cluster, maybe_auth, positive):
|
|||||||
try:
|
try:
|
||||||
run_query(instance, put_query, stdin=csv_data)
|
run_query(instance, put_query, stdin=csv_data)
|
||||||
except helpers.client.QueryRuntimeException:
|
except helpers.client.QueryRuntimeException:
|
||||||
assert not positive
|
if positive:
|
||||||
|
raise
|
||||||
else:
|
else:
|
||||||
assert positive
|
assert positive
|
||||||
assert csv_data == get_s3_file_content(cluster, bucket, filename)
|
assert csv_data == get_s3_file_content(cluster, bucket, filename)
|
||||||
@ -191,9 +193,9 @@ def test_put_get_with_redirect(cluster):
|
|||||||
|
|
||||||
# Test multipart put.
|
# Test multipart put.
|
||||||
@pytest.mark.parametrize("maybe_auth,positive", [
|
@pytest.mark.parametrize("maybe_auth,positive", [
|
||||||
("",True),
|
("", True),
|
||||||
("'minio','minio123',",True),
|
# ("'minio','minio123',",True), Redirect with credentials not working with nginx.
|
||||||
("'wrongid','wrongkey',",False)
|
("'wrongid','wrongkey',", False)
|
||||||
])
|
])
|
||||||
def test_multipart_put(cluster, maybe_auth, positive):
|
def test_multipart_put(cluster, maybe_auth, positive):
|
||||||
# type: (ClickHouseCluster) -> None
|
# type: (ClickHouseCluster) -> None
|
||||||
@ -222,13 +224,14 @@ def test_multipart_put(cluster, maybe_auth, positive):
|
|||||||
try:
|
try:
|
||||||
run_query(instance, put_query, stdin=csv_data, settings={'s3_min_upload_part_size': min_part_size_bytes})
|
run_query(instance, put_query, stdin=csv_data, settings={'s3_min_upload_part_size': min_part_size_bytes})
|
||||||
except helpers.client.QueryRuntimeException:
|
except helpers.client.QueryRuntimeException:
|
||||||
assert not positive
|
if positive:
|
||||||
|
raise
|
||||||
else:
|
else:
|
||||||
assert positive
|
assert positive
|
||||||
|
|
||||||
# Use Nginx access logs to count number of parts uploaded to Minio.
|
# Use Nginx access logs to count number of parts uploaded to Minio.
|
||||||
nginx_logs = get_nginx_access_logs()
|
nginx_logs = get_nginx_access_logs()
|
||||||
uploaded_parts = filter(lambda log_line: log_line.find(filename) >= 0 and log_line.find("PUT") >= 0, nginx_logs)
|
uploaded_parts = filter(lambda log_line: log_line.find(filename) >= 0 and log_line.find("PUT") >= 0, nginx_logs)
|
||||||
assert uploaded_parts > 1
|
assert len(uploaded_parts) > 1
|
||||||
|
|
||||||
assert csv_data == get_s3_file_content(cluster, bucket, filename)
|
assert csv_data == get_s3_file_content(cluster, bucket, filename)
|
||||||
|
@ -4,7 +4,7 @@ FROM ubuntu:18.04
|
|||||||
RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list
|
RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list
|
||||||
|
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& env DEBIAN_FRONTEND=noninteractive apt-get -y install tzdata python llvm-6.0 llvm-6.0-dev libreadline-dev libicu-dev bsdutils llvm-8 \
|
&& env DEBIAN_FRONTEND=noninteractive apt-get -y install tzdata python llvm-6.0 llvm-6.0-dev libreadline-dev libicu-dev bsdutils llvm-8 curl libcurl4 libcurl4-openssl-dev \
|
||||||
&& rm -rf \
|
&& rm -rf \
|
||||||
/var/lib/apt/lists/* \
|
/var/lib/apt/lists/* \
|
||||||
/var/cache/debconf \
|
/var/cache/debconf \
|
||||||
|
Loading…
Reference in New Issue
Block a user