Virtual hosted-style support of S3 URI.

This commit is contained in:
Pavel Kovalenko 2020-04-01 22:56:40 +03:00 committed by Pavel Kovalenko
parent f60337a765
commit 6f255c4309
2 changed files with 81 additions and 38 deletions

View File

@ -2,15 +2,15 @@
#if USE_AWS_S3
#include <IO/S3Common.h>
#include <IO/WriteBufferFromString.h>
# include <IO/S3Common.h>
# include <IO/WriteBufferFromString.h>
#include <regex>
#include <aws/s3/S3Client.h>
#include <aws/core/auth/AWSCredentialsProvider.h>
#include <aws/core/utils/logging/LogSystemInterface.h>
#include <aws/core/utils/logging/LogMacros.h>
#include <common/logger_useful.h>
# include <aws/core/auth/AWSCredentialsProvider.h>
# include <aws/core/utils/logging/LogMacros.h>
# include <aws/core/utils/logging/LogSystemInterface.h>
# include <aws/s3/S3Client.h>
# include <re2/re2.h>
# include <common/logger_useful.h>
namespace
@ -57,7 +57,6 @@ private:
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
@ -106,46 +105,37 @@ namespace S3
URI::URI(const Poco::URI & uri_)
{
static const std::regex bucket_key_pattern("([^/]+)/(.*)"); /// TODO std::regex is discouraged
static const RE2 virtual_hosted_style_pattern("(.+\\.)?s3[.-][a-z0-9-.]+");
static const RE2 path_style_pattern("([^/]+)/(.*)");
uri = uri_;
// s3://*
if (uri.getScheme() == "s3" || uri.getScheme() == "S3")
{
bucket = uri.getAuthority();
if (bucket.empty())
throw Exception ("Invalid S3 URI: no bucket: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
const auto & path = uri.getPath();
// s3://bucket or s3://bucket/
if (path.length() <= 1)
throw Exception ("Invalid S3 URI: no key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
key = path.substr(1);
return;
}
if (uri.getHost().empty())
throw Exception("Invalid S3 URI: no host: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
throw Exception("Invalid S3 URI host: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
endpoint = uri.getScheme() + "://" + uri.getAuthority();
// Parse bucket and key from path.
std::smatch match;
std::regex_search(uri.getPath(), match, bucket_key_pattern);
if (!match.empty())
if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket))
{
bucket = match.str(1);
if (bucket.empty())
throw Exception ("Invalid S3 URI: no bucket: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
if (!bucket.empty())
bucket = bucket.substr(0, bucket.length() - 1);
if (bucket.length() < 3 || bucket.length() > 63)
throw Exception("Invalid S3 URI bucket: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
key = match.str(2);
if (key.empty())
throw Exception ("Invalid S3 URI: no key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
key = uri.getPath().substr(1);
if (key.empty() || key == "/")
throw Exception("Invalid S3 URI key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
}
else if (re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key))
{
if (bucket.length() < 3 || bucket.length() > 63)
throw Exception("Invalid S3 URI bucket: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
if (key.empty() || key == "/")
throw Exception("Invalid S3 URI key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
}
else
throw Exception("Invalid S3 URI: no bucket or key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
throw Exception("Invalid S3 URI bucket or key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
}
}

View File

@ -0,0 +1,53 @@
#include <gtest/gtest.h>
#if USE_AWS_S3
#include <IO/S3Common.h>
namespace
{
using namespace DB;
class S3UriTest : public testing::TestWithParam<std::string>
{
};
TEST(S3UriTest, validPatterns)
{
{
S3::URI uri(Poco::URI("https://jokserfn.s3.yandexcloud.net/data"));
ASSERT_EQ("https://jokserfn.s3.yandexcloud.net", uri.endpoint);
ASSERT_EQ("jokserfn", uri.bucket);
ASSERT_EQ("data", uri.key);
}
{
S3::URI uri(Poco::URI("https://storage.yandexcloud.net/jokserfn/data"));
ASSERT_EQ("https://storage.yandexcloud.net", uri.endpoint);
ASSERT_EQ("jokserfn", uri.bucket);
ASSERT_EQ("data", uri.key);
}
}
TEST_P(S3UriTest, invalidPatterns)
{
ASSERT_ANY_THROW(S3::URI(Poco::URI(GetParam())));
}
INSTANTIATE_TEST_SUITE_P(
S3,
S3UriTest,
testing::Values(
"https:///",
"https://jokserfn.s3.yandexcloud.net/",
"https://.s3.yandexcloud.net/key",
"https://s3.yandexcloud.net/key",
"https://s3.yandexcloud.net/key/",
"https://s3.yandexcloud.net//",
"https://yandexcloud.net/",
"https://yandexcloud.net//",
"https://yandexcloud.net/bucket/",
"https://yandexcloud.net//key"));
}
#endif