From 6f255c43095e517754b7f909fa96f6434cf4fb04 Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Wed, 1 Apr 2020 22:56:40 +0300 Subject: [PATCH] Virtual hosted-style support of S3 URI. --- dbms/src/IO/S3Common.cpp | 66 +++++++++++++----------------- dbms/src/IO/tests/gtest_s3_uri.cpp | 53 ++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 38 deletions(-) create mode 100644 dbms/src/IO/tests/gtest_s3_uri.cpp diff --git a/dbms/src/IO/S3Common.cpp b/dbms/src/IO/S3Common.cpp index 137fe22c872..e1952f5eafd 100644 --- a/dbms/src/IO/S3Common.cpp +++ b/dbms/src/IO/S3Common.cpp @@ -2,15 +2,15 @@ #if USE_AWS_S3 -#include -#include +# include +# include -#include -#include -#include -#include -#include -#include +# include +# include +# include +# include +# include +# include namespace @@ -57,7 +57,6 @@ private: namespace DB { - namespace ErrorCodes { extern const int BAD_ARGUMENTS; @@ -106,46 +105,37 @@ namespace S3 URI::URI(const Poco::URI & uri_) { - static const std::regex bucket_key_pattern("([^/]+)/(.*)"); /// TODO std::regex is discouraged + static const RE2 virtual_hosted_style_pattern("(.+\\.)?s3[.-][a-z0-9-.]+"); + static const RE2 path_style_pattern("([^/]+)/(.*)"); uri = uri_; - // s3://* - if (uri.getScheme() == "s3" || uri.getScheme() == "S3") - { - bucket = uri.getAuthority(); - if (bucket.empty()) - throw Exception ("Invalid S3 URI: no bucket: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); - - const auto & path = uri.getPath(); - // s3://bucket or s3://bucket/ - if (path.length() <= 1) - throw Exception ("Invalid S3 URI: no key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); - - key = path.substr(1); - return; - } - if (uri.getHost().empty()) - throw Exception("Invalid S3 URI: no host: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); + throw Exception("Invalid S3 URI host: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); endpoint = uri.getScheme() + "://" + uri.getAuthority(); - // Parse bucket and key from path. - std::smatch match; - std::regex_search(uri.getPath(), match, bucket_key_pattern); - if (!match.empty()) + if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket)) { - bucket = match.str(1); - if (bucket.empty()) - throw Exception ("Invalid S3 URI: no bucket: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); + if (!bucket.empty()) + bucket = bucket.substr(0, bucket.length() - 1); + if (bucket.length() < 3 || bucket.length() > 63) + throw Exception("Invalid S3 URI bucket: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); - key = match.str(2); - if (key.empty()) - throw Exception ("Invalid S3 URI: no key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); + key = uri.getPath().substr(1); + if (key.empty() || key == "/") + throw Exception("Invalid S3 URI key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); + } + else if (re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key)) + { + if (bucket.length() < 3 || bucket.length() > 63) + throw Exception("Invalid S3 URI bucket: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); + + if (key.empty() || key == "/") + throw Exception("Invalid S3 URI key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); } else - throw Exception("Invalid S3 URI: no bucket or key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); + throw Exception("Invalid S3 URI bucket or key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); } } diff --git a/dbms/src/IO/tests/gtest_s3_uri.cpp b/dbms/src/IO/tests/gtest_s3_uri.cpp new file mode 100644 index 00000000000..93891b35ba2 --- /dev/null +++ b/dbms/src/IO/tests/gtest_s3_uri.cpp @@ -0,0 +1,53 @@ +#include + +#if USE_AWS_S3 + +#include + +namespace +{ +using namespace DB; + +class S3UriTest : public testing::TestWithParam +{ +}; + +TEST(S3UriTest, validPatterns) +{ + { + S3::URI uri(Poco::URI("https://jokserfn.s3.yandexcloud.net/data")); + ASSERT_EQ("https://jokserfn.s3.yandexcloud.net", uri.endpoint); + ASSERT_EQ("jokserfn", uri.bucket); + ASSERT_EQ("data", uri.key); + } + { + S3::URI uri(Poco::URI("https://storage.yandexcloud.net/jokserfn/data")); + ASSERT_EQ("https://storage.yandexcloud.net", uri.endpoint); + ASSERT_EQ("jokserfn", uri.bucket); + ASSERT_EQ("data", uri.key); + } +} + +TEST_P(S3UriTest, invalidPatterns) +{ + ASSERT_ANY_THROW(S3::URI(Poco::URI(GetParam()))); +} + +INSTANTIATE_TEST_SUITE_P( + S3, + S3UriTest, + testing::Values( + "https:///", + "https://jokserfn.s3.yandexcloud.net/", + "https://.s3.yandexcloud.net/key", + "https://s3.yandexcloud.net/key", + "https://s3.yandexcloud.net/key/", + "https://s3.yandexcloud.net//", + "https://yandexcloud.net/", + "https://yandexcloud.net//", + "https://yandexcloud.net/bucket/", + "https://yandexcloud.net//key")); + +} + +#endif