mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 16:12:01 +00:00
Improved validation of S3 buckets and keys.
This commit is contained in:
parent
6e10d28c78
commit
0479edd47e
@ -1,3 +1,5 @@
|
||||
#include <Functions/isValidUTF8.h>
|
||||
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringOrArrayToT.h>
|
||||
@ -10,15 +12,17 @@
|
||||
# include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
|
||||
/// inspired by https://github.com/cyb70289/utf8/
|
||||
struct ValidUTF8Impl
|
||||
{
|
||||
/*
|
||||
MIT License
|
||||
|
||||
@ -71,7 +75,7 @@ SOFTWARE.
|
||||
* +--------------------+------------+-------------+------------+-------------+
|
||||
*/
|
||||
|
||||
static inline UInt8 isValidUTF8Naive(const UInt8 * data, UInt64 len)
|
||||
inline UInt8 ValidUTF8Impl::isValidUTF8Naive(const UInt8 * data, UInt64 len)
|
||||
{
|
||||
while (len)
|
||||
{
|
||||
@ -139,9 +143,9 @@ SOFTWARE.
|
||||
}
|
||||
|
||||
#ifndef __SSE4_1__
|
||||
static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { return isValidUTF8Naive(data, len); }
|
||||
inline UInt8 ValidUTF8Impl::isValidUTF8(const UInt8 * data, UInt64 len) { return isValidUTF8Naive(data, len); }
|
||||
#else
|
||||
static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len)
|
||||
inline UInt8 ValidUTF8Impl::isValidUTF8(const UInt8 * data, UInt64 len)
|
||||
{
|
||||
/*
|
||||
* Map high nibble of "First Byte" to legal character length minus 1
|
||||
@ -291,9 +295,7 @@ SOFTWARE.
|
||||
}
|
||||
#endif
|
||||
|
||||
static constexpr bool is_fixed_to_constant = false;
|
||||
|
||||
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res)
|
||||
void ValidUTF8Impl::vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res)
|
||||
{
|
||||
size_t size = offsets.size();
|
||||
size_t prev_offset = 0;
|
||||
@ -304,35 +306,34 @@ SOFTWARE.
|
||||
}
|
||||
}
|
||||
|
||||
static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt8 & /*res*/) {}
|
||||
void ValidUTF8Impl::vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt8 & /*res*/) {}
|
||||
|
||||
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt8> & res)
|
||||
void ValidUTF8Impl::vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt8> & res)
|
||||
{
|
||||
size_t size = data.size() / n;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
res[i] = isValidUTF8(data.data() + i * n, n);
|
||||
}
|
||||
|
||||
[[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray<UInt8> &)
|
||||
[[noreturn]] void ValidUTF8Impl::array(const ColumnString::Offsets &, PaddedPODArray<UInt8> &)
|
||||
{
|
||||
throw Exception("Cannot apply function isValidUTF8 to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray<UInt8> &)
|
||||
[[noreturn]] void ValidUTF8Impl::uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray<UInt8> &)
|
||||
{
|
||||
throw Exception("Cannot apply function isValidUTF8 to UUID argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
};
|
||||
|
||||
struct NameIsValidUTF8
|
||||
{
|
||||
static constexpr auto name = "isValidUTF8";
|
||||
};
|
||||
using FunctionValidUTF8 = FunctionStringOrArrayToT<ValidUTF8Impl, NameIsValidUTF8, UInt8>;
|
||||
struct NameIsValidUTF8
|
||||
{
|
||||
static constexpr auto name = "isValidUTF8";
|
||||
};
|
||||
using FunctionValidUTF8 = FunctionStringOrArrayToT<ValidUTF8Impl, NameIsValidUTF8, UInt8>;
|
||||
|
||||
void registerFunctionIsValidUTF8(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionValidUTF8>();
|
||||
}
|
||||
void registerFunctionIsValidUTF8(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionValidUTF8>();
|
||||
}
|
||||
|
||||
}
|
||||
|
26
src/Functions/isValidUTF8.h
Normal file
26
src/Functions/isValidUTF8.h
Normal file
@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct ValidUTF8Impl
|
||||
{
|
||||
static inline UInt8 isValidUTF8Naive(const UInt8 * data, UInt64 len);
|
||||
static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len);
|
||||
|
||||
static constexpr bool is_fixed_to_constant = false;
|
||||
|
||||
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res);
|
||||
static void vectorFixedToConstant(const ColumnString::Chars & data, size_t n, UInt8 & res);
|
||||
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt8> & res);
|
||||
|
||||
[[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray<UInt8> &);
|
||||
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray<UInt8> &);
|
||||
};
|
||||
|
||||
}
|
@ -2,9 +2,11 @@
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
# include <IO/S3Common.h>
|
||||
|
||||
# include <Common/quoteString.h>
|
||||
|
||||
# include <IO/S3Common.h>
|
||||
# include <Functions/isValidUTF8.h>
|
||||
# include <IO/WriteBufferFromString.h>
|
||||
# include <Storages/StorageS3Settings.h>
|
||||
|
||||
@ -616,55 +618,82 @@ namespace S3
|
||||
uri = uri_;
|
||||
storage_name = S3;
|
||||
|
||||
if (uri.getHost().empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Host is empty in S3 URI: {}", uri.toString());
|
||||
|
||||
String name;
|
||||
String endpoint_authority_from_uri;
|
||||
|
||||
if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &name, &endpoint_authority_from_uri))
|
||||
try
|
||||
{
|
||||
is_virtual_hosted_style = true;
|
||||
endpoint = uri.getScheme() + "://" + name + endpoint_authority_from_uri;
|
||||
if (uri.getHost().empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Host is empty in S3 URI.");
|
||||
|
||||
/// S3 specification requires at least 3 and at most 63 characters in bucket name.
|
||||
/// https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html
|
||||
if (bucket.length() < 3 || bucket.length() > 63)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Bucket name length is out of bounds in virtual hosted style S3 URI: {} ({})", quoteString(bucket), uri.toString());
|
||||
String name;
|
||||
String endpoint_authority_from_uri;
|
||||
|
||||
if (!uri.getPath().empty())
|
||||
if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &name, &endpoint_authority_from_uri))
|
||||
{
|
||||
/// Remove leading '/' from path to extract key.
|
||||
key = uri.getPath().substr(1);
|
||||
is_virtual_hosted_style = true;
|
||||
endpoint = uri.getScheme() + "://" + name + endpoint_authority_from_uri;
|
||||
|
||||
if (!uri.getPath().empty())
|
||||
{
|
||||
/// Remove leading '/' from path to extract key.
|
||||
key = uri.getPath().substr(1);
|
||||
}
|
||||
|
||||
boost::to_upper(name);
|
||||
if (name != S3 && name != COS)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", quoteString(name));
|
||||
}
|
||||
if (name == S3)
|
||||
{
|
||||
storage_name = name;
|
||||
}
|
||||
else
|
||||
{
|
||||
storage_name = COSN;
|
||||
}
|
||||
}
|
||||
|
||||
boost::to_upper(name);
|
||||
if (name != S3 && name != COS)
|
||||
else if (re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Object storage system name is unrecognized in virtual hosted style S3 URI: {} ({})", quoteString(name), uri.toString());
|
||||
}
|
||||
if (name == S3)
|
||||
{
|
||||
storage_name = name;
|
||||
is_virtual_hosted_style = false;
|
||||
endpoint = uri.getScheme() + "://" + uri.getAuthority();
|
||||
}
|
||||
else
|
||||
{
|
||||
storage_name = COSN;
|
||||
}
|
||||
}
|
||||
else if (re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key))
|
||||
{
|
||||
is_virtual_hosted_style = false;
|
||||
endpoint = uri.getScheme() + "://" + uri.getAuthority();
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket or key name are invalid in S3 URI.");
|
||||
|
||||
/// S3 specification requires at least 3 and at most 63 characters in bucket name.
|
||||
/// https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html
|
||||
if (bucket.length() < 3 || bucket.length() > 63)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI: {} ({})", quoteString(bucket), uri.toString());
|
||||
validateBucket(bucket);
|
||||
validateKey(key);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket or key name are invalid in S3 URI: {}", uri.toString());
|
||||
catch(const Exception & e)
|
||||
{
|
||||
throw Exception(e.code(), "{} ({})", e.message(), uri.toString());
|
||||
}
|
||||
}
|
||||
|
||||
void URI::validateBucket(const String & bucket)
|
||||
{
|
||||
/// See:
|
||||
/// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html
|
||||
/// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#createbucket
|
||||
|
||||
if (bucket.length() < 3 || bucket.length() > 222)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Bucket name length is out of bounds in virtual hosted style S3 URI: {}", quoteString(bucket));
|
||||
}
|
||||
|
||||
void URI::validateKey(const String & /*key*/)
|
||||
{
|
||||
/// See:
|
||||
/// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
|
||||
/// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#putobject
|
||||
|
||||
/// The following is valid for AWS S3:
|
||||
/// if (key.length() > 1024)
|
||||
/// throw Exception(ErrorCodes::BAD_ARGUMENTS, "Key name is too long (showing first 1024 characters): {}", quoteString(key.substr(0, 1024) + "..."));
|
||||
/// if (!ValidUTF8Impl::isValidUTF8(key.data(), key.size()))
|
||||
/// throw Exception(ErrorCodes::BAD_ARGUMENTS, "Key name must be valid UTF-8 string: {}", quoteString(key));
|
||||
|
||||
/// The following is valid for IBM COS:
|
||||
/// if (key.length() < 1)
|
||||
/// throw Exception(ErrorCodes::BAD_ARGUMENTS, "Key name is too short (0 bytes long)");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -74,6 +74,9 @@ struct URI
|
||||
bool is_virtual_hosted_style;
|
||||
|
||||
explicit URI(const Poco::URI & uri_);
|
||||
|
||||
static void validateBucket(const String & bucket);
|
||||
static void validateKey(const String & bucket);
|
||||
};
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user