mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Fix validateKey/Bucket for S3
This commit is contained in:
parent
295c8d5686
commit
6fe63a80bc
@ -617,51 +617,55 @@ namespace S3
|
||||
uri = uri_;
|
||||
storage_name = S3;
|
||||
|
||||
try
|
||||
if (uri.getHost().empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Host is empty in S3 URI.");
|
||||
|
||||
String name;
|
||||
String endpoint_authority_from_uri;
|
||||
|
||||
if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &name, &endpoint_authority_from_uri))
|
||||
{
|
||||
if (uri.getHost().empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Host is empty in S3 URI.");
|
||||
is_virtual_hosted_style = true;
|
||||
endpoint = uri.getScheme() + "://" + name + endpoint_authority_from_uri;
|
||||
validateBucket(bucket, uri);
|
||||
|
||||
String name;
|
||||
String endpoint_authority_from_uri;
|
||||
|
||||
if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &name, &endpoint_authority_from_uri))
|
||||
if (!uri.getPath().empty())
|
||||
{
|
||||
is_virtual_hosted_style = true;
|
||||
endpoint = uri.getScheme() + "://" + name + endpoint_authority_from_uri;
|
||||
|
||||
if (!uri.getPath().empty())
|
||||
{
|
||||
/// Remove leading '/' from path to extract key.
|
||||
key = uri.getPath().substr(1);
|
||||
}
|
||||
|
||||
boost::to_upper(name);
|
||||
if (name != S3 && name != COS)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", quoteString(name));
|
||||
}
|
||||
if (name == S3)
|
||||
{
|
||||
storage_name = name;
|
||||
}
|
||||
else
|
||||
{
|
||||
storage_name = COSN;
|
||||
}
|
||||
/// Remove leading '/' from path to extract key.
|
||||
key = uri.getPath().substr(1);
|
||||
}
|
||||
else if (re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key))
|
||||
|
||||
boost::to_upper(name);
|
||||
if (name != S3 && name != COS)
|
||||
{
|
||||
is_virtual_hosted_style = false;
|
||||
endpoint = uri.getScheme() + "://" + uri.getAuthority();
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", quoteString(name));
|
||||
}
|
||||
if (name == S3)
|
||||
{
|
||||
storage_name = name;
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket or key name are invalid in S3 URI.");
|
||||
{
|
||||
storage_name = COSN;
|
||||
}
|
||||
}
|
||||
catch (const Exception & e)
|
||||
else if (re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key))
|
||||
{
|
||||
throw Exception(e.code(), "{} ({})", e.message(), uri.toString());
|
||||
is_virtual_hosted_style = false;
|
||||
endpoint = uri.getScheme() + "://" + uri.getAuthority();
|
||||
validateBucket(bucket, uri);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket or key name are invalid in S3 URI.");
|
||||
}
|
||||
|
||||
void URI::validateBucket(const String & bucket, const Poco::URI & uri)
|
||||
{
|
||||
/// S3 specification requires at least 3 and at most 63 characters in bucket name.
|
||||
/// https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html
|
||||
if (bucket.length() < 3 || bucket.length() > 63)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}",
|
||||
quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : "");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -75,8 +75,7 @@ struct URI
|
||||
|
||||
explicit URI(const Poco::URI & uri_);
|
||||
|
||||
static void validateBucket(const String & bucket);
|
||||
static void validateKey(const String & bucket);
|
||||
static void validateBucket(const String & bucket, const Poco::URI & uri);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -68,6 +68,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_PARSE_TEXT;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int S3_ERROR;
|
||||
extern const int UNEXPECTED_EXPRESSION;
|
||||
@ -478,32 +479,26 @@ private:
|
||||
return it->second;
|
||||
}
|
||||
|
||||
static void validateBucket(const StringRef & str)
|
||||
static void validateBucket(const String & str)
|
||||
{
|
||||
/// See:
|
||||
/// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html
|
||||
/// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#createbucket
|
||||
S3::URI::validateBucket(str, {});
|
||||
|
||||
if (str.size < 3 || 222 < str.size)
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT,
|
||||
"Bucket name length is out of bounds in virtual hosted style S3 URI: {}", quoteString(str));
|
||||
|
||||
if (!DB::UTF8::isValidUTF8(reinterpret_cast<const UInt8 *>(str.data), str.size))
|
||||
if (!DB::UTF8::isValidUTF8(reinterpret_cast<const UInt8 *>(str.data()), str.size()))
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in bucket name");
|
||||
|
||||
validatePartitionKey(str, false);
|
||||
}
|
||||
|
||||
static void validateKey(const StringRef & str)
|
||||
static void validateKey(const String & str)
|
||||
{
|
||||
/// See:
|
||||
/// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
|
||||
/// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#putobject
|
||||
|
||||
if (str.size < 1 || 1024 < str.size)
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect key length (min - 2, max - 1023 characters), got: {}", str.size);
|
||||
if (str.empty() || str.size() > 1024)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect key length (not empty, max 1023 characters), got: {}", str.size());
|
||||
|
||||
if (!DB::UTF8::isValidUTF8(reinterpret_cast<const UInt8 *>(str.data), str.size))
|
||||
if (!DB::UTF8::isValidUTF8(reinterpret_cast<const UInt8 *>(str.data()), str.size()))
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in key");
|
||||
|
||||
validatePartitionKey(str, true);
|
||||
|
@ -4,6 +4,6 @@ INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/test_{_partition_id}.cs
|
||||
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/test_{_partition_id}.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, 'abc\xc3\x28abc'); -- { serverError CANNOT_PARSE_TEXT }
|
||||
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/test_{_partition_id}.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, 'abc}{abc'); -- { serverError CANNOT_PARSE_TEXT }
|
||||
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/test_{_partition_id}.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, 'abc*abc'); -- { serverError CANNOT_PARSE_TEXT }
|
||||
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/{_partition_id}', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, ''); -- { serverError CANNOT_PARSE_TEXT }
|
||||
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/{_partition_id}/key.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, ''); -- { serverError CANNOT_PARSE_TEXT }
|
||||
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/{_partition_id}', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, ''); -- { serverError BAD_ARGUMENTS }
|
||||
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/{_partition_id}/key.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, ''); -- { serverError BAD_ARGUMENTS }
|
||||
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/{_partition_id}/key.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, 'aa/bb'); -- { serverError CANNOT_PARSE_TEXT }
|
||||
|
Loading…
Reference in New Issue
Block a user