mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Fix validateKey/Bucket for S3
This commit is contained in:
parent
295c8d5686
commit
6fe63a80bc
@ -617,51 +617,55 @@ namespace S3
|
|||||||
uri = uri_;
|
uri = uri_;
|
||||||
storage_name = S3;
|
storage_name = S3;
|
||||||
|
|
||||||
try
|
if (uri.getHost().empty())
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Host is empty in S3 URI.");
|
||||||
|
|
||||||
|
String name;
|
||||||
|
String endpoint_authority_from_uri;
|
||||||
|
|
||||||
|
if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &name, &endpoint_authority_from_uri))
|
||||||
{
|
{
|
||||||
if (uri.getHost().empty())
|
is_virtual_hosted_style = true;
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Host is empty in S3 URI.");
|
endpoint = uri.getScheme() + "://" + name + endpoint_authority_from_uri;
|
||||||
|
validateBucket(bucket, uri);
|
||||||
|
|
||||||
String name;
|
if (!uri.getPath().empty())
|
||||||
String endpoint_authority_from_uri;
|
|
||||||
|
|
||||||
if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &name, &endpoint_authority_from_uri))
|
|
||||||
{
|
{
|
||||||
is_virtual_hosted_style = true;
|
/// Remove leading '/' from path to extract key.
|
||||||
endpoint = uri.getScheme() + "://" + name + endpoint_authority_from_uri;
|
key = uri.getPath().substr(1);
|
||||||
|
|
||||||
if (!uri.getPath().empty())
|
|
||||||
{
|
|
||||||
/// Remove leading '/' from path to extract key.
|
|
||||||
key = uri.getPath().substr(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
boost::to_upper(name);
|
|
||||||
if (name != S3 && name != COS)
|
|
||||||
{
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", quoteString(name));
|
|
||||||
}
|
|
||||||
if (name == S3)
|
|
||||||
{
|
|
||||||
storage_name = name;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
storage_name = COSN;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else if (re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key))
|
|
||||||
|
boost::to_upper(name);
|
||||||
|
if (name != S3 && name != COS)
|
||||||
{
|
{
|
||||||
is_virtual_hosted_style = false;
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", quoteString(name));
|
||||||
endpoint = uri.getScheme() + "://" + uri.getAuthority();
|
}
|
||||||
|
if (name == S3)
|
||||||
|
{
|
||||||
|
storage_name = name;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket or key name are invalid in S3 URI.");
|
{
|
||||||
|
storage_name = COSN;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch (const Exception & e)
|
else if (re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key))
|
||||||
{
|
{
|
||||||
throw Exception(e.code(), "{} ({})", e.message(), uri.toString());
|
is_virtual_hosted_style = false;
|
||||||
|
endpoint = uri.getScheme() + "://" + uri.getAuthority();
|
||||||
|
validateBucket(bucket, uri);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket or key name are invalid in S3 URI.");
|
||||||
|
}
|
||||||
|
|
||||||
|
void URI::validateBucket(const String & bucket, const Poco::URI & uri)
|
||||||
|
{
|
||||||
|
/// S3 specification requires at least 3 and at most 63 characters in bucket name.
|
||||||
|
/// https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html
|
||||||
|
if (bucket.length() < 3 || bucket.length() > 63)
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}",
|
||||||
|
quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : "");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -75,8 +75,7 @@ struct URI
|
|||||||
|
|
||||||
explicit URI(const Poco::URI & uri_);
|
explicit URI(const Poco::URI & uri_);
|
||||||
|
|
||||||
static void validateBucket(const String & bucket);
|
static void validateBucket(const String & bucket, const Poco::URI & uri);
|
||||||
static void validateKey(const String & bucket);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -68,6 +68,7 @@ namespace DB
|
|||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int CANNOT_PARSE_TEXT;
|
extern const int CANNOT_PARSE_TEXT;
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
extern const int S3_ERROR;
|
extern const int S3_ERROR;
|
||||||
extern const int UNEXPECTED_EXPRESSION;
|
extern const int UNEXPECTED_EXPRESSION;
|
||||||
@ -478,32 +479,26 @@ private:
|
|||||||
return it->second;
|
return it->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void validateBucket(const StringRef & str)
|
static void validateBucket(const String & str)
|
||||||
{
|
{
|
||||||
/// See:
|
S3::URI::validateBucket(str, {});
|
||||||
/// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html
|
|
||||||
/// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#createbucket
|
|
||||||
|
|
||||||
if (str.size < 3 || 222 < str.size)
|
if (!DB::UTF8::isValidUTF8(reinterpret_cast<const UInt8 *>(str.data()), str.size()))
|
||||||
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT,
|
|
||||||
"Bucket name length is out of bounds in virtual hosted style S3 URI: {}", quoteString(str));
|
|
||||||
|
|
||||||
if (!DB::UTF8::isValidUTF8(reinterpret_cast<const UInt8 *>(str.data), str.size))
|
|
||||||
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in bucket name");
|
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in bucket name");
|
||||||
|
|
||||||
validatePartitionKey(str, false);
|
validatePartitionKey(str, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void validateKey(const StringRef & str)
|
static void validateKey(const String & str)
|
||||||
{
|
{
|
||||||
/// See:
|
/// See:
|
||||||
/// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
|
/// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
|
||||||
/// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#putobject
|
/// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#putobject
|
||||||
|
|
||||||
if (str.size < 1 || 1024 < str.size)
|
if (str.empty() || str.size() > 1024)
|
||||||
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect key length (min - 2, max - 1023 characters), got: {}", str.size);
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect key length (not empty, max 1023 characters), got: {}", str.size());
|
||||||
|
|
||||||
if (!DB::UTF8::isValidUTF8(reinterpret_cast<const UInt8 *>(str.data), str.size))
|
if (!DB::UTF8::isValidUTF8(reinterpret_cast<const UInt8 *>(str.data()), str.size()))
|
||||||
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in key");
|
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in key");
|
||||||
|
|
||||||
validatePartitionKey(str, true);
|
validatePartitionKey(str, true);
|
||||||
|
@ -4,6 +4,6 @@ INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/test_{_partition_id}.cs
|
|||||||
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/test_{_partition_id}.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, 'abc\xc3\x28abc'); -- { serverError CANNOT_PARSE_TEXT }
|
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/test_{_partition_id}.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, 'abc\xc3\x28abc'); -- { serverError CANNOT_PARSE_TEXT }
|
||||||
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/test_{_partition_id}.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, 'abc}{abc'); -- { serverError CANNOT_PARSE_TEXT }
|
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/test_{_partition_id}.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, 'abc}{abc'); -- { serverError CANNOT_PARSE_TEXT }
|
||||||
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/test_{_partition_id}.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, 'abc*abc'); -- { serverError CANNOT_PARSE_TEXT }
|
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/test_{_partition_id}.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, 'abc*abc'); -- { serverError CANNOT_PARSE_TEXT }
|
||||||
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/{_partition_id}', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, ''); -- { serverError CANNOT_PARSE_TEXT }
|
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/foo/{_partition_id}', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, ''); -- { serverError BAD_ARGUMENTS }
|
||||||
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/{_partition_id}/key.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, ''); -- { serverError CANNOT_PARSE_TEXT }
|
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/{_partition_id}/key.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, ''); -- { serverError BAD_ARGUMENTS }
|
||||||
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/{_partition_id}/key.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, 'aa/bb'); -- { serverError CANNOT_PARSE_TEXT }
|
INSERT INTO TABLE FUNCTION s3('http://localhost:9001/{_partition_id}/key.csv', 'admin', 'admin', 'CSV', 'id Int32, val String') PARTITION BY val VALUES (1, 'aa/bb'); -- { serverError CANNOT_PARSE_TEXT }
|
||||||
|
Loading…
Reference in New Issue
Block a user