Address PR comments

This commit is contained in:
Antonio Andelic 2023-03-29 11:08:44 +00:00
parent 6f1e50598a
commit 7b1ad221b2
6 changed files with 45 additions and 13 deletions

View File

@ -12,7 +12,7 @@ This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ec
``` sql ``` sql
CREATE TABLE s3_engine_table (name String, value UInt32) CREATE TABLE s3_engine_table (name String, value UInt32)
ENGINE = S3(path, [, NOSIGN | aws_access_key_id, aws_secret_access_key,] format, [compression]) ENGINE = S3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key,] format, [compression])
[PARTITION BY expr] [PARTITION BY expr]
[SETTINGS ...] [SETTINGS ...]
``` ```
@ -185,13 +185,8 @@ Sometimes, it can produce problems when accessing some buckets that are public c
This issue can be avoided by using `NOSIGN` keyword, forcing the client to ignore all the credentials, and not sign the requests. This issue can be avoided by using `NOSIGN` keyword, forcing the client to ignore all the credentials, and not sign the requests.
``` sql ``` sql
SELECT * CREATE TABLE big_table (name String, value UInt32)
FROM s3( ENGINE = S3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/aapl_stock.csv', NOSIGN, 'CSVWithNames');
'https://datasets-documentation.s3.eu-west-3.amazonaws.com/aapl_stock.csv',
NOSIGN,
'CSVWithNames'
)
LIMIT 5;
``` ```
## See also ## See also

View File

@ -135,9 +135,15 @@ void AuthSettings::updateFrom(const AuthSettings & from)
headers = from.headers; headers = from.headers;
region = from.region; region = from.region;
server_side_encryption_customer_key_base64 = from.server_side_encryption_customer_key_base64; server_side_encryption_customer_key_base64 = from.server_side_encryption_customer_key_base64;
use_environment_credentials = from.use_environment_credentials;
use_insecure_imds_request = from.use_insecure_imds_request; if (from.use_environment_credentials)
expiration_window_seconds = from.expiration_window_seconds; use_environment_credentials = from.use_environment_credentials;
if (from.use_insecure_imds_request)
use_insecure_imds_request = from.use_insecure_imds_request;
if (from.expiration_window_seconds)
expiration_window_seconds = from.expiration_window_seconds;
if (from.no_sign_request.has_value()) if (from.no_sign_request.has_value())
no_sign_request = *from.no_sign_request; no_sign_request = *from.no_sign_request;

View File

@ -96,6 +96,8 @@ static const std::unordered_set<std::string_view> optional_configuration_keys =
"upload_part_size_multiply_parts_count_threshold", "upload_part_size_multiply_parts_count_threshold",
"max_single_part_upload_size", "max_single_part_upload_size",
"max_connections", "max_connections",
"expiration_window_seconds",
"no_sign_request"
}; };
namespace ErrorCodes namespace ErrorCodes
@ -1289,6 +1291,8 @@ void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configur
configuration.auth_settings.access_key_id = collection.getOrDefault<String>("access_key_id", ""); configuration.auth_settings.access_key_id = collection.getOrDefault<String>("access_key_id", "");
configuration.auth_settings.secret_access_key = collection.getOrDefault<String>("secret_access_key", ""); configuration.auth_settings.secret_access_key = collection.getOrDefault<String>("secret_access_key", "");
configuration.auth_settings.use_environment_credentials = collection.getOrDefault<UInt64>("use_environment_credentials", 0); configuration.auth_settings.use_environment_credentials = collection.getOrDefault<UInt64>("use_environment_credentials", 0);
configuration.auth_settings.no_sign_request = collection.getOrDefault<bool>("no_sign_request", 0);
configuration.auth_settings.expiration_window_seconds = collection.getOrDefault<UInt64>("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS);
configuration.format = collection.getOrDefault<String>("format", "auto"); configuration.format = collection.getOrDefault<String>("format", "auto");
configuration.compression_method = collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto")); configuration.compression_method = collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto"));

View File

@ -112,7 +112,7 @@ void TableFunctionS3::parseArgumentsImpl(
/// For 5 arguments we support 2 possible variants: /// For 5 arguments we support 2 possible variants:
/// - s3(source, access_key_id, access_key_id, format, structure) /// - s3(source, access_key_id, access_key_id, format, structure)
/// - s3(source, NOSIGN, format, structure, compression_method) /// - s3(source, NOSIGN, format, structure, compression_method)
/// We can distinguish them by looking at the 2-nd argument: check if it's a format name or not. /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not.
else if (args.size() == 5) else if (args.size() == 5)
{ {
auto second_arg = checkAndGetLiteralArgument<String>(args[1], "NOSIGN/access_key_id"); auto second_arg = checkAndGetLiteralArgument<String>(args[1], "NOSIGN/access_key_id");

View File

@ -35,5 +35,9 @@
<access_key_id>minio</access_key_id> <access_key_id>minio</access_key_id>
<secret_access_key>minio123</secret_access_key> <secret_access_key>minio123</secret_access_key>
</s3_parquet2> </s3_parquet2>
<s3_json_no_sign>
<url>http://minio1:9001/root/test_cache4.jsonl</url>
<no_sign_request>true</no_sign_request>
</s3_json_no_sign>
</named_collections> </named_collections>
</clickhouse> </clickhouse>

View File

@ -88,7 +88,10 @@ def started_cluster():
"AWS_ACCESS_KEY_ID": "aws", "AWS_ACCESS_KEY_ID": "aws",
"AWS_SECRET_ACCESS_KEY": "aws123", "AWS_SECRET_ACCESS_KEY": "aws123",
}, },
main_configs=["configs/use_environment_credentials.xml"], main_configs=[
"configs/use_environment_credentials.xml",
"configs/named_collections.xml",
],
) )
logging.info("Starting cluster...") logging.info("Starting cluster...")
@ -129,3 +132,23 @@ def test_with_invalid_environment_credentials(started_cluster):
f"select count() from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_cache4.jsonl', {auth})" f"select count() from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_cache4.jsonl', {auth})"
).strip() ).strip()
) )
def test_no_sign_named_collections(started_cluster):
instance = started_cluster.instances["s3_with_invalid_environment_credentials"]
bucket = started_cluster.minio_bucket
instance.query(
f"insert into function s3(s3_json_no_sign) select * from numbers(100) settings s3_truncate_on_insert=1"
)
with pytest.raises(helpers.client.QueryRuntimeException) as ei:
instance.query(
f"select count() from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_cache4.jsonl')"
)
assert ei.value.returncode == 243
assert "HTTP response code: 403" in ei.value.stderr
assert "100" == instance.query(f"select count() from s3(s3_json_no_sign)").strip()