From ab101eaff8ff36aa31c50bf3c20e2e4e23fe571a Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 6 Sep 2023 18:21:24 +0000 Subject: [PATCH 1/3] Fix possible error 'URI contains invalid characters' in s3 table function --- src/Storages/StorageS3.cpp | 8 ++++++-- src/TableFunctions/TableFunctionS3.cpp | 9 +++++---- ...esigned_url_and_url_with_special_characters.reference | 0 ..._s3_presigned_url_and_url_with_special_characters.sql | 4 ++++ 4 files changed, 15 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.reference create mode 100644 tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 24b2279bfdc..5078d21141b 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1246,11 +1246,13 @@ void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configur validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); auto filename = collection.getOrDefault("filename", ""); + String url; if (!filename.empty()) - configuration.url = S3::URI(std::filesystem::path(collection.get("url")) / filename); + url = std::filesystem::path(collection.get("url")) / filename; else - configuration.url = S3::URI(collection.get("url")); + url = collection.get("url"); + configuration.url = S3::URI(url); configuration.auth_settings.access_key_id = collection.getOrDefault("access_key_id", ""); configuration.auth_settings.secret_access_key = collection.getOrDefault("secret_access_key", ""); configuration.auth_settings.use_environment_credentials = collection.getOrDefault("use_environment_credentials", 1); @@ -1258,6 +1260,8 @@ void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configur configuration.auth_settings.expiration_window_seconds = collection.getOrDefault("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS); configuration.format = collection.getOrDefault("format", configuration.format); + if (configuration.format == "auto") + configuration.format = FormatFactory::instance().getFormatFromFileName(Poco::URI(url).getPath(), true); configuration.compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); configuration.structure = collection.getOrDefault("structure", "auto"); diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 0ae3a19dc7f..94ce29f1116 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -152,7 +152,8 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context } /// This argument is always the first - configuration.url = S3::URI(checkAndGetLiteralArgument(args[0], "url")); + String url = checkAndGetLiteralArgument(args[0], "url"); + configuration.url = S3::URI(url); if (args_to_idx.contains("format")) { @@ -176,12 +177,12 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(args[args_to_idx["secret_access_key"]], "secret_access_key"); configuration.auth_settings.no_sign_request = no_sign_request; + + if (configuration.format == "auto") + configuration.format = FormatFactory::instance().getFormatFromFileName(Poco::URI(url).getPath(), true); } configuration.keys = {configuration.url.key}; - - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(Poco::URI(configuration.url.uri.getPath()).getPath(), true); } void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr context) diff --git a/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.reference b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql new file mode 100644 index 00000000000..e6954fd9cbf --- /dev/null +++ b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql @@ -0,0 +1,4 @@ +select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/BU%20-%20UNIT%20-%201/*.parquet'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } + +select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/*.parquet?some_tocken=ABCD'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } + From e11e5fbe2e8e50b298a05f6c8c8c8d2e9b0ffbef Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 6 Sep 2023 21:10:30 +0200 Subject: [PATCH 2/3] Fix fasttest --- .../02873_s3_presigned_url_and_url_with_special_characters.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql index e6954fd9cbf..da76a5cb88f 100644 --- a/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql +++ b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest + select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/BU%20-%20UNIT%20-%201/*.parquet'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/*.parquet?some_tocken=ABCD'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } From c6ef811b82e4c1ccc30c224ab285255399a7bfc7 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 7 Sep 2023 11:05:06 +0000 Subject: [PATCH 3/3] Fix tests --- src/Storages/StorageS3.cpp | 8 ++------ src/TableFunctions/TableFunctionS3.cpp | 5 +++++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 5078d21141b..24b2279bfdc 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1246,13 +1246,11 @@ void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configur validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); auto filename = collection.getOrDefault("filename", ""); - String url; if (!filename.empty()) - url = std::filesystem::path(collection.get("url")) / filename; + configuration.url = S3::URI(std::filesystem::path(collection.get("url")) / filename); else - url = collection.get("url"); + configuration.url = S3::URI(collection.get("url")); - configuration.url = S3::URI(url); configuration.auth_settings.access_key_id = collection.getOrDefault("access_key_id", ""); configuration.auth_settings.secret_access_key = collection.getOrDefault("secret_access_key", ""); configuration.auth_settings.use_environment_credentials = collection.getOrDefault("use_environment_credentials", 1); @@ -1260,8 +1258,6 @@ void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configur configuration.auth_settings.expiration_window_seconds = collection.getOrDefault("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS); configuration.format = collection.getOrDefault("format", configuration.format); - if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(Poco::URI(url).getPath(), true); configuration.compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); configuration.structure = collection.getOrDefault("structure", "auto"); diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 94ce29f1116..2e9527cff6c 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -58,6 +58,11 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context if (auto named_collection = tryGetNamedCollectionWithOverrides(args, context)) { StorageS3::processNamedCollectionResult(configuration, *named_collection); + if (configuration.format == "auto") + { + String file_path = named_collection->getOrDefault("filename", Poco::URI(named_collection->get("url")).getPath()); + configuration.format = FormatFactory::instance().getFormatFromFileName(file_path, true); + } } else {