From 87f451d7641e1f2b5392eeddf4c0655bae58236b Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 13 Jun 2024 22:36:57 +0200 Subject: [PATCH] Revert "Change default s3_throw_on_zero_files_match to true, document that presigned S3 URLs are not supported" --- docs/en/sql-reference/table-functions/s3.md | 11 +--------- src/Core/Settings.h | 6 +++--- src/Core/SettingsChangesHistory.h | 6 ++---- .../ObjectStorage/Azure/Configuration.cpp | 1 - .../ObjectStorage/HDFS/Configuration.cpp | 1 - .../ObjectStorage/S3/Configuration.cpp | 1 - .../ObjectStorage/StorageObjectStorage.h | 1 - .../StorageObjectStorageSource.cpp | 12 +++-------- .../StorageObjectStorageSource.h | 2 -- src/Storages/S3Queue/StorageS3Queue.cpp | 2 +- tests/integration/test_storage_hdfs/test.py | 21 ++++++------------- ...02481_s3_throw_if_mismatch_files.reference | 4 ++-- .../02481_s3_throw_if_mismatch_files.sql | 4 ++-- ...ed_url_and_url_with_special_characters.sql | 4 ++-- .../aspell-ignore/en/aspell-dict.txt | 5 ++--- 15 files changed, 24 insertions(+), 57 deletions(-) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 7538d66996f..1a7e2b8d66a 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -248,6 +248,7 @@ FROM s3( LIMIT 5; ``` + ## Working with archives Suppose that we have several archive files with following URIs on S3: @@ -265,16 +266,6 @@ FROM s3( ); ``` -## Presigned URL - -Presigned URLs are currently not supported. Use `url()` table function instead: -```sql -SELECT * -FROM url( - 'https://example.amazonaws.com/f.csv?X-Amz-Security-Token=[...]' -) -``` - ## Virtual Columns {#virtual-columns} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index efa84f19f78..b3e83092a77 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -115,9 +115,9 @@ class IColumn; M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \ M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \ M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \ - M(Bool, s3_throw_on_zero_files_match, true, "Throw an error, when ListObjects request cannot match any files", 0) \ - M(Bool, hdfs_throw_on_zero_files_match, true, "Throw an error, when ListObjects request cannot match any files", 0) \ - M(Bool, azure_throw_on_zero_files_match, true, "Throw an error, when ListObjects request cannot match any files", 0) \ + M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ + M(Bool, hdfs_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ + M(Bool, azure_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in S3 table engine", 0) \ M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \ M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 31da77fddaf..69bc8c5d207 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -88,9 +88,8 @@ static const std::map StorageObjectStorageSourc iterator = std::make_unique( object_storage, configuration, predicate, virtual_columns, local_context, is_archive ? nullptr : read_keys, settings.list_object_keys_size, - settings.throw_on_zero_files_match, settings.throw_on_zero_files_match_setting_name, - file_progress_callback); + settings.throw_on_zero_files_match, file_progress_callback); } else { @@ -426,7 +425,6 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( ObjectInfos * read_keys_, size_t list_object_keys_size, bool throw_on_zero_files_match_, - const char * throw_on_zero_files_match_setting_name_, std::function file_progress_callback_) : IIterator("GlobIterator") , WithContext(context_) @@ -434,7 +432,6 @@ StorageObjectStorageSource::GlobIterator::GlobIterator( , configuration(configuration_) , virtual_columns(virtual_columns_) , throw_on_zero_files_match(throw_on_zero_files_match_) - , throw_on_zero_files_match_setting_name(throw_on_zero_files_match_setting_name_) , read_keys(read_keys_) , file_progress_callback(file_progress_callback_) { @@ -487,11 +484,8 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne if (first_iteration && !object_info && throw_on_zero_files_match) { throw Exception(ErrorCodes::FILE_DOESNT_EXIST, - "Can not match any files with path {}{}", - configuration->getPath(), - throw_on_zero_files_match_setting_name - ? fmt::format(" (this error can be suppressed by setting {} = false)", throw_on_zero_files_match_setting_name) - : ""); + "Can not match any files with path {}", + configuration->getPath()); } first_iteration = false; return object_info; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 5e76d8e979f..fd7c7aa7102 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -168,7 +168,6 @@ public: ObjectInfos * read_keys_, size_t list_object_keys_size, bool throw_on_zero_files_match_, - const char * throw_on_zero_files_match_setting_name_, std::function file_progress_callback_ = {}); ~GlobIterator() override = default; @@ -185,7 +184,6 @@ private: const ConfigurationPtr configuration; const NamesAndTypesList virtual_columns; const bool throw_on_zero_files_match; - const char * throw_on_zero_files_match_setting_name; size_t index = 0; diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index 7e26335c691..afb75a21b21 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -486,7 +486,7 @@ std::shared_ptr StorageS3Queue::createFileIterator { auto settings = configuration->getQuerySettings(local_context); auto glob_iterator = std::make_unique( - object_storage, configuration, predicate, getVirtualsList(), local_context, nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match, settings.throw_on_zero_files_match_setting_name); + object_storage, configuration, predicate, getVirtualsList(), local_context, nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match); return std::make_shared(files_metadata, std::move(glob_iterator), shutdown_called, log); } diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 818a1e7447d..47d8f44c0b7 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -111,7 +111,7 @@ def test_storage_with_multidirectory_glob(started_cluster): try: node1.query( - "SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p4/path1,p2/path3}/postfix/data{1,2}.nonexist', TSV) SETTINGS hdfs_throw_on_zero_files_match=0" + "SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p4/path1,p2/path3}/postfix/data{1,2}.nonexist', TSV)" ) assert False, "Exception have to be thrown" except Exception as ex: @@ -220,22 +220,14 @@ def test_globs_in_read_table(started_cluster): ) print("inside_table_func ", inside_table_func) assert ( - node1.query( - "select * from hdfs(" - + inside_table_func - + ") settings hdfs_throw_on_zero_files_match=0" - ) + node1.query("select * from hdfs(" + inside_table_func + ")") == paths_amount * some_data ) assert node1.query( - "select count(distinct _path) from hdfs(" - + inside_table_func - + ") settings hdfs_throw_on_zero_files_match=0" + "select count(distinct _path) from hdfs(" + inside_table_func + ")" ).rstrip() == str(paths_amount) assert node1.query( - "select count(distinct _file) from hdfs(" - + inside_table_func - + ") settings hdfs_throw_on_zero_files_match=0" + "select count(distinct _file) from hdfs(" + inside_table_func + ")" ).rstrip() == str(files_amount) @@ -643,7 +635,6 @@ def test_cluster_join(started_cluster): SELECT l.id,r.id FROM hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') as l JOIN hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') as r ON l.id = r.id - SETTINGS hdfs_throw_on_zero_files_match=0 """ ) assert "AMBIGUOUS_COLUMN_NAME" not in result @@ -652,13 +643,13 @@ def test_cluster_join(started_cluster): def test_cluster_macro(started_cluster): with_macro = node1.query( """ - SELECT id FROM hdfsCluster('{default_cluster_macro}', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') SETTINGS hdfs_throw_on_zero_files_match=0 + SELECT id FROM hdfsCluster('{default_cluster_macro}', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') """ ) no_macro = node1.query( """ - SELECT id FROM hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') SETTINGS hdfs_throw_on_zero_files_match=0 + SELECT id FROM hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') """ ) diff --git a/tests/queries/0_stateless/02481_s3_throw_if_mismatch_files.reference b/tests/queries/0_stateless/02481_s3_throw_if_mismatch_files.reference index 752b12ff3bd..a7096a686f5 100644 --- a/tests/queries/0_stateless/02481_s3_throw_if_mismatch_files.reference +++ b/tests/queries/0_stateless/02481_s3_throw_if_mismatch_files.reference @@ -3,5 +3,5 @@ drop table if exists test_02481_mismatch_files; create table test_02481_mismatch_files (a UInt64, b String) engine = S3(s3_conn, filename='test_02481_mismatch_files_{_partition_id}', format=Parquet) partition by a; set s3_truncate_on_insert=1; insert into test_02481_mismatch_files values (1, 'a'), (22, 'b'), (333, 'c'); -select a, b from s3(s3_conn, filename='test_02481_mismatch_filesxxx*', format=Parquet) settings s3_throw_on_zero_files_match=0; -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } -select a, b from s3(s3_conn, filename='test_02481_mismatch_filesxxx*', format=Parquet); -- { serverError FILE_DOESNT_EXIST } +select a, b from s3(s3_conn, filename='test_02481_mismatch_filesxxx*', format=Parquet); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +select a, b from s3(s3_conn, filename='test_02481_mismatch_filesxxx*', format=Parquet) settings s3_throw_on_zero_files_match=1; -- { serverError FILE_DOESNT_EXIST } diff --git a/tests/queries/0_stateless/02481_s3_throw_if_mismatch_files.sql b/tests/queries/0_stateless/02481_s3_throw_if_mismatch_files.sql index cd500b58946..7ec1d3ebd5f 100644 --- a/tests/queries/0_stateless/02481_s3_throw_if_mismatch_files.sql +++ b/tests/queries/0_stateless/02481_s3_throw_if_mismatch_files.sql @@ -7,6 +7,6 @@ create table test_02481_mismatch_files (a UInt64, b String) engine = S3(s3_conn, set s3_truncate_on_insert=1; insert into test_02481_mismatch_files values (1, 'a'), (22, 'b'), (333, 'c'); -select a, b from s3(s3_conn, filename='test_02481_mismatch_filesxxx*', format=Parquet) settings s3_throw_on_zero_files_match=0; -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +select a, b from s3(s3_conn, filename='test_02481_mismatch_filesxxx*', format=Parquet); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } -select a, b from s3(s3_conn, filename='test_02481_mismatch_filesxxx*', format=Parquet); -- { serverError FILE_DOESNT_EXIST } +select a, b from s3(s3_conn, filename='test_02481_mismatch_filesxxx*', format=Parquet) settings s3_throw_on_zero_files_match=1; -- { serverError FILE_DOESNT_EXIST } diff --git a/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql index 078a5701aca..1e99eb8b83d 100644 --- a/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql +++ b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql @@ -1,5 +1,5 @@ -- Tags: no-fasttest -select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/BU%20-%20UNIT%20-%201/*.parquet', NOSIGN) settings s3_throw_on_zero_files_match=0; -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/BU%20-%20UNIT%20-%201/*.parquet'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } -select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/*.parquet?some_tocken=ABCD', NOSIGN) settings s3_throw_on_zero_files_match=0; -- { serverError CANNOT_DETECT_FORMAT } +select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/*.parquet?some_tocken=ABCD'); -- { serverError CANNOT_DETECT_FORMAT } diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index c4b70de1f65..84682689934 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 2912 +personal_ws-1.1 en 2758 AArch ACLs ALTERs @@ -722,7 +722,6 @@ Postgres PostgresSQL Precompiled Preprocess -Presigned PrettyCompact PrettyCompactMonoBlock PrettyCompactNoEscapes @@ -1937,9 +1936,9 @@ loghouse london lookups loongarch +lowcardinality lowCardinalityIndices lowCardinalityKeys -lowcardinality lowerUTF lowercased lttb