diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index fd27d4b6ed9..9868b2a05a8 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -258,7 +258,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) - [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. - [s3_create_new_file_on_insert](/docs/en/operations/settings/settings.md#s3_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default. -- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default. +- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Enabled by default. ## S3-related Settings {#settings} diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index b14eb84392f..ea7820c1aec 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -317,7 +317,7 @@ SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date - [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. - [s3_create_new_file_on_insert](/docs/en/operations/settings/settings.md#s3_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default. -- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default. +- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Enabled by default. **See Also** diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 66b8ffe31c8..9f913a4ff9a 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -433,7 +433,7 @@ Possible values: - 0 — `INSERT` query appends new data to the end of the file. - 1 — `INSERT` query creates a new file. )", 0) \ - DECLARE(Bool, s3_skip_empty_files, false, R"( + DECLARE(Bool, s3_skip_empty_files, true, R"( Enables or disables skipping empty files in [S3](../../engines/table-engines/integrations/s3.md) engine tables. Possible values: diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 18a9dd6ecbf..f0d3e001362 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -87,6 +87,7 @@ static std::initializer_listgetFileName(); - chassert(!filename.empty()); /// file iterator could get new keys after new iteration if (read_keys.size() > prev_read_keys_size) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 74be640d83d..fc8181ebab4 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -306,7 +306,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade { object_info = file_iterator->next(processor); - if (!object_info || object_info->getFileName().empty()) + if (!object_info || object_info->getPath().empty()) return {}; if (!object_info->metadata) diff --git a/tests/queries/0_stateless/03271_s3_table_function_asterisk_glob.reference b/tests/queries/0_stateless/03271_s3_table_function_asterisk_glob.reference new file mode 100644 index 00000000000..373831be4eb --- /dev/null +++ b/tests/queries/0_stateless/03271_s3_table_function_asterisk_glob.reference @@ -0,0 +1,16 @@ +0 +1 +2 +3 +0 +1 +2 +3 +0 +1 +2 +3 +0 +1 +2 +3 diff --git a/tests/queries/0_stateless/03271_s3_table_function_asterisk_glob.sql b/tests/queries/0_stateless/03271_s3_table_function_asterisk_glob.sql new file mode 100644 index 00000000000..d3dba883f23 --- /dev/null +++ b/tests/queries/0_stateless/03271_s3_table_function_asterisk_glob.sql @@ -0,0 +1,28 @@ +-- Tags: no-parallel, no-fasttest +-- Tag no-fasttest: Depends on AWS + +SET s3_truncate_on_insert = 1; +SET s3_skip_empty_files = 0; + +INSERT INTO FUNCTION s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/', format=Parquet) SELECT 0 as num; +INSERT INTO FUNCTION s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/file1', format=Parquet) SELECT 1 as num; +INSERT INTO FUNCTION s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/file2', format=Parquet) SELECT 2 as num; +INSERT INTO FUNCTION s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/file3', format=Parquet) SELECT 3 as num; + +SELECT * FROM s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/*') ORDER BY ALL SETTINGS max_threads = 1; +SELECT * FROM s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/*') ORDER BY ALL SETTINGS max_threads = 4; + +SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/*') ORDER BY ALL SETTINGS max_threads = 1; +SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/*') ORDER BY ALL SETTINGS max_threads = 4; + +-- Empty "directory" files created implicitly by S3 console: +-- https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html +SELECT * +FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/wikistat/original/*', NOSIGN) +LIMIT 1 +FORMAT Null; + +SELECT * +FROM s3Cluster('test_cluster_two_shards_localhost', 'https://clickhouse-public-datasets.s3.amazonaws.com/wikistat/original/*', NOSIGN) +LIMIT 1 +Format Null;