mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #71947 from ClickHouse/fix_weird_problem
Fix weird case when `s3`/`s3Cluster` return incomplete result or exception
This commit is contained in:
commit
c6a10151d9
@ -258,7 +258,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32)
|
||||
|
||||
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [s3_create_new_file_on_insert](/docs/en/operations/settings/settings.md#s3_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Enabled by default.
|
||||
|
||||
## S3-related Settings {#settings}
|
||||
|
||||
|
@ -317,7 +317,7 @@ SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date
|
||||
|
||||
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [s3_create_new_file_on_insert](/docs/en/operations/settings/settings.md#s3_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Enabled by default.
|
||||
|
||||
**See Also**
|
||||
|
||||
|
@ -433,7 +433,7 @@ Possible values:
|
||||
- 0 — `INSERT` query appends new data to the end of the file.
|
||||
- 1 — `INSERT` query creates a new file.
|
||||
)", 0) \
|
||||
DECLARE(Bool, s3_skip_empty_files, false, R"(
|
||||
DECLARE(Bool, s3_skip_empty_files, true, R"(
|
||||
Enables or disables skipping empty files in [S3](../../engines/table-engines/integrations/s3.md) engine tables.
|
||||
|
||||
Possible values:
|
||||
|
@ -87,6 +87,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
{"filesystem_cache_skip_download_if_exceeds_per_query_cache_write_limit", 1, 1, "Rename of setting skip_download_if_exceeds_query_cache_limit"},
|
||||
{"filesystem_cache_prefer_bigger_buffer_size", true, true, "New setting"},
|
||||
{"read_in_order_use_virtual_row", false, false, "Use virtual row while reading in order of primary key or its monotonic function fashion. It is useful when searching over multiple parts as only relevant ones are touched."},
|
||||
{"s3_skip_empty_files", false, true, "We hope it will provide better UX"},
|
||||
{"filesystem_cache_boundary_alignment", 0, 0, "New setting"},
|
||||
{"push_external_roles_in_interserver_queries", false, false, "New setting."},
|
||||
}
|
||||
|
@ -218,7 +218,6 @@ ReadBufferIterator::Data ReadBufferIterator::next()
|
||||
}
|
||||
|
||||
const auto filename = current_object_info->getFileName();
|
||||
chassert(!filename.empty());
|
||||
|
||||
/// file iterator could get new keys after new iteration
|
||||
if (read_keys.size() > prev_read_keys_size)
|
||||
|
@ -306,7 +306,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
|
||||
{
|
||||
object_info = file_iterator->next(processor);
|
||||
|
||||
if (!object_info || object_info->getFileName().empty())
|
||||
if (!object_info || object_info->getPath().empty())
|
||||
return {};
|
||||
|
||||
if (!object_info->metadata)
|
||||
|
@ -0,0 +1,16 @@
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
@ -0,0 +1,28 @@
|
||||
-- Tags: no-parallel, no-fasttest
|
||||
-- Tag no-fasttest: Depends on AWS
|
||||
|
||||
SET s3_truncate_on_insert = 1;
|
||||
SET s3_skip_empty_files = 0;
|
||||
|
||||
INSERT INTO FUNCTION s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/', format=Parquet) SELECT 0 as num;
|
||||
INSERT INTO FUNCTION s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/file1', format=Parquet) SELECT 1 as num;
|
||||
INSERT INTO FUNCTION s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/file2', format=Parquet) SELECT 2 as num;
|
||||
INSERT INTO FUNCTION s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/file3', format=Parquet) SELECT 3 as num;
|
||||
|
||||
SELECT * FROM s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/*') ORDER BY ALL SETTINGS max_threads = 1;
|
||||
SELECT * FROM s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/*') ORDER BY ALL SETTINGS max_threads = 4;
|
||||
|
||||
SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/*') ORDER BY ALL SETTINGS max_threads = 1;
|
||||
SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/*') ORDER BY ALL SETTINGS max_threads = 4;
|
||||
|
||||
-- Empty "directory" files created implicitly by S3 console:
|
||||
-- https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html
|
||||
SELECT *
|
||||
FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/wikistat/original/*', NOSIGN)
|
||||
LIMIT 1
|
||||
FORMAT Null;
|
||||
|
||||
SELECT *
|
||||
FROM s3Cluster('test_cluster_two_shards_localhost', 'https://clickhouse-public-datasets.s3.amazonaws.com/wikistat/original/*', NOSIGN)
|
||||
LIMIT 1
|
||||
Format Null;
|
Loading…
Reference in New Issue
Block a user