mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #71947 from ClickHouse/fix_weird_problem
Fix weird case when `s3`/`s3Cluster` return incomplete result or exception
This commit is contained in:
commit
c6a10151d9
@ -258,7 +258,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32)
|
|||||||
|
|
||||||
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
|
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
|
||||||
- [s3_create_new_file_on_insert](/docs/en/operations/settings/settings.md#s3_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
- [s3_create_new_file_on_insert](/docs/en/operations/settings/settings.md#s3_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||||
- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Enabled by default.
|
||||||
|
|
||||||
## S3-related Settings {#settings}
|
## S3-related Settings {#settings}
|
||||||
|
|
||||||
|
@ -317,7 +317,7 @@ SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date
|
|||||||
|
|
||||||
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
|
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
|
||||||
- [s3_create_new_file_on_insert](/docs/en/operations/settings/settings.md#s3_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
- [s3_create_new_file_on_insert](/docs/en/operations/settings/settings.md#s3_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||||
- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Enabled by default.
|
||||||
|
|
||||||
**See Also**
|
**See Also**
|
||||||
|
|
||||||
|
@ -433,7 +433,7 @@ Possible values:
|
|||||||
- 0 — `INSERT` query appends new data to the end of the file.
|
- 0 — `INSERT` query appends new data to the end of the file.
|
||||||
- 1 — `INSERT` query creates a new file.
|
- 1 — `INSERT` query creates a new file.
|
||||||
)", 0) \
|
)", 0) \
|
||||||
DECLARE(Bool, s3_skip_empty_files, false, R"(
|
DECLARE(Bool, s3_skip_empty_files, true, R"(
|
||||||
Enables or disables skipping empty files in [S3](../../engines/table-engines/integrations/s3.md) engine tables.
|
Enables or disables skipping empty files in [S3](../../engines/table-engines/integrations/s3.md) engine tables.
|
||||||
|
|
||||||
Possible values:
|
Possible values:
|
||||||
|
@ -87,6 +87,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
|||||||
{"filesystem_cache_skip_download_if_exceeds_per_query_cache_write_limit", 1, 1, "Rename of setting skip_download_if_exceeds_query_cache_limit"},
|
{"filesystem_cache_skip_download_if_exceeds_per_query_cache_write_limit", 1, 1, "Rename of setting skip_download_if_exceeds_query_cache_limit"},
|
||||||
{"filesystem_cache_prefer_bigger_buffer_size", true, true, "New setting"},
|
{"filesystem_cache_prefer_bigger_buffer_size", true, true, "New setting"},
|
||||||
{"read_in_order_use_virtual_row", false, false, "Use virtual row while reading in order of primary key or its monotonic function fashion. It is useful when searching over multiple parts as only relevant ones are touched."},
|
{"read_in_order_use_virtual_row", false, false, "Use virtual row while reading in order of primary key or its monotonic function fashion. It is useful when searching over multiple parts as only relevant ones are touched."},
|
||||||
|
{"s3_skip_empty_files", false, true, "We hope it will provide better UX"},
|
||||||
{"filesystem_cache_boundary_alignment", 0, 0, "New setting"},
|
{"filesystem_cache_boundary_alignment", 0, 0, "New setting"},
|
||||||
{"push_external_roles_in_interserver_queries", false, false, "New setting."},
|
{"push_external_roles_in_interserver_queries", false, false, "New setting."},
|
||||||
}
|
}
|
||||||
|
@ -218,7 +218,6 @@ ReadBufferIterator::Data ReadBufferIterator::next()
|
|||||||
}
|
}
|
||||||
|
|
||||||
const auto filename = current_object_info->getFileName();
|
const auto filename = current_object_info->getFileName();
|
||||||
chassert(!filename.empty());
|
|
||||||
|
|
||||||
/// file iterator could get new keys after new iteration
|
/// file iterator could get new keys after new iteration
|
||||||
if (read_keys.size() > prev_read_keys_size)
|
if (read_keys.size() > prev_read_keys_size)
|
||||||
|
@ -306,7 +306,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
|
|||||||
{
|
{
|
||||||
object_info = file_iterator->next(processor);
|
object_info = file_iterator->next(processor);
|
||||||
|
|
||||||
if (!object_info || object_info->getFileName().empty())
|
if (!object_info || object_info->getPath().empty())
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
if (!object_info->metadata)
|
if (!object_info->metadata)
|
||||||
|
@ -0,0 +1,16 @@
|
|||||||
|
0
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
0
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
0
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
0
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
@ -0,0 +1,28 @@
|
|||||||
|
-- Tags: no-parallel, no-fasttest
|
||||||
|
-- Tag no-fasttest: Depends on AWS
|
||||||
|
|
||||||
|
SET s3_truncate_on_insert = 1;
|
||||||
|
SET s3_skip_empty_files = 0;
|
||||||
|
|
||||||
|
INSERT INTO FUNCTION s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/', format=Parquet) SELECT 0 as num;
|
||||||
|
INSERT INTO FUNCTION s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/file1', format=Parquet) SELECT 1 as num;
|
||||||
|
INSERT INTO FUNCTION s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/file2', format=Parquet) SELECT 2 as num;
|
||||||
|
INSERT INTO FUNCTION s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/file3', format=Parquet) SELECT 3 as num;
|
||||||
|
|
||||||
|
SELECT * FROM s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/*') ORDER BY ALL SETTINGS max_threads = 1;
|
||||||
|
SELECT * FROM s3(s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/*') ORDER BY ALL SETTINGS max_threads = 4;
|
||||||
|
|
||||||
|
SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/*') ORDER BY ALL SETTINGS max_threads = 1;
|
||||||
|
SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', s3_conn, filename='dir1/03271_s3_table_function_asterisk_glob/*') ORDER BY ALL SETTINGS max_threads = 4;
|
||||||
|
|
||||||
|
-- Empty "directory" files created implicitly by S3 console:
|
||||||
|
-- https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html
|
||||||
|
SELECT *
|
||||||
|
FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/wikistat/original/*', NOSIGN)
|
||||||
|
LIMIT 1
|
||||||
|
FORMAT Null;
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM s3Cluster('test_cluster_two_shards_localhost', 'https://clickhouse-public-datasets.s3.amazonaws.com/wikistat/original/*', NOSIGN)
|
||||||
|
LIMIT 1
|
||||||
|
Format Null;
|
Loading…
Reference in New Issue
Block a user