From f5f1752f4454bd959f2f719722699287d28b773b Mon Sep 17 00:00:00 2001 From: Anna Date: Mon, 1 Mar 2021 16:35:59 +0300 Subject: [PATCH 01/53] add files in ru docs --- docs/ru/engines/table-engines/index.md | 1 + .../table-engines/integrations/index.md | 1 + .../engines/table-engines/integrations/s3.md | 156 ++++++++++++++++ .../ru/sql-reference/table-functions/index.md | 1 + docs/ru/sql-reference/table-functions/s3.md | 169 ++++++++++++++++++ 5 files changed, 328 insertions(+) create mode 100644 docs/ru/engines/table-engines/integrations/s3.md create mode 100644 docs/ru/sql-reference/table-functions/s3.md diff --git a/docs/ru/engines/table-engines/index.md b/docs/ru/engines/table-engines/index.md index 740588c50a4..81ac4b251b4 100644 --- a/docs/ru/engines/table-engines/index.md +++ b/docs/ru/engines/table-engines/index.md @@ -31,6 +31,7 @@ toc_title: "\u0412\u0432\u0435\u0434\u0435\u043d\u0438\u0435" - [CollapsingMergeTree](mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) - [VersionedCollapsingMergeTree](mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree) - [GraphiteMergeTree](mergetree-family/graphitemergetree.md#graphitemergetree) +- [S3](../../engines/table-engines/integrations/s3.md#table_engines-s3) ### Log {#log} diff --git a/docs/ru/engines/table-engines/integrations/index.md b/docs/ru/engines/table-engines/integrations/index.md index db7e527442e..f1e43d3c7e5 100644 --- a/docs/ru/engines/table-engines/integrations/index.md +++ b/docs/ru/engines/table-engines/integrations/index.md @@ -14,6 +14,7 @@ toc_priority: 30 - [MySQL](../../../engines/table-engines/integrations/mysql.md) - [MongoDB](../../../engines/table-engines/integrations/mongodb.md) - [HDFS](../../../engines/table-engines/integrations/hdfs.md) +- [S3](../../../engines/table-engines/integrations/s3.md) - [Kafka](../../../engines/table-engines/integrations/kafka.md) - [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md) - [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md new file mode 100644 index 00000000000..5858a0803e6 --- /dev/null +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -0,0 +1,156 @@ +--- +toc_priority: 4 +toc_title: S3 +--- + +# S3 {#table_engines-s3} + +This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem. This engine is similar +to the [HDFS](../../../engines/table-engines/special/file.md#table_engines-hdfs) engine, but provides S3-specific features. + +## Usage {#usage} + +``` sql +ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) +``` + +**Input parameters** + +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `’abc’, ‘def’ — strings. +- `format` — The [format](../../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension. + +**Example:** + +**1.** Set up the `s3_engine_table` table: + +``` sql +CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +``` + +**2.** Fill file: + +``` sql +INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3) +``` + +**3.** Query the data: + +``` sql +SELECT * FROM s3_engine_table LIMIT 2 +``` + +``` text +┌─name─┬─value─┐ +│ one │ 1 │ +│ two │ 2 │ +└──────┴───────┘ +``` + +## Implementation Details {#implementation-details} + +- Reads and writes can be parallel +- Not supported: + - `ALTER` and `SELECT...SAMPLE` operations. + - Indexes. + - Replication. + +**Globs in path** + +Multiple path components can have globs. For being processed file should exist and match to the whole path pattern. Listing of files determines during `SELECT` (not at `CREATE` moment). + +- `*` — Substitutes any number of any characters except `/` including empty string. +- `?` — Substitutes any single character. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`. + +Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function. + +**Example** + +1. Suppose we have several files in TSV format with the following URIs on HDFS: + +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’ + +2. There are several ways to make a table consisting of all six files: + + + +``` sql +CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV') +``` + +3. Another way: + +``` sql +CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV') +``` + +4. Table consists of all the files in both directories (all files should satisfy format and schema described in query): + +``` sql +CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV') +``` + +!!! warning "Warning" + If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. + +**Example** + +Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: + +``` sql +CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV') +``` + +## Virtual Columns {#virtual-columns} + +- `_path` — Path to the file. +- `_file` — Name of the file. + +**See Also** + +- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) + +## S3-related settings {#settings} + +The following settings can be set before query execution or placed into configuration file. + +- `s3_max_single_part_upload_size` — Default value is `64Mb`. The maximum size of object to upload using singlepart upload to S3. +- `s3_min_upload_part_size` — Default value is `512Mb`. The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). +- `s3_max_redirects` — Default value is `10`. Max number of S3 redirects hops allowed. + +Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. + +### Endpoint-based settings {#endpointsettings} + +The following settings can be specified in configuration file for given endpoint (which will be matched by exact prefix of a URL): + +- `endpoint` — Mandatory. Specifies prefix of an endpoint. +- `access_key_id` and `secret_access_key` — Optional. Specifies credentials to use with given endpoint. +- `use_environment_credentials` — Optional, default value is `false`. If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint. +- `header` — Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint. +- `server_side_encryption_customer_key_base64` — Optional. If specified, required headers for accessing S3 objects with SSE-C encryption will be set. + +Example: + +``` + + + https://storage.yandexcloud.net/my-test-bucket-768/ + + + + + + + +``` + +[Original article](https://clickhouse.tech/docs/en/operations/table_engines/s3/) diff --git a/docs/ru/sql-reference/table-functions/index.md b/docs/ru/sql-reference/table-functions/index.md index 83225d54e60..178701bbc34 100644 --- a/docs/ru/sql-reference/table-functions/index.md +++ b/docs/ru/sql-reference/table-functions/index.md @@ -33,5 +33,6 @@ toc_title: "\u0412\u0432\u0435\u0434\u0435\u043D\u0438\u0435" | [jdbc](jdbc.md) | Создаёт таблицу с дижком [JDBC](../../engines/table-engines/integrations/jdbc.md). | | [odbc](odbc.md) | Создаёт таблицу с движком [ODBC](../../engines/table-engines/integrations/odbc.md). | | [hdfs](hdfs.md) | Создаёт таблицу с движком [HDFS](../../engines/table-engines/integrations/hdfs.md). | +| [s3](../../sql-reference/table-functions/s3.md) | Creates a [S3](../../engines/table-engines/integrations/s3.md)-engine table. | [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/) diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md new file mode 100644 index 00000000000..76a0e042ea4 --- /dev/null +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -0,0 +1,169 @@ +--- +toc_priority: 45 +toc_title: s3 +--- + +# s3 {#s3} + +Provides table-like interface to select/insert files in S3. This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md). + +``` sql +s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) +``` + +**Input parameters** + +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `’abc’, ‘def’ — strings. +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension. + +**Returned value** + +A table with the specified structure for reading or writing data in the specified file. + +**Example** + +Table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` and selection of the first two rows from it: + +``` sql +SELECT * +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +LIMIT 2 +``` + +``` text +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ +``` + +The similar but from file with `gzip` compression: + +``` sql +SELECT * +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') +LIMIT 2 +``` + +``` text +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ +``` + +**Globs in path** + +Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix). + +- `*` — Substitutes any number of any characters except `/` including empty string. +- `?` — Substitutes any single character. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`. + +Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)). + +**Example** + +1. Suppose that we have several files with following URIs on S3: + +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv’ + +2. Query the amount of rows in files end with number from 1 to 3: + + + +``` sql +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32') +``` + +``` text +┌─count()─┐ +│ 18 │ +└─────────┘ +``` + +3. Query the amount of rows in all files of these two directories: + + + +``` sql +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32') +``` + +``` text +┌─count()─┐ +│ 24 │ +└─────────┘ +``` + + +!!! warning "Warning" + If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. + +**Example** + +Query the data from files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: + +``` sql +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32') +``` + +``` text +┌─count()─┐ +│ 12 │ +└─────────┘ +``` + +**Data insert** + +The S3 table function may be used for data insert as well. + +**Example** + +Insert a data into file `test-data.csv.gz`: + +``` sql +INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +VALUES ('test-data', 1), ('test-data-2', 2) +``` + +Insert a data into file `test-data.csv.gz` from existing table: + +``` sql +INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +SELECT name, value FROM existing_table +``` + +## Virtual Columns {#virtual-columns} + +- `_path` — Path to the file. +- `_file` — Name of the file. + +## S3-related settings {#settings} + +The following settings can be set before query execution or placed into configuration file. + +- `s3_max_single_part_upload_size` — Default value is `64Mb`. The maximum size of object to upload using singlepart upload to S3. +- `s3_min_upload_part_size` — Default value is `512Mb`. The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). +- `s3_max_redirects` — Default value is `10`. Max number of S3 redirects hops allowed. + +Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. + +**See Also** + +- [Virtual columns](https://clickhouse.tech/docs/en/operations/table_engines/#table_engines-virtual_columns) + +[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/s3/) From 85dc5b8a749796da67c62c86a4afaa144332b94b Mon Sep 17 00:00:00 2001 From: Anna Date: Mon, 1 Mar 2021 23:07:09 +0300 Subject: [PATCH 02/53] Add ru translation --- contrib/librdkafka | 2 +- .../engines/table-engines/integrations/s3.md | 191 ++++++++---------- docs/ru/sql-reference/table-functions/s3.md | 114 ++++------- 3 files changed, 131 insertions(+), 176 deletions(-) diff --git a/contrib/librdkafka b/contrib/librdkafka index cf11d0aa36d..f2f6616419d 160000 --- a/contrib/librdkafka +++ b/contrib/librdkafka @@ -1 +1 @@ -Subproject commit cf11d0aa36d4738f2c9bf4377807661660f1be76 +Subproject commit f2f6616419d567c9198aef0d1133a2e9b4f02276 diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 5858a0803e6..1a5398137b8 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -3,42 +3,34 @@ toc_priority: 4 toc_title: S3 --- -# S3 {#table_engines-s3} +# S3 {#table-engines-s3} -This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem. This engine is similar -to the [HDFS](../../../engines/table-engines/special/file.md#table_engines-hdfs) engine, but provides S3-specific features. +Этот движок обеспечивает интеграцию с экосистемой [Amazon S3](https://aws.amazon.com/s3/). Он похож на движок [HDFS](../../../engines/table-engines/special/file.md#table_engines-hdfs), но обеспечивает специфические для S3 возможности. -## Usage {#usage} +## Создание таблицы ``` sql -ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) +CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) ``` -**Input parameters** +**Параметры движка** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `’abc’, ‘def’ — strings. -- `format` — The [format](../../../interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -- `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension. +- `path` — URL-адрес корзины с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": *, ?, {abc,def} и {N..M} где N, M — числа, `’abc’, ‘def’ — строки. +- `format` — [формат](../../../interfaces/formats.md#formats) файла. +- `structure` — структура таблицы в формате `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — автоматически обнаруживает сжатие по расширению файла. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. -**Example:** +## Виртуальные столбцы {#virtual-columns} + +[Виртуальные столбцы](../../../engines/table-engines/index.md#table_engines-virtual_columns): +- `_path` — путь к файлу. +- `_file` — имя файла. -**1.** Set up the `s3_engine_table` table: ``` sql -CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') -``` - -**2.** Fill file: - -``` sql -INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3) -``` - -**3.** Query the data: - -``` sql -SELECT * FROM s3_engine_table LIMIT 2 +CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); +INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); +SELECT * FROM s3_engine_table LIMIT 2; ``` ``` text @@ -48,97 +40,49 @@ SELECT * FROM s3_engine_table LIMIT 2 └──────┴───────┘ ``` -## Implementation Details {#implementation-details} +## Детали реализации {#implementation-details} -- Reads and writes can be parallel -- Not supported: - - `ALTER` and `SELECT...SAMPLE` operations. - - Indexes. - - Replication. +- чтение и запись могут быть параллельными; +- не поддерживаются: + - запросы `ALTER` и `SELECT...SAMPLE`, + - индексы, + - репликация. -**Globs in path** +## Глобальные пути -Multiple path components can have globs. For being processed file should exist and match to the whole path pattern. Listing of files determines during `SELECT` (not at `CREATE` moment). +Несколько компонентов пути могут быть глобальными. Для обработки файл должен существовать и соответствовать всему шаблону пути. Список файлов определяется во время выполнения запроса `SELECT` (не в момент выполнения запроса `CREATE`). -- `*` — Substitutes any number of any characters except `/` including empty string. -- `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. -- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`. +- `*` — заменяет любое количество любых символов, кроме `/` включая пустую строку. +- `?` — заменяет любые одиночные символы. +- `{some_string, another_string, yet_another_one}` — заменяет любые строки `'some_string', 'another_string', 'yet_another_one'`. +- `{N..M}` — заменяет любое число от N до M, включая обе границы. N и M могут иметь ведущие нули, например `000..078`. -Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function. +Конструкции с `{}` аналогичны функции [remote](../../../sql-reference/table-functions/remote.md). -**Example** +## S3-связанные настройки {#s3-settings} -1. Suppose we have several files in TSV format with the following URIs on HDFS: +Следующие настройки могут быть установлены перед выполнением запроса или заданы в конфигурационном файле. -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’ +- `s3_max_single_part_upload_size` — максимальный размер объекта для загрузки с использованием однокомпонентной загрузки в S3. Значение по умолчанию — `64Mb`. +- `s3_min_upload_part_size` — минимальный размер объекта для загрузки при многокомпонентной загрузке в [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Значение по умолчанию — `512Mb`. +- `s3_max_redirects` — максимальное количество разрешенных переадресаций S3. Значение по умолчанию — `10`. -2. There are several ways to make a table consisting of all six files: +Соображение безопасности: если злонамеренный пользователь попробует указать произвольные URL-адреса S3, параметр `s3_max_redirects` должен быть установлен в ноль, чтобы избежать атак [SSRF] (https://en.wikipedia.org/wiki/Server-side_request_forgery). Как альтернатива, в конфигурации сервера должен быть указан `remote_host_filter`. - +## Настройки на основе конечных точек {#endpoint-settings} -``` sql -CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV') -``` +Следующие настройки могут быть заданы в конфигурационном файле для данной конечной точки (которая будет соответствовать точному префиксу URL-адреса). -3. Another way: +Обязательная настройка: +- `endpoint` — указывает префикс конечной точки. -``` sql -CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV') -``` +Необязательные настройки: +- `access_key_id` и `secret_access_key` — указывает учетные данные для использования с данной конечной точкой. +- `use_environment_credentials` — если `true`, S3-клиент будет пытаться получить учетные данные из переменных среды и метаданных Amazon EC2 для данной конечной точки. Значение по умолчанию - `false`. +- `header` — добавляет указанный HTTP-заголовок к запросу на заданную конечную точку. Может быть определен несколько раз. +- `server_side_encryption_customer_key_base64` — устанавливает необходимые заголовки для доступа к объектам S3 с шифрованием SSE-C. -4. Table consists of all the files in both directories (all files should satisfy format and schema described in query): - -``` sql -CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV') -``` - -!!! warning "Warning" - If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. - -**Example** - -Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: - -``` sql -CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV') -``` - -## Virtual Columns {#virtual-columns} - -- `_path` — Path to the file. -- `_file` — Name of the file. - -**See Also** - -- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) - -## S3-related settings {#settings} - -The following settings can be set before query execution or placed into configuration file. - -- `s3_max_single_part_upload_size` — Default value is `64Mb`. The maximum size of object to upload using singlepart upload to S3. -- `s3_min_upload_part_size` — Default value is `512Mb`. The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). -- `s3_max_redirects` — Default value is `10`. Max number of S3 redirects hops allowed. - -Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. - -### Endpoint-based settings {#endpointsettings} - -The following settings can be specified in configuration file for given endpoint (which will be matched by exact prefix of a URL): - -- `endpoint` — Mandatory. Specifies prefix of an endpoint. -- `access_key_id` and `secret_access_key` — Optional. Specifies credentials to use with given endpoint. -- `use_environment_credentials` — Optional, default value is `false`. If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint. -- `header` — Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint. -- `server_side_encryption_customer_key_base64` — Optional. If specified, required headers for accessing S3 objects with SSE-C encryption will be set. - -Example: +**Пример** ``` @@ -153,4 +97,45 @@ Example: ``` -[Original article](https://clickhouse.tech/docs/en/operations/table_engines/s3/) +## Примеры использования {#usage-examples} + +Предположим, у нас есть несколько файлов в формате TSV со следующими URL-адресами в HDFS: + +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’ + +Существует несколько способов создать таблицу, включающую в себя все шесть файлов: + +``` sql +CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); +``` + +или + +``` sql +CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); +``` + +Таблица содержит все файлы в обоих директориях (все файлы должны соответствовать формату и схеме, описанным в запросе): + +``` sql +CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); +``` + +!!! warning "Warning" + Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. + +Создание таблицы из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: + +``` sql +CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); +``` +## Смотрите также + +- [Табличная функция S3](../../../sql-reference/table-functions/s3.md) + +[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/integrations/s3/) diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 76a0e042ea4..5a9b83d7fd0 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -3,35 +3,41 @@ toc_priority: 45 toc_title: s3 --- -# s3 {#s3} +# Табличная Функция S3 {#s3-table-function} -Provides table-like interface to select/insert files in S3. This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md). +Предоставляет табличный интерфейс для выбора/вставки файлов в [Amazon S3](https://aws.amazon.com/s3/). Эта табличная функция похожа на [hdfs](../../sql-reference/table-functions/hdfs.md), но обеспечивает специфические для S3 возможности. + +## Синтаксис {#syntax} ``` sql s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) ``` -**Input parameters** +**Входные параметры** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `’abc’, ‘def’ — strings. -- `format` — The [format](../../interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -- `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension. +- `path` — URL-адрес корзины с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": *, ?, {abc,def} и {N..M} где N, M — числа, `’abc’, ‘def’ — строки. +- `format` — [формат](../../interfaces/formats.md#formats) файла. +- `structure` — cтруктура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — автоматически обнаруживает сжатие по расширению файла. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. -**Returned value** +**Возвращаемые значения** -A table with the specified structure for reading or writing data in the specified file. +Таблица с указанной структурой для чтения или записи данных в указанный файл. -**Example** +**Примеры** -Table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` and selection of the first two rows from it: +Создание таблицы из файла S3 `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` и выбор первых трех столбцов из нее: + +Query: ``` sql SELECT * FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') -LIMIT 2 +LIMIT 2; ``` +Result: + ``` text ┌─column1─┬─column2─┬─column3─┐ │ 1 │ 2 │ 3 │ @@ -39,35 +45,27 @@ LIMIT 2 └─────────┴─────────┴─────────┘ ``` -The similar but from file with `gzip` compression: +То же самое, но файл со сжатием `gzip`: + +Запрос: ``` sql -SELECT * -FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') -LIMIT 2 +SELECT * +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') +LIMIT 2; ``` +Результат: + ``` text ┌─column1─┬─column2─┬─column3─┐ │ 1 │ 2 │ 3 │ │ 3 │ 2 │ 1 │ └─────────┴─────────┴─────────┘ ``` +## Примеры использования {#usage-examples} -**Globs in path** - -Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix). - -- `*` — Substitutes any number of any characters except `/` including empty string. -- `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. -- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`. - -Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)). - -**Example** - -1. Suppose that we have several files with following URIs on S3: +Предположим, у нас есть несколько файлов со следующими URI на S3: - ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’ - ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’ @@ -78,13 +76,11 @@ Constructions with `{}` are similar to the [remote table function](../../sql-ref - ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’ - ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv’ -2. Query the amount of rows in files end with number from 1 to 3: - - +1. Запрос количества строк в файлах, заканчивающихся цифрами от 1 до 3: ``` sql SELECT count(*) -FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32') +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32'); ``` ``` text @@ -93,13 +89,11 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi └─────────┘ ``` -3. Query the amount of rows in all files of these two directories: - - +2. Запрос количества строк во всех файлах этих двух каталогов: ``` sql SELECT count(*) -FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32') +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32'); ``` ``` text @@ -108,17 +102,14 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi └─────────┘ ``` - !!! warning "Warning" - If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. + Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. -**Example** - -Query the data from files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +3. Запрос данных из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: ``` sql SELECT count(*) -FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32') +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); ``` ``` text @@ -127,43 +118,22 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000 └─────────┘ ``` -**Data insert** - -The S3 table function may be used for data insert as well. - -**Example** - -Insert a data into file `test-data.csv.gz`: +4. Вставка данных в файл `test-data.csv.gz`: ``` sql INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') -VALUES ('test-data', 1), ('test-data-2', 2) +VALUES ('test-data', 1), ('test-data-2', 2); ``` -Insert a data into file `test-data.csv.gz` from existing table: +5. Вставка данных в файл `test-data.csv.gz` из существующей таблицы: ``` sql INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') -SELECT name, value FROM existing_table +SELECT name, value FROM existing_table; ``` -## Virtual Columns {#virtual-columns} +## Смотрите также -- `_path` — Path to the file. -- `_file` — Name of the file. +- [Движок таблиц S3](../../engines/table-engines/integrations/s3.md) -## S3-related settings {#settings} - -The following settings can be set before query execution or placed into configuration file. - -- `s3_max_single_part_upload_size` — Default value is `64Mb`. The maximum size of object to upload using singlepart upload to S3. -- `s3_min_upload_part_size` — Default value is `512Mb`. The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). -- `s3_max_redirects` — Default value is `10`. Max number of S3 redirects hops allowed. - -Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. - -**See Also** - -- [Virtual columns](https://clickhouse.tech/docs/en/operations/table_engines/#table_engines-virtual_columns) - -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/s3/) +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/s3/) From 178134ba23cf3f136608d2c8c3130cc8fa5bdf26 Mon Sep 17 00:00:00 2001 From: Anna Date: Fri, 5 Mar 2021 11:00:49 +0300 Subject: [PATCH 03/53] edit en text --- .../engines/table-engines/integrations/s3.md | 125 ++++++++---------- docs/en/sql-reference/table-functions/s3.md | 67 +++------- .../engines/table-engines/integrations/s3.md | 7 +- docs/ru/sql-reference/table-functions/s3.md | 2 +- 4 files changed, 72 insertions(+), 129 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 5858a0803e6..7fdfce9c9ad 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -3,12 +3,12 @@ toc_priority: 4 toc_title: S3 --- -# S3 {#table_engines-s3} +# S3 Table Engine {#table-engines-s3} This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem. This engine is similar to the [HDFS](../../../engines/table-engines/special/file.md#table_engines-hdfs) engine, but provides S3-specific features. -## Usage {#usage} +## Create Table ``` sql ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) @@ -21,24 +21,16 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension. +[Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns): +- `_path` — Path to the file. +- `_file` — Name of the file. + **Example:** -**1.** Set up the `s3_engine_table` table: - ``` sql -CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') -``` - -**2.** Fill file: - -``` sql -INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3) -``` - -**3.** Query the data: - -``` sql -SELECT * FROM s3_engine_table LIMIT 2 +CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); +INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); +SELECT * FROM s3_engine_table LIMIT 2; ``` ``` text @@ -67,58 +59,7 @@ Multiple path components can have globs. For being processed file should exist a Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function. -**Example** - -1. Suppose we have several files in TSV format with the following URIs on HDFS: - -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’ - -2. There are several ways to make a table consisting of all six files: - - - -``` sql -CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV') -``` - -3. Another way: - -``` sql -CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV') -``` - -4. Table consists of all the files in both directories (all files should satisfy format and schema described in query): - -``` sql -CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV') -``` - -!!! warning "Warning" - If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. - -**Example** - -Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: - -``` sql -CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV') -``` - -## Virtual Columns {#virtual-columns} - -- `_path` — Path to the file. -- `_file` — Name of the file. - -**See Also** - -- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) - -## S3-related settings {#settings} +## S3-related Settings {#s3-settings} The following settings can be set before query execution or placed into configuration file. @@ -128,7 +69,7 @@ The following settings can be set before query execution or placed into configur Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. -### Endpoint-based settings {#endpointsettings} +### Endpoint-based Settings {#endpoint-settings} The following settings can be specified in configuration file for given endpoint (which will be matched by exact prefix of a URL): @@ -138,7 +79,7 @@ The following settings can be specified in configuration file for given endpoint - `header` — Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint. - `server_side_encryption_customer_key_base64` — Optional. If specified, required headers for accessing S3 objects with SSE-C encryption will be set. -Example: +**Example:** ``` @@ -152,5 +93,45 @@ Example: ``` +## Usage {#usage-examples} -[Original article](https://clickhouse.tech/docs/en/operations/table_engines/s3/) +Suppose we have several files in TSV format with the following URIs on HDFS: + +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’ + +There are several ways to make a table consisting of all six files: + +``` sql +CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV') +``` + +or: + +``` sql +CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV') +``` + +Table consists of all the files in both directories (all files should satisfy format and schema described in query): + +``` sql +CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV') +``` + +!!! warning "Warning" + If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. + +Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: + +``` sql +CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV') +``` +## See also + +- [S3 table function](../../../sql-reference/table-functions/s3.md) + +[Original article](https://clickhouse.tech/docs/en/engines/table-engines/integrations/s3/) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 76a0e042ea4..82b953a512a 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -3,15 +3,17 @@ toc_priority: 45 toc_title: s3 --- -# s3 {#s3} +# S3 Table Function {#s3-table-function} -Provides table-like interface to select/insert files in S3. This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md). +Provides table-like interface to select/insert files in [Amazon S3](https://aws.amazon.com/s3/). This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md), but provides S3-specific features. + +## Syntax {#syntax} ``` sql s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) ``` -**Input parameters** +**Input arguments** - `path` — Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `’abc’, ‘def’ — strings. - `format` — The [format](../../interfaces/formats.md#formats) of the file. @@ -22,7 +24,7 @@ s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compres A table with the specified structure for reading or writing data in the specified file. -**Example** +**Examples** Table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` and selection of the first two rows from it: @@ -54,20 +56,9 @@ LIMIT 2 └─────────┴─────────┴─────────┘ ``` -**Globs in path** +## Usage {#usage-examples} -Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix). - -- `*` — Substitutes any number of any characters except `/` including empty string. -- `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. -- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`. - -Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)). - -**Example** - -1. Suppose that we have several files with following URIs on S3: +Suppose that we have several files with following URIs on S3: - ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’ - ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’ @@ -78,9 +69,7 @@ Constructions with `{}` are similar to the [remote table function](../../sql-ref - ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’ - ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv’ -2. Query the amount of rows in files end with number from 1 to 3: - - +1. Query the amount of rows in files end with number from 1 to 3: ``` sql SELECT count(*) @@ -93,9 +82,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi └─────────┘ ``` -3. Query the amount of rows in all files of these two directories: - - +2. Query the amount of rows in all files of these two directories: ``` sql SELECT count(*) @@ -108,13 +95,10 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi └─────────┘ ``` - !!! warning "Warning" If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. -**Example** - -Query the data from files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +3. Query the data from files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: ``` sql SELECT count(*) @@ -127,43 +111,22 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000 └─────────┘ ``` -**Data insert** - -The S3 table function may be used for data insert as well. - -**Example** - -Insert a data into file `test-data.csv.gz`: +4. Insert a data into file `test-data.csv.gz`: ``` sql INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') VALUES ('test-data', 1), ('test-data-2', 2) ``` -Insert a data into file `test-data.csv.gz` from existing table: +5. Insert a data into file `test-data.csv.gz` from existing table: ``` sql INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') SELECT name, value FROM existing_table ``` -## Virtual Columns {#virtual-columns} - -- `_path` — Path to the file. -- `_file` — Name of the file. - -## S3-related settings {#settings} - -The following settings can be set before query execution or placed into configuration file. - -- `s3_max_single_part_upload_size` — Default value is `64Mb`. The maximum size of object to upload using singlepart upload to S3. -- `s3_min_upload_part_size` — Default value is `512Mb`. The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). -- `s3_max_redirects` — Default value is `10`. Max number of S3 redirects hops allowed. - -Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. - **See Also** -- [Virtual columns](https://clickhouse.tech/docs/en/operations/table_engines/#table_engines-virtual_columns) +- [S3 engine](../../engines/table-engines/integrations/s3.md) -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/s3/) +[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/s3/) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 1a5398137b8..e91226ab3fd 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -3,7 +3,7 @@ toc_priority: 4 toc_title: S3 --- -# S3 {#table-engines-s3} +# Движок таблиц S3 {#table-engines-s3} Этот движок обеспечивает интеграцию с экосистемой [Amazon S3](https://aws.amazon.com/s3/). Он похож на движок [HDFS](../../../engines/table-engines/special/file.md#table_engines-hdfs), но обеспечивает специфические для S3 возможности. @@ -20,12 +20,11 @@ CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3(path, [aws_ - `structure` — структура таблицы в формате `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — автоматически обнаруживает сжатие по расширению файла. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. -## Виртуальные столбцы {#virtual-columns} - [Виртуальные столбцы](../../../engines/table-engines/index.md#table_engines-virtual_columns): - `_path` — путь к файлу. - `_file` — имя файла. +**Пример** ``` sql CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); @@ -48,7 +47,7 @@ SELECT * FROM s3_engine_table LIMIT 2; - индексы, - репликация. -## Глобальные пути +**Глобальные пути** Несколько компонентов пути могут быть глобальными. Для обработки файл должен существовать и соответствовать всему шаблону пути. Список файлов определяется во время выполнения запроса `SELECT` (не в момент выполнения запроса `CREATE`). diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 5a9b83d7fd0..3da4c64f490 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -13,7 +13,7 @@ toc_title: s3 s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) ``` -**Входные параметры** +**Входные аргументы** - `path` — URL-адрес корзины с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": *, ?, {abc,def} и {N..M} где N, M — числа, `’abc’, ‘def’ — строки. - `format` — [формат](../../interfaces/formats.md#formats) файла. From 1bceed49848b5cd65078c97c92155759ccbfe1ba Mon Sep 17 00:00:00 2001 From: Anna Date: Fri, 5 Mar 2021 11:31:16 +0300 Subject: [PATCH 04/53] casting to template --- .../engines/table-engines/integrations/s3.md | 30 +++++++------- docs/en/sql-reference/table-functions/s3.md | 14 +++---- .../engines/table-engines/integrations/s3.md | 39 +++++++++++-------- docs/ru/sql-reference/table-functions/s3.md | 6 +-- 4 files changed, 49 insertions(+), 40 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 7fdfce9c9ad..0cc9a26a950 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -3,29 +3,24 @@ toc_priority: 4 toc_title: S3 --- -# S3 Table Engine {#table-engines-s3} +# S3 Table Engine {#table-engine-s3} -This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem. This engine is similar -to the [HDFS](../../../engines/table-engines/special/file.md#table_engines-hdfs) engine, but provides S3-specific features. +This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem. This engine is similar to the [HDFS](../../../engines/table-engines/special/file.md#table_engines-hdfs) engine, but provides S3-specific features. -## Create Table +## Create Table {#creating-a-table} ``` sql -ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) +CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) ``` -**Input parameters** +**Engine parameters** - `path` — Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `’abc’, ‘def’ — strings. - `format` — The [format](../../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension. -[Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns): -- `_path` — Path to the file. -- `_file` — Name of the file. - -**Example:** +**Example** ``` sql CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); @@ -39,8 +34,15 @@ SELECT * FROM s3_engine_table LIMIT 2; │ two │ 2 │ └──────┴───────┘ ``` +## Virtual columns {#virtual-columns} -## Implementation Details {#implementation-details} +- `_path` — Path to the file. +- `_file` — Name of the file. + +For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns). + +## Specifics and recommendations {#specifics-and-recommendations} +### Implementation Details {#implementation-details} - Reads and writes can be parallel - Not supported: @@ -48,7 +50,7 @@ SELECT * FROM s3_engine_table LIMIT 2; - Indexes. - Replication. -**Globs in path** +### Globs in path Multiple path components can have globs. For being processed file should exist and match to the whole path pattern. Listing of files determines during `SELECT` (not at `CREATE` moment). @@ -59,7 +61,7 @@ Multiple path components can have globs. For being processed file should exist a Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function. -## S3-related Settings {#s3-settings} +### S3-related Settings {#s3-settings} The following settings can be set before query execution or placed into configuration file. diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 82b953a512a..27646480e60 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -7,13 +7,13 @@ toc_title: s3 Provides table-like interface to select/insert files in [Amazon S3](https://aws.amazon.com/s3/). This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md), but provides S3-specific features. -## Syntax {#syntax} +**Syntax** ``` sql s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) ``` -**Input arguments** +**Arguments** - `path` — Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `’abc’, ‘def’ — strings. - `format` — The [format](../../interfaces/formats.md#formats) of the file. @@ -31,7 +31,7 @@ Table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` ``` sql SELECT * FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') -LIMIT 2 +LIMIT 2; ``` ``` text @@ -46,7 +46,7 @@ The similar but from file with `gzip` compression: ``` sql SELECT * FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') -LIMIT 2 +LIMIT 2; ``` ``` text @@ -102,7 +102,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi ``` sql SELECT count(*) -FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32') +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); ``` ``` text @@ -115,14 +115,14 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000 ``` sql INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') -VALUES ('test-data', 1), ('test-data-2', 2) +VALUES ('test-data', 1), ('test-data-2', 2); ``` 5. Insert a data into file `test-data.csv.gz` from existing table: ``` sql INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') -SELECT name, value FROM existing_table +SELECT name, value FROM existing_table; ``` **See Also** diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index e91226ab3fd..56274d64bde 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -3,11 +3,11 @@ toc_priority: 4 toc_title: S3 --- -# Движок таблиц S3 {#table-engines-s3} +# Движок таблиц S3 {#table-engine-s3} Этот движок обеспечивает интеграцию с экосистемой [Amazon S3](https://aws.amazon.com/s3/). Он похож на движок [HDFS](../../../engines/table-engines/special/file.md#table_engines-hdfs), но обеспечивает специфические для S3 возможности. -## Создание таблицы +## Создание таблицы {#creating-a-table} ``` sql CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) @@ -20,10 +20,6 @@ CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3(path, [aws_ - `structure` — структура таблицы в формате `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — автоматически обнаруживает сжатие по расширению файла. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. -[Виртуальные столбцы](../../../engines/table-engines/index.md#table_engines-virtual_columns): -- `_path` — путь к файлу. -- `_file` — имя файла. - **Пример** ``` sql @@ -38,16 +34,23 @@ SELECT * FROM s3_engine_table LIMIT 2; │ two │ 2 │ └──────┴───────┘ ``` +## Виртуальные столбцы {#virtual-columns} -## Детали реализации {#implementation-details} +- `_path` — путь к файлу. +- `_file` — имя файла. -- чтение и запись могут быть параллельными; -- не поддерживаются: +Подробнее про виртуальные столбцы можно прочитать [здесь](../../../engines/table-engines/index.md#table_engines-virtual_columns). + +## Особенности и рекомендации {#specifics-and-recommendations} +### Детали реализации {#implementation-details} + +- Чтение и запись могут быть параллельными. +- Не поддерживаются: - запросы `ALTER` и `SELECT...SAMPLE`, - индексы, - репликация. -**Глобальные пути** +### Глобальные пути Несколько компонентов пути могут быть глобальными. Для обработки файл должен существовать и соответствовать всему шаблону пути. Список файлов определяется во время выполнения запроса `SELECT` (не в момент выполнения запроса `CREATE`). @@ -58,7 +61,7 @@ SELECT * FROM s3_engine_table LIMIT 2; Конструкции с `{}` аналогичны функции [remote](../../../sql-reference/table-functions/remote.md). -## S3-связанные настройки {#s3-settings} +### S3-связанные настройки {#s3-settings} Следующие настройки могут быть установлены перед выполнением запроса или заданы в конфигурационном файле. @@ -68,7 +71,7 @@ SELECT * FROM s3_engine_table LIMIT 2; Соображение безопасности: если злонамеренный пользователь попробует указать произвольные URL-адреса S3, параметр `s3_max_redirects` должен быть установлен в ноль, чтобы избежать атак [SSRF] (https://en.wikipedia.org/wiki/Server-side_request_forgery). Как альтернатива, в конфигурации сервера должен быть указан `remote_host_filter`. -## Настройки на основе конечных точек {#endpoint-settings} +### Настройки на основе конечных точек {#endpoint-settings} Следующие настройки могут быть заданы в конфигурационном файле для данной конечной точки (которая будет соответствовать точному префиксу URL-адреса). @@ -110,19 +113,22 @@ SELECT * FROM s3_engine_table LIMIT 2; Существует несколько способов создать таблицу, включающую в себя все шесть файлов: ``` sql -CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); +CREATE TABLE table_with_range (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); ``` или ``` sql -CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); +CREATE TABLE table_with_question_mark (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); ``` Таблица содержит все файлы в обоих директориях (все файлы должны соответствовать формату и схеме, описанным в запросе): ``` sql -CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); +CREATE TABLE table_with_asterisk (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); ``` !!! warning "Warning" @@ -131,7 +137,8 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https: Создание таблицы из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: ``` sql -CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); +CREATE TABLE big_table (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); ``` ## Смотрите также diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 3da4c64f490..7351fba8758 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -7,15 +7,15 @@ toc_title: s3 Предоставляет табличный интерфейс для выбора/вставки файлов в [Amazon S3](https://aws.amazon.com/s3/). Эта табличная функция похожа на [hdfs](../../sql-reference/table-functions/hdfs.md), но обеспечивает специфические для S3 возможности. -## Синтаксис {#syntax} +**Синтаксис** ``` sql s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) ``` -**Входные аргументы** +**Aргументы** -- `path` — URL-адрес корзины с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": *, ?, {abc,def} и {N..M} где N, M — числа, `’abc’, ‘def’ — строки. +- `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": *, ?, {abc,def} и {N..M} где N, M — числа, `’abc’, ‘def’ — строки. - `format` — [формат](../../interfaces/formats.md#formats) файла. - `structure` — cтруктура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — автоматически обнаруживает сжатие по расширению файла. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. From 7ccbd63e25234a95488f8db2c672c32c90e46d23 Mon Sep 17 00:00:00 2001 From: Anna Date: Fri, 5 Mar 2021 11:59:06 +0300 Subject: [PATCH 05/53] minor fixed --- .../engines/table-engines/integrations/s3.md | 25 ++++--- docs/en/sql-reference/table-functions/s3.md | 68 +++++++++---------- .../engines/table-engines/integrations/s3.md | 30 ++++---- docs/ru/sql-reference/table-functions/s3.md | 68 +++++++++---------- 4 files changed, 97 insertions(+), 94 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 0cc9a26a950..89191e42cfc 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -23,16 +23,16 @@ CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3(path, [aws_ **Example** ``` sql -CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); -INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); -SELECT * FROM s3_engine_table LIMIT 2; + CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); + INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); + SELECT * FROM s3_engine_table LIMIT 2; ``` ``` text -┌─name─┬─value─┐ -│ one │ 1 │ -│ two │ 2 │ -└──────┴───────┘ + ┌─name─┬─value─┐ + │ one │ 1 │ + │ two │ 2 │ + └──────┴───────┘ ``` ## Virtual columns {#virtual-columns} @@ -109,19 +109,21 @@ Suppose we have several files in TSV format with the following URIs on HDFS: There are several ways to make a table consisting of all six files: ``` sql -CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV') + CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); ``` or: ``` sql -CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV') + CREATE TABLE table_with_question_mark (name String, value UInt32) + ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); ``` Table consists of all the files in both directories (all files should satisfy format and schema described in query): ``` sql -CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV') + CREATE TABLE table_with_asterisk (name String, value UInt32) + ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); ``` !!! warning "Warning" @@ -130,7 +132,8 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https: Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: ``` sql -CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV') + CREATE TABLE big_table (name String, value UInt32) + ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); ``` ## See also diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 27646480e60..3f02fadabe7 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -10,7 +10,7 @@ Provides table-like interface to select/insert files in [Amazon S3](https://aws. **Syntax** ``` sql -s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) + s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) ``` **Arguments** @@ -29,31 +29,31 @@ A table with the specified structure for reading or writing data in the specifie Table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` and selection of the first two rows from it: ``` sql -SELECT * -FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') -LIMIT 2; + SELECT * + FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') + LIMIT 2; ``` ``` text -┌─column1─┬─column2─┬─column3─┐ -│ 1 │ 2 │ 3 │ -│ 3 │ 2 │ 1 │ -└─────────┴─────────┴─────────┘ + ┌─column1─┬─column2─┬─column3─┐ + │ 1 │ 2 │ 3 │ + │ 3 │ 2 │ 1 │ + └─────────┴─────────┴─────────┘ ``` The similar but from file with `gzip` compression: ``` sql -SELECT * -FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') -LIMIT 2; + SELECT * + FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') + LIMIT 2; ``` ``` text -┌─column1─┬─column2─┬─column3─┐ -│ 1 │ 2 │ 3 │ -│ 3 │ 2 │ 1 │ -└─────────┴─────────┴─────────┘ + ┌─column1─┬─column2─┬─column3─┐ + │ 1 │ 2 │ 3 │ + │ 3 │ 2 │ 1 │ + └─────────┴─────────┴─────────┘ ``` ## Usage {#usage-examples} @@ -72,27 +72,27 @@ Suppose that we have several files with following URIs on S3: 1. Query the amount of rows in files end with number from 1 to 3: ``` sql -SELECT count(*) -FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32') + SELECT count(*) + FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32'); ``` ``` text -┌─count()─┐ -│ 18 │ -└─────────┘ + ┌─count()─┐ + │ 18 │ + └─────────┘ ``` 2. Query the amount of rows in all files of these two directories: ``` sql -SELECT count(*) -FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32') + SELECT count(*) + FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32'); ``` ``` text -┌─count()─┐ -│ 24 │ -└─────────┘ + ┌─count()─┐ + │ 24 │ + └─────────┘ ``` !!! warning "Warning" @@ -101,28 +101,28 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi 3. Query the data from files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: ``` sql -SELECT count(*) -FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); + SELECT count(*) + FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); ``` ``` text -┌─count()─┐ -│ 12 │ -└─────────┘ + ┌─count()─┐ + │ 12 │ + └─────────┘ ``` 4. Insert a data into file `test-data.csv.gz`: ``` sql -INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') -VALUES ('test-data', 1), ('test-data-2', 2); + INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') + VALUES ('test-data', 1), ('test-data-2', 2); ``` 5. Insert a data into file `test-data.csv.gz` from existing table: ``` sql -INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') -SELECT name, value FROM existing_table; + INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') + SELECT name, value FROM existing_table; ``` **See Also** diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 56274d64bde..5843fa71af6 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -23,16 +23,16 @@ CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3(path, [aws_ **Пример** ``` sql -CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); -INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); -SELECT * FROM s3_engine_table LIMIT 2; + CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); + INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); + SELECT * FROM s3_engine_table LIMIT 2; ``` ``` text -┌─name─┬─value─┐ -│ one │ 1 │ -│ two │ 2 │ -└──────┴───────┘ + ┌─name─┬─value─┐ + │ one │ 1 │ + │ two │ 2 │ + └──────┴───────┘ ``` ## Виртуальные столбцы {#virtual-columns} @@ -113,22 +113,22 @@ SELECT * FROM s3_engine_table LIMIT 2; Существует несколько способов создать таблицу, включающую в себя все шесть файлов: ``` sql -CREATE TABLE table_with_range (name String, value UInt32) -ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); + CREATE TABLE table_with_range (name String, value UInt32) + ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); ``` или ``` sql -CREATE TABLE table_with_question_mark (name String, value UInt32) -ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); + CREATE TABLE table_with_question_mark (name String, value UInt32) + ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); ``` Таблица содержит все файлы в обоих директориях (все файлы должны соответствовать формату и схеме, описанным в запросе): ``` sql -CREATE TABLE table_with_asterisk (name String, value UInt32) -ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); + CREATE TABLE table_with_asterisk (name String, value UInt32) + ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); ``` !!! warning "Warning" @@ -137,8 +137,8 @@ ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_p Создание таблицы из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: ``` sql -CREATE TABLE big_table (name String, value UInt32) -ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); + CREATE TABLE big_table (name String, value UInt32) + ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); ``` ## Смотрите также diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 7351fba8758..383fdb099fe 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -10,7 +10,7 @@ toc_title: s3 **Синтаксис** ``` sql -s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) + s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) ``` **Aргументы** @@ -31,18 +31,18 @@ s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compres Query: ``` sql -SELECT * -FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') -LIMIT 2; + SELECT * + FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') + LIMIT 2; ``` Result: ``` text -┌─column1─┬─column2─┬─column3─┐ -│ 1 │ 2 │ 3 │ -│ 3 │ 2 │ 1 │ -└─────────┴─────────┴─────────┘ + ┌─column1─┬─column2─┬─column3─┐ + │ 1 │ 2 │ 3 │ + │ 3 │ 2 │ 1 │ + └─────────┴─────────┴─────────┘ ``` То же самое, но файл со сжатием `gzip`: @@ -50,18 +50,18 @@ Result: Запрос: ``` sql -SELECT * -FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') -LIMIT 2; + SELECT * + FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') + LIMIT 2; ``` Результат: ``` text -┌─column1─┬─column2─┬─column3─┐ -│ 1 │ 2 │ 3 │ -│ 3 │ 2 │ 1 │ -└─────────┴─────────┴─────────┘ + ┌─column1─┬─column2─┬─column3─┐ + │ 1 │ 2 │ 3 │ + │ 3 │ 2 │ 1 │ + └─────────┴─────────┴─────────┘ ``` ## Примеры использования {#usage-examples} @@ -79,27 +79,27 @@ LIMIT 2; 1. Запрос количества строк в файлах, заканчивающихся цифрами от 1 до 3: ``` sql -SELECT count(*) -FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32'); + SELECT count(*) + FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32'); ``` ``` text -┌─count()─┐ -│ 18 │ -└─────────┘ + ┌─count()─┐ + │ 18 │ + └─────────┘ ``` 2. Запрос количества строк во всех файлах этих двух каталогов: ``` sql -SELECT count(*) -FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32'); + SELECT count(*) + FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32'); ``` ``` text -┌─count()─┐ -│ 24 │ -└─────────┘ + ┌─count()─┐ + │ 24 │ + └─────────┘ ``` !!! warning "Warning" @@ -108,28 +108,28 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi 3. Запрос данных из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: ``` sql -SELECT count(*) -FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); + SELECT count(*) + FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); ``` ``` text -┌─count()─┐ -│ 12 │ -└─────────┘ + ┌─count()─┐ + │ 12 │ + └─────────┘ ``` 4. Вставка данных в файл `test-data.csv.gz`: ``` sql -INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') -VALUES ('test-data', 1), ('test-data-2', 2); + INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') + VALUES ('test-data', 1), ('test-data-2', 2); ``` 5. Вставка данных в файл `test-data.csv.gz` из существующей таблицы: ``` sql -INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') -SELECT name, value FROM existing_table; + INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') + SELECT name, value FROM existing_table; ``` ## Смотрите также From c9cdde9983697f6efce01fa42f6c34adc6590609 Mon Sep 17 00:00:00 2001 From: Anna Date: Fri, 5 Mar 2021 12:10:35 +0300 Subject: [PATCH 06/53] Minor fixed --- .../engines/table-engines/integrations/s3.md | 32 +++++++++++-------- docs/en/sql-reference/table-functions/s3.md | 16 +++++----- .../engines/table-engines/integrations/s3.md | 28 ++++++++-------- docs/ru/sql-reference/table-functions/s3.md | 18 +++++------ 4 files changed, 50 insertions(+), 44 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 89191e42cfc..ee1630c25d1 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -10,12 +10,13 @@ This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ec ## Create Table {#creating-a-table} ``` sql -CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) +CREATE TABLE s3_engine_table (name String, value UInt32) +ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) ``` **Engine parameters** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `’abc’, ‘def’ — strings. +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*, ?, {abc,def} and {N..M}` where `N, M` — numbers, `'abc', 'def'` — strings. - `format` — The [format](../../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension. @@ -23,7 +24,8 @@ CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3(path, [aws_ **Example** ``` sql - CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); + CREATE TABLE s3_engine_table (name String, value UInt32) + ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); SELECT * FROM s3_engine_table LIMIT 2; ``` @@ -99,27 +101,29 @@ The following settings can be specified in configuration file for given endpoint Suppose we have several files in TSV format with the following URIs on HDFS: -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’ +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv' -There are several ways to make a table consisting of all six files: + +1. There are several ways to make a table consisting of all six files: ``` sql - CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); + CREATE TABLE table_with_range (name String, value UInt32) + ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); ``` -or: +2. Another way: ``` sql CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); ``` -Table consists of all the files in both directories (all files should satisfy format and schema described in query): +3. Table consists of all the files in both directories (all files should satisfy format and schema described in query): ``` sql CREATE TABLE table_with_asterisk (name String, value UInt32) @@ -129,7 +133,7 @@ Table consists of all the files in both directories (all files should satisfy fo !!! warning "Warning" If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. -Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +4. Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: ``` sql CREATE TABLE big_table (name String, value UInt32) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 3f02fadabe7..f0d7c8d6eeb 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -60,14 +60,14 @@ The similar but from file with `gzip` compression: Suppose that we have several files with following URIs on S3: -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv’ +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv' 1. Query the amount of rows in files end with number from 1 to 3: diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 5843fa71af6..d6336b6f9f1 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -10,12 +10,13 @@ toc_title: S3 ## Создание таблицы {#creating-a-table} ``` sql -CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) +CREATE TABLE s3_engine_table (name String, value UInt32) +ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) ``` **Параметры движка** -- `path` — URL-адрес корзины с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": *, ?, {abc,def} и {N..M} где N, M — числа, `’abc’, ‘def’ — строки. +- `path` — URL-адрес корзины с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*, ?, {abc,def} и {N..M}` где `N, M` — числа, `'abc', 'def'` — строки. - `format` — [формат](../../../interfaces/formats.md#formats) файла. - `structure` — структура таблицы в формате `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — автоматически обнаруживает сжатие по расширению файла. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. @@ -23,7 +24,8 @@ CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3(path, [aws_ **Пример** ``` sql - CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); + CREATE TABLE s3_engine_table (name String, value UInt32) + ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); SELECT * FROM s3_engine_table LIMIT 2; ``` @@ -103,28 +105,28 @@ CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3(path, [aws_ Предположим, у нас есть несколько файлов в формате TSV со следующими URL-адресами в HDFS: -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’ +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv' -Существует несколько способов создать таблицу, включающую в себя все шесть файлов: +1. Существует несколько способов создать таблицу, включающую в себя все шесть файлов: ``` sql CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); ``` -или +2. Другой способ: ``` sql CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); ``` -Таблица содержит все файлы в обоих директориях (все файлы должны соответствовать формату и схеме, описанным в запросе): +3. Таблица содержит все файлы в обоих директориях (все файлы должны соответствовать формату и схеме, описанным в запросе): ``` sql CREATE TABLE table_with_asterisk (name String, value UInt32) @@ -134,7 +136,7 @@ CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3(path, [aws_ !!! warning "Warning" Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. -Создание таблицы из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +4. Создание таблицы из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: ``` sql CREATE TABLE big_table (name String, value UInt32) diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 383fdb099fe..50790666fc6 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -15,7 +15,7 @@ toc_title: s3 **Aргументы** -- `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": *, ?, {abc,def} и {N..M} где N, M — числа, `’abc’, ‘def’ — строки. +- `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*, ?, {abc,def} и {N..M}` где `N, M` — числа, `’abc’, ‘def’` — строки. - `format` — [формат](../../interfaces/formats.md#formats) файла. - `structure` — cтруктура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — автоматически обнаруживает сжатие по расширению файла. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. @@ -67,14 +67,14 @@ Result: Предположим, у нас есть несколько файлов со следующими URI на S3: -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’ -- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv’ +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv' 1. Запрос количества строк в файлах, заканчивающихся цифрами от 1 до 3: From bb2061dd8f71ab32390197d27ca1a768f3c63025 Mon Sep 17 00:00:00 2001 From: Anna Date: Fri, 5 Mar 2021 12:22:15 +0300 Subject: [PATCH 07/53] revert some changes --- .../engines/table-engines/integrations/s3.md | 34 +++++----- docs/en/sql-reference/table-functions/s3.md | 68 +++++++++---------- .../engines/table-engines/integrations/s3.md | 34 +++++----- docs/ru/sql-reference/table-functions/s3.md | 68 +++++++++---------- 4 files changed, 102 insertions(+), 102 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index ee1630c25d1..ac1a0533a68 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -24,17 +24,17 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, **Example** ``` sql - CREATE TABLE s3_engine_table (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); - INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); - SELECT * FROM s3_engine_table LIMIT 2; +CREATE TABLE s3_engine_table (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); +INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); +SELECT * FROM s3_engine_table LIMIT 2; ``` ``` text - ┌─name─┬─value─┐ - │ one │ 1 │ - │ two │ 2 │ - └──────┴───────┘ +┌─name─┬─value─┐ +│ one │ 1 │ +│ two │ 2 │ +└──────┴───────┘ ``` ## Virtual columns {#virtual-columns} @@ -85,7 +85,7 @@ The following settings can be specified in configuration file for given endpoint **Example:** -``` +``` xml https://storage.yandexcloud.net/my-test-bucket-768/ @@ -112,22 +112,22 @@ Suppose we have several files in TSV format with the following URIs on HDFS: 1. There are several ways to make a table consisting of all six files: ``` sql - CREATE TABLE table_with_range (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); +CREATE TABLE table_with_range (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); ``` 2. Another way: ``` sql - CREATE TABLE table_with_question_mark (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); +CREATE TABLE table_with_question_mark (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); ``` 3. Table consists of all the files in both directories (all files should satisfy format and schema described in query): ``` sql - CREATE TABLE table_with_asterisk (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); +CREATE TABLE table_with_asterisk (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); ``` !!! warning "Warning" @@ -136,8 +136,8 @@ Suppose we have several files in TSV format with the following URIs on HDFS: 4. Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: ``` sql - CREATE TABLE big_table (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); +CREATE TABLE big_table (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); ``` ## See also diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index f0d7c8d6eeb..c24ded9861e 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -10,7 +10,7 @@ Provides table-like interface to select/insert files in [Amazon S3](https://aws. **Syntax** ``` sql - s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) +s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) ``` **Arguments** @@ -29,31 +29,31 @@ A table with the specified structure for reading or writing data in the specifie Table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` and selection of the first two rows from it: ``` sql - SELECT * - FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') - LIMIT 2; +SELECT * +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +LIMIT 2; ``` ``` text - ┌─column1─┬─column2─┬─column3─┐ - │ 1 │ 2 │ 3 │ - │ 3 │ 2 │ 1 │ - └─────────┴─────────┴─────────┘ +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ ``` The similar but from file with `gzip` compression: ``` sql - SELECT * - FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') - LIMIT 2; +SELECT * +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') +LIMIT 2; ``` ``` text - ┌─column1─┬─column2─┬─column3─┐ - │ 1 │ 2 │ 3 │ - │ 3 │ 2 │ 1 │ - └─────────┴─────────┴─────────┘ +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ ``` ## Usage {#usage-examples} @@ -72,27 +72,27 @@ Suppose that we have several files with following URIs on S3: 1. Query the amount of rows in files end with number from 1 to 3: ``` sql - SELECT count(*) - FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32'); +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32') ``` ``` text - ┌─count()─┐ - │ 18 │ - └─────────┘ +┌─count()─┐ +│ 18 │ +└─────────┘ ``` 2. Query the amount of rows in all files of these two directories: ``` sql - SELECT count(*) - FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32'); +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32') ``` ``` text - ┌─count()─┐ - │ 24 │ - └─────────┘ +┌─count()─┐ +│ 24 │ +└─────────┘ ``` !!! warning "Warning" @@ -101,28 +101,28 @@ Suppose that we have several files with following URIs on S3: 3. Query the data from files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: ``` sql - SELECT count(*) - FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); ``` ``` text - ┌─count()─┐ - │ 12 │ - └─────────┘ +┌─count()─┐ +│ 12 │ +└─────────┘ ``` 4. Insert a data into file `test-data.csv.gz`: ``` sql - INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') - VALUES ('test-data', 1), ('test-data-2', 2); +INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +VALUES ('test-data', 1), ('test-data-2', 2); ``` 5. Insert a data into file `test-data.csv.gz` from existing table: ``` sql - INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') - SELECT name, value FROM existing_table; +INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +SELECT name, value FROM existing_table; ``` **See Also** diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index d6336b6f9f1..1c2bb58a815 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -24,17 +24,17 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, **Пример** ``` sql - CREATE TABLE s3_engine_table (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); - INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); - SELECT * FROM s3_engine_table LIMIT 2; +CREATE TABLE s3_engine_table (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); +INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); +SELECT * FROM s3_engine_table LIMIT 2; ``` ``` text - ┌─name─┬─value─┐ - │ one │ 1 │ - │ two │ 2 │ - └──────┴───────┘ +┌─name─┬─value─┐ +│ one │ 1 │ +│ two │ 2 │ +└──────┴───────┘ ``` ## Виртуальные столбцы {#virtual-columns} @@ -88,7 +88,7 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, **Пример** -``` +```xml https://storage.yandexcloud.net/my-test-bucket-768/ @@ -115,22 +115,22 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, 1. Существует несколько способов создать таблицу, включающую в себя все шесть файлов: ``` sql - CREATE TABLE table_with_range (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); +CREATE TABLE table_with_range (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); ``` 2. Другой способ: ``` sql - CREATE TABLE table_with_question_mark (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); +CREATE TABLE table_with_question_mark (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); ``` 3. Таблица содержит все файлы в обоих директориях (все файлы должны соответствовать формату и схеме, описанным в запросе): ``` sql - CREATE TABLE table_with_asterisk (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); +CREATE TABLE table_with_asterisk (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); ``` !!! warning "Warning" @@ -139,8 +139,8 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, 4. Создание таблицы из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: ``` sql - CREATE TABLE big_table (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); +CREATE TABLE big_table (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); ``` ## Смотрите также diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 50790666fc6..6d1616c5c06 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -10,7 +10,7 @@ toc_title: s3 **Синтаксис** ``` sql - s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) +s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) ``` **Aргументы** @@ -31,18 +31,18 @@ toc_title: s3 Query: ``` sql - SELECT * - FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') - LIMIT 2; +SELECT * +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +LIMIT 2; ``` Result: ``` text - ┌─column1─┬─column2─┬─column3─┐ - │ 1 │ 2 │ 3 │ - │ 3 │ 2 │ 1 │ - └─────────┴─────────┴─────────┘ +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ ``` То же самое, но файл со сжатием `gzip`: @@ -50,18 +50,18 @@ Result: Запрос: ``` sql - SELECT * - FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') - LIMIT 2; +SELECT * +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') +LIMIT 2; ``` Результат: ``` text - ┌─column1─┬─column2─┬─column3─┐ - │ 1 │ 2 │ 3 │ - │ 3 │ 2 │ 1 │ - └─────────┴─────────┴─────────┘ +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ ``` ## Примеры использования {#usage-examples} @@ -79,27 +79,27 @@ Result: 1. Запрос количества строк в файлах, заканчивающихся цифрами от 1 до 3: ``` sql - SELECT count(*) - FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32'); +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32'); ``` ``` text - ┌─count()─┐ - │ 18 │ - └─────────┘ +┌─count()─┐ +│ 18 │ +└─────────┘ ``` 2. Запрос количества строк во всех файлах этих двух каталогов: ``` sql - SELECT count(*) - FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32'); +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32'); ``` ``` text - ┌─count()─┐ - │ 24 │ - └─────────┘ +┌─count()─┐ +│ 24 │ +└─────────┘ ``` !!! warning "Warning" @@ -108,28 +108,28 @@ Result: 3. Запрос данных из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: ``` sql - SELECT count(*) - FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); ``` ``` text - ┌─count()─┐ - │ 12 │ - └─────────┘ +┌─count()─┐ +│ 12 │ +└─────────┘ ``` 4. Вставка данных в файл `test-data.csv.gz`: ``` sql - INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') - VALUES ('test-data', 1), ('test-data-2', 2); +INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +VALUES ('test-data', 1), ('test-data-2', 2); ``` 5. Вставка данных в файл `test-data.csv.gz` из существующей таблицы: ``` sql - INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') - SELECT name, value FROM existing_table; +INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +SELECT name, value FROM existing_table; ``` ## Смотрите также From 6e6c0069e247bf92c0b958231892f69df422404d Mon Sep 17 00:00:00 2001 From: Anna Date: Fri, 5 Mar 2021 12:36:05 +0300 Subject: [PATCH 08/53] fixed links --- docs/en/engines/table-engines/index.md | 1 + docs/ru/engines/table-engines/index.md | 2 +- docs/ru/engines/table-engines/integrations/s3.md | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/en/engines/table-engines/index.md b/docs/en/engines/table-engines/index.md index 546557beb57..09ec8b45c93 100644 --- a/docs/en/engines/table-engines/index.md +++ b/docs/en/engines/table-engines/index.md @@ -30,6 +30,7 @@ Engines in the family: - [CollapsingMergeTree](../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) - [VersionedCollapsingMergeTree](../../engines/table-engines/mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree) - [GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) +- [S3](../../engines/table-engines/integrations/s3.md#table-engine-s3) ### Log {#log} diff --git a/docs/ru/engines/table-engines/index.md b/docs/ru/engines/table-engines/index.md index 81ac4b251b4..a81e68a9cd7 100644 --- a/docs/ru/engines/table-engines/index.md +++ b/docs/ru/engines/table-engines/index.md @@ -31,7 +31,7 @@ toc_title: "\u0412\u0432\u0435\u0434\u0435\u043d\u0438\u0435" - [CollapsingMergeTree](mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) - [VersionedCollapsingMergeTree](mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree) - [GraphiteMergeTree](mergetree-family/graphitemergetree.md#graphitemergetree) -- [S3](../../engines/table-engines/integrations/s3.md#table_engines-s3) +- [S3](../../engines/table-engines/integrations/s3.md#table-engine-s3) ### Log {#log} diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 1c2bb58a815..696380059d8 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -88,7 +88,7 @@ SELECT * FROM s3_engine_table LIMIT 2; **Пример** -```xml +``` xml https://storage.yandexcloud.net/my-test-bucket-768/ From 62db68e3daf22da98a66c3e0fc9188546a5f021f Mon Sep 17 00:00:00 2001 From: Anna Date: Fri, 5 Mar 2021 12:43:51 +0300 Subject: [PATCH 09/53] fix link --- docs/en/engines/table-engines/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/index.md b/docs/en/engines/table-engines/index.md index 09ec8b45c93..57bd5026975 100644 --- a/docs/en/engines/table-engines/index.md +++ b/docs/en/engines/table-engines/index.md @@ -53,7 +53,7 @@ Engines in the family: - [ODBC](../../engines/table-engines/integrations/odbc.md#table-engine-odbc) - [JDBC](../../engines/table-engines/integrations/jdbc.md#table-engine-jdbc) - [HDFS](../../engines/table-engines/integrations/hdfs.md#hdfs) -- [S3](../../engines/table-engines/integrations/s3.md#table_engines-s3) +- [S3](../../engines/table-engines/integrations/s3.md#table-engine-s3) ### Special Engines {#special-engines} From 061f1bbe6bef147927337f54f6bda862108de4db Mon Sep 17 00:00:00 2001 From: Anna Date: Fri, 5 Mar 2021 12:54:17 +0300 Subject: [PATCH 10/53] minor fixed --- docs/en/sql-reference/table-functions/s3.md | 2 +- docs/ru/sql-reference/table-functions/s3.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index c24ded9861e..d0e3a6ef453 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -15,7 +15,7 @@ s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compres **Arguments** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: *, ?, {abc,def} and {N..M} where N, M — numbers, `’abc’, ‘def’ — strings. +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*, ?, {abc,def} and {N..M}` where `N, M` — numbers, `'abc', 'def'` — strings. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension. diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 6d1616c5c06..eb6c9bbb73e 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -15,7 +15,7 @@ s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compres **Aргументы** -- `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*, ?, {abc,def} и {N..M}` где `N, M` — числа, `’abc’, ‘def’` — строки. +- `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*, ?, {abc,def} и {N..M}` где `N, M` — числа, `'abc', 'def'` — строки. - `format` — [формат](../../interfaces/formats.md#formats) файла. - `structure` — cтруктура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — автоматически обнаруживает сжатие по расширению файла. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. From 78ebeab9b933901e951b6a9b995475fd30281454 Mon Sep 17 00:00:00 2001 From: Anna Date: Fri, 5 Mar 2021 13:13:07 +0300 Subject: [PATCH 11/53] edit adopters.md --- docs/en/introduction/adopters.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 454d856f779..e0f293a7019 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -123,5 +123,6 @@ toc_title: Adopters | МКБ | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | | ЦФТ | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) | | kakaocorp | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) | +| Tesla | Electric vehicle and clean energy company | — | — | — | [vacancy description, March 2021](https://news.ycombinator.com/item?id=26306170) | [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) From b1b58123304b351fc9291775ba43bf67f1b37d2c Mon Sep 17 00:00:00 2001 From: Anna Date: Fri, 5 Mar 2021 13:16:36 +0300 Subject: [PATCH 12/53] minor fix --- docs/en/introduction/adopters.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index e0f293a7019..23f7b596851 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -123,6 +123,6 @@ toc_title: Adopters | МКБ | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | | ЦФТ | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) | | kakaocorp | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) | -| Tesla | Electric vehicle and clean energy company | — | — | — | [vacancy description, March 2021](https://news.ycombinator.com/item?id=26306170) | +| Tesla | Electric vehicle and clean energy company | — | — | — | [Vacancy description, March 2021](https://news.ycombinator.com/item?id=26306170) | [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) From 933da37546fb6ac43ec3f1ed0a222360df418d15 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Sat, 6 Mar 2021 22:58:54 +0300 Subject: [PATCH 13/53] Update s3.md important fixes --- docs/ru/engines/table-engines/integrations/s3.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 696380059d8..a5e00a83750 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -52,9 +52,9 @@ SELECT * FROM s3_engine_table LIMIT 2; - индексы, - репликация. -### Глобальные пути +### Символы подстановки -Несколько компонентов пути могут быть глобальными. Для обработки файл должен существовать и соответствовать всему шаблону пути. Список файлов определяется во время выполнения запроса `SELECT` (не в момент выполнения запроса `CREATE`). +Несколько компонентов пути могут содержать символы подстановки. Для обработки файл должен существовать и соответствовать всему шаблону пути. Список файлов определяется во время выполнения запроса `SELECT` (не в момент выполнения запроса `CREATE`). - `*` — заменяет любое количество любых символов, кроме `/` включая пустую строку. - `?` — заменяет любые одиночные символы. From f6889ab49a76c357bb211437969e49779b913463 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:47:25 +0300 Subject: [PATCH 14/53] Update docs/en/engines/table-engines/integrations/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index ac1a0533a68..b7968c4a571 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -16,7 +16,7 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, **Engine parameters** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*, ?, {abc,def} and {N..M}` where `N, M` — numbers, `'abc', 'def'` — strings. +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. - `format` — The [format](../../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension. From 5d254e32c6a848ac001ff010e665b31e6d056d6b Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:47:35 +0300 Subject: [PATCH 15/53] Update docs/en/engines/table-engines/integrations/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index b7968c4a571..e7131e00e02 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -19,7 +19,7 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, - `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. - `format` — The [format](../../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -- `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension. +- `compression` — Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Parameter is optional. By default, it will autodetect compression by file extension. **Example** From 621188d4d7de3a8fb6d34e5c582f9958341d0f08 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:47:49 +0300 Subject: [PATCH 16/53] Update docs/en/engines/table-engines/integrations/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index e7131e00e02..47e8a365c00 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -54,7 +54,7 @@ For more information about virtual columns see [here](../../../engines/table-eng ### Globs in path -Multiple path components can have globs. For being processed file should exist and match to the whole path pattern. Listing of files determines during `SELECT` (not at `CREATE` moment). +Multiple path components can have globs. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment). - `*` — Substitutes any number of any characters except `/` including empty string. - `?` — Substitutes any single character. From b186bf611b5c4585807827df3d87e0a0f2cf49a2 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:48:04 +0300 Subject: [PATCH 17/53] Update docs/en/sql-reference/table-functions/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/table-functions/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index d0e3a6ef453..95955be1e0d 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -15,7 +15,7 @@ s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compres **Arguments** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*, ?, {abc,def} and {N..M}` where `N, M` — numbers, `'abc', 'def'` — strings. +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension. From 805301dddb48c22ca51ead6813de584f33161527 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:48:15 +0300 Subject: [PATCH 18/53] Update docs/en/engines/table-engines/integrations/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 47e8a365c00..7a3191553a0 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -52,7 +52,7 @@ For more information about virtual columns see [here](../../../engines/table-eng - Indexes. - Replication. -### Globs in path +### Globs in path {#globs-in-path} Multiple path components can have globs. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment). From b87e687444af216557ac9d1d5f411c790bfc3876 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:48:31 +0300 Subject: [PATCH 19/53] Update docs/en/sql-reference/table-functions/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/table-functions/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 95955be1e0d..51782ac9688 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -26,7 +26,7 @@ A table with the specified structure for reading or writing data in the specifie **Examples** -Table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv` and selection of the first two rows from it: +Selecting the first two rows from the table from S3 file `https://storage.yandexcloud.net/my-test-bucket-768/data.csv`: ``` sql SELECT * From 9de3e8d09ff2e3fb1f3e8c8d090d8fd3bf888f06 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:48:52 +0300 Subject: [PATCH 20/53] Update docs/en/sql-reference/table-functions/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/table-functions/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 51782ac9688..3b0747a4424 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -82,7 +82,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi └─────────┘ ``` -2. Query the amount of rows in all files of these two directories: +Count the total amount of rows in all files in these two directories: ``` sql SELECT count(*) From 413d60e22575f228a33f7e2ce6493c3d1db67bb5 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:49:05 +0300 Subject: [PATCH 21/53] Update docs/en/sql-reference/table-functions/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/table-functions/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 3b0747a4424..3ac729b62ae 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -111,7 +111,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000 └─────────┘ ``` -4. Insert a data into file `test-data.csv.gz`: +Insert data into file `test-data.csv.gz`: ``` sql INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') From 1da690a089835f190f46bc6d0f1a12ef502da690 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:49:24 +0300 Subject: [PATCH 22/53] Update docs/ru/engines/table-engines/integrations/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index a5e00a83750..a5f1594d858 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -19,7 +19,7 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, - `path` — URL-адрес корзины с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*, ?, {abc,def} и {N..M}` где `N, M` — числа, `'abc', 'def'` — строки. - `format` — [формат](../../../interfaces/formats.md#formats) файла. - `structure` — структура таблицы в формате `'column1_name column1_type, column2_name column2_type, ...'`. -- `compression` — автоматически обнаруживает сжатие по расширению файла. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. +- `compression` — тип сжатия. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. Если не указано, то тип сжатия определяется автоматически по расширению файла. **Пример** From 4a589aff5b54e324f4d76bae32396bd2b2ab4e35 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:49:37 +0300 Subject: [PATCH 23/53] Update docs/ru/engines/table-engines/integrations/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index a5f1594d858..7156697b12f 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -52,7 +52,7 @@ SELECT * FROM s3_engine_table LIMIT 2; - индексы, - репликация. -### Символы подстановки +### Символы подстановки {#globs-in-path} Несколько компонентов пути могут содержать символы подстановки. Для обработки файл должен существовать и соответствовать всему шаблону пути. Список файлов определяется во время выполнения запроса `SELECT` (не в момент выполнения запроса `CREATE`). From fc13e9264477e5c29f5216cad6d23b7bbc54a86e Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:50:42 +0300 Subject: [PATCH 24/53] Update docs/ru/engines/table-engines/integrations/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 7156697b12f..8216379786c 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -65,7 +65,7 @@ SELECT * FROM s3_engine_table LIMIT 2; ### S3-связанные настройки {#s3-settings} -Следующие настройки могут быть установлены перед выполнением запроса или заданы в конфигурационном файле. +Перед выполнением запроса или в конфигурационном файле могут быть установлены следующие настройки: - `s3_max_single_part_upload_size` — максимальный размер объекта для загрузки с использованием однокомпонентной загрузки в S3. Значение по умолчанию — `64Mb`. - `s3_min_upload_part_size` — минимальный размер объекта для загрузки при многокомпонентной загрузке в [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Значение по умолчанию — `512Mb`. From cf4d94ed65f244677322af9517797ba2c6be6e5a Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:50:50 +0300 Subject: [PATCH 25/53] Update docs/ru/engines/table-engines/integrations/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 8216379786c..bfae21de241 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -63,7 +63,7 @@ SELECT * FROM s3_engine_table LIMIT 2; Конструкции с `{}` аналогичны функции [remote](../../../sql-reference/table-functions/remote.md). -### S3-связанные настройки {#s3-settings} +### Настройки движка S3 {#s3-settings} Перед выполнением запроса или в конфигурационном файле могут быть установлены следующие настройки: From aa757a15a3defc58664dbf527fa5df76889daedf Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:51:13 +0300 Subject: [PATCH 26/53] Update docs/ru/engines/table-engines/integrations/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/s3.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index bfae21de241..d7fbcbea48d 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -67,8 +67,8 @@ SELECT * FROM s3_engine_table LIMIT 2; Перед выполнением запроса или в конфигурационном файле могут быть установлены следующие настройки: -- `s3_max_single_part_upload_size` — максимальный размер объекта для загрузки с использованием однокомпонентной загрузки в S3. Значение по умолчанию — `64Mb`. -- `s3_min_upload_part_size` — минимальный размер объекта для загрузки при многокомпонентной загрузке в [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Значение по умолчанию — `512Mb`. +- `s3_max_single_part_upload_size` — максимальный размер объекта для загрузки с использованием однокомпонентной загрузки в S3. Значение по умолчанию — `64 Mб`. +- `s3_min_upload_part_size` — минимальный размер объекта для загрузки при многокомпонентной загрузке в [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Значение по умолчанию — `512 Mб`. - `s3_max_redirects` — максимальное количество разрешенных переадресаций S3. Значение по умолчанию — `10`. Соображение безопасности: если злонамеренный пользователь попробует указать произвольные URL-адреса S3, параметр `s3_max_redirects` должен быть установлен в ноль, чтобы избежать атак [SSRF] (https://en.wikipedia.org/wiki/Server-side_request_forgery). Как альтернатива, в конфигурации сервера должен быть указан `remote_host_filter`. From 165c57bfa4de0c1c9051d11d7fa66653bafb2105 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:51:29 +0300 Subject: [PATCH 27/53] Update docs/ru/engines/table-engines/integrations/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index d7fbcbea48d..1ab3a9657fd 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -75,7 +75,7 @@ SELECT * FROM s3_engine_table LIMIT 2; ### Настройки на основе конечных точек {#endpoint-settings} -Следующие настройки могут быть заданы в конфигурационном файле для данной конечной точки (которая будет соответствовать точному префиксу URL-адреса). +Для конечной точки (которая соответствует точному префиксу URL-адреса) в конфигурационном файле могут быть заданы следующие настройки: Обязательная настройка: - `endpoint` — указывает префикс конечной точки. From 4232f3b9a4e1e482a6738cc67dea2cea820bd2ac Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:51:42 +0300 Subject: [PATCH 28/53] Update docs/ru/engines/table-engines/integrations/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 1ab3a9657fd..8fd45291b17 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -81,7 +81,7 @@ SELECT * FROM s3_engine_table LIMIT 2; - `endpoint` — указывает префикс конечной точки. Необязательные настройки: -- `access_key_id` и `secret_access_key` — указывает учетные данные для использования с данной конечной точкой. +- `access_key_id` и `secret_access_key` — указывают учетные данные для использования с данной конечной точкой. - `use_environment_credentials` — если `true`, S3-клиент будет пытаться получить учетные данные из переменных среды и метаданных Amazon EC2 для данной конечной точки. Значение по умолчанию - `false`. - `header` — добавляет указанный HTTP-заголовок к запросу на заданную конечную точку. Может быть определен несколько раз. - `server_side_encryption_customer_key_base64` — устанавливает необходимые заголовки для доступа к объектам S3 с шифрованием SSE-C. From a42a728c448e63ed8eb7f7a834209c542c217352 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:57:13 +0300 Subject: [PATCH 29/53] Update docs/ru/engines/table-engines/integrations/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 8fd45291b17..c34f3b147ef 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -126,7 +126,7 @@ CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); ``` -3. Таблица содержит все файлы в обоих директориях (все файлы должны соответствовать формату и схеме, описанным в запросе): +3. Таблица содержит все файлы в обеих директориях (все файлы должны соответствовать формату и схеме, описанным в запросе): ``` sql CREATE TABLE table_with_asterisk (name String, value UInt32) From 956d86a5929b551ce31c6551fcfe460ab508f5cf Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:57:23 +0300 Subject: [PATCH 30/53] Update docs/ru/sql-reference/table-functions/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/table-functions/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index eb6c9bbb73e..1ab68cbbb3e 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -36,7 +36,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'c LIMIT 2; ``` -Result: +Результат: ``` text ┌─column1─┬─column2─┬─column3─┐ From 6df6acb2cf36486bcd1b606f8ab6a6e03ecf01d1 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:57:30 +0300 Subject: [PATCH 31/53] Update docs/ru/sql-reference/table-functions/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/table-functions/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 1ab68cbbb3e..6766c5106ca 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -89,7 +89,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi └─────────┘ ``` -2. Запрос количества строк во всех файлах этих двух каталогов: +Подсчитаем общее количество строк во всех файлах этих двух каталогов: ``` sql SELECT count(*) From 2315b1b0c5fa9a9e90f43579383e2d0fd274c5c8 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:57:39 +0300 Subject: [PATCH 32/53] Update docs/ru/sql-reference/table-functions/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/table-functions/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 6766c5106ca..450ade43bc2 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -118,7 +118,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000 └─────────┘ ``` -4. Вставка данных в файл `test-data.csv.gz`: +Запишем данные в файл `test-data.csv.gz`: ``` sql INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') From 4d2a7d05638c3cd0d27dc6d8b6d7ef9929789168 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:57:47 +0300 Subject: [PATCH 33/53] Update docs/ru/sql-reference/table-functions/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/table-functions/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 450ade43bc2..8077af5e805 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -132,7 +132,7 @@ INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv SELECT name, value FROM existing_table; ``` -## Смотрите также +**Смотрите также** - [Движок таблиц S3](../../engines/table-engines/integrations/s3.md) From b3725c73220d866b7e2b5607c4a7e9eb6ac09ac0 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:59:23 +0300 Subject: [PATCH 34/53] Update docs/ru/sql-reference/table-functions/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/table-functions/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 8077af5e805..1038b5a83f5 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -125,7 +125,7 @@ INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv VALUES ('test-data', 1), ('test-data-2', 2); ``` -5. Вставка данных в файл `test-data.csv.gz` из существующей таблицы: +Запишем данные из существующей таблицы в файл `test-data.csv.gz`: ``` sql INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') From 4759e9ffbf76d4531f393f70c3c6f4749ea43545 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:59:30 +0300 Subject: [PATCH 35/53] Update docs/ru/sql-reference/table-functions/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/table-functions/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 1038b5a83f5..3896e8cb236 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -105,7 +105,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi !!! warning "Warning" Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. -3. Запрос данных из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Подсчитаем общее количество строк в файлах с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: ``` sql SELECT count(*) From 45774b30b20bc61e672aa79f40913bf74cde84d4 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 08:59:37 +0300 Subject: [PATCH 36/53] Update docs/ru/sql-reference/table-functions/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/table-functions/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 3896e8cb236..4069a4e3e88 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -76,7 +76,7 @@ LIMIT 2; - 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv' - 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv' -1. Запрос количества строк в файлах, заканчивающихся цифрами от 1 до 3: +Подсчитаем количество строк в файлах, заканчивающихся цифрами от 1 до 3: ``` sql SELECT count(*) From 4f1dcb99f1a544a232aa4c84ffbd5f1fc60fa798 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 09:13:11 +0300 Subject: [PATCH 37/53] Update docs/ru/engines/table-engines/integrations/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index c34f3b147ef..3aabba8152a 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -142,7 +142,7 @@ ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_p CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); ``` -## Смотрите также +**Смотрите также** - [Табличная функция S3](../../../sql-reference/table-functions/s3.md) From aadbcb318e2cc54d8ed459f7509a0b8deea883f8 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 09:13:18 +0300 Subject: [PATCH 38/53] Update docs/ru/engines/table-engines/integrations/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 3aabba8152a..0292776c6a7 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -56,7 +56,7 @@ SELECT * FROM s3_engine_table LIMIT 2; Несколько компонентов пути могут содержать символы подстановки. Для обработки файл должен существовать и соответствовать всему шаблону пути. Список файлов определяется во время выполнения запроса `SELECT` (не в момент выполнения запроса `CREATE`). -- `*` — заменяет любое количество любых символов, кроме `/` включая пустую строку. +- `*` — заменяет любое количество любых символов, кроме `/`, включая пустую строку. - `?` — заменяет любые одиночные символы. - `{some_string, another_string, yet_another_one}` — заменяет любые строки `'some_string', 'another_string', 'yet_another_one'`. - `{N..M}` — заменяет любое число от N до M, включая обе границы. N и M могут иметь ведущие нули, например `000..078`. From 14b98cb903d5156a35a3f11c94c5683fc4acb1c6 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 09:13:31 +0300 Subject: [PATCH 39/53] Update docs/en/sql-reference/table-functions/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/table-functions/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 3ac729b62ae..10d75a7bde7 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -118,7 +118,7 @@ INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv VALUES ('test-data', 1), ('test-data-2', 2); ``` -5. Insert a data into file `test-data.csv.gz` from existing table: +Insert data into file `test-data.csv.gz` from existing table: ``` sql INSERT INTO s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') From f3dd9f6bbfaf43d6812dde748b9e675143011775 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 09:13:40 +0300 Subject: [PATCH 40/53] Update docs/en/sql-reference/table-functions/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/table-functions/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 10d75a7bde7..93eb880c55c 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -98,7 +98,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi !!! warning "Warning" If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. -3. Query the data from files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: +Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: ``` sql SELECT count(*) From af2e8b6e37a486e89ff838ea482f9f5bf4bad82c Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 09:13:46 +0300 Subject: [PATCH 41/53] Update docs/en/sql-reference/table-functions/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/table-functions/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 93eb880c55c..e7bc2c3f509 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -69,7 +69,7 @@ Suppose that we have several files with following URIs on S3: - 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv' - 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv' -1. Query the amount of rows in files end with number from 1 to 3: +Count the amount of rows in files ending with numbers from 1 to 3: ``` sql SELECT count(*) From 8923544b87c369047f057d6e5ce9ad935b77f51b Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 09:15:20 +0300 Subject: [PATCH 42/53] Update index.md --- docs/en/engines/table-engines/index.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/en/engines/table-engines/index.md b/docs/en/engines/table-engines/index.md index 57bd5026975..e60cdf3c899 100644 --- a/docs/en/engines/table-engines/index.md +++ b/docs/en/engines/table-engines/index.md @@ -30,7 +30,6 @@ Engines in the family: - [CollapsingMergeTree](../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) - [VersionedCollapsingMergeTree](../../engines/table-engines/mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree) - [GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) -- [S3](../../engines/table-engines/integrations/s3.md#table-engine-s3) ### Log {#log} From f4afebc19ab09e742ab37555ebe64f283266e036 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 09:19:58 +0300 Subject: [PATCH 43/53] Update index.md --- docs/ru/engines/table-engines/index.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/ru/engines/table-engines/index.md b/docs/ru/engines/table-engines/index.md index a81e68a9cd7..740588c50a4 100644 --- a/docs/ru/engines/table-engines/index.md +++ b/docs/ru/engines/table-engines/index.md @@ -31,7 +31,6 @@ toc_title: "\u0412\u0432\u0435\u0434\u0435\u043d\u0438\u0435" - [CollapsingMergeTree](mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) - [VersionedCollapsingMergeTree](mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree) - [GraphiteMergeTree](mergetree-family/graphitemergetree.md#graphitemergetree) -- [S3](../../engines/table-engines/integrations/s3.md#table-engine-s3) ### Log {#log} From bebaceacfe657fe924728d5ab3e1278b76c885f3 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 09:20:31 +0300 Subject: [PATCH 44/53] Update docs/ru/engines/table-engines/integrations/s3.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 0292776c6a7..97f0605d739 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -73,7 +73,7 @@ SELECT * FROM s3_engine_table LIMIT 2; Соображение безопасности: если злонамеренный пользователь попробует указать произвольные URL-адреса S3, параметр `s3_max_redirects` должен быть установлен в ноль, чтобы избежать атак [SSRF] (https://en.wikipedia.org/wiki/Server-side_request_forgery). Как альтернатива, в конфигурации сервера должен быть указан `remote_host_filter`. -### Настройки на основе конечных точек {#endpoint-settings} +### Настройки конечных точек {#endpoint-settings} Для конечной точки (которая соответствует точному префиксу URL-адреса) в конфигурационном файле могут быть заданы следующие настройки: From b4158ff23b02aa9a04b7b90a141f64ac4c9425a1 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 09:22:56 +0300 Subject: [PATCH 45/53] Update index.md --- docs/ru/sql-reference/table-functions/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/sql-reference/table-functions/index.md b/docs/ru/sql-reference/table-functions/index.md index 178701bbc34..7d88a7a3590 100644 --- a/docs/ru/sql-reference/table-functions/index.md +++ b/docs/ru/sql-reference/table-functions/index.md @@ -33,6 +33,6 @@ toc_title: "\u0412\u0432\u0435\u0434\u0435\u043D\u0438\u0435" | [jdbc](jdbc.md) | Создаёт таблицу с дижком [JDBC](../../engines/table-engines/integrations/jdbc.md). | | [odbc](odbc.md) | Создаёт таблицу с движком [ODBC](../../engines/table-engines/integrations/odbc.md). | | [hdfs](hdfs.md) | Создаёт таблицу с движком [HDFS](../../engines/table-engines/integrations/hdfs.md). | -| [s3](../../sql-reference/table-functions/s3.md) | Creates a [S3](../../engines/table-engines/integrations/s3.md)-engine table. | +| [s3](s3.md) | Создаёт таблицу с движком [S3](../../engines/table-engines/integrations/s3.md)-engine table. | -[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/) +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/) From 6cbdd523a0ebac4d1a5e3a735ec740abb91af348 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 09:23:19 +0300 Subject: [PATCH 46/53] Update index.md --- docs/ru/sql-reference/table-functions/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/table-functions/index.md b/docs/ru/sql-reference/table-functions/index.md index 7d88a7a3590..0ee1798bded 100644 --- a/docs/ru/sql-reference/table-functions/index.md +++ b/docs/ru/sql-reference/table-functions/index.md @@ -33,6 +33,6 @@ toc_title: "\u0412\u0432\u0435\u0434\u0435\u043D\u0438\u0435" | [jdbc](jdbc.md) | Создаёт таблицу с дижком [JDBC](../../engines/table-engines/integrations/jdbc.md). | | [odbc](odbc.md) | Создаёт таблицу с движком [ODBC](../../engines/table-engines/integrations/odbc.md). | | [hdfs](hdfs.md) | Создаёт таблицу с движком [HDFS](../../engines/table-engines/integrations/hdfs.md). | -| [s3](s3.md) | Создаёт таблицу с движком [S3](../../engines/table-engines/integrations/s3.md)-engine table. | +| [s3](s3.md) | Создаёт таблицу с движком [S3](../../engines/table-engines/integrations/s3.md). | [Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/) From c8e87a6bb7d0f276c500c8ca8ee7075342e1d559 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 09:26:55 +0300 Subject: [PATCH 47/53] Update s3.md --- .../engines/table-engines/integrations/s3.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 7a3191553a0..ea1f41d35fd 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -19,7 +19,7 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, - `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. - `format` — The [format](../../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -- `compression` — Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Parameter is optional. By default, it will autodetect compression by file extension. +- `compression` — Compression type. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Parameter is optional. By default, it will autodetect compression by file extension. **Example** @@ -67,9 +67,9 @@ Constructions with `{}` are similar to the [remote](../../../sql-reference/table The following settings can be set before query execution or placed into configuration file. -- `s3_max_single_part_upload_size` — Default value is `64Mb`. The maximum size of object to upload using singlepart upload to S3. -- `s3_min_upload_part_size` — Default value is `512Mb`. The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). -- `s3_max_redirects` — Default value is `10`. Max number of S3 redirects hops allowed. +- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `64Mb`. +- `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`. +- `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`. Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. @@ -77,11 +77,11 @@ Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max The following settings can be specified in configuration file for given endpoint (which will be matched by exact prefix of a URL): -- `endpoint` — Mandatory. Specifies prefix of an endpoint. -- `access_key_id` and `secret_access_key` — Optional. Specifies credentials to use with given endpoint. -- `use_environment_credentials` — Optional, default value is `false`. If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint. -- `header` — Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint. -- `server_side_encryption_customer_key_base64` — Optional. If specified, required headers for accessing S3 objects with SSE-C encryption will be set. +- `endpoint` — Specifies prefix of an endpoint. Mandatory. +- `access_key_id` and `secret_access_key` — Specifies credentials to use with given endpoint. Optional. +- `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint. Optional, default value is `false`. +- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be speficied multiple times. +- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional. **Example:** From 5f7f74aee090f135a1f6f9dbf0f13adc8d570b31 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 09:29:50 +0300 Subject: [PATCH 48/53] Update s3.md --- docs/ru/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 97f0605d739..fd4f433e038 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -16,7 +16,7 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, **Параметры движка** -- `path` — URL-адрес корзины с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*, ?, {abc,def} и {N..M}` где `N, M` — числа, `'abc', 'def'` — строки. +- `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*`, `?`, `{abc,def}` и `{N..M}` где `N`, `M` — числа, `'abc'`, `'def'` — строки. - `format` — [формат](../../../interfaces/formats.md#formats) файла. - `structure` — структура таблицы в формате `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — тип сжатия. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. Если не указано, то тип сжатия определяется автоматически по расширению файла. From 61058c868ac74e4c8a64f7a25ab0da7c09a32407 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Wed, 10 Mar 2021 10:24:13 +0300 Subject: [PATCH 49/53] Update s3.md --- docs/ru/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index fd4f433e038..60d8eb39502 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -126,7 +126,7 @@ CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); ``` -3. Таблица содержит все файлы в обеих директориях (все файлы должны соответствовать формату и схеме, описанным в запросе): +3. Таблица содержит все файлы в обоих каталогах (все файлы должны соответствовать формату и схеме, описанным в запросе): ``` sql CREATE TABLE table_with_asterisk (name String, value UInt32) From b908a27cae6d1d002ab4a99c4ca16bd6d3f11b75 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Thu, 11 Mar 2021 13:09:45 +0300 Subject: [PATCH 50/53] Update s3.md --- docs/en/engines/table-engines/integrations/s3.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index ea1f41d35fd..035d5118392 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -43,8 +43,7 @@ SELECT * FROM s3_engine_table LIMIT 2; For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns). -## Specifics and recommendations {#specifics-and-recommendations} -### Implementation Details {#implementation-details} +## Implementation Details {#implementation-details} - Reads and writes can be parallel - Not supported: @@ -52,7 +51,7 @@ For more information about virtual columns see [here](../../../engines/table-eng - Indexes. - Replication. -### Globs in path {#globs-in-path} +## Globs in path {#globs-in-path} Multiple path components can have globs. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment). @@ -63,7 +62,7 @@ Multiple path components can have globs. For being processed file should exist a Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function. -### S3-related Settings {#s3-settings} +## S3-related Settings {#s3-settings} The following settings can be set before query execution or placed into configuration file. @@ -73,7 +72,7 @@ The following settings can be set before query execution or placed into configur Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. -### Endpoint-based Settings {#endpoint-settings} +## Endpoint-based Settings {#endpoint-settings} The following settings can be specified in configuration file for given endpoint (which will be matched by exact prefix of a URL): From abe54b8dcb3ea23ff1980e1e618a53fb89c5fc7d Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Thu, 11 Mar 2021 13:11:00 +0300 Subject: [PATCH 51/53] Update s3.md --- docs/ru/engines/table-engines/integrations/s3.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 60d8eb39502..f89060a785e 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -43,8 +43,7 @@ SELECT * FROM s3_engine_table LIMIT 2; Подробнее про виртуальные столбцы можно прочитать [здесь](../../../engines/table-engines/index.md#table_engines-virtual_columns). -## Особенности и рекомендации {#specifics-and-recommendations} -### Детали реализации {#implementation-details} +## Детали реализации {#implementation-details} - Чтение и запись могут быть параллельными. - Не поддерживаются: @@ -52,7 +51,7 @@ SELECT * FROM s3_engine_table LIMIT 2; - индексы, - репликация. -### Символы подстановки {#globs-in-path} +## Символы подстановки {#globs-in-path} Несколько компонентов пути могут содержать символы подстановки. Для обработки файл должен существовать и соответствовать всему шаблону пути. Список файлов определяется во время выполнения запроса `SELECT` (не в момент выполнения запроса `CREATE`). @@ -63,7 +62,7 @@ SELECT * FROM s3_engine_table LIMIT 2; Конструкции с `{}` аналогичны функции [remote](../../../sql-reference/table-functions/remote.md). -### Настройки движка S3 {#s3-settings} +## Настройки движка S3 {#s3-settings} Перед выполнением запроса или в конфигурационном файле могут быть установлены следующие настройки: @@ -73,7 +72,7 @@ SELECT * FROM s3_engine_table LIMIT 2; Соображение безопасности: если злонамеренный пользователь попробует указать произвольные URL-адреса S3, параметр `s3_max_redirects` должен быть установлен в ноль, чтобы избежать атак [SSRF] (https://en.wikipedia.org/wiki/Server-side_request_forgery). Как альтернатива, в конфигурации сервера должен быть указан `remote_host_filter`. -### Настройки конечных точек {#endpoint-settings} +## Настройки конечных точек {#endpoint-settings} Для конечной точки (которая соответствует точному префиксу URL-адреса) в конфигурационном файле могут быть заданы следующие настройки: From 82a034049060f3e91b7ec3b1bd49f1f2ffbfae2e Mon Sep 17 00:00:00 2001 From: Anna Date: Sun, 21 Mar 2021 17:23:16 +0300 Subject: [PATCH 52/53] Fixes after review --- docs/en/engines/table-engines/integrations/s3.md | 7 +++---- docs/en/sql-reference/table-functions/s3.md | 2 +- docs/ru/engines/table-engines/integrations/s3.md | 6 +++--- docs/ru/sql-reference/table-functions/s3.md | 2 +- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index eb6cec01dcc..03340f2d8c9 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -16,7 +16,7 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, **Engine parameters** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). - `format` — The [format](../../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — Compression type. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Parameter is optional. By default, it will autodetect compression by file extension. @@ -62,9 +62,9 @@ For more information about virtual columns see [here](../../../engines/table-eng - Indexes. - Replication. -## Globs in path {#globs-in-path} +## Wildcards In Path {#wildcards-in-path} -Multiple path components can have globs. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment). +`path` argument can specify multiple files using bash-like wildcards. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment). - `*` — Substitutes any number of any characters except `/` including empty string. - `?` — Substitutes any single character. @@ -87,7 +87,6 @@ Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max The following settings can be specified in configuration file for given endpoint (which will be matched by exact prefix of a URL): - - `endpoint` — Specifies prefix of an endpoint. Mandatory. - `access_key_id` and `secret_access_key` — Specifies credentials to use with given endpoint. Optional. - `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint. Optional, default value is `false`. diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index e7bc2c3f509..34f0607b94c 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -15,7 +15,7 @@ s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compres **Arguments** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — Parameter is optional. Supported values: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. By default, it will autodetect compression by file extension. diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 9afab92449a..fa10e8ebc34 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -16,7 +16,7 @@ ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, **Параметры движка** -- `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*`, `?`, `{abc,def}` и `{N..M}` где `N`, `M` — числа, `'abc'`, `'def'` — строки. +- `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*`, `?`, `{abc,def}` и `{N..M}` где `N`, `M` — числа, `'abc'`, `'def'` — строки. Подробнее смотри [ниже](#wildcards-in-path). - `format` — [формат](../../../interfaces/formats.md#formats) файла. - `structure` — структура таблицы в формате `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — тип сжатия. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. Если не указано, то тип сжатия определяется автоматически по расширению файла. @@ -52,9 +52,9 @@ SELECT * FROM s3_engine_table LIMIT 2; - индексы, - репликация. -## Символы подстановки {#globs-in-path} +## Символы подстановки {#wildcards-in-path} -Несколько компонентов пути могут содержать символы подстановки. Для обработки файл должен существовать и соответствовать всему шаблону пути. Список файлов определяется во время выполнения запроса `SELECT` (не в момент выполнения запроса `CREATE`). +Аргумент `path` может указывать на несколько файлов, используя подстановочные знаки. Для обработки файл должен существовать и соответствовать всему шаблону пути. Список файлов определяется во время выполнения запроса `SELECT` (не в момент выполнения запроса `CREATE`). - `*` — заменяет любое количество любых символов, кроме `/`, включая пустую строку. - `?` — заменяет любые одиночные символы. diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 78afb532591..1d3fc8cfdb7 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -15,7 +15,7 @@ s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compres **Aргументы** -- `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*, ?, {abc,def} и {N..M}` где `N, M` — числа, `'abc', 'def'` — строки. +- `path` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные знаки в режиме "только чтение": `*, ?, {abc,def} и {N..M}` где `N, M` — числа, `'abc', 'def'` — строки. Подробнее смотри [здесь](../../engines/table-engines/integrations/s3.md#wildcards-in-path). - `format` — [формат](../../interfaces/formats.md#formats) файла. - `structure` — cтруктура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — автоматически обнаруживает сжатие по расширению файла. Возможные значения: none, gzip/gz, brotli/br, xz/LZMA, zstd/zst. Необязательный параметр. From 71c7257fce907f08b39ea1c5c0a4d74fe626a83f Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 23 Mar 2021 01:41:14 +0300 Subject: [PATCH 53/53] undo submodule change --- contrib/librdkafka | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/librdkafka b/contrib/librdkafka index f2f6616419d..cf11d0aa36d 160000 --- a/contrib/librdkafka +++ b/contrib/librdkafka @@ -1 +1 @@ -Subproject commit f2f6616419d567c9198aef0d1133a2e9b4f02276 +Subproject commit cf11d0aa36d4738f2c9bf4377807661660f1be76