mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-19 04:42:37 +00:00
Merge pull request #73366 from gingerwizard/issue_2907
azure blob storage doc improvements
This commit is contained in:
commit
0ec76057e0
@ -24,19 +24,24 @@ CREATE TABLE test (name String, value UInt32)
|
||||
|
||||
`AzureQueue` parameters are the same as `AzureBlobStorage` table engine supports. See parameters section [here](../../../engines/table-engines/integrations/azureBlobStorage.md).
|
||||
|
||||
Similar to the [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage) table engine, users can use Azurite emulator for local Azure Storage development. Further details [here](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=docker-hub%2Cblob-storage).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
CREATE TABLE azure_queue_engine_table (name String, value UInt32)
|
||||
ENGINE=AzureQueue('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/data/')
|
||||
SETTINGS
|
||||
mode = 'unordered'
|
||||
CREATE TABLE azure_queue_engine_table
|
||||
(
|
||||
`key` UInt64,
|
||||
`data` String
|
||||
)
|
||||
ENGINE = AzureQueue('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;', 'testcontainer', '*', 'CSV')
|
||||
SETTINGS mode = 'unordered'
|
||||
```
|
||||
|
||||
## Settings {#settings}
|
||||
|
||||
The set of supported settings is the same as for `S3Queue` table engine, but without `s3queue_` prefix. See [full list of settings settings](../../../engines/table-engines/integrations/s3queue.md#settings).
|
||||
To get a list of settings, configured for the table, use `system.s3_queue_settings` table. Available from `24.10`.
|
||||
To get a list of settings, configured for the table, use `system.azure_queue_settings` table. Available from `24.10`.
|
||||
|
||||
## Description {#description}
|
||||
|
||||
@ -51,18 +56,18 @@ When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE azure_queue_engine_table (name String, value UInt32)
|
||||
ENGINE=AzureQueue('<endpoint>', 'CSV', 'gzip')
|
||||
SETTINGS
|
||||
mode = 'unordered';
|
||||
CREATE TABLE azure_queue_engine_table (key UInt64, data String)
|
||||
ENGINE=AzureQueue('<endpoint>', 'CSV', 'gzip')
|
||||
SETTINGS
|
||||
mode = 'unordered';
|
||||
|
||||
CREATE TABLE stats (name String, value UInt32)
|
||||
ENGINE = MergeTree() ORDER BY name;
|
||||
CREATE TABLE stats (key UInt64, data String)
|
||||
ENGINE = MergeTree() ORDER BY key;
|
||||
|
||||
CREATE MATERIALIZED VIEW consumer TO stats
|
||||
AS SELECT name, value FROM azure_queue_engine_table;
|
||||
CREATE MATERIALIZED VIEW consumer TO stats
|
||||
AS SELECT key, data FROM azure_queue_engine_table;
|
||||
|
||||
SELECT * FROM stats ORDER BY name;
|
||||
SELECT * FROM stats ORDER BY key;
|
||||
```
|
||||
|
||||
## Virtual columns {#virtual-columns}
|
||||
@ -71,3 +76,77 @@ Example:
|
||||
- `_file` — Name of the file.
|
||||
|
||||
For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns).
|
||||
|
||||
## Introspection
|
||||
|
||||
Enable logging for the table via the table setting `enable_logging_to_s3queue_log=1`.
|
||||
|
||||
Introspection capabilities are the same as the [S3Queue table engine](/docs/en/engines/table-engines/integrations/s3queue#introspection) with several distinct differences:
|
||||
|
||||
1. Use the `system.s3queue` for the in-memory state of the queue. Later versions of ClickHouse may introduce a dedicated `azurequeue` table.
|
||||
2. Enable the `system.azure_queue_log` via the main ClickHouse configuration e.g.
|
||||
|
||||
```xml
|
||||
<azure_queue_log>
|
||||
<database>system</database>
|
||||
<table>azure_queue_log</table>
|
||||
</azure_queue_log>
|
||||
```
|
||||
|
||||
This persistent table has the same information as `system.s3queue`, but for processed and failed files.
|
||||
|
||||
The table has the following structure:
|
||||
|
||||
```sql
|
||||
|
||||
CREATE TABLE system.azure_queue_log
|
||||
(
|
||||
`hostname` LowCardinality(String) COMMENT 'Hostname',
|
||||
`event_date` Date COMMENT 'Event date of writing this log row',
|
||||
`event_time` DateTime COMMENT 'Event time of writing this log row',
|
||||
`database` String COMMENT 'The name of a database where current S3Queue table lives.',
|
||||
`table` String COMMENT 'The name of S3Queue table.',
|
||||
`uuid` String COMMENT 'The UUID of S3Queue table',
|
||||
`file_name` String COMMENT 'File name of the processing file',
|
||||
`rows_processed` UInt64 COMMENT 'Number of processed rows',
|
||||
`status` Enum8('Processed' = 0, 'Failed' = 1) COMMENT 'Status of the processing file',
|
||||
`processing_start_time` Nullable(DateTime) COMMENT 'Time of the start of processing the file',
|
||||
`processing_end_time` Nullable(DateTime) COMMENT 'Time of the end of processing the file',
|
||||
`exception` String COMMENT 'Exception message if happened'
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toYYYYMM(event_date)
|
||||
ORDER BY (event_date, event_time)
|
||||
SETTINGS index_granularity = 8192
|
||||
COMMENT 'Contains logging entries with the information files processes by S3Queue engine.'
|
||||
|
||||
```
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
FROM system.azure_queue_log
|
||||
LIMIT 1
|
||||
FORMAT Vertical
|
||||
|
||||
Row 1:
|
||||
──────
|
||||
hostname: clickhouse
|
||||
event_date: 2024-12-16
|
||||
event_time: 2024-12-16 13:42:47
|
||||
database: default
|
||||
table: azure_queue_engine_table
|
||||
uuid: 1bc52858-00c0-420d-8d03-ac3f189f27c8
|
||||
file_name: test_1.csv
|
||||
rows_processed: 3
|
||||
status: Processed
|
||||
processing_start_time: 2024-12-16 13:42:47
|
||||
processing_end_time: 2024-12-16 13:42:47
|
||||
exception:
|
||||
|
||||
1 row in set. Elapsed: 0.002 sec.
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
@ -31,10 +31,12 @@ CREATE TABLE azure_blob_storage_table (name String, value UInt32)
|
||||
|
||||
**Example**
|
||||
|
||||
Users can use the Azurite emulator for local Azure Storage development. Further details [here](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=docker-hub%2Cblob-storage). If using a local instance of Azurite, users may need to substitute `http://localhost:10000` for `http://azurite1:10000` in the commands below, where we assume Azurite is available at host `azurite1`.
|
||||
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test_table (key UInt64, data String)
|
||||
ENGINE = AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;',
|
||||
'test_container', 'test_table', 'CSV');
|
||||
ENGINE = AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;', 'testcontainer', 'test_table', 'CSV');
|
||||
|
||||
INSERT INTO test_table VALUES (1, 'a'), (2, 'b'), (3, 'c');
|
||||
|
||||
@ -73,7 +75,7 @@ To enable caching use a setting `filesystem_cache_name = '<name>'` and `enable_f
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
FROM azureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;', 'test_container', 'test_table', 'CSV')
|
||||
FROM azureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;', 'testcontainer', 'test_table', 'CSV')
|
||||
SETTINGS filesystem_cache_name = 'cache_for_azure', enable_filesystem_cache = 1;
|
||||
```
|
||||
|
||||
|
@ -490,7 +490,7 @@ AzureBlobStorage('<connection string>/<url>', '<container>', '<path>', '<account
|
||||
|
||||
```sql
|
||||
BACKUP TABLE data TO AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;',
|
||||
'test_container', 'data_backup');
|
||||
'testcontainer', 'data_backup');
|
||||
RESTORE TABLE data AS data_restored FROM AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;',
|
||||
'test_container', 'data_backup');
|
||||
'testcontainer', 'data_backup');
|
||||
```
|
||||
|
@ -32,11 +32,13 @@ A table with the specified structure for reading or writing data in the specifie
|
||||
|
||||
**Examples**
|
||||
|
||||
Similar to the [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage) table engine, users can use Azurite emulator for local Azure Storage development. Further details [here](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=docker-hub%2Cblob-storage). Below we assume Azurite is available at the hostname `azurite1`.
|
||||
|
||||
Write data into azure blob storage using the following :
|
||||
|
||||
```sql
|
||||
INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1',
|
||||
'test_container', 'test_{_partition_id}.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
|
||||
'testcontainer', 'test_{_partition_id}.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
|
||||
'CSV', 'auto', 'column1 UInt32, column2 UInt32, column3 UInt32') PARTITION BY column3 VALUES (1, 2, 3), (3, 2, 1), (78, 43, 3);
|
||||
```
|
||||
|
||||
@ -44,7 +46,7 @@ And then it can be read using
|
||||
|
||||
```sql
|
||||
SELECT * FROM azureBlobStorage('http://azurite1:10000/devstoreaccount1',
|
||||
'test_container', 'test_1.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
|
||||
'testcontainer', 'test_1.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
|
||||
'CSV', 'auto', 'column1 UInt32, column2 UInt32, column3 UInt32');
|
||||
```
|
||||
|
||||
@ -58,7 +60,7 @@ or using connection_string
|
||||
|
||||
```sql
|
||||
SELECT count(*) FROM azureBlobStorage('DefaultEndpointsProtocol=https;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;EndPointSuffix=core.windows.net',
|
||||
'test_container', 'test_3.csv', 'CSV', 'auto' , 'column1 UInt32, column2 UInt32, column3 UInt32');
|
||||
'testcontainer', 'test_3.csv', 'CSV', 'auto' , 'column1 UInt32, column2 UInt32, column3 UInt32');
|
||||
```
|
||||
|
||||
``` text
|
||||
|
@ -32,11 +32,13 @@ A table with the specified structure for reading or writing data in the specifie
|
||||
|
||||
**Examples**
|
||||
|
||||
Similar to the [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage) table engine, users can use Azurite emulator for local Azure Storage development. Further details [here](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=docker-hub%2Cblob-storage). Below we assume Azurite is available at the hostname `azurite1`.
|
||||
|
||||
Select the count for the file `test_cluster_*.csv`, using all the nodes in the `cluster_simple` cluster:
|
||||
|
||||
``` sql
|
||||
SELECT count(*) from azureBlobStorageCluster(
|
||||
'cluster_simple', 'http://azurite1:10000/devstoreaccount1', 'test_container', 'test_cluster_count.csv', 'devstoreaccount1',
|
||||
'cluster_simple', 'http://azurite1:10000/devstoreaccount1', 'testcontainer', 'test_cluster_count.csv', 'devstoreaccount1',
|
||||
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV',
|
||||
'auto', 'key UInt64')
|
||||
```
|
||||
|
@ -49,6 +49,7 @@ AutoML
|
||||
Autocompletion
|
||||
AvroConfluent
|
||||
AzureQueue
|
||||
Azurite
|
||||
BFloat
|
||||
BIGINT
|
||||
BIGSERIAL
|
||||
|
Loading…
Reference in New Issue
Block a user