From b789516556bdd48280797bd844c76800e598cb5e Mon Sep 17 00:00:00 2001 From: Dale Mcdiarmid Date: Mon, 16 Dec 2024 13:59:37 +0000 Subject: [PATCH 1/2] azure blob storage doc improvements --- .../table-engines/integrations/azure-queue.md | 107 +++++++++++++++--- .../integrations/azureBlobStorage.md | 8 +- docs/en/operations/backup.md | 4 +- .../table-functions/azureBlobStorage.md | 8 +- .../azureBlobStorageCluster.md | 4 +- 5 files changed, 108 insertions(+), 23 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/azure-queue.md b/docs/en/engines/table-engines/integrations/azure-queue.md index 2e5889c7485..5c3d0c99bc8 100644 --- a/docs/en/engines/table-engines/integrations/azure-queue.md +++ b/docs/en/engines/table-engines/integrations/azure-queue.md @@ -24,19 +24,24 @@ CREATE TABLE test (name String, value UInt32) `AzureQueue` parameters are the same as `AzureBlobStorage` table engine supports. See parameters section [here](../../../engines/table-engines/integrations/azureBlobStorage.md). +Similar to the [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage) table engine, users can use Azurite emulator for local Azure Storage development. Further details [here](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=docker-hub%2Cblob-storage). + **Example** ```sql -CREATE TABLE azure_queue_engine_table (name String, value UInt32) -ENGINE=AzureQueue('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/data/') -SETTINGS - mode = 'unordered' +CREATE TABLE azure_queue_engine_table +( + `key` UInt64, + `data` String +) +ENGINE = AzureQueue('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;', 'testcontainer', '*', 'CSV') +SETTINGS mode = 'unordered' ``` ## Settings {#settings} The set of supported settings is the same as for `S3Queue` table engine, but without `s3queue_` prefix. See [full list of settings settings](../../../engines/table-engines/integrations/s3queue.md#settings). -To get a list of settings, configured for the table, use `system.s3_queue_settings` table. Available from `24.10`. +To get a list of settings, configured for the table, use `system.azure_queue_settings` table. Available from `24.10`. ## Description {#description} @@ -51,18 +56,18 @@ When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the Example: ``` sql - CREATE TABLE azure_queue_engine_table (name String, value UInt32) - ENGINE=AzureQueue('', 'CSV', 'gzip') - SETTINGS - mode = 'unordered'; +CREATE TABLE azure_queue_engine_table (key UInt64, data String) + ENGINE=AzureQueue('', 'CSV', 'gzip') + SETTINGS + mode = 'unordered'; - CREATE TABLE stats (name String, value UInt32) - ENGINE = MergeTree() ORDER BY name; +CREATE TABLE stats (key UInt64, data String) + ENGINE = MergeTree() ORDER BY key; - CREATE MATERIALIZED VIEW consumer TO stats - AS SELECT name, value FROM azure_queue_engine_table; +CREATE MATERIALIZED VIEW consumer TO stats + AS SELECT key, data FROM azure_queue_engine_table; - SELECT * FROM stats ORDER BY name; +SELECT * FROM stats ORDER BY key; ``` ## Virtual columns {#virtual-columns} @@ -71,3 +76,77 @@ Example: - `_file` — Name of the file. For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns). + +## Introspection + +Enable logging for the table via the table setting `enable_logging_to_s3queue_log=1`. + +Introspection capabilities are the same as the [S3Queue table engine](/docs/en/engines/table-engines/integrations/s3queue#introspection) with several distinct differences: + +1. Use the `system.s3queue` for the in-memory state of the queue. Later versions of ClickHouse may introduce a dedicated `azurequeue` table. +2. Enable the `system.azure_queue_log` via the main ClickHouse configuration e.g. + + ```xml + + system + azure_queue_log
+
+ ``` + +This persistent table has the same information as `system.s3queue`, but for processed and failed files. + +The table has the following structure: + +```sql + +CREATE TABLE system.azure_queue_log +( + `hostname` LowCardinality(String) COMMENT 'Hostname', + `event_date` Date COMMENT 'Event date of writing this log row', + `event_time` DateTime COMMENT 'Event time of writing this log row', + `database` String COMMENT 'The name of a database where current S3Queue table lives.', + `table` String COMMENT 'The name of S3Queue table.', + `uuid` String COMMENT 'The UUID of S3Queue table', + `file_name` String COMMENT 'File name of the processing file', + `rows_processed` UInt64 COMMENT 'Number of processed rows', + `status` Enum8('Processed' = 0, 'Failed' = 1) COMMENT 'Status of the processing file', + `processing_start_time` Nullable(DateTime) COMMENT 'Time of the start of processing the file', + `processing_end_time` Nullable(DateTime) COMMENT 'Time of the end of processing the file', + `exception` String COMMENT 'Exception message if happened' +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(event_date) +ORDER BY (event_date, event_time) +SETTINGS index_granularity = 8192 +COMMENT 'Contains logging entries with the information files processes by S3Queue engine.' + +``` + +Example: + +```sql +SELECT * +FROM system.azure_queue_log +LIMIT 1 +FORMAT Vertical + +Row 1: +────── +hostname: clickhouse +event_date: 2024-12-16 +event_time: 2024-12-16 13:42:47 +database: default +table: azure_queue_engine_table +uuid: 1bc52858-00c0-420d-8d03-ac3f189f27c8 +file_name: test_1.csv +rows_processed: 3 +status: Processed +processing_start_time: 2024-12-16 13:42:47 +processing_end_time: 2024-12-16 13:42:47 +exception: + +1 row in set. Elapsed: 0.002 sec. + +``` + + diff --git a/docs/en/engines/table-engines/integrations/azureBlobStorage.md b/docs/en/engines/table-engines/integrations/azureBlobStorage.md index bb1349ad9d0..3f66ecd42e6 100644 --- a/docs/en/engines/table-engines/integrations/azureBlobStorage.md +++ b/docs/en/engines/table-engines/integrations/azureBlobStorage.md @@ -31,10 +31,12 @@ CREATE TABLE azure_blob_storage_table (name String, value UInt32) **Example** +Users can use the Azurite emulator for local Azure Storage development. Further details [here](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=docker-hub%2Cblob-storage). If using a local instance of Azurite, users may need to substitute `http://localhost:10000` for `http://azurite1:10000` in the commands below, where we assume Azurite is available at host `azurite1`. + + ``` sql CREATE TABLE test_table (key UInt64, data String) - ENGINE = AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;', - 'test_container', 'test_table', 'CSV'); + ENGINE = AzureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;', 'testcontainer', 'test_table', 'CSV'); INSERT INTO test_table VALUES (1, 'a'), (2, 'b'), (3, 'c'); @@ -73,7 +75,7 @@ To enable caching use a setting `filesystem_cache_name = ''` and `enable_f ```sql SELECT * -FROM azureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;', 'test_container', 'test_table', 'CSV') +FROM azureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;', 'testcontainer', 'test_table', 'CSV') SETTINGS filesystem_cache_name = 'cache_for_azure', enable_filesystem_cache = 1; ``` diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 248fdbc156f..dbec26239ac 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -490,7 +490,7 @@ AzureBlobStorage('/', '', '', ' Date: Mon, 16 Dec 2024 14:09:03 +0000 Subject: [PATCH 2/2] add Azurite --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index dd26977ece1..832aafbb3da 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -49,6 +49,7 @@ AutoML Autocompletion AvroConfluent AzureQueue +Azurite BFloat BIGINT BIGSERIAL