mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge branch 'master' of github.com:ClickHouse/ClickHouse into export-logs-in-ci
This commit is contained in:
commit
0bab88b7cb
4
.gitmodules
vendored
4
.gitmodules
vendored
@ -331,6 +331,10 @@
|
||||
[submodule "contrib/liburing"]
|
||||
path = contrib/liburing
|
||||
url = https://github.com/axboe/liburing
|
||||
[submodule "contrib/libarchive"]
|
||||
path = contrib/libarchive
|
||||
url = https://github.com/libarchive/libarchive.git
|
||||
ignore = dirty
|
||||
[submodule "contrib/libfiu"]
|
||||
path = contrib/libfiu
|
||||
url = https://github.com/ClickHouse/libfiu.git
|
||||
|
@ -23,11 +23,8 @@ curl https://clickhouse.com/ | sh
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [**v23.7 Release Webinar**](https://clickhouse.com/company/events/v23-7-community-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-07) - Jul 27 - 23.7 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
|
||||
* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/293913596) - Jul 18
|
||||
* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/293913441) - Jul 19
|
||||
* [**ClickHouse Meetup in Toronto**](https://www.meetup.com/clickhouse-toronto-user-group/events/294183127) - Jul 20
|
||||
* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/294428050/) - Jul 27
|
||||
* [**v23.8 Community Call**](https://clickhouse.com/company/events/v23-8-community-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-08) - Aug 31 - 23.8 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
|
||||
* [**ClickHouse & AI - A Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/294472987) - Aug 8
|
||||
* [**ClickHouse Meetup in Paris**](https://www.meetup.com/clickhouse-france-user-group/events/294283460) - Sep 12
|
||||
|
||||
Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.
|
||||
|
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -92,6 +92,7 @@ add_contrib (google-protobuf-cmake google-protobuf)
|
||||
add_contrib (openldap-cmake openldap)
|
||||
add_contrib (grpc-cmake grpc)
|
||||
add_contrib (msgpack-c-cmake msgpack-c)
|
||||
add_contrib (libarchive-cmake libarchive)
|
||||
|
||||
add_contrib (corrosion-cmake corrosion)
|
||||
|
||||
|
1
contrib/libarchive
vendored
Submodule
1
contrib/libarchive
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit ee45796171324519f0c0bfd012018dd099296336
|
172
contrib/libarchive-cmake/CMakeLists.txt
Normal file
172
contrib/libarchive-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,172 @@
|
||||
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libarchive")
|
||||
|
||||
set(SRCS
|
||||
"${LIBRARY_DIR}/libarchive/archive_acl.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_blake2sp_ref.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_blake2s_ref.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_check_magic.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_cmdline.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_cryptor.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_digest.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_disk_acl_darwin.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_disk_acl_freebsd.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_disk_acl_linux.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_disk_acl_sunos.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry_copy_bhfi.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry_copy_stat.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry_link_resolver.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry_sparse.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry_stat.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry_strmode.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry_xattr.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_getdate.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_hmac.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_match.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_options.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_pack_dev.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_pathmatch.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_ppmd7.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_ppmd8.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_random.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_rb.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_add_passphrase.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_append_filter.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_data_into_fd.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_disk_entry_from_file.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_disk_posix.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_disk_set_standard_lookup.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_disk_windows.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_extract2.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_extract.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_open_fd.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_open_file.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_open_filename.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_open_memory.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_set_format.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_set_options.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_all.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_by_code.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_bzip2.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_compress.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_grzip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_gzip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_lrzip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_lz4.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_lzop.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_none.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_program.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_rpm.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_uu.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_xz.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_zstd.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_7zip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_all.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_ar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_by_code.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_cab.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_cpio.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_empty.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_iso9660.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_lha.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_mtree.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_rar5.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_rar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_raw.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_tar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_warc.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_xar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_zip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_string.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_string_sprintf.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_util.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_version_details.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_virtual.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_windows.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_b64encode.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_by_name.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_bzip2.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_compress.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_grzip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_gzip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_lrzip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_lz4.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_lzop.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_none.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_program.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_uuencode.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_xz.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_zstd.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_disk_posix.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_disk_set_standard_lookup.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_disk_windows.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_open_fd.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_open_file.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_open_filename.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_open_memory.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_7zip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_ar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_by_name.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_binary.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_newc.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_odc.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_filter_by_ext.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_gnutar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_iso9660.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_mtree.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_pax.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_raw.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_shar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_ustar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_v7tar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_warc.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_xar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_zip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_options.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_passphrase.c"
|
||||
"${LIBRARY_DIR}/libarchive/filter_fork_posix.c"
|
||||
"${LIBRARY_DIR}/libarchive/filter_fork_windows.c"
|
||||
"${LIBRARY_DIR}/libarchive/xxhash.c"
|
||||
)
|
||||
|
||||
add_library(_libarchive ${SRCS})
|
||||
target_include_directories(_libarchive PUBLIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"${LIBRARY_DIR}/libarchive"
|
||||
)
|
||||
|
||||
target_compile_definitions(_libarchive PUBLIC
|
||||
HAVE_CONFIG_H
|
||||
)
|
||||
|
||||
target_compile_options(_libarchive PRIVATE "-Wno-reserved-macro-identifier")
|
||||
|
||||
if (TARGET ch_contrib::xz)
|
||||
target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1)
|
||||
target_link_libraries(_libarchive PRIVATE ch_contrib::xz)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::zlib)
|
||||
target_compile_definitions(_libarchive PUBLIC HAVE_ZLIB_H=1)
|
||||
target_link_libraries(_libarchive PRIVATE ch_contrib::zlib)
|
||||
endif()
|
||||
|
||||
if (OS_LINUX)
|
||||
target_compile_definitions(
|
||||
_libarchive PUBLIC
|
||||
MAJOR_IN_SYSMACROS=1
|
||||
HAVE_LINUX_FS_H=1
|
||||
HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=1
|
||||
HAVE_LINUX_TYPES_H=1
|
||||
HAVE_SYS_STATFS_H=1
|
||||
HAVE_FUTIMESAT=1
|
||||
HAVE_ICONV=1
|
||||
)
|
||||
endif()
|
||||
|
||||
add_library(ch_contrib::libarchive ALIAS _libarchive)
|
1391
contrib/libarchive-cmake/config.h
Normal file
1391
contrib/libarchive-cmake/config.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -19,13 +19,13 @@ RUN apt-get update \
|
||||
# and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB).
|
||||
# TSAN will flush shadow memory when reaching this limit.
|
||||
# It may cause false-negatives, but it's better than OOM.
|
||||
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment
|
||||
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment
|
||||
RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment
|
||||
RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment
|
||||
RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment
|
||||
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
|
||||
# (but w/o verbosity for TSAN, otherwise test.reference will not match)
|
||||
ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
|
||||
ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
|
||||
ENV UBSAN_OPTIONS='print_stacktrace=1'
|
||||
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
|
||||
|
||||
|
@ -130,7 +130,7 @@ COPY misc/ /misc/
|
||||
|
||||
# Same options as in test/base/Dockerfile
|
||||
# (in case you need to override them in tests)
|
||||
ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
|
||||
ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
|
||||
ENV UBSAN_OPTIONS='print_stacktrace=1'
|
||||
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
|
||||
|
||||
|
@ -41,6 +41,8 @@ RUN apt-get update -y \
|
||||
zstd \
|
||||
file \
|
||||
pv \
|
||||
zip \
|
||||
p7zip-full \
|
||||
&& apt-get clean
|
||||
|
||||
RUN pip3 install numpy scipy pandas Jinja2
|
||||
|
@ -13,7 +13,7 @@ A recommended alternative to the Buffer Table Engine is enabling [asynchronous i
|
||||
:::
|
||||
|
||||
``` sql
|
||||
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
|
||||
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]])
|
||||
```
|
||||
|
||||
### Engine parameters:
|
||||
|
@ -4578,6 +4578,17 @@ Type: Int64
|
||||
|
||||
Default: 0
|
||||
|
||||
## rewrite_count_distinct_if_with_count_distinct_implementation
|
||||
|
||||
Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#settings-count_distinct_implementation) setting.
|
||||
|
||||
Possible values:
|
||||
|
||||
- true — Allow.
|
||||
- false — Disallow.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## precise_float_parsing {#precise_float_parsing}
|
||||
|
||||
Switches [Float32/Float64](../../sql-reference/data-types/float.md) parsing algorithms:
|
||||
|
@ -11,7 +11,7 @@ A client application to interact with clickhouse-keeper by its native protocol.
|
||||
|
||||
- `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode.
|
||||
- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`.
|
||||
- `-p N`, `--port=N` — Server port. Default value: 2181
|
||||
- `-p N`, `--port=N` — Server port. Default value: 9181
|
||||
- `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s.
|
||||
- `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
|
||||
- `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.
|
||||
@ -21,8 +21,8 @@ A client application to interact with clickhouse-keeper by its native protocol.
|
||||
## Example {#clickhouse-keeper-client-example}
|
||||
|
||||
```bash
|
||||
./clickhouse-keeper-client -h localhost:2181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
|
||||
Connected to ZooKeeper at [::1]:2181 with session_id 137
|
||||
./clickhouse-keeper-client -h localhost:9181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
|
||||
Connected to ZooKeeper at [::1]:9181 with session_id 137
|
||||
/ :) ls
|
||||
keeper foo bar
|
||||
/ :) cd keeper
|
||||
|
@ -0,0 +1,47 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/azureBlobStorageCluster
|
||||
sidebar_position: 55
|
||||
sidebar_label: azureBlobStorageCluster
|
||||
title: "azureBlobStorageCluster Table Function"
|
||||
---
|
||||
|
||||
Allows processing files from [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||
This table function is similar to the [s3Cluster function](../../sql-reference/table-functions/s3Cluster.md).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
azureBlobStorageCluster(cluster_name, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
|
||||
- `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key)
|
||||
- `container_name` - Container name
|
||||
- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings.
|
||||
- `account_name` - if storage_account_url is used, then account name can be specified here
|
||||
- `account_key` - if storage_account_url is used, then account key can be specified here
|
||||
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
|
||||
- `compression` — Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. (same as setting to `auto`).
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A table with the specified structure for reading or writing data in the specified file.
|
||||
|
||||
**Examples**
|
||||
|
||||
Select the count for the file `test_cluster_*.csv`, using all the nodes in the `cluster_simple` cluster:
|
||||
|
||||
``` sql
|
||||
SELECT count(*) from azureBlobStorageCluster(
|
||||
'cluster_simple', 'http://azurite1:10000/devstoreaccount1', 'test_container', 'test_cluster_count.csv', 'devstoreaccount1',
|
||||
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV',
|
||||
'auto', 'key UInt64')
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [AzureBlobStorage engine](../../engines/table-engines/integrations/azureBlobStorage.md)
|
||||
- [azureBlobStorage table function](../../sql-reference/table-functions/azureBlobStorage.md)
|
@ -13,16 +13,18 @@ The `file` function can be used in `SELECT` and `INSERT` queries to read from or
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
file(path [,format] [,structure] [,compression])
|
||||
file([path_to_archive ::] path [,format] [,structure] [,compression])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
|
||||
- `path_to_archive` - The relative path to zip/tar/7z archive. Path to archive support the same globs as `path`.
|
||||
- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
|
||||
|
||||
|
||||
**Returned value**
|
||||
|
||||
A table with the specified structure for reading or writing data in the specified file.
|
||||
@ -128,6 +130,11 @@ file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32');
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
Getting data from table in table.csv, located in archive1.zip or/and archive2.zip
|
||||
``` sql
|
||||
SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv');
|
||||
```
|
||||
|
||||
## Globs in Path
|
||||
|
||||
Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix).
|
||||
|
@ -9,7 +9,7 @@ sidebar_label: Buffer
|
||||
Буферизует записываемые данные в оперативке, периодически сбрасывая их в другую таблицу. При чтении, производится чтение данных одновременно из буфера и из другой таблицы.
|
||||
|
||||
``` sql
|
||||
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
|
||||
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]])
|
||||
```
|
||||
|
||||
Параметры движка:
|
||||
|
@ -5,7 +5,7 @@ slug: /zh/engines/table-engines/special/buffer
|
||||
|
||||
缓冲数据写入 RAM 中,周期性地将数据刷新到另一个表。在读取操作时,同时从缓冲区和另一个表读取数据。
|
||||
|
||||
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
|
||||
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]])
|
||||
|
||||
引擎的参数:database,table - 要刷新数据的表。可以使用返回字符串的常量表达式而不是数据库名称。 num_layers - 并行层数。在物理上,该表将表示为 num_layers 个独立缓冲区。建议值为16。min_time,max_time,min_rows,max_rows,min_bytes,max_bytes - 从缓冲区刷新数据的条件。
|
||||
|
||||
|
@ -131,7 +131,7 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
|
||||
.binding("host"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("port", "p", "server port. default `2181`")
|
||||
Poco::Util::Option("port", "p", "server port. default `9181`")
|
||||
.argument("<port>")
|
||||
.binding("port"));
|
||||
|
||||
@ -307,7 +307,7 @@ int KeeperClient::main(const std::vector<String> & /* args */)
|
||||
}
|
||||
|
||||
auto host = config().getString("host", "localhost");
|
||||
auto port = config().getString("port", "2181");
|
||||
auto port = config().getString("port", "9181");
|
||||
zk_args.hosts = {host + ":" + port};
|
||||
zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000;
|
||||
zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000;
|
||||
|
@ -18,7 +18,8 @@
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename T, typename = std::enable_if_t<std::is_fundamental_v<std::decay_t<T>>>>
|
||||
template <typename T>
|
||||
requires std::is_fundamental_v<std::decay_t<T>>
|
||||
void updateHash(SipHash & hash, const T & value)
|
||||
{
|
||||
hash.update(value);
|
||||
|
@ -576,6 +576,10 @@ if (TARGET ch_contrib::bzip2)
|
||||
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::bzip2)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::libarchive)
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::libarchive)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::minizip)
|
||||
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::minizip)
|
||||
endif ()
|
||||
|
@ -1436,6 +1436,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
|
||||
ConstraintsDescription{},
|
||||
String{},
|
||||
{},
|
||||
String{},
|
||||
};
|
||||
StoragePtr storage = std::make_shared<StorageFile>(in_file, global_context->getUserFilesPath(), args);
|
||||
storage->startup();
|
||||
|
@ -81,9 +81,9 @@ public:
|
||||
}
|
||||
|
||||
/// Message must be a compile-time constant
|
||||
template<typename T, typename = std::enable_if_t<std::is_convertible_v<T, String>>>
|
||||
Exception(int code, T && message)
|
||||
: Exception(message, code)
|
||||
template <typename T>
|
||||
requires std::is_convertible_v<T, String>
|
||||
Exception(int code, T && message) : Exception(message, code)
|
||||
{
|
||||
capture_thread_frame_pointers = thread_frame_pointers;
|
||||
message_format_string = tryGetStaticFormatString(message);
|
||||
|
@ -3,8 +3,8 @@
|
||||
#include <base/defines.h>
|
||||
#include <base/sort.h>
|
||||
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -119,7 +119,8 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true, typename... Args>
|
||||
template <typename TValue = Value, bool = true, typename... Args>
|
||||
requires(!std::is_same_v<TValue, IntervalTreeVoidValue>)
|
||||
ALWAYS_INLINE bool emplace(Interval interval, Args &&... args)
|
||||
{
|
||||
assert(!tree_is_built);
|
||||
|
@ -9,7 +9,8 @@ namespace DB
|
||||
class NetException : public Exception
|
||||
{
|
||||
public:
|
||||
template<typename T, typename = std::enable_if_t<std::is_convertible_v<T, String>>>
|
||||
template <typename T>
|
||||
requires std::is_convertible_v<T, String>
|
||||
NetException(int code, T && message) : Exception(std::forward<T>(message), code)
|
||||
{
|
||||
message_format_string = tryGetStaticFormatString(message);
|
||||
|
@ -59,6 +59,7 @@
|
||||
#cmakedefine01 USE_ULID
|
||||
#cmakedefine01 FIU_ENABLE
|
||||
#cmakedefine01 USE_BCRYPT
|
||||
#cmakedefine01 USE_LIBARCHIVE
|
||||
|
||||
/// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO.
|
||||
/// That's why we use absolute paths.
|
||||
|
@ -12,9 +12,9 @@ struct MultiEnum
|
||||
|
||||
MultiEnum() = default;
|
||||
|
||||
template <typename ... EnumValues, typename = std::enable_if_t<std::conjunction_v<std::is_same<EnumTypeT, EnumValues>...>>>
|
||||
constexpr explicit MultiEnum(EnumValues ... v)
|
||||
: MultiEnum((toBitFlag(v) | ... | 0u))
|
||||
template <typename... EnumValues>
|
||||
requires std::conjunction_v<std::is_same<EnumTypeT, EnumValues>...>
|
||||
constexpr explicit MultiEnum(EnumValues... v) : MultiEnum((toBitFlag(v) | ... | 0u))
|
||||
{}
|
||||
|
||||
template <typename ValueType>
|
||||
|
@ -536,6 +536,7 @@ class IColumn;
|
||||
M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
|
||||
M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
|
||||
M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
|
||||
M(Bool, rewrite_count_distinct_if_with_count_distinct_implementation, false, "Rewrite countDistinctIf with count_distinct_implementation configuration", 0) \
|
||||
M(Bool, convert_query_to_cnf, false, "Convert SELECT query to CNF", 0) \
|
||||
M(Bool, optimize_or_like_chain, false, "Optimize multiple OR LIKE into multiMatchAny. This optimization should not be enabled by default, because it defies index analysis in some cases.", 0) \
|
||||
M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
|
||||
|
@ -80,6 +80,7 @@ namespace SettingsChangesHistory
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}},
|
||||
{"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}},
|
||||
{"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
|
||||
{"http_receive_timeout", 180, 30, "See http_send_timeout."}}},
|
||||
|
@ -138,7 +138,6 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS,
|
||||
|
||||
IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS,
|
||||
{{"clickhouse", Dialect::clickhouse},
|
||||
{"kusto", Dialect::kusto},
|
||||
{"kusto", Dialect::kusto},
|
||||
{"prql", Dialect::prql}})
|
||||
// FIXME: do not add 'kusto_auto' to the list. Maybe remove it from code completely?
|
||||
|
@ -189,10 +189,10 @@ void SerializationNullable::serializeBinary(const IColumn & column, size_t row_n
|
||||
|
||||
/// Deserialize value into ColumnNullable.
|
||||
/// We need to insert both to nested column and to null byte map, or, in case of exception, to not insert at all.
|
||||
template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested, typename std::enable_if_t<std::is_same_v<ReturnType, void>, ReturnType>* = nullptr>
|
||||
static ReturnType safeDeserialize(
|
||||
IColumn & column, const ISerialization &,
|
||||
CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
|
||||
template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested, ReturnType * = nullptr>
|
||||
requires std::same_as<ReturnType, void>
|
||||
static ReturnType
|
||||
safeDeserialize(IColumn & column, const ISerialization &, CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
|
||||
{
|
||||
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
|
||||
|
||||
@ -217,10 +217,10 @@ static ReturnType safeDeserialize(
|
||||
}
|
||||
|
||||
/// Deserialize value into non-nullable column. In case of NULL, insert default value and return false.
|
||||
template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested, typename std::enable_if_t<std::is_same_v<ReturnType, bool>, ReturnType>* = nullptr>
|
||||
static ReturnType safeDeserialize(
|
||||
IColumn & column, const ISerialization &,
|
||||
CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
|
||||
template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested, ReturnType * = nullptr>
|
||||
requires std::same_as<ReturnType, bool>
|
||||
static ReturnType
|
||||
safeDeserialize(IColumn & column, const ISerialization &, CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
|
||||
{
|
||||
bool insert_default = check_for_null();
|
||||
if (insert_default)
|
||||
|
@ -534,18 +534,15 @@ struct JavaHashImpl
|
||||
static_cast<uint32_t>(x) ^ static_cast<uint32_t>(static_cast<uint64_t>(x) >> 32));
|
||||
}
|
||||
|
||||
template <class T, typename std::enable_if<std::is_same_v<T, int8_t>
|
||||
|| std::is_same_v<T, int16_t>
|
||||
|| std::is_same_v<T, int32_t>, T>::type * = nullptr>
|
||||
template <class T, T * = nullptr>
|
||||
requires std::same_as<T, int8_t> || std::same_as<T, int16_t> || std::same_as<T, int32_t>
|
||||
static ReturnType apply(T x)
|
||||
{
|
||||
return x;
|
||||
}
|
||||
|
||||
template <typename T, typename std::enable_if<!std::is_same_v<T, int8_t>
|
||||
&& !std::is_same_v<T, int16_t>
|
||||
&& !std::is_same_v<T, int32_t>
|
||||
&& !std::is_same_v<T, int64_t>, T>::type * = nullptr>
|
||||
template <class T, T * = nullptr>
|
||||
requires(!std::same_as<T, int8_t> && !std::same_as<T, int16_t> && !std::same_as<T, int32_t>)
|
||||
static ReturnType apply(T x)
|
||||
{
|
||||
if (std::is_unsigned_v<T>)
|
||||
|
@ -88,8 +88,9 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename ... Args, typename = std::enable_if_t<!std::is_same_v<T, DateTime64>>>
|
||||
inline auto execute(const T & t, Args && ... args) const
|
||||
template <typename T, typename... Args>
|
||||
requires (!std::same_as<T, DateTime64>)
|
||||
inline auto execute(const T & t, Args &&... args) const
|
||||
{
|
||||
return wrapped_transform.execute(t, std::forward<Args>(args)...);
|
||||
}
|
||||
@ -128,7 +129,8 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename ... Args, typename = std::enable_if_t<!std::is_same_v<T, DateTime64>>>
|
||||
template <typename T, typename ... Args>
|
||||
requires (!std::same_as<T, DateTime64>)
|
||||
inline auto executeExtendedResult(const T & t, Args && ... args) const
|
||||
{
|
||||
return wrapped_transform.executeExtendedResult(t, std::forward<Args>(args)...);
|
||||
|
14
src/IO/Archives/ArchiveUtils.h
Normal file
14
src/IO/Archives/ArchiveUtils.h
Normal file
@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_LIBARCHIVE
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wreserved-macro-identifier"
|
||||
|
||||
#include <archive.h>
|
||||
#include <archive_entry.h>
|
||||
#endif
|
||||
#endif
|
@ -40,18 +40,26 @@ public:
|
||||
virtual bool nextFile() = 0;
|
||||
};
|
||||
|
||||
virtual const std::string & getPath() const = 0;
|
||||
|
||||
/// Starts enumerating files in the archive.
|
||||
virtual std::unique_ptr<FileEnumerator> firstFile() = 0;
|
||||
|
||||
using NameFilter = std::function<bool(const std::string &)>;
|
||||
|
||||
/// Starts reading a file from the archive. The function returns a read buffer,
|
||||
/// you can read that buffer to extract uncompressed data from the archive.
|
||||
/// Several read buffers can be used at the same time in parallel.
|
||||
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename) = 0;
|
||||
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter) = 0;
|
||||
|
||||
/// It's possible to convert a file enumerator to a read buffer and vice versa.
|
||||
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) = 0;
|
||||
virtual std::unique_ptr<FileEnumerator> nextFile(std::unique_ptr<ReadBuffer> read_buffer) = 0;
|
||||
|
||||
virtual std::vector<std::string> getAllFiles() = 0;
|
||||
virtual std::vector<std::string> getAllFiles(NameFilter filter) = 0;
|
||||
|
||||
/// Sets password used to decrypt files in the archive.
|
||||
virtual void setPassword(const String & /* password */) {}
|
||||
|
||||
|
348
src/IO/Archives/LibArchiveReader.cpp
Normal file
348
src/IO/Archives/LibArchiveReader.cpp
Normal file
@ -0,0 +1,348 @@
|
||||
#include <IO/Archives/LibArchiveReader.h>
|
||||
#include <IO/ReadBufferFromFileBase.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/scope_guard_safe.h>
|
||||
|
||||
#include <IO/Archives/ArchiveUtils.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
#if USE_LIBARCHIVE
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_UNPACK_ARCHIVE;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int CANNOT_READ_ALL_DATA;
|
||||
extern const int UNSUPPORTED_METHOD;
|
||||
}
|
||||
|
||||
class LibArchiveReader::Handle
|
||||
{
|
||||
public:
|
||||
explicit Handle(std::string path_to_archive_, bool lock_on_reading_)
|
||||
: path_to_archive(path_to_archive_), lock_on_reading(lock_on_reading_)
|
||||
{
|
||||
current_archive = open(path_to_archive);
|
||||
}
|
||||
|
||||
Handle(const Handle &) = delete;
|
||||
Handle(Handle && other) noexcept
|
||||
: current_archive(other.current_archive)
|
||||
, current_entry(other.current_entry)
|
||||
, lock_on_reading(other.lock_on_reading)
|
||||
{
|
||||
other.current_archive = nullptr;
|
||||
other.current_entry = nullptr;
|
||||
}
|
||||
|
||||
~Handle()
|
||||
{
|
||||
close(current_archive);
|
||||
}
|
||||
|
||||
bool locateFile(const std::string & filename)
|
||||
{
|
||||
return locateFile([&](const std::string & file) { return file == filename; });
|
||||
}
|
||||
|
||||
bool locateFile(NameFilter filter)
|
||||
{
|
||||
resetFileInfo();
|
||||
int err = ARCHIVE_OK;
|
||||
while (true)
|
||||
{
|
||||
err = readNextHeader(current_archive, ¤t_entry);
|
||||
|
||||
if (err == ARCHIVE_RETRY)
|
||||
continue;
|
||||
|
||||
if (err != ARCHIVE_OK)
|
||||
break;
|
||||
|
||||
if (filter(archive_entry_pathname(current_entry)))
|
||||
return true;
|
||||
}
|
||||
|
||||
checkError(err);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool nextFile()
|
||||
{
|
||||
resetFileInfo();
|
||||
int err = ARCHIVE_OK;
|
||||
do
|
||||
{
|
||||
err = readNextHeader(current_archive, ¤t_entry);
|
||||
} while (err == ARCHIVE_RETRY);
|
||||
|
||||
checkError(err);
|
||||
return err == ARCHIVE_OK;
|
||||
}
|
||||
|
||||
std::vector<std::string> getAllFiles(NameFilter filter)
|
||||
{
|
||||
auto * archive = open(path_to_archive);
|
||||
SCOPE_EXIT(
|
||||
close(archive);
|
||||
);
|
||||
|
||||
struct archive_entry * entry = nullptr;
|
||||
|
||||
std::vector<std::string> files;
|
||||
int error = readNextHeader(archive, &entry);
|
||||
while (error == ARCHIVE_OK || error == ARCHIVE_RETRY)
|
||||
{
|
||||
chassert(entry != nullptr);
|
||||
std::string name = archive_entry_pathname(entry);
|
||||
if (!filter || filter(name))
|
||||
files.push_back(std::move(name));
|
||||
|
||||
error = readNextHeader(archive, &entry);
|
||||
}
|
||||
|
||||
checkError(error);
|
||||
return files;
|
||||
}
|
||||
|
||||
const String & getFileName() const
|
||||
{
|
||||
chassert(current_entry);
|
||||
if (!file_name)
|
||||
file_name.emplace(archive_entry_pathname(current_entry));
|
||||
|
||||
return *file_name;
|
||||
}
|
||||
|
||||
const FileInfo & getFileInfo() const
|
||||
{
|
||||
chassert(current_entry);
|
||||
if (!file_info)
|
||||
{
|
||||
file_info.emplace();
|
||||
file_info->uncompressed_size = archive_entry_size(current_entry);
|
||||
file_info->compressed_size = archive_entry_size(current_entry);
|
||||
file_info->is_encrypted = false;
|
||||
}
|
||||
|
||||
return *file_info;
|
||||
}
|
||||
|
||||
struct archive * current_archive;
|
||||
struct archive_entry * current_entry = nullptr;
|
||||
private:
|
||||
void checkError(int error) const
|
||||
{
|
||||
if (error == ARCHIVE_FATAL)
|
||||
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Failed to read archive while fetching all files: {}", archive_error_string(current_archive));
|
||||
}
|
||||
|
||||
void resetFileInfo()
|
||||
{
|
||||
file_name.reset();
|
||||
file_info.reset();
|
||||
}
|
||||
|
||||
static struct archive * open(const String & path_to_archive)
|
||||
{
|
||||
auto * archive = archive_read_new();
|
||||
try
|
||||
{
|
||||
archive_read_support_filter_all(archive);
|
||||
archive_read_support_format_all(archive);
|
||||
if (archive_read_open_filename(archive, path_to_archive.c_str(), 10240) != ARCHIVE_OK)
|
||||
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open archive: {}", quoteString(path_to_archive));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
close(archive);
|
||||
throw;
|
||||
}
|
||||
|
||||
return archive;
|
||||
}
|
||||
|
||||
static void close(struct archive * archive)
|
||||
{
|
||||
if (archive)
|
||||
{
|
||||
archive_read_close(archive);
|
||||
archive_read_free(archive);
|
||||
}
|
||||
}
|
||||
|
||||
int readNextHeader(struct archive * archive, struct archive_entry ** entry) const
|
||||
{
|
||||
std::unique_lock lock(Handle::read_lock, std::defer_lock);
|
||||
if (lock_on_reading)
|
||||
lock.lock();
|
||||
|
||||
return archive_read_next_header(archive, entry);
|
||||
}
|
||||
|
||||
const String path_to_archive;
|
||||
|
||||
/// for some archive types when we are reading headers static variables are used
|
||||
/// which are not thread-safe
|
||||
const bool lock_on_reading;
|
||||
static inline std::mutex read_lock;
|
||||
|
||||
mutable std::optional<String> file_name;
|
||||
mutable std::optional<FileInfo> file_info;
|
||||
};
|
||||
|
||||
class LibArchiveReader::FileEnumeratorImpl : public FileEnumerator
|
||||
{
|
||||
public:
|
||||
explicit FileEnumeratorImpl(Handle handle_) : handle(std::move(handle_)) {}
|
||||
|
||||
const String & getFileName() const override { return handle.getFileName(); }
|
||||
const FileInfo & getFileInfo() const override { return handle.getFileInfo(); }
|
||||
bool nextFile() override { return handle.nextFile(); }
|
||||
|
||||
/// Releases owned handle to pass it to a read buffer.
|
||||
Handle releaseHandle() && { return std::move(handle); }
|
||||
private:
|
||||
Handle handle;
|
||||
};
|
||||
|
||||
class LibArchiveReader::ReadBufferFromLibArchive : public ReadBufferFromFileBase
|
||||
{
|
||||
public:
|
||||
explicit ReadBufferFromLibArchive(Handle handle_, std::string path_to_archive_)
|
||||
: ReadBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0)
|
||||
, handle(std::move(handle_))
|
||||
, path_to_archive(std::move(path_to_archive_))
|
||||
{}
|
||||
|
||||
off_t seek(off_t /* off */, int /* whence */) override
|
||||
{
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Seek is not supported when reading from archive");
|
||||
}
|
||||
|
||||
off_t getPosition() override
|
||||
{
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getPosition not supported when reading from archive");
|
||||
}
|
||||
|
||||
String getFileName() const override { return handle.getFileName(); }
|
||||
|
||||
Handle releaseHandle() &&
|
||||
{
|
||||
return std::move(handle);
|
||||
}
|
||||
|
||||
private:
|
||||
bool nextImpl() override
|
||||
{
|
||||
auto bytes_read = archive_read_data(handle.current_archive, internal_buffer.begin(), static_cast<int>(internal_buffer.size()));
|
||||
|
||||
if (bytes_read < 0)
|
||||
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Failed to read file {} from {}: {}", handle.getFileName(), path_to_archive, archive_error_string(handle.current_archive));
|
||||
|
||||
if (!bytes_read)
|
||||
return false;
|
||||
|
||||
total_bytes_read += bytes;
|
||||
|
||||
working_buffer = internal_buffer;
|
||||
working_buffer.resize(bytes_read);
|
||||
return true;
|
||||
}
|
||||
|
||||
Handle handle;
|
||||
const String path_to_archive;
|
||||
size_t total_bytes_read = 0;
|
||||
};
|
||||
|
||||
LibArchiveReader::LibArchiveReader(std::string archive_name_, bool lock_on_reading_, std::string path_to_archive_)
|
||||
: archive_name(std::move(archive_name_)), lock_on_reading(lock_on_reading_), path_to_archive(std::move(path_to_archive_))
|
||||
{}
|
||||
|
||||
LibArchiveReader::~LibArchiveReader() = default;
|
||||
|
||||
const std::string & LibArchiveReader::getPath() const
|
||||
{
|
||||
return path_to_archive;
|
||||
}
|
||||
|
||||
bool LibArchiveReader::fileExists(const String & filename)
|
||||
{
|
||||
Handle handle(path_to_archive, lock_on_reading);
|
||||
return handle.locateFile(filename);
|
||||
}
|
||||
|
||||
LibArchiveReader::FileInfo LibArchiveReader::getFileInfo(const String & filename)
|
||||
{
|
||||
Handle handle(path_to_archive, lock_on_reading);
|
||||
if (!handle.locateFile(filename))
|
||||
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: file not found", path_to_archive);
|
||||
return handle.getFileInfo();
|
||||
}
|
||||
|
||||
std::unique_ptr<LibArchiveReader::FileEnumerator> LibArchiveReader::firstFile()
|
||||
{
|
||||
Handle handle(path_to_archive, lock_on_reading);
|
||||
if (!handle.nextFile())
|
||||
return nullptr;
|
||||
|
||||
return std::make_unique<FileEnumeratorImpl>(std::move(handle));
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader::readFile(const String & filename)
|
||||
{
|
||||
return readFile([&](const std::string & file) { return file == filename; });
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader::readFile(NameFilter filter)
|
||||
{
|
||||
Handle handle(path_to_archive, lock_on_reading);
|
||||
if (!handle.locateFile(filter))
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: no file found satisfying the filter", path_to_archive);
|
||||
return std::make_unique<ReadBufferFromLibArchive>(std::move(handle), path_to_archive);
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader::readFile(std::unique_ptr<FileEnumerator> enumerator)
|
||||
{
|
||||
if (!dynamic_cast<FileEnumeratorImpl *>(enumerator.get()))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong enumerator passed to readFile()");
|
||||
auto enumerator_impl = std::unique_ptr<FileEnumeratorImpl>(static_cast<FileEnumeratorImpl *>(enumerator.release()));
|
||||
auto handle = std::move(*enumerator_impl).releaseHandle();
|
||||
return std::make_unique<ReadBufferFromLibArchive>(std::move(handle), path_to_archive);
|
||||
}
|
||||
|
||||
std::unique_ptr<LibArchiveReader::FileEnumerator> LibArchiveReader::nextFile(std::unique_ptr<ReadBuffer> read_buffer)
|
||||
{
|
||||
if (!dynamic_cast<ReadBufferFromLibArchive *>(read_buffer.get()))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong ReadBuffer passed to nextFile()");
|
||||
auto read_buffer_from_libarchive = std::unique_ptr<ReadBufferFromLibArchive>(static_cast<ReadBufferFromLibArchive *>(read_buffer.release()));
|
||||
auto handle = std::move(*read_buffer_from_libarchive).releaseHandle();
|
||||
if (!handle.nextFile())
|
||||
return nullptr;
|
||||
return std::make_unique<FileEnumeratorImpl>(std::move(handle));
|
||||
}
|
||||
|
||||
std::vector<std::string> LibArchiveReader::getAllFiles()
|
||||
{
|
||||
return getAllFiles({});
|
||||
}
|
||||
|
||||
std::vector<std::string> LibArchiveReader::getAllFiles(NameFilter filter)
|
||||
{
|
||||
Handle handle(path_to_archive, lock_on_reading);
|
||||
return handle.getAllFiles(filter);
|
||||
}
|
||||
|
||||
void LibArchiveReader::setPassword(const String & /*password_*/)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not set password to {} archive", archive_name);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
79
src/IO/Archives/LibArchiveReader.h
Normal file
79
src/IO/Archives/LibArchiveReader.h
Normal file
@ -0,0 +1,79 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <IO/Archives/IArchiveReader.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
#if USE_LIBARCHIVE
|
||||
|
||||
class ReadBuffer;
|
||||
class ReadBufferFromFileBase;
|
||||
class SeekableReadBuffer;
|
||||
|
||||
/// Implementation of IArchiveReader for reading archives using libarchive.
|
||||
class LibArchiveReader : public IArchiveReader
|
||||
{
|
||||
public:
|
||||
~LibArchiveReader() override;
|
||||
|
||||
const std::string & getPath() const override;
|
||||
|
||||
/// Returns true if there is a specified file in the archive.
|
||||
bool fileExists(const String & filename) override;
|
||||
|
||||
/// Returns the information about a file stored in the archive.
|
||||
FileInfo getFileInfo(const String & filename) override;
|
||||
|
||||
/// Starts enumerating files in the archive.
|
||||
std::unique_ptr<FileEnumerator> firstFile() override;
|
||||
|
||||
/// Starts reading a file from the archive. The function returns a read buffer,
|
||||
/// you can read that buffer to extract uncompressed data from the archive.
|
||||
/// Several read buffers can be used at the same time in parallel.
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename) override;
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter) override;
|
||||
|
||||
/// It's possible to convert a file enumerator to a read buffer and vice versa.
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) override;
|
||||
std::unique_ptr<FileEnumerator> nextFile(std::unique_ptr<ReadBuffer> read_buffer) override;
|
||||
|
||||
std::vector<std::string> getAllFiles() override;
|
||||
std::vector<std::string> getAllFiles(NameFilter filter) override;
|
||||
|
||||
/// Sets password used to decrypt the contents of the files in the archive.
|
||||
void setPassword(const String & password_) override;
|
||||
|
||||
protected:
|
||||
/// Constructs an archive's reader that will read from a file in the local filesystem.
|
||||
LibArchiveReader(std::string archive_name_, bool lock_on_reading_, std::string path_to_archive_);
|
||||
|
||||
private:
|
||||
class ReadBufferFromLibArchive;
|
||||
class Handle;
|
||||
class FileEnumeratorImpl;
|
||||
|
||||
const std::string archive_name;
|
||||
const bool lock_on_reading;
|
||||
const String path_to_archive;
|
||||
};
|
||||
|
||||
class TarArchiveReader : public LibArchiveReader
|
||||
{
|
||||
public:
|
||||
explicit TarArchiveReader(std::string path_to_archive) : LibArchiveReader("tar", /*lock_on_reading_=*/ true, std::move(path_to_archive)) { }
|
||||
};
|
||||
|
||||
class SevenZipArchiveReader : public LibArchiveReader
|
||||
{
|
||||
public:
|
||||
explicit SevenZipArchiveReader(std::string path_to_archive) : LibArchiveReader("7z", /*lock_on_reading_=*/ false, std::move(path_to_archive)) { }
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
}
|
@ -85,6 +85,26 @@ public:
|
||||
file_name = file_name_;
|
||||
}
|
||||
|
||||
void locateFile(NameFilter filter)
|
||||
{
|
||||
int err = unzGoToFirstFile(raw_handle);
|
||||
if (err == UNZ_END_OF_LIST_OF_FILE)
|
||||
showError("No file was found satisfying the filter");
|
||||
|
||||
do
|
||||
{
|
||||
checkResult(err);
|
||||
resetFileInfo();
|
||||
retrieveFileInfo();
|
||||
if (filter(getFileName()))
|
||||
return;
|
||||
|
||||
err = unzGoToNextFile(raw_handle);
|
||||
} while (err != UNZ_END_OF_LIST_OF_FILE);
|
||||
|
||||
showError("No file was found satisfying the filter");
|
||||
}
|
||||
|
||||
bool tryLocateFile(const String & file_name_)
|
||||
{
|
||||
resetFileInfo();
|
||||
@ -131,6 +151,27 @@ public:
|
||||
return *file_info;
|
||||
}
|
||||
|
||||
std::vector<std::string> getAllFiles(NameFilter filter)
|
||||
{
|
||||
std::vector<std::string> files;
|
||||
resetFileInfo();
|
||||
int err = unzGoToFirstFile(raw_handle);
|
||||
if (err == UNZ_END_OF_LIST_OF_FILE)
|
||||
return files;
|
||||
|
||||
do
|
||||
{
|
||||
checkResult(err);
|
||||
resetFileInfo();
|
||||
retrieveFileInfo();
|
||||
if (!filter || filter(getFileName()))
|
||||
files.push_back(*file_name);
|
||||
err = unzGoToNextFile(raw_handle);
|
||||
} while (err != UNZ_END_OF_LIST_OF_FILE);
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
void closeFile()
|
||||
{
|
||||
int err = unzCloseCurrentFile(raw_handle);
|
||||
@ -459,6 +500,11 @@ ZipArchiveReader::~ZipArchiveReader()
|
||||
}
|
||||
}
|
||||
|
||||
const std::string & ZipArchiveReader::getPath() const
|
||||
{
|
||||
return path_to_archive;
|
||||
}
|
||||
|
||||
bool ZipArchiveReader::fileExists(const String & filename)
|
||||
{
|
||||
return acquireHandle().tryLocateFile(filename);
|
||||
@ -486,6 +532,13 @@ std::unique_ptr<ReadBufferFromFileBase> ZipArchiveReader::readFile(const String
|
||||
return std::make_unique<ReadBufferFromZipArchive>(std::move(handle));
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> ZipArchiveReader::readFile(NameFilter filter)
|
||||
{
|
||||
auto handle = acquireHandle();
|
||||
handle.locateFile(filter);
|
||||
return std::make_unique<ReadBufferFromZipArchive>(std::move(handle));
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> ZipArchiveReader::readFile(std::unique_ptr<FileEnumerator> enumerator)
|
||||
{
|
||||
if (!dynamic_cast<FileEnumeratorImpl *>(enumerator.get()))
|
||||
@ -506,6 +559,17 @@ std::unique_ptr<ZipArchiveReader::FileEnumerator> ZipArchiveReader::nextFile(std
|
||||
return std::make_unique<FileEnumeratorImpl>(std::move(handle));
|
||||
}
|
||||
|
||||
std::vector<std::string> ZipArchiveReader::getAllFiles()
|
||||
{
|
||||
return getAllFiles({});
|
||||
}
|
||||
|
||||
std::vector<std::string> ZipArchiveReader::getAllFiles(NameFilter filter)
|
||||
{
|
||||
auto handle = acquireHandle();
|
||||
return handle.getAllFiles(filter);
|
||||
}
|
||||
|
||||
void ZipArchiveReader::setPassword(const String & password_)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
|
@ -27,6 +27,8 @@ public:
|
||||
|
||||
~ZipArchiveReader() override;
|
||||
|
||||
const std::string & getPath() const override;
|
||||
|
||||
/// Returns true if there is a specified file in the archive.
|
||||
bool fileExists(const String & filename) override;
|
||||
|
||||
@ -40,11 +42,15 @@ public:
|
||||
/// you can read that buffer to extract uncompressed data from the archive.
|
||||
/// Several read buffers can be used at the same time in parallel.
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename) override;
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter) override;
|
||||
|
||||
/// It's possible to convert a file enumerator to a read buffer and vice versa.
|
||||
std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) override;
|
||||
std::unique_ptr<FileEnumerator> nextFile(std::unique_ptr<ReadBuffer> read_buffer) override;
|
||||
|
||||
std::vector<std::string> getAllFiles() override;
|
||||
std::vector<std::string> getAllFiles(NameFilter filter) override;
|
||||
|
||||
/// Sets password used to decrypt the contents of the files in the archive.
|
||||
void setPassword(const String & password_) override;
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <IO/Archives/createArchiveReader.h>
|
||||
#include <IO/Archives/ZipArchiveReader.h>
|
||||
#include <IO/Archives/LibArchiveReader.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
|
||||
@ -29,10 +30,28 @@ std::shared_ptr<IArchiveReader> createArchiveReader(
|
||||
return std::make_shared<ZipArchiveReader>(path_to_archive, archive_read_function, archive_size);
|
||||
#else
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "minizip library is disabled");
|
||||
#endif
|
||||
}
|
||||
else if (path_to_archive.ends_with(".tar") || path_to_archive.ends_with("tar.gz"))
|
||||
{
|
||||
#if USE_LIBARCHIVE
|
||||
return std::make_shared<TarArchiveReader>(path_to_archive);
|
||||
#else
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "libarchive library is disabled");
|
||||
#endif
|
||||
}
|
||||
else if (path_to_archive.ends_with(".7z"))
|
||||
{
|
||||
#if USE_LIBARCHIVE
|
||||
return std::make_shared<SevenZipArchiveReader>(path_to_archive);
|
||||
#else
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "libarchive library is disabled");
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Cannot determine the type of archive {}", path_to_archive);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -45,6 +45,9 @@ bool MySQLPacketPayloadReadBuffer::nextImpl()
|
||||
}
|
||||
|
||||
in.nextIfAtEnd();
|
||||
/// Don't return a buffer when no bytes available
|
||||
if (!in.hasPendingData())
|
||||
return false;
|
||||
working_buffer = ReadBuffer::Buffer(in.position(), in.buffer().end());
|
||||
size_t count = std::min(in.available(), payload_length - offset);
|
||||
working_buffer.resize(count);
|
||||
|
@ -536,7 +536,10 @@ void PocoHTTPClient::makeRequestInternalImpl(
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, fmt::format("Failed to make request to: {}", uri));
|
||||
auto error_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ true);
|
||||
error_message.text = fmt::format("Failed to make request to: {}: {}", uri, error_message.text);
|
||||
LOG_INFO(log, error_message);
|
||||
|
||||
response->SetClientErrorType(Aws::Client::CoreErrors::NETWORK_CONNECTION);
|
||||
response->SetClientErrorMessage(getCurrentExceptionMessage(false));
|
||||
|
||||
|
@ -42,13 +42,50 @@ void ZstdDeflatingAppendableWriteBuffer::nextImpl()
|
||||
if (!offset())
|
||||
return;
|
||||
|
||||
input.src = reinterpret_cast<unsigned char *>(working_buffer.begin());
|
||||
input.size = offset();
|
||||
input.pos = 0;
|
||||
|
||||
if (first_write && append_to_existing_file && isNeedToAddEmptyBlock())
|
||||
{
|
||||
addEmptyBlock();
|
||||
first_write = false;
|
||||
}
|
||||
|
||||
flush(ZSTD_e_flush);
|
||||
try
|
||||
{
|
||||
bool ended = false;
|
||||
do
|
||||
{
|
||||
out->nextIfAtEnd();
|
||||
|
||||
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
|
||||
output.size = out->buffer().size();
|
||||
output.pos = out->offset();
|
||||
|
||||
size_t compression_result = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_flush);
|
||||
if (ZSTD_isError(compression_result))
|
||||
throw Exception(
|
||||
ErrorCodes::ZSTD_ENCODER_FAILED,
|
||||
"ZSTD stream decoding failed: error code: {}; ZSTD version: {}",
|
||||
ZSTD_getErrorName(compression_result), ZSTD_VERSION_STRING);
|
||||
|
||||
first_write = false;
|
||||
out->position() = out->buffer().begin() + output.pos;
|
||||
|
||||
bool everything_was_compressed = (input.pos == input.size);
|
||||
bool everything_was_flushed = compression_result == 0;
|
||||
|
||||
ended = everything_was_compressed && everything_was_flushed;
|
||||
} while (!ended);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Do not try to write next time after exception.
|
||||
out->position() = out->buffer().begin();
|
||||
throw;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ZstdDeflatingAppendableWriteBuffer::~ZstdDeflatingAppendableWriteBuffer()
|
||||
@ -66,58 +103,58 @@ void ZstdDeflatingAppendableWriteBuffer::finalizeImpl()
|
||||
}
|
||||
else
|
||||
{
|
||||
finalizeBefore();
|
||||
out->finalize();
|
||||
finalizeAfter();
|
||||
try
|
||||
{
|
||||
finalizeBefore();
|
||||
out->finalize();
|
||||
finalizeAfter();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Do not try to flush next time after exception.
|
||||
out->position() = out->buffer().begin();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ZstdDeflatingAppendableWriteBuffer::finalizeBefore()
|
||||
{
|
||||
/// Actually we can use ZSTD_e_flush here and add empty termination
|
||||
/// block on each new buffer creation for non-empty file unconditionally (without isNeedToAddEmptyBlock).
|
||||
/// However ZSTD_decompressStream is able to read non-terminated frame (we use it in reader buffer),
|
||||
/// but console zstd utility cannot.
|
||||
flush(ZSTD_e_end);
|
||||
}
|
||||
next();
|
||||
|
||||
out->nextIfAtEnd();
|
||||
|
||||
void ZstdDeflatingAppendableWriteBuffer::flush(ZSTD_EndDirective mode)
|
||||
{
|
||||
input.src = reinterpret_cast<unsigned char *>(working_buffer.begin());
|
||||
input.size = offset();
|
||||
input.pos = 0;
|
||||
|
||||
try
|
||||
{
|
||||
bool ended = false;
|
||||
do
|
||||
{
|
||||
out->nextIfAtEnd();
|
||||
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
|
||||
output.size = out->buffer().size();
|
||||
output.pos = out->offset();
|
||||
|
||||
/// Actually we can use ZSTD_e_flush here and add empty termination
|
||||
/// block on each new buffer creation for non-empty file unconditionally (without isNeedToAddEmptyBlock).
|
||||
/// However ZSTD_decompressStream is able to read non-terminated frame (we use it in reader buffer),
|
||||
/// but console zstd utility cannot.
|
||||
size_t remaining = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end);
|
||||
while (remaining != 0)
|
||||
{
|
||||
if (ZSTD_isError(remaining))
|
||||
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED,
|
||||
"ZSTD stream encoder end failed: error: '{}' ZSTD version: {}",
|
||||
ZSTD_getErrorName(remaining), ZSTD_VERSION_STRING);
|
||||
|
||||
remaining = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end);
|
||||
|
||||
out->position() = out->buffer().begin() + output.pos;
|
||||
|
||||
if (!out->hasPendingData())
|
||||
{
|
||||
out->next();
|
||||
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
|
||||
output.size = out->buffer().size();
|
||||
output.pos = out->offset();
|
||||
|
||||
size_t compression_result = ZSTD_compressStream2(cctx, &output, &input, mode);
|
||||
if (ZSTD_isError(compression_result))
|
||||
throw Exception(
|
||||
ErrorCodes::ZSTD_ENCODER_FAILED,
|
||||
"ZSTD stream decoding failed: error code: {}; ZSTD version: {}",
|
||||
ZSTD_getErrorName(compression_result), ZSTD_VERSION_STRING);
|
||||
|
||||
out->position() = out->buffer().begin() + output.pos;
|
||||
|
||||
bool everything_was_compressed = (input.pos == input.size);
|
||||
bool everything_was_flushed = compression_result == 0;
|
||||
|
||||
ended = everything_was_compressed && everything_was_flushed;
|
||||
} while (!ended);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Do not try to write next time after exception.
|
||||
out->position() = out->buffer().begin();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -52,8 +52,6 @@ private:
|
||||
/// NOTE: will fill compressed data to the out.working_buffer, but will not call out.next method until the buffer is full
|
||||
void nextImpl() override;
|
||||
|
||||
void flush(ZSTD_EndDirective mode);
|
||||
|
||||
/// Write terminating ZSTD_e_end: empty block + frame epilogue. BTW it
|
||||
/// should be almost noop, because frame epilogue contains only checksums,
|
||||
/// and they are disabled for this buffer.
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include "config.h"
|
||||
|
||||
#include <IO/Archives/ArchiveUtils.h>
|
||||
#include <IO/Archives/IArchiveReader.h>
|
||||
#include <IO/Archives/IArchiveWriter.h>
|
||||
#include <IO/Archives/createArchiveReader.h>
|
||||
@ -19,11 +20,52 @@
|
||||
namespace DB::ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_UNPACK_ARCHIVE;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
using namespace DB;
|
||||
|
||||
enum class ArchiveType : uint8_t
|
||||
{
|
||||
Tar,
|
||||
SevenZip
|
||||
};
|
||||
|
||||
template <ArchiveType archive_type>
|
||||
bool createArchiveWithFiles(const std::string & archivename, const std::map<std::string, std::string> & files)
|
||||
{
|
||||
struct archive * a;
|
||||
struct archive_entry * entry;
|
||||
|
||||
a = archive_write_new();
|
||||
|
||||
if constexpr (archive_type == ArchiveType::Tar)
|
||||
archive_write_set_format_pax_restricted(a);
|
||||
else if constexpr (archive_type == ArchiveType::SevenZip)
|
||||
archive_write_set_format_7zip(a);
|
||||
else
|
||||
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Invalid archive type requested: {}", static_cast<size_t>(archive_type));
|
||||
|
||||
archive_write_open_filename(a, archivename.c_str());
|
||||
|
||||
for (const auto & [filename, content] : files) {
|
||||
entry = archive_entry_new();
|
||||
archive_entry_set_pathname(entry, filename.c_str());
|
||||
archive_entry_set_size(entry, content.size());
|
||||
archive_entry_set_mode(entry, S_IFREG | 0644); // regular file with rw-r--r-- permissions
|
||||
archive_entry_set_mtime(entry, time(nullptr), 0);
|
||||
archive_write_header(a, entry);
|
||||
archive_write_data(a, content.c_str(), content.size());
|
||||
archive_entry_free(entry);
|
||||
}
|
||||
|
||||
archive_write_close(a);
|
||||
archive_write_free(a);
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
class ArchiveReaderAndWriterTest : public ::testing::TestWithParam<const char *>
|
||||
{
|
||||
@ -327,6 +369,127 @@ TEST_P(ArchiveReaderAndWriterTest, ArchiveNotExist)
|
||||
[&]{ createArchiveReader(getPathToArchive()); });
|
||||
}
|
||||
|
||||
TEST(TarArchiveReaderTest, FileExists) {
|
||||
String archive_path = "archive.tar";
|
||||
String filename = "file.txt";
|
||||
String contents = "test";
|
||||
bool created = createArchiveWithFiles<ArchiveType::Tar>(archive_path, {{filename, contents}});
|
||||
EXPECT_EQ(created, true);
|
||||
auto reader = createArchiveReader(archive_path);
|
||||
EXPECT_EQ(reader->fileExists(filename), true);
|
||||
fs::remove(archive_path);
|
||||
}
|
||||
|
||||
TEST(TarArchiveReaderTest, ReadFile) {
|
||||
String archive_path = "archive.tar";
|
||||
String filename = "file.txt";
|
||||
String contents = "test";
|
||||
bool created = createArchiveWithFiles<ArchiveType::Tar>(archive_path, {{filename, contents}});
|
||||
EXPECT_EQ(created, true);
|
||||
auto reader = createArchiveReader(archive_path);
|
||||
auto in = reader->readFile(filename);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, contents);
|
||||
fs::remove(archive_path);
|
||||
}
|
||||
|
||||
TEST(TarArchiveReaderTest, ReadTwoFiles) {
|
||||
String archive_path = "archive.tar";
|
||||
String file1 = "file1.txt";
|
||||
String contents1 = "test1";
|
||||
String file2 = "file2.txt";
|
||||
String contents2 = "test2";
|
||||
bool created = createArchiveWithFiles<ArchiveType::Tar>(archive_path, {{file1, contents1}, {file2, contents2}});
|
||||
EXPECT_EQ(created, true);
|
||||
auto reader = createArchiveReader(archive_path);
|
||||
EXPECT_EQ(reader->fileExists(file1), true);
|
||||
EXPECT_EQ(reader->fileExists(file2), true);
|
||||
auto in = reader->readFile(file1);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, contents1);
|
||||
in = reader->readFile(file2);
|
||||
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, contents2);
|
||||
fs::remove(archive_path);
|
||||
}
|
||||
|
||||
|
||||
TEST(TarArchiveReaderTest, CheckFileInfo) {
|
||||
String archive_path = "archive.tar";
|
||||
String filename = "file.txt";
|
||||
String contents = "test";
|
||||
bool created = createArchiveWithFiles<ArchiveType::Tar>(archive_path, {{filename, contents}});
|
||||
EXPECT_EQ(created, true);
|
||||
auto reader = createArchiveReader(archive_path);
|
||||
auto info = reader->getFileInfo(filename);
|
||||
EXPECT_EQ(info.uncompressed_size, contents.size());
|
||||
EXPECT_GT(info.compressed_size, 0);
|
||||
fs::remove(archive_path);
|
||||
}
|
||||
|
||||
TEST(SevenZipArchiveReaderTest, FileExists) {
|
||||
String archive_path = "archive.7z";
|
||||
String filename = "file.txt";
|
||||
String contents = "test";
|
||||
bool created = createArchiveWithFiles<ArchiveType::SevenZip>(archive_path, {{filename, contents}});
|
||||
EXPECT_EQ(created, true);
|
||||
auto reader = createArchiveReader(archive_path);
|
||||
EXPECT_EQ(reader->fileExists(filename), true);
|
||||
fs::remove(archive_path);
|
||||
}
|
||||
|
||||
TEST(SevenZipArchiveReaderTest, ReadFile) {
|
||||
String archive_path = "archive.7z";
|
||||
String filename = "file.txt";
|
||||
String contents = "test";
|
||||
bool created = createArchiveWithFiles<ArchiveType::SevenZip>(archive_path, {{filename, contents}});
|
||||
EXPECT_EQ(created, true);
|
||||
auto reader = createArchiveReader(archive_path);
|
||||
auto in = reader->readFile(filename);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, contents);
|
||||
fs::remove(archive_path);
|
||||
}
|
||||
|
||||
TEST(SevenZipArchiveReaderTest, CheckFileInfo) {
|
||||
String archive_path = "archive.7z";
|
||||
String filename = "file.txt";
|
||||
String contents = "test";
|
||||
bool created = createArchiveWithFiles<ArchiveType::SevenZip>(archive_path, {{filename, contents}});
|
||||
EXPECT_EQ(created, true);
|
||||
auto reader = createArchiveReader(archive_path);
|
||||
auto info = reader->getFileInfo(filename);
|
||||
EXPECT_EQ(info.uncompressed_size, contents.size());
|
||||
EXPECT_GT(info.compressed_size, 0);
|
||||
fs::remove(archive_path);
|
||||
}
|
||||
|
||||
TEST(SevenZipArchiveReaderTest, ReadTwoFiles) {
|
||||
String archive_path = "archive.7z";
|
||||
String file1 = "file1.txt";
|
||||
String contents1 = "test1";
|
||||
String file2 = "file2.txt";
|
||||
String contents2 = "test2";
|
||||
bool created = createArchiveWithFiles<ArchiveType::SevenZip>(archive_path, {{file1, contents1}, {file2, contents2}});
|
||||
EXPECT_EQ(created, true);
|
||||
auto reader = createArchiveReader(archive_path);
|
||||
EXPECT_EQ(reader->fileExists(file1), true);
|
||||
EXPECT_EQ(reader->fileExists(file2), true);
|
||||
auto in = reader->readFile(file1);
|
||||
String str;
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, contents1);
|
||||
in = reader->readFile(file2);
|
||||
|
||||
readStringUntilEOF(str, *in);
|
||||
EXPECT_EQ(str, contents2);
|
||||
fs::remove(archive_path);
|
||||
}
|
||||
|
||||
|
||||
#if USE_MINIZIP
|
||||
|
||||
@ -334,7 +497,7 @@ namespace
|
||||
{
|
||||
const char * supported_archive_file_exts[] =
|
||||
{
|
||||
".zip",
|
||||
".zip"
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -62,11 +62,13 @@ KeyMetadata::KeyMetadata(
|
||||
CleanupQueue & cleanup_queue_,
|
||||
DownloadQueue & download_queue_,
|
||||
Poco::Logger * log_,
|
||||
std::shared_mutex & key_prefix_directory_mutex_,
|
||||
bool created_base_directory_)
|
||||
: key(key_)
|
||||
, key_path(key_path_)
|
||||
, cleanup_queue(cleanup_queue_)
|
||||
, download_queue(download_queue_)
|
||||
, key_prefix_directory_mutex(key_prefix_directory_mutex_)
|
||||
, created_base_directory(created_base_directory_)
|
||||
, log(log_)
|
||||
{
|
||||
@ -102,16 +104,21 @@ bool KeyMetadata::createBaseDirectory()
|
||||
{
|
||||
try
|
||||
{
|
||||
std::shared_lock lock(key_prefix_directory_mutex);
|
||||
fs::create_directories(key_path);
|
||||
}
|
||||
catch (...)
|
||||
catch (const fs::filesystem_error & e)
|
||||
{
|
||||
/// Avoid errors like
|
||||
/// std::__1::__fs::filesystem::filesystem_error: filesystem error: in create_directories: No space left on device
|
||||
/// and mark file segment with SKIP_CACHE state
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
created_base_directory = false;
|
||||
return false;
|
||||
|
||||
if (e.code() == std::errc::no_space_on_device)
|
||||
{
|
||||
LOG_TRACE(log, "Failed to create base directory for key {}, "
|
||||
"because no space left on device", key);
|
||||
|
||||
return false;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
@ -200,7 +207,7 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata(
|
||||
|
||||
it = emplace(
|
||||
key, std::make_shared<KeyMetadata>(
|
||||
key, getPathForKey(key), *cleanup_queue, *download_queue, log, is_initial_load)).first;
|
||||
key, getPathForKey(key), *cleanup_queue, *download_queue, log, key_prefix_directory_mutex, is_initial_load)).first;
|
||||
}
|
||||
|
||||
key_metadata = it->second;
|
||||
@ -315,17 +322,10 @@ void CacheMetadata::doCleanup()
|
||||
|
||||
try
|
||||
{
|
||||
std::unique_lock mutex(key_prefix_directory_mutex);
|
||||
if (fs::exists(key_prefix_directory) && fs::is_empty(key_prefix_directory))
|
||||
fs::remove(key_prefix_directory);
|
||||
}
|
||||
catch (const fs::filesystem_error & e)
|
||||
{
|
||||
/// Key prefix directory can become non-empty just now, it is expected.
|
||||
if (e.code() == std::errc::directory_not_empty)
|
||||
continue;
|
||||
LOG_ERROR(log, "Error while removing key {}: {}", cleanup_key, getCurrentExceptionMessage(true));
|
||||
chassert(false);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
LOG_ERROR(log, "Error while removing key {}: {}", cleanup_key, getCurrentExceptionMessage(true));
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Interpreters/Cache/FileCacheKey.h>
|
||||
#include <Interpreters/Cache/FileSegment.h>
|
||||
#include <Interpreters/Cache/FileCache_fwd_internal.h>
|
||||
#include <shared_mutex>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -50,6 +51,7 @@ struct KeyMetadata : public std::map<size_t, FileSegmentMetadataPtr>,
|
||||
CleanupQueue & cleanup_queue_,
|
||||
DownloadQueue & download_queue_,
|
||||
Poco::Logger * log_,
|
||||
std::shared_mutex & key_prefix_directory_mutex_,
|
||||
bool created_base_directory_ = false);
|
||||
|
||||
enum class KeyState
|
||||
@ -76,6 +78,7 @@ private:
|
||||
KeyGuard guard;
|
||||
CleanupQueue & cleanup_queue;
|
||||
DownloadQueue & download_queue;
|
||||
std::shared_mutex & key_prefix_directory_mutex;
|
||||
std::atomic<bool> created_base_directory = false;
|
||||
Poco::Logger * log;
|
||||
};
|
||||
@ -128,6 +131,7 @@ private:
|
||||
mutable CacheMetadataGuard guard;
|
||||
const CleanupQueuePtr cleanup_queue;
|
||||
const DownloadQueuePtr download_queue;
|
||||
std::shared_mutex key_prefix_directory_mutex;
|
||||
Poco::Logger * log;
|
||||
|
||||
void downloadImpl(FileSegment & file_segment, std::optional<Memory<>> & memory);
|
||||
|
@ -520,6 +520,8 @@ ContextMutablePtr Session::makeSessionContext()
|
||||
{},
|
||||
session_context->getSettingsRef().max_sessions_for_user);
|
||||
|
||||
recordLoginSucess(session_context);
|
||||
|
||||
return session_context;
|
||||
}
|
||||
|
||||
@ -582,6 +584,8 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std:
|
||||
{ session_name_ },
|
||||
max_sessions_for_user);
|
||||
|
||||
recordLoginSucess(session_context);
|
||||
|
||||
return session_context;
|
||||
}
|
||||
|
||||
@ -655,24 +659,38 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t
|
||||
if (user_id)
|
||||
user = query_context->getUser();
|
||||
|
||||
if (!notified_session_log_about_login)
|
||||
{
|
||||
if (auto session_log = getSessionLog())
|
||||
{
|
||||
session_log->addLoginSuccess(
|
||||
auth_id,
|
||||
named_session ? std::optional<std::string>(named_session->key.second) : std::nullopt,
|
||||
*query_context,
|
||||
user);
|
||||
|
||||
notified_session_log_about_login = true;
|
||||
}
|
||||
}
|
||||
/// Interserver does not create session context
|
||||
recordLoginSucess(query_context);
|
||||
|
||||
return query_context;
|
||||
}
|
||||
|
||||
|
||||
void Session::recordLoginSucess(ContextPtr login_context) const
|
||||
{
|
||||
if (notified_session_log_about_login)
|
||||
return;
|
||||
|
||||
if (!login_context)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Session or query context must be created");
|
||||
|
||||
if (auto session_log = getSessionLog())
|
||||
{
|
||||
const auto & settings = login_context->getSettingsRef();
|
||||
const auto access = login_context->getAccess();
|
||||
|
||||
session_log->addLoginSuccess(auth_id,
|
||||
named_session ? named_session->key.second : "",
|
||||
settings,
|
||||
access,
|
||||
getClientInfo(),
|
||||
user);
|
||||
|
||||
notified_session_log_about_login = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Session::releaseSessionID()
|
||||
{
|
||||
if (!named_session)
|
||||
|
@ -97,6 +97,8 @@ public:
|
||||
private:
|
||||
std::shared_ptr<SessionLog> getSessionLog() const;
|
||||
ContextMutablePtr makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const;
|
||||
void recordLoginSucess(ContextPtr login_context) const;
|
||||
|
||||
|
||||
mutable bool notified_session_log_about_login = false;
|
||||
const UUID auth_id;
|
||||
|
@ -199,12 +199,13 @@ void SessionLogElement::appendToBlock(MutableColumns & columns) const
|
||||
columns[i++]->insertData(auth_failure_reason.data(), auth_failure_reason.length());
|
||||
}
|
||||
|
||||
void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional<String> session_id, const Context & login_context, const UserPtr & login_user)
|
||||
void SessionLog::addLoginSuccess(const UUID & auth_id,
|
||||
const String & session_id,
|
||||
const Settings & settings,
|
||||
const ContextAccessPtr & access,
|
||||
const ClientInfo & client_info,
|
||||
const UserPtr & login_user)
|
||||
{
|
||||
const auto access = login_context.getAccess();
|
||||
const auto & settings = login_context.getSettingsRef();
|
||||
const auto & client_info = login_context.getClientInfo();
|
||||
|
||||
DB::SessionLogElement log_entry(auth_id, SESSION_LOGIN_SUCCESS);
|
||||
log_entry.client_info = client_info;
|
||||
|
||||
@ -215,8 +216,7 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional<String> ses
|
||||
}
|
||||
log_entry.external_auth_server = login_user ? login_user->auth_data.getLDAPServerName() : "";
|
||||
|
||||
if (session_id)
|
||||
log_entry.session_id = *session_id;
|
||||
log_entry.session_id = session_id;
|
||||
|
||||
if (const auto roles_info = access->getRolesInfo())
|
||||
log_entry.roles = roles_info->getCurrentRolesNames();
|
||||
|
@ -20,6 +20,7 @@ enum SessionLogElementType : int8_t
|
||||
class ContextAccess;
|
||||
struct User;
|
||||
using UserPtr = std::shared_ptr<const User>;
|
||||
using ContextAccessPtr = std::shared_ptr<const ContextAccess>;
|
||||
|
||||
/** A struct which will be inserted as row into session_log table.
|
||||
*
|
||||
@ -72,7 +73,13 @@ class SessionLog : public SystemLog<SessionLogElement>
|
||||
using SystemLog<SessionLogElement>::SystemLog;
|
||||
|
||||
public:
|
||||
void addLoginSuccess(const UUID & auth_id, std::optional<String> session_id, const Context & login_context, const UserPtr & login_user);
|
||||
void addLoginSuccess(const UUID & auth_id,
|
||||
const String & session_id,
|
||||
const Settings & settings,
|
||||
const ContextAccessPtr & access,
|
||||
const ClientInfo & client_info,
|
||||
const UserPtr & login_user);
|
||||
|
||||
void addLoginFailure(const UUID & auth_id, const ClientInfo & info, const std::optional<String> & user, const Exception & reason);
|
||||
void addLogOut(const UUID & auth_id, const UserPtr & login_user, const ClientInfo & client_info);
|
||||
};
|
||||
|
@ -110,6 +110,9 @@ using CustomizeCountDistinctVisitor = InDepthNodeVisitor<OneTypeMatcher<Customiz
|
||||
char countifdistinct[] = "countifdistinct";
|
||||
using CustomizeCountIfDistinctVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<countifdistinct>>, true>;
|
||||
|
||||
char countdistinctif[] = "countdistinctif";
|
||||
using CustomizeCountDistinctIfVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<countdistinctif>>, true>;
|
||||
|
||||
char in[] = "in";
|
||||
using CustomizeInVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<in>>, true>;
|
||||
|
||||
@ -1368,6 +1371,12 @@ void TreeRewriter::normalize(
|
||||
CustomizeIfDistinctVisitor::Data data_distinct_if{"DistinctIf"};
|
||||
CustomizeIfDistinctVisitor(data_distinct_if).visit(query);
|
||||
|
||||
if (settings.rewrite_count_distinct_if_with_count_distinct_implementation)
|
||||
{
|
||||
CustomizeCountDistinctIfVisitor::Data data_count_distinct_if{settings.count_distinct_implementation.toString() + "If"};
|
||||
CustomizeCountDistinctIfVisitor(data_count_distinct_if).visit(query);
|
||||
}
|
||||
|
||||
ExistsExpressionVisitor::Data exists;
|
||||
ExistsExpressionVisitor(exists).visit(query);
|
||||
|
||||
|
@ -108,7 +108,8 @@ namespace
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T, typename = std::enable_if_t<std::is_same_v<T, double> || std::is_same_v<T, QuotaValue>>>
|
||||
template <typename T>
|
||||
requires std::same_as<T, double> || std::same_as<T, QuotaValue>
|
||||
T fieldToNumber(const Field & f)
|
||||
{
|
||||
if (f.getType() == Field::Types::String)
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnSparse.h>
|
||||
#include <Core/ColumnWithTypeAndName.h>
|
||||
#include <base/types.h>
|
||||
|
||||
@ -35,7 +36,11 @@ Columns getColumnsByIndices(const Chunk & chunk, const std::vector<size_t> & ind
|
||||
Columns columns;
|
||||
const Columns & all_cols = chunk.getColumns();
|
||||
for (const auto & index : indices)
|
||||
columns.push_back(all_cols.at(index));
|
||||
{
|
||||
auto col = recursiveRemoveSparse(all_cols.at(index));
|
||||
columns.push_back(std::move(col));
|
||||
}
|
||||
|
||||
return columns;
|
||||
}
|
||||
|
||||
@ -149,7 +154,7 @@ IProcessor::Status FilterBySetOnTheFlyTransform::prepare()
|
||||
LOG_DEBUG(log, "Finished {} by [{}]: consumed {} rows in total, {} rows bypassed, result {} rows, {:.2f}% filtered",
|
||||
Poco::toLower(getDescription()), fmt::join(column_names, ", "),
|
||||
stat.consumed_rows, stat.consumed_rows_before_set, stat.result_rows,
|
||||
100 - 100.0 * stat.result_rows / stat.consumed_rows);
|
||||
stat.consumed_rows > 0 ? (100 - 100.0 * stat.result_rows / stat.consumed_rows) : 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -561,8 +561,7 @@ void HTTPHandler::processQuery(
|
||||
session->makeSessionContext();
|
||||
}
|
||||
|
||||
auto client_info = session->getClientInfo();
|
||||
auto context = session->makeQueryContext(std::move(client_info));
|
||||
auto context = session->makeQueryContext();
|
||||
|
||||
/// This parameter is used to tune the behavior of output formats (such as Native) for compatibility.
|
||||
if (params.has("client_protocol_version"))
|
||||
|
@ -300,6 +300,7 @@ void registerStorageAzureBlob(StorageFactory & factory)
|
||||
args.constraints,
|
||||
args.comment,
|
||||
format_settings,
|
||||
/* distributed_processing */ false,
|
||||
partition_by);
|
||||
},
|
||||
{
|
||||
@ -448,12 +449,13 @@ StorageAzureBlob::StorageAzureBlob(
|
||||
const ConstraintsDescription & constraints_,
|
||||
const String & comment,
|
||||
std::optional<FormatSettings> format_settings_,
|
||||
bool distributed_processing_,
|
||||
ASTPtr partition_by_)
|
||||
: IStorage(table_id_)
|
||||
, name("AzureBlobStorage")
|
||||
, configuration(configuration_)
|
||||
, object_storage(std::move(object_storage_))
|
||||
, distributed_processing(false)
|
||||
, distributed_processing(distributed_processing_)
|
||||
, format_settings(format_settings_)
|
||||
, partition_by(partition_by_)
|
||||
{
|
||||
@ -463,7 +465,7 @@ StorageAzureBlob::StorageAzureBlob(
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
if (columns_.empty())
|
||||
{
|
||||
auto columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context);
|
||||
auto columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context, distributed_processing);
|
||||
storage_metadata.setColumns(columns);
|
||||
}
|
||||
else
|
||||
@ -672,7 +674,12 @@ Pipe StorageAzureBlob::read(
|
||||
Pipes pipes;
|
||||
|
||||
std::shared_ptr<StorageAzureBlobSource::IIterator> iterator_wrapper;
|
||||
if (configuration.withGlobs())
|
||||
if (distributed_processing)
|
||||
{
|
||||
iterator_wrapper = std::make_shared<StorageAzureBlobSource::ReadIterator>(local_context,
|
||||
local_context->getReadTaskCallback());
|
||||
}
|
||||
else if (configuration.withGlobs())
|
||||
{
|
||||
/// Iterate through disclosed globs and make a source for each file
|
||||
iterator_wrapper = std::make_shared<StorageAzureBlobSource::GlobIterator>(
|
||||
@ -845,6 +852,7 @@ StorageAzureBlobSource::GlobIterator::GlobIterator(
|
||||
blobs_with_metadata.emplace_back(blob_path_with_globs, object_metadata);
|
||||
if (outer_blobs)
|
||||
outer_blobs->emplace_back(blobs_with_metadata.back());
|
||||
is_finished = true;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -863,8 +871,10 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
|
||||
{
|
||||
std::lock_guard lock(next_mutex);
|
||||
|
||||
if (is_finished)
|
||||
if (is_finished && index >= blobs_with_metadata.size())
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
bool need_new_batch = blobs_with_metadata.empty() || index >= blobs_with_metadata.size();
|
||||
|
||||
@ -1184,11 +1194,17 @@ ColumnsDescription StorageAzureBlob::getTableStructureFromData(
|
||||
AzureObjectStorage * object_storage,
|
||||
const Configuration & configuration,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
ContextPtr ctx)
|
||||
ContextPtr ctx,
|
||||
bool distributed_processing)
|
||||
{
|
||||
RelativePathsWithMetadata read_keys;
|
||||
std::shared_ptr<StorageAzureBlobSource::IIterator> file_iterator;
|
||||
if (configuration.withGlobs())
|
||||
if (distributed_processing)
|
||||
{
|
||||
file_iterator = std::make_shared<StorageAzureBlobSource::ReadIterator>(ctx,
|
||||
ctx->getReadTaskCallback());
|
||||
}
|
||||
else if (configuration.withGlobs())
|
||||
{
|
||||
file_iterator = std::make_shared<StorageAzureBlobSource::GlobIterator>(
|
||||
object_storage, configuration.container, configuration.blob_path, nullptr, Block{}, ctx, &read_keys);
|
||||
|
@ -63,6 +63,7 @@ public:
|
||||
const ConstraintsDescription & constraints_,
|
||||
const String & comment,
|
||||
std::optional<FormatSettings> format_settings_,
|
||||
bool distributed_processing_,
|
||||
ASTPtr partition_by_);
|
||||
|
||||
static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context);
|
||||
@ -108,7 +109,8 @@ public:
|
||||
AzureObjectStorage * object_storage,
|
||||
const Configuration & configuration,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
ContextPtr ctx);
|
||||
ContextPtr ctx,
|
||||
bool distributed_processing = false);
|
||||
|
||||
private:
|
||||
std::string name;
|
||||
@ -137,7 +139,6 @@ private:
|
||||
const String & format_name,
|
||||
const ContextPtr & ctx);
|
||||
|
||||
|
||||
};
|
||||
|
||||
class StorageAzureBlobSource : public ISource, WithContext
|
||||
@ -169,7 +170,7 @@ public:
|
||||
RelativePathWithMetadata next() override;
|
||||
~GlobIterator() override = default;
|
||||
|
||||
private:
|
||||
private:
|
||||
AzureObjectStorage * object_storage;
|
||||
std::string container;
|
||||
String blob_path_with_globs;
|
||||
@ -194,6 +195,21 @@ public:
|
||||
std::function<void(FileProgress)> file_progress_callback;
|
||||
};
|
||||
|
||||
class ReadIterator : public IIterator
|
||||
{
|
||||
public:
|
||||
explicit ReadIterator(ContextPtr context_,
|
||||
const ReadTaskCallback & callback_)
|
||||
: IIterator(context_), callback(callback_) { }
|
||||
RelativePathWithMetadata next() override
|
||||
{
|
||||
return {callback(), {}};
|
||||
}
|
||||
|
||||
private:
|
||||
ReadTaskCallback callback;
|
||||
};
|
||||
|
||||
class KeysIterator : public IIterator
|
||||
{
|
||||
public:
|
||||
|
99
src/Storages/StorageAzureBlobCluster.cpp
Normal file
99
src/Storages/StorageAzureBlobCluster.cpp
Normal file
@ -0,0 +1,99 @@
|
||||
#include "Storages/StorageAzureBlobCluster.h"
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <IO/ConnectionTimeouts.h>
|
||||
#include <Interpreters/AddDefaultDatabaseVisitor.h>
|
||||
#include <Interpreters/InterpreterSelectQuery.h>
|
||||
#include <Processors/Sources/RemoteSource.h>
|
||||
#include <Processors/Transforms/AddingDefaultsTransform.h>
|
||||
#include <QueryPipeline/RemoteQueryExecutor.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/StorageURL.h>
|
||||
#include <Storages/SelectQueryInfo.h>
|
||||
#include <Storages/StorageDictionary.h>
|
||||
#include <Storages/extractTableFunctionArgumentsFromSelectQuery.h>
|
||||
#include <Storages/getVirtualsForStorage.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <TableFunctions/TableFunctionAzureBlobStorageCluster.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
StorageAzureBlobCluster::StorageAzureBlobCluster(
|
||||
const String & cluster_name_,
|
||||
const StorageAzureBlob::Configuration & configuration_,
|
||||
std::unique_ptr<AzureObjectStorage> && object_storage_,
|
||||
const StorageID & table_id_,
|
||||
const ColumnsDescription & columns_,
|
||||
const ConstraintsDescription & constraints_,
|
||||
ContextPtr context_,
|
||||
bool structure_argument_was_provided_)
|
||||
: IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageAzureBlobCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_)
|
||||
, configuration{configuration_}
|
||||
, object_storage(std::move(object_storage_))
|
||||
{
|
||||
context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL());
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
|
||||
if (columns_.empty())
|
||||
{
|
||||
/// `format_settings` is set to std::nullopt, because StorageAzureBlobCluster is used only as table function
|
||||
auto columns = StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context_, false);
|
||||
storage_metadata.setColumns(columns);
|
||||
}
|
||||
else
|
||||
storage_metadata.setColumns(columns_);
|
||||
|
||||
storage_metadata.setConstraints(constraints_);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
|
||||
auto default_virtuals = NamesAndTypesList{
|
||||
{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
|
||||
|
||||
auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList();
|
||||
virtual_columns = getVirtualsForStorage(columns, default_virtuals);
|
||||
for (const auto & column : virtual_columns)
|
||||
virtual_block.insert({column.type->createColumn(), column.type, column.name});
|
||||
}
|
||||
|
||||
void StorageAzureBlobCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
|
||||
{
|
||||
ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
|
||||
if (!expression_list)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query));
|
||||
|
||||
TableFunctionAzureBlobStorageCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
|
||||
}
|
||||
|
||||
RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const
|
||||
{
|
||||
auto iterator = std::make_shared<StorageAzureBlobSource::GlobIterator>(
|
||||
object_storage.get(), configuration.container, configuration.blob_path,
|
||||
query, virtual_block, context, nullptr);
|
||||
auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String{ return iterator->next().relative_path; });
|
||||
return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) };
|
||||
}
|
||||
|
||||
NamesAndTypesList StorageAzureBlobCluster::getVirtuals() const
|
||||
{
|
||||
return virtual_columns;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
53
src/Storages/StorageAzureBlobCluster.h
Normal file
53
src/Storages/StorageAzureBlobCluster.h
Normal file
@ -0,0 +1,53 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
#include "Client/Connection.h"
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <Storages/IStorageCluster.h>
|
||||
#include <Storages/StorageAzureBlob.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
class StorageAzureBlobCluster : public IStorageCluster
|
||||
{
|
||||
public:
|
||||
StorageAzureBlobCluster(
|
||||
const String & cluster_name_,
|
||||
const StorageAzureBlob::Configuration & configuration_,
|
||||
std::unique_ptr<AzureObjectStorage> && object_storage_,
|
||||
const StorageID & table_id_,
|
||||
const ColumnsDescription & columns_,
|
||||
const ConstraintsDescription & constraints_,
|
||||
ContextPtr context_,
|
||||
bool structure_argument_was_provided_);
|
||||
|
||||
std::string getName() const override { return "AzureBlobStorageCluster"; }
|
||||
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
|
||||
RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override;
|
||||
|
||||
private:
|
||||
void updateBeforeRead(const ContextPtr & /*context*/) override {}
|
||||
|
||||
void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override;
|
||||
|
||||
StorageAzureBlob::Configuration configuration;
|
||||
NamesAndTypesList virtual_columns;
|
||||
Block virtual_block;
|
||||
std::unique_ptr<AzureObjectStorage> object_storage;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -22,6 +22,8 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/Archives/createArchiveReader.h>
|
||||
#include <IO/Archives/IArchiveReader.h>
|
||||
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
@ -57,7 +59,6 @@
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event CreatedReadBufferOrdinary;
|
||||
@ -379,10 +380,33 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
|
||||
bool use_table_fd,
|
||||
int table_fd,
|
||||
const String & compression_method,
|
||||
ContextPtr context)
|
||||
ContextPtr context,
|
||||
const String & path_to_archive = "")
|
||||
{
|
||||
CompressionMethod method;
|
||||
|
||||
if (!path_to_archive.empty())
|
||||
{
|
||||
auto reader = createArchiveReader(path_to_archive);
|
||||
|
||||
if (current_path.find_first_of("*?{") != std::string::npos)
|
||||
{
|
||||
auto matcher = std::make_shared<re2::RE2>(makeRegexpPatternFromGlobs(current_path));
|
||||
if (!matcher->ok())
|
||||
throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
|
||||
"Cannot compile regex from glob ({}): {}", current_path, matcher->error());
|
||||
|
||||
return reader->readFile([matcher = std::move(matcher)](const std::string & path)
|
||||
{
|
||||
return re2::RE2::FullMatch(path, *matcher);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
return reader->readFile(current_path);
|
||||
}
|
||||
}
|
||||
|
||||
if (use_table_fd)
|
||||
method = chooseCompressionMethod("", compression_method);
|
||||
else
|
||||
@ -471,7 +495,8 @@ ColumnsDescription StorageFile::getTableStructureFromFile(
|
||||
const std::vector<String> & paths,
|
||||
const String & compression_method,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
ContextPtr context)
|
||||
ContextPtr context,
|
||||
const std::vector<String> & paths_to_archive)
|
||||
{
|
||||
if (format == "Distributed")
|
||||
{
|
||||
@ -491,30 +516,62 @@ ColumnsDescription StorageFile::getTableStructureFromFile(
|
||||
if (context->getSettingsRef().schema_inference_use_cache_for_file)
|
||||
columns_from_cache = tryGetColumnsFromCache(paths, format, format_settings, context);
|
||||
|
||||
ReadBufferIterator read_buffer_iterator = [&, it = paths.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr<ReadBuffer>
|
||||
ReadBufferIterator read_buffer_iterator;
|
||||
if (paths_to_archive.empty())
|
||||
{
|
||||
String path;
|
||||
struct stat file_stat;
|
||||
do
|
||||
read_buffer_iterator = [&, it = paths.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr<ReadBuffer>
|
||||
{
|
||||
if (it == paths.end())
|
||||
String path;
|
||||
struct stat file_stat;
|
||||
do
|
||||
{
|
||||
if (first)
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
|
||||
"Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually",
|
||||
format);
|
||||
return nullptr;
|
||||
if (it == paths.end())
|
||||
{
|
||||
if (first)
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
|
||||
"Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually",
|
||||
format);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
path = *it++;
|
||||
file_stat = getFileStat(path, false, -1, "File");
|
||||
}
|
||||
while (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0);
|
||||
|
||||
path = *it++;
|
||||
file_stat = getFileStat(path, false, -1, "File");
|
||||
}
|
||||
while (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0);
|
||||
first = false;
|
||||
return createReadBuffer(path, file_stat, false, -1, compression_method, context);
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
read_buffer_iterator = [&, path_it = paths.begin(), archive_it = paths_to_archive.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr<ReadBuffer>
|
||||
{
|
||||
String path;
|
||||
struct stat file_stat;
|
||||
do
|
||||
{
|
||||
if (archive_it == paths_to_archive.end())
|
||||
{
|
||||
if (first)
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
|
||||
"Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually",
|
||||
format);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
first = false;
|
||||
return createReadBuffer(path, file_stat, false, -1, compression_method, context);
|
||||
};
|
||||
path = *archive_it++;
|
||||
file_stat = getFileStat(path, false, -1, "File");
|
||||
}
|
||||
while (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0);
|
||||
|
||||
first = false;
|
||||
return createReadBuffer(*path_it, file_stat, false, -1, compression_method, context, path);
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
ColumnsDescription columns;
|
||||
if (columns_from_cache)
|
||||
@ -566,8 +623,17 @@ StorageFile::StorageFile(int table_fd_, CommonArguments args)
|
||||
StorageFile::StorageFile(const std::string & table_path_, const std::string & user_files_path, CommonArguments args)
|
||||
: StorageFile(args)
|
||||
{
|
||||
if (!args.path_to_archive.empty())
|
||||
{
|
||||
paths_to_archive = getPathsList(args.path_to_archive, user_files_path, args.getContext(), total_bytes_to_read);
|
||||
paths = {table_path_};
|
||||
}
|
||||
else
|
||||
{
|
||||
paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read);
|
||||
}
|
||||
|
||||
is_db_table = false;
|
||||
paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read);
|
||||
is_path_with_globs = paths.size() > 1;
|
||||
if (!paths.empty())
|
||||
path_for_partitioned_write = paths.front();
|
||||
@ -621,7 +687,7 @@ void StorageFile::setStorageMetadata(CommonArguments args)
|
||||
columns = getTableStructureFromFileDescriptor(args.getContext());
|
||||
else
|
||||
{
|
||||
columns = getTableStructureFromFile(format_name, paths, compression_method, format_settings, args.getContext());
|
||||
columns = getTableStructureFromFile(format_name, paths, compression_method, format_settings, args.getContext(), paths_to_archive);
|
||||
if (!args.columns.empty() && args.columns != columns)
|
||||
throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Table structure and file structure are different");
|
||||
}
|
||||
@ -654,7 +720,9 @@ public:
|
||||
class FilesIterator
|
||||
{
|
||||
public:
|
||||
explicit FilesIterator(const Strings & files_) : files(files_)
|
||||
explicit FilesIterator(
|
||||
const Strings & files_, std::vector<std::string> archives_, std::vector<std::pair<uint64_t, std::string>> files_in_archive_)
|
||||
: files(files_), archives(std::move(archives_)), files_in_archive(std::move(files_in_archive_))
|
||||
{
|
||||
}
|
||||
|
||||
@ -667,8 +735,25 @@ public:
|
||||
return files[current_index];
|
||||
}
|
||||
|
||||
std::pair<String, String> nextFileFromArchive()
|
||||
{
|
||||
auto current_index = index.fetch_add(1, std::memory_order_relaxed);
|
||||
if (current_index >= files_in_archive.size())
|
||||
return {"", ""};
|
||||
|
||||
const auto & [archive_index, filename] = files_in_archive[current_index];
|
||||
return {archives[archive_index], filename};
|
||||
}
|
||||
|
||||
bool fromArchive() const
|
||||
{
|
||||
return !archives.empty();
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::string> files;
|
||||
std::vector<std::string> archives;
|
||||
std::vector<std::pair<uint64_t, std::string>> files_in_archive;
|
||||
std::atomic<size_t> index = 0;
|
||||
};
|
||||
|
||||
@ -776,9 +861,35 @@ public:
|
||||
{
|
||||
if (!storage->use_table_fd)
|
||||
{
|
||||
current_path = files_iterator->next();
|
||||
if (current_path.empty())
|
||||
return {};
|
||||
if (files_iterator->fromArchive())
|
||||
{
|
||||
auto [archive, filename] = files_iterator->nextFileFromArchive();
|
||||
if (archive.empty())
|
||||
return {};
|
||||
|
||||
current_path = std::move(filename);
|
||||
|
||||
if (!archive_reader || archive_reader->getPath() != archive)
|
||||
{
|
||||
archive_reader = createArchiveReader(archive);
|
||||
file_enumerator = archive_reader->firstFile();
|
||||
}
|
||||
|
||||
if (file_enumerator == nullptr)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to find a file in archive {}", archive);
|
||||
|
||||
while (file_enumerator->getFileName() != current_path)
|
||||
{
|
||||
if (!file_enumerator->nextFile())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected file {} is missing from archive {}", current_path, archive);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
current_path = files_iterator->next();
|
||||
if (current_path.empty())
|
||||
return {};
|
||||
}
|
||||
|
||||
/// Special case for distributed format. Defaults are not needed here.
|
||||
if (storage->format_name == "Distributed")
|
||||
@ -791,10 +902,24 @@ public:
|
||||
|
||||
if (!read_buf)
|
||||
{
|
||||
auto file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName());
|
||||
if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0)
|
||||
continue;
|
||||
read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context);
|
||||
struct stat file_stat;
|
||||
if (archive_reader == nullptr)
|
||||
{
|
||||
file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName());
|
||||
|
||||
if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (archive_reader == nullptr)
|
||||
{
|
||||
read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context);
|
||||
}
|
||||
else
|
||||
{
|
||||
chassert(file_enumerator);
|
||||
read_buf = archive_reader->readFile(std::move(file_enumerator));
|
||||
}
|
||||
}
|
||||
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
@ -861,7 +986,11 @@ public:
|
||||
reader.reset();
|
||||
pipeline.reset();
|
||||
input_format.reset();
|
||||
read_buf.reset();
|
||||
|
||||
if (archive_reader != nullptr)
|
||||
file_enumerator = archive_reader->nextFile(std::move(read_buf));
|
||||
else
|
||||
read_buf.reset();
|
||||
}
|
||||
|
||||
return {};
|
||||
@ -879,6 +1008,9 @@ private:
|
||||
std::unique_ptr<QueryPipeline> pipeline;
|
||||
std::unique_ptr<PullingPipelineExecutor> reader;
|
||||
|
||||
std::shared_ptr<IArchiveReader> archive_reader;
|
||||
std::unique_ptr<IArchiveReader::FileEnumerator> file_enumerator = nullptr;
|
||||
|
||||
ColumnsDescription columns_description;
|
||||
NamesAndTypesList requested_columns;
|
||||
NamesAndTypesList requested_virtual_columns;
|
||||
@ -908,21 +1040,67 @@ Pipe StorageFile::read(
|
||||
}
|
||||
else
|
||||
{
|
||||
if (paths.size() == 1 && !fs::exists(paths[0]))
|
||||
const auto & p = paths_to_archive.empty() ? paths : paths_to_archive;
|
||||
if (p.size() == 1 && !fs::exists(p[0]))
|
||||
{
|
||||
if (context->getSettingsRef().engine_file_empty_if_not_exists)
|
||||
return Pipe(std::make_shared<NullSource>(storage_snapshot->getSampleBlockForColumns(column_names)));
|
||||
else
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", paths[0]);
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", p[0]);
|
||||
}
|
||||
}
|
||||
|
||||
auto files_iterator = std::make_shared<StorageFileSource::FilesIterator>(paths);
|
||||
std::vector<std::pair<uint64_t, std::string>> files_in_archive;
|
||||
|
||||
size_t files_in_archive_num = 0;
|
||||
if (!paths_to_archive.empty())
|
||||
{
|
||||
if (paths.size() != 1)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Multiple paths defined for reading from archive");
|
||||
|
||||
const auto & path = paths[0];
|
||||
|
||||
IArchiveReader::NameFilter filter;
|
||||
if (path.find_first_of("*?{") != std::string::npos)
|
||||
{
|
||||
auto matcher = std::make_shared<re2::RE2>(makeRegexpPatternFromGlobs(path));
|
||||
if (!matcher->ok())
|
||||
throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
|
||||
"Cannot compile regex from glob ({}): {}", path, matcher->error());
|
||||
|
||||
filter = [matcher](const std::string & p)
|
||||
{
|
||||
return re2::RE2::FullMatch(p, *matcher);
|
||||
};
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < paths_to_archive.size(); ++i)
|
||||
{
|
||||
if (filter)
|
||||
{
|
||||
const auto & path_to_archive = paths_to_archive[i];
|
||||
auto archive_reader = createArchiveReader(path_to_archive);
|
||||
auto files = archive_reader->getAllFiles(filter);
|
||||
for (auto & file : files)
|
||||
files_in_archive.push_back({i, std::move(file)});
|
||||
}
|
||||
else
|
||||
{
|
||||
files_in_archive.push_back({i, path});
|
||||
}
|
||||
}
|
||||
|
||||
files_in_archive_num = files_in_archive.size();
|
||||
}
|
||||
|
||||
auto files_iterator = std::make_shared<StorageFileSource::FilesIterator>(paths, paths_to_archive, std::move(files_in_archive));
|
||||
auto this_ptr = std::static_pointer_cast<StorageFile>(shared_from_this());
|
||||
|
||||
size_t num_streams = max_num_streams;
|
||||
if (max_num_streams > paths.size())
|
||||
num_streams = paths.size();
|
||||
|
||||
auto files_to_read = std::max(files_in_archive_num, paths.size());
|
||||
if (max_num_streams > files_to_read)
|
||||
num_streams = files_to_read;
|
||||
|
||||
Pipes pipes;
|
||||
pipes.reserve(num_streams);
|
||||
@ -1202,6 +1380,9 @@ SinkToStoragePtr StorageFile::write(
|
||||
ContextPtr context,
|
||||
bool /*async_insert*/)
|
||||
{
|
||||
if (!use_table_fd && !paths_to_archive.empty())
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Writing to archives is not supported");
|
||||
|
||||
if (format_name == "Distributed")
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not implemented for Distributed format");
|
||||
|
||||
@ -1375,6 +1556,7 @@ void registerStorageFile(StorageFactory & factory)
|
||||
factory_args.constraints,
|
||||
factory_args.comment,
|
||||
{},
|
||||
{},
|
||||
};
|
||||
|
||||
ASTs & engine_args_ast = factory_args.engine_args;
|
||||
@ -1445,7 +1627,7 @@ void registerStorageFile(StorageFactory & factory)
|
||||
else if (type == Field::Types::UInt64)
|
||||
source_fd = static_cast<int>(literal->value.get<UInt64>());
|
||||
else if (type == Field::Types::String)
|
||||
source_path = literal->value.get<String>();
|
||||
StorageFile::parseFileSource(literal->value.get<String>(), source_path, storage_args.path_to_archive);
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument must be path or file descriptor");
|
||||
}
|
||||
@ -1517,4 +1699,32 @@ void StorageFile::addColumnsToCache(
|
||||
schema_cache.addMany(cache_keys, columns);
|
||||
}
|
||||
|
||||
void StorageFile::parseFileSource(String source, String & filename, String & path_to_archive)
|
||||
{
|
||||
size_t pos = source.find("::");
|
||||
if (pos == String::npos)
|
||||
{
|
||||
filename = std::move(source);
|
||||
return;
|
||||
}
|
||||
|
||||
std::string_view path_to_archive_view = std::string_view{source}.substr(0, pos);
|
||||
while (path_to_archive_view.back() == ' ')
|
||||
path_to_archive_view.remove_suffix(1);
|
||||
|
||||
if (path_to_archive_view.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to archive is empty");
|
||||
|
||||
path_to_archive = path_to_archive_view;
|
||||
|
||||
std::string_view filename_view = std::string_view{source}.substr(pos + 2);
|
||||
while (filename_view.front() == ' ')
|
||||
filename_view.remove_prefix(1);
|
||||
|
||||
if (filename_view.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filename is empty");
|
||||
|
||||
filename = filename_view;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -22,8 +22,8 @@ public:
|
||||
const ColumnsDescription & columns;
|
||||
const ConstraintsDescription & constraints;
|
||||
const String & comment;
|
||||
|
||||
const std::string rename_after_processing;
|
||||
std::string path_to_archive;
|
||||
};
|
||||
|
||||
/// From file descriptor
|
||||
@ -90,10 +90,13 @@ public:
|
||||
const std::vector<String> & paths,
|
||||
const String & compression_method,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
ContextPtr context);
|
||||
ContextPtr context,
|
||||
const std::vector<String> & paths_to_archive = {"auto"});
|
||||
|
||||
static SchemaCache & getSchemaCache(const ContextPtr & context);
|
||||
|
||||
static void parseFileSource(String source, String & filename, String & path_to_archive);
|
||||
|
||||
protected:
|
||||
friend class StorageFileSource;
|
||||
friend class StorageFileSink;
|
||||
@ -123,6 +126,7 @@ private:
|
||||
|
||||
std::string base_path;
|
||||
std::vector<std::string> paths;
|
||||
std::vector<std::string> paths_to_archive;
|
||||
|
||||
bool is_db_table = true; /// Table is stored in real database, not user's file
|
||||
bool use_table_fd = false; /// Use table_fd instead of path
|
||||
|
@ -42,6 +42,7 @@ protected:
|
||||
virtual String getFormatFromFirstArgument();
|
||||
|
||||
String filename;
|
||||
String path_to_archive;
|
||||
String format = "auto";
|
||||
String structure = "auto";
|
||||
String compression_method = "auto";
|
||||
|
@ -44,10 +44,8 @@ bool isConnectionString(const std::string & candidate)
|
||||
|
||||
}
|
||||
|
||||
StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file)
|
||||
void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context)
|
||||
{
|
||||
StorageAzureBlob::Configuration configuration;
|
||||
|
||||
/// Supported signatures:
|
||||
///
|
||||
/// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
|
||||
@ -59,87 +57,80 @@ StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImp
|
||||
|
||||
configuration.blobs_paths = {configuration.blob_path};
|
||||
|
||||
if (configuration.format == "auto" && get_format_from_file)
|
||||
if (configuration.format == "auto")
|
||||
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true);
|
||||
|
||||
return configuration;
|
||||
}
|
||||
|
||||
if (engine_args.size() < 3 || engine_args.size() > 8)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Storage Azure requires 3 to 7 arguments: "
|
||||
"AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])");
|
||||
|
||||
for (auto & engine_arg : engine_args)
|
||||
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context);
|
||||
|
||||
std::unordered_map<std::string_view, size_t> engine_args_to_idx;
|
||||
|
||||
configuration.connection_url = checkAndGetLiteralArgument<String>(engine_args[0], "connection_string/storage_account_url");
|
||||
configuration.is_connection_string = isConnectionString(configuration.connection_url);
|
||||
|
||||
configuration.container = checkAndGetLiteralArgument<String>(engine_args[1], "container");
|
||||
configuration.blob_path = checkAndGetLiteralArgument<String>(engine_args[2], "blobpath");
|
||||
|
||||
auto is_format_arg = [] (const std::string & s) -> bool
|
||||
else
|
||||
{
|
||||
return s == "auto" || FormatFactory::instance().getAllFormats().contains(s);
|
||||
};
|
||||
if (engine_args.size() < 3 || engine_args.size() > 8)
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Storage Azure requires 3 to 7 arguments: "
|
||||
"AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])");
|
||||
|
||||
if (engine_args.size() == 4)
|
||||
{
|
||||
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name/structure");
|
||||
if (is_format_arg(fourth_arg))
|
||||
for (auto & engine_arg : engine_args)
|
||||
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context);
|
||||
|
||||
std::unordered_map<std::string_view, size_t> engine_args_to_idx;
|
||||
|
||||
configuration.connection_url = checkAndGetLiteralArgument<String>(engine_args[0], "connection_string/storage_account_url");
|
||||
configuration.is_connection_string = isConnectionString(configuration.connection_url);
|
||||
|
||||
configuration.container = checkAndGetLiteralArgument<String>(engine_args[1], "container");
|
||||
configuration.blob_path = checkAndGetLiteralArgument<String>(engine_args[2], "blobpath");
|
||||
|
||||
auto is_format_arg
|
||||
= [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); };
|
||||
|
||||
if (engine_args.size() == 4)
|
||||
{
|
||||
configuration.format = fourth_arg;
|
||||
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name/structure");
|
||||
if (is_format_arg(fourth_arg))
|
||||
{
|
||||
configuration.format = fourth_arg;
|
||||
}
|
||||
else
|
||||
{
|
||||
configuration.structure = fourth_arg;
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (engine_args.size() == 5)
|
||||
{
|
||||
configuration.structure = fourth_arg;
|
||||
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
|
||||
if (is_format_arg(fourth_arg))
|
||||
{
|
||||
configuration.format = fourth_arg;
|
||||
configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[4], "compression");
|
||||
}
|
||||
else
|
||||
{
|
||||
configuration.account_name = fourth_arg;
|
||||
configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (engine_args.size() == 5)
|
||||
{
|
||||
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
|
||||
if (is_format_arg(fourth_arg))
|
||||
else if (engine_args.size() == 6)
|
||||
{
|
||||
configuration.format = fourth_arg;
|
||||
configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[4], "compression");
|
||||
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
|
||||
if (is_format_arg(fourth_arg))
|
||||
{
|
||||
configuration.format = fourth_arg;
|
||||
configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[4], "compression");
|
||||
configuration.structure = checkAndGetLiteralArgument<String>(engine_args[5], "structure");
|
||||
}
|
||||
else
|
||||
{
|
||||
configuration.account_name = fourth_arg;
|
||||
configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
|
||||
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name/structure");
|
||||
if (is_format_arg(sixth_arg))
|
||||
configuration.format = sixth_arg;
|
||||
else
|
||||
configuration.structure = sixth_arg;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
configuration.account_name = fourth_arg;
|
||||
configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
|
||||
}
|
||||
}
|
||||
else if (engine_args.size() == 6)
|
||||
{
|
||||
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
|
||||
if (is_format_arg(fourth_arg))
|
||||
{
|
||||
configuration.format = fourth_arg;
|
||||
configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[4], "compression");
|
||||
configuration.structure = checkAndGetLiteralArgument<String>(engine_args[5], "structure");
|
||||
}
|
||||
else
|
||||
{
|
||||
configuration.account_name = fourth_arg;
|
||||
configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
|
||||
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
|
||||
if (!is_format_arg(sixth_arg))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg);
|
||||
configuration.format = sixth_arg;
|
||||
}
|
||||
}
|
||||
else if (engine_args.size() == 7)
|
||||
{
|
||||
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
|
||||
if (is_format_arg(fourth_arg))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format, compression and structure must be last arguments");
|
||||
}
|
||||
else
|
||||
else if (engine_args.size() == 7)
|
||||
{
|
||||
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
|
||||
configuration.account_name = fourth_arg;
|
||||
configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
|
||||
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
|
||||
@ -148,17 +139,9 @@ StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImp
|
||||
configuration.format = sixth_arg;
|
||||
configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[6], "compression");
|
||||
}
|
||||
}
|
||||
else if (engine_args.size() == 8)
|
||||
{
|
||||
|
||||
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
|
||||
if (is_format_arg(fourth_arg))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments");
|
||||
}
|
||||
else
|
||||
else if (engine_args.size() == 8)
|
||||
{
|
||||
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
|
||||
configuration.account_name = fourth_arg;
|
||||
configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
|
||||
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
|
||||
@ -168,14 +151,12 @@ StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImp
|
||||
configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[6], "compression");
|
||||
configuration.structure = checkAndGetLiteralArgument<String>(engine_args[7], "structure");
|
||||
}
|
||||
|
||||
configuration.blobs_paths = {configuration.blob_path};
|
||||
|
||||
if (configuration.format == "auto")
|
||||
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true);
|
||||
}
|
||||
|
||||
configuration.blobs_paths = {configuration.blob_path};
|
||||
|
||||
if (configuration.format == "auto" && get_format_from_file)
|
||||
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true);
|
||||
|
||||
return configuration;
|
||||
}
|
||||
|
||||
void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context)
|
||||
@ -190,7 +171,87 @@ void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function,
|
||||
|
||||
auto & args = args_func.at(0)->children;
|
||||
|
||||
configuration = parseArgumentsImpl(args, context);
|
||||
parseArgumentsImpl(args, context);
|
||||
}
|
||||
|
||||
void TableFunctionAzureBlobStorage::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context)
|
||||
{
|
||||
if (tryGetNamedCollectionWithOverrides(args, context))
|
||||
{
|
||||
/// In case of named collection, just add key-value pair "structure='...'"
|
||||
/// at the end of arguments to override existed structure.
|
||||
ASTs equal_func_args = {std::make_shared<ASTIdentifier>("structure"), std::make_shared<ASTLiteral>(structure)};
|
||||
auto equal_func = makeASTFunction("equals", std::move(equal_func_args));
|
||||
args.push_back(equal_func);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (args.size() < 3 || args.size() > 8)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Storage Azure requires 3 to 7 arguments: "
|
||||
"AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])");
|
||||
|
||||
auto structure_literal = std::make_shared<ASTLiteral>(structure);
|
||||
|
||||
auto is_format_arg
|
||||
= [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); };
|
||||
|
||||
|
||||
if (args.size() == 3)
|
||||
{
|
||||
/// Add format=auto & compression=auto before structure argument.
|
||||
args.push_back(std::make_shared<ASTLiteral>("auto"));
|
||||
args.push_back(std::make_shared<ASTLiteral>("auto"));
|
||||
args.push_back(structure_literal);
|
||||
}
|
||||
else if (args.size() == 4)
|
||||
{
|
||||
auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name/structure");
|
||||
if (is_format_arg(fourth_arg))
|
||||
{
|
||||
/// Add compression=auto before structure argument.
|
||||
args.push_back(std::make_shared<ASTLiteral>("auto"));
|
||||
args.push_back(structure_literal);
|
||||
}
|
||||
else
|
||||
{
|
||||
args.back() = structure_literal;
|
||||
}
|
||||
}
|
||||
else if (args.size() == 5)
|
||||
{
|
||||
auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name");
|
||||
if (!is_format_arg(fourth_arg))
|
||||
{
|
||||
/// Add format=auto & compression=auto before structure argument.
|
||||
args.push_back(std::make_shared<ASTLiteral>("auto"));
|
||||
args.push_back(std::make_shared<ASTLiteral>("auto"));
|
||||
}
|
||||
args.push_back(structure_literal);
|
||||
}
|
||||
else if (args.size() == 6)
|
||||
{
|
||||
auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name");
|
||||
if (!is_format_arg(fourth_arg))
|
||||
{
|
||||
/// Add compression=auto before structure argument.
|
||||
args.push_back(std::make_shared<ASTLiteral>("auto"));
|
||||
args.push_back(structure_literal);
|
||||
}
|
||||
else
|
||||
{
|
||||
args.back() = structure_literal;
|
||||
}
|
||||
}
|
||||
else if (args.size() == 7)
|
||||
{
|
||||
args.push_back(structure_literal);
|
||||
}
|
||||
else if (args.size() == 8)
|
||||
{
|
||||
args.back() = structure_literal;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(ContextPtr context, bool is_insert_query) const
|
||||
@ -202,7 +263,7 @@ ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(Contex
|
||||
auto settings = StorageAzureBlob::createSettings(context);
|
||||
|
||||
auto object_storage = std::make_unique<AzureObjectStorage>("AzureBlobStorageTableFunction", std::move(client), std::move(settings));
|
||||
return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context);
|
||||
return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context, false);
|
||||
}
|
||||
|
||||
return parseColumnsListFromString(configuration.structure, context);
|
||||
@ -234,6 +295,7 @@ StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_funct
|
||||
String{},
|
||||
/// No format_settings for table function Azure
|
||||
std::nullopt,
|
||||
/* distributed_processing */ false,
|
||||
nullptr);
|
||||
|
||||
storage->startup();
|
||||
|
@ -13,13 +13,23 @@ namespace DB
|
||||
|
||||
class Context;
|
||||
|
||||
/* AzureBlob(source, [access_key_id, secret_access_key,] [format, structure, compression]) - creates a temporary storage for a file in AzureBlob.
|
||||
/* AzureBlob(source, [access_key_id, secret_access_key,] [format, compression, structure]) - creates a temporary storage for a file in AzureBlob.
|
||||
*/
|
||||
class TableFunctionAzureBlobStorage : public ITableFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "azureBlobStorage";
|
||||
static constexpr auto signature = "- connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]\n";
|
||||
|
||||
static constexpr auto signature = " - connection_string, container_name, blobpath\n"
|
||||
" - connection_string, container_name, blobpath, structure \n"
|
||||
" - connection_string, container_name, blobpath, format \n"
|
||||
" - connection_string, container_name, blobpath, format, compression \n"
|
||||
" - connection_string, container_name, blobpath, format, compression, structure \n"
|
||||
" - storage_account_url, container_name, blobpath, account_name, account_key\n"
|
||||
" - storage_account_url, container_name, blobpath, account_name, account_key, structure\n"
|
||||
" - storage_account_url, container_name, blobpath, account_name, account_key, format\n"
|
||||
" - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n"
|
||||
" - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n";
|
||||
|
||||
static size_t getMaxNumberOfArguments() { return 8; }
|
||||
|
||||
@ -46,7 +56,9 @@ public:
|
||||
return {"_path", "_file"};
|
||||
}
|
||||
|
||||
static StorageAzureBlob::Configuration parseArgumentsImpl(ASTs & args, const ContextPtr & context, bool get_format_from_file = true);
|
||||
virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context);
|
||||
|
||||
static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context);
|
||||
|
||||
protected:
|
||||
|
||||
|
85
src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp
Normal file
85
src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp
Normal file
@ -0,0 +1,85 @@
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
|
||||
#include <TableFunctions/TableFunctionAzureBlobStorageCluster.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <Interpreters/parseColumnsListForTableFunction.h>
|
||||
#include <Storages/StorageAzureBlob.h>
|
||||
|
||||
#include "registerTableFunctions.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl(
|
||||
const ASTPtr & /*function*/, ContextPtr context,
|
||||
const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const
|
||||
{
|
||||
StoragePtr storage;
|
||||
ColumnsDescription columns;
|
||||
bool structure_argument_was_provided = configuration.structure != "auto";
|
||||
|
||||
if (structure_argument_was_provided)
|
||||
{
|
||||
columns = parseColumnsListFromString(configuration.structure, context);
|
||||
}
|
||||
else if (!structure_hint.empty())
|
||||
{
|
||||
columns = structure_hint;
|
||||
}
|
||||
|
||||
auto client = StorageAzureBlob::createClient(configuration, !is_insert_query);
|
||||
auto settings = StorageAzureBlob::createSettings(context);
|
||||
|
||||
if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
|
||||
{
|
||||
/// On worker node this filename won't contains globs
|
||||
storage = std::make_shared<StorageAzureBlob>(
|
||||
configuration,
|
||||
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings)),
|
||||
context,
|
||||
StorageID(getDatabaseName(), table_name),
|
||||
columns,
|
||||
ConstraintsDescription{},
|
||||
/* comment */String{},
|
||||
/* format_settings */std::nullopt, /// No format_settings
|
||||
/* distributed_processing */ true,
|
||||
/*partition_by_=*/nullptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
storage = std::make_shared<StorageAzureBlobCluster>(
|
||||
cluster_name,
|
||||
configuration,
|
||||
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings)),
|
||||
StorageID(getDatabaseName(), table_name),
|
||||
columns,
|
||||
ConstraintsDescription{},
|
||||
context,
|
||||
structure_argument_was_provided);
|
||||
}
|
||||
|
||||
storage->startup();
|
||||
|
||||
return storage;
|
||||
}
|
||||
|
||||
|
||||
void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<TableFunctionAzureBlobStorageCluster>(
|
||||
{.documentation
|
||||
= {.description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)",
|
||||
.examples{{"azureBlobStorageCluster", "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}},
|
||||
.allow_readonly = false}
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
55
src/TableFunctions/TableFunctionAzureBlobStorageCluster.h
Normal file
55
src/TableFunctions/TableFunctionAzureBlobStorageCluster.h
Normal file
@ -0,0 +1,55 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
|
||||
#include <TableFunctions/ITableFunction.h>
|
||||
#include <TableFunctions/TableFunctionAzureBlobStorage.h>
|
||||
#include <TableFunctions/ITableFunctionCluster.h>
|
||||
#include <Storages/StorageAzureBlobCluster.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
/**
|
||||
* azureBlobStorageCluster(cluster_name, source, [access_key_id, secret_access_key,] format, compression_method, structure)
|
||||
* A table function, which allows to process many files from Azure Blob Storage on a specific cluster
|
||||
* On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks
|
||||
* in Azure Blob Storage file path and dispatch each file dynamically.
|
||||
* On worker node it asks initiator about next task to process, processes it.
|
||||
* This is repeated until the tasks are finished.
|
||||
*/
|
||||
class TableFunctionAzureBlobStorageCluster : public ITableFunctionCluster<TableFunctionAzureBlobStorage>
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "azureBlobStorageCluster";
|
||||
static constexpr auto signature = " - cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]";
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
String getSignature() const override
|
||||
{
|
||||
return signature;
|
||||
}
|
||||
|
||||
protected:
|
||||
StoragePtr executeImpl(
|
||||
const ASTPtr & ast_function,
|
||||
ContextPtr context,
|
||||
const std::string & table_name,
|
||||
ColumnsDescription cached_columns,
|
||||
bool is_insert_query) const override;
|
||||
|
||||
const char * getStorageTypeName() const override { return "AzureBlobStorageCluster"; }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -25,6 +25,7 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr
|
||||
if (context->getApplicationType() != Context::ApplicationType::LOCAL)
|
||||
{
|
||||
ITableFunctionFileLike::parseFirstArguments(arg, context);
|
||||
StorageFile::parseFileSource(std::move(filename), filename, path_to_archive);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -39,6 +40,8 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr
|
||||
fd = STDOUT_FILENO;
|
||||
else if (filename == "stderr")
|
||||
fd = STDERR_FILENO;
|
||||
else
|
||||
StorageFile::parseFileSource(std::move(filename), filename, path_to_archive);
|
||||
}
|
||||
else if (type == Field::Types::Int64 || type == Field::Types::UInt64)
|
||||
{
|
||||
@ -76,7 +79,9 @@ StoragePtr TableFunctionFile::getStorage(const String & source,
|
||||
ConstraintsDescription{},
|
||||
String{},
|
||||
global_context->getSettingsRef().rename_files_after_processing,
|
||||
path_to_archive,
|
||||
};
|
||||
|
||||
if (fd >= 0)
|
||||
return std::make_shared<StorageFile>(fd, args);
|
||||
|
||||
@ -90,8 +95,15 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context
|
||||
if (fd >= 0)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Schema inference is not supported for table function '{}' with file descriptor", getName());
|
||||
size_t total_bytes_to_read = 0;
|
||||
Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read);
|
||||
return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context);
|
||||
|
||||
Strings paths;
|
||||
Strings paths_to_archives;
|
||||
if (path_to_archive.empty())
|
||||
paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read);
|
||||
else
|
||||
paths_to_archives = StorageFile::getPathsList(path_to_archive, context->getUserFilesPath(), context, total_bytes_to_read);
|
||||
|
||||
return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context, paths_to_archives);
|
||||
}
|
||||
|
||||
|
||||
|
@ -75,6 +75,7 @@ void registerTableFunctions()
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
registerTableFunctionAzureBlobStorage(factory);
|
||||
registerTableFunctionAzureBlobStorageCluster(factory);
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -72,6 +72,7 @@ void registerTableFunctionExplain(TableFunctionFactory & factory);
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
void registerTableFunctionAzureBlobStorage(TableFunctionFactory & factory);
|
||||
void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory);
|
||||
#endif
|
||||
|
||||
void registerTableFunctions();
|
||||
|
@ -162,5 +162,8 @@ endif ()
|
||||
if (TARGET ch_contrib::fiu)
|
||||
set(FIU_ENABLE 1)
|
||||
endif()
|
||||
if (TARGET ch_contrib::libarchive)
|
||||
set(USE_LIBARCHIVE 1)
|
||||
endif()
|
||||
|
||||
set(SOURCE_DIR ${CMAKE_SOURCE_DIR})
|
||||
|
@ -18,6 +18,8 @@
|
||||
<session_timeout_ms>15000</session_timeout_ms>
|
||||
<raft_logs_level>trace</raft_logs_level>
|
||||
<force_sync>false</force_sync>
|
||||
<election_timeout_lower_bound_ms>2000</election_timeout_lower_bound_ms>
|
||||
<election_timeout_upper_bound_ms>4000</election_timeout_upper_bound_ms>
|
||||
</coordination_settings>
|
||||
|
||||
<raft_configuration>
|
||||
|
@ -18,6 +18,8 @@
|
||||
<session_timeout_ms>15000</session_timeout_ms>
|
||||
<raft_logs_level>trace</raft_logs_level>
|
||||
<force_sync>false</force_sync>
|
||||
<election_timeout_lower_bound_ms>2000</election_timeout_lower_bound_ms>
|
||||
<election_timeout_upper_bound_ms>4000</election_timeout_upper_bound_ms>
|
||||
</coordination_settings>
|
||||
|
||||
<raft_configuration>
|
||||
|
@ -18,6 +18,8 @@
|
||||
<session_timeout_ms>15000</session_timeout_ms>
|
||||
<raft_logs_level>trace</raft_logs_level>
|
||||
<force_sync>false</force_sync>
|
||||
<election_timeout_lower_bound_ms>2000</election_timeout_lower_bound_ms>
|
||||
<election_timeout_upper_bound_ms>4000</election_timeout_upper_bound_ms>
|
||||
</coordination_settings>
|
||||
|
||||
<raft_configuration>
|
||||
|
@ -2592,6 +2592,20 @@ def named_collections(clickhouse_node, mysql_node, service_name):
|
||||
f"/* expect: (1, 'a', 1), (2, 'b', 2) */ SELECT * FROM {db}.t1",
|
||||
"1\ta\t1\n2\tb\t2\n",
|
||||
)
|
||||
clickhouse_node.query(f"ALTER NAMED COLLECTION {db} SET port=9999")
|
||||
clickhouse_node.query(f"DETACH DATABASE {db}")
|
||||
mysql_node.query(f"INSERT INTO {db}.t1 VALUES (3, 'c', 3)")
|
||||
assert "ConnectionFailed:" in clickhouse_node.query_and_get_error(
|
||||
f"ATTACH DATABASE {db}"
|
||||
)
|
||||
clickhouse_node.query(f"ALTER NAMED COLLECTION {db} SET port=3306")
|
||||
clickhouse_node.query(f"ATTACH DATABASE {db}")
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"/* expect: (1, 'a', 1), (2, 'b', 2), (3, 'c', 3) */ SELECT * FROM {db}.t1",
|
||||
"1\ta\t1\n2\tb\t2\n3\tc\t3\n",
|
||||
)
|
||||
|
||||
clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}")
|
||||
mysql_node.query(f"DROP DATABASE IF EXISTS {db}")
|
||||
|
||||
|
@ -27,10 +27,7 @@ proto_dir = os.path.join(SCRIPT_DIR, "./protos")
|
||||
gen_dir = os.path.join(SCRIPT_DIR, "./_gen")
|
||||
os.makedirs(gen_dir, exist_ok=True)
|
||||
run_and_check(
|
||||
"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} \
|
||||
{proto_dir}/clickhouse_grpc.proto".format(
|
||||
proto_dir=proto_dir, gen_dir=gen_dir
|
||||
),
|
||||
f"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} {proto_dir}/clickhouse_grpc.proto",
|
||||
shell=True,
|
||||
)
|
||||
|
||||
|
1
tests/integration/test_session_log/.gitignore
vendored
Normal file
1
tests/integration/test_session_log/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
_gen
|
0
tests/integration/test_session_log/__init__.py
Normal file
0
tests/integration/test_session_log/__init__.py
Normal file
9
tests/integration/test_session_log/configs/log.xml
Normal file
9
tests/integration/test_session_log/configs/log.xml
Normal file
@ -0,0 +1,9 @@
|
||||
<clickhouse>
|
||||
<logger>
|
||||
<level>trace</level>
|
||||
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
|
||||
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
|
||||
<size>1000M</size>
|
||||
<count>10</count>
|
||||
</logger>
|
||||
</clickhouse>
|
9
tests/integration/test_session_log/configs/ports.xml
Normal file
9
tests/integration/test_session_log/configs/ports.xml
Normal file
@ -0,0 +1,9 @@
|
||||
<clickhouse>
|
||||
<postgresql_port>5433</postgresql_port>
|
||||
<mysql_port>9001</mysql_port>
|
||||
<grpc_port>9100</grpc_port>
|
||||
<grpc replace="replace">
|
||||
<!-- Enable if you want very detailed logs -->
|
||||
<verbose_logs>false</verbose_logs>
|
||||
</grpc>
|
||||
</clickhouse>
|
@ -0,0 +1,9 @@
|
||||
<clickhouse>
|
||||
<session_log>
|
||||
<database>system</database>
|
||||
<table>session_log</table>
|
||||
|
||||
<partition_by>toYYYYMM(event_date)</partition_by>
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
</session_log>
|
||||
</clickhouse>
|
23
tests/integration/test_session_log/configs/users.xml
Normal file
23
tests/integration/test_session_log/configs/users.xml
Normal file
@ -0,0 +1,23 @@
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<function_sleep_max_microseconds_per_block>0</function_sleep_max_microseconds_per_block>
|
||||
</default>
|
||||
</profiles>
|
||||
<users>
|
||||
<default>
|
||||
</default>
|
||||
<mysql_user>
|
||||
<password>pass</password>
|
||||
</mysql_user>
|
||||
<postgres_user>
|
||||
<password>pass</password>
|
||||
</postgres_user>
|
||||
<grpc_user>
|
||||
<password>pass</password>
|
||||
</grpc_user>
|
||||
<parallel_user>
|
||||
<password>pass</password>
|
||||
</parallel_user>
|
||||
</users>
|
||||
</clickhouse>
|
1
tests/integration/test_session_log/protos/clickhouse_grpc.proto
Symbolic link
1
tests/integration/test_session_log/protos/clickhouse_grpc.proto
Symbolic link
@ -0,0 +1 @@
|
||||
../../../../src/Server/grpc_protos/clickhouse_grpc.proto
|
289
tests/integration/test_session_log/test.py
Normal file
289
tests/integration/test_session_log/test.py
Normal file
@ -0,0 +1,289 @@
|
||||
import os
|
||||
|
||||
import grpc
|
||||
import pymysql.connections
|
||||
import psycopg2 as py_psql
|
||||
import pytest
|
||||
import random
|
||||
import sys
|
||||
import threading
|
||||
|
||||
from helpers.cluster import ClickHouseCluster, run_and_check
|
||||
|
||||
POSTGRES_SERVER_PORT = 5433
|
||||
MYSQL_SERVER_PORT = 9001
|
||||
GRPC_PORT = 9100
|
||||
SESSION_LOG_MATCHING_FIELDS = "auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface"
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
DEFAULT_ENCODING = "utf-8"
|
||||
|
||||
# Use grpcio-tools to generate *pb2.py files from *.proto.
|
||||
proto_dir = os.path.join(SCRIPT_DIR, "./protos")
|
||||
gen_dir = os.path.join(SCRIPT_DIR, "./_gen")
|
||||
os.makedirs(gen_dir, exist_ok=True)
|
||||
run_and_check(
|
||||
f"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} {proto_dir}/clickhouse_grpc.proto",
|
||||
shell=True,
|
||||
)
|
||||
|
||||
sys.path.append(gen_dir)
|
||||
|
||||
import clickhouse_grpc_pb2
|
||||
import clickhouse_grpc_pb2_grpc
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
instance = cluster.add_instance(
|
||||
"node",
|
||||
main_configs=[
|
||||
"configs/ports.xml",
|
||||
"configs/log.xml",
|
||||
"configs/session_log.xml",
|
||||
],
|
||||
user_configs=["configs/users.xml"],
|
||||
# Bug in TSAN reproduces in this test https://github.com/grpc/grpc/issues/29550#issuecomment-1188085387
|
||||
env_variables={
|
||||
"TSAN_OPTIONS": "report_atomic_races=0 " + os.getenv("TSAN_OPTIONS", default="")
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def grpc_get_url():
|
||||
return f"{instance.ip_address}:{GRPC_PORT}"
|
||||
|
||||
|
||||
def grpc_create_insecure_channel():
|
||||
channel = grpc.insecure_channel(grpc_get_url())
|
||||
grpc.channel_ready_future(channel).result(timeout=2)
|
||||
return channel
|
||||
|
||||
|
||||
session_id_counter = 0
|
||||
|
||||
|
||||
def next_session_id():
|
||||
global session_id_counter
|
||||
session_id = session_id_counter
|
||||
session_id_counter += 1
|
||||
return str(session_id)
|
||||
|
||||
|
||||
def grpc_query(query, user_, pass_, raise_exception):
|
||||
try:
|
||||
query_info = clickhouse_grpc_pb2.QueryInfo(
|
||||
query=query,
|
||||
session_id=next_session_id(),
|
||||
user_name=user_,
|
||||
password=pass_,
|
||||
)
|
||||
channel = grpc_create_insecure_channel()
|
||||
stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(channel)
|
||||
result = stub.ExecuteQuery(query_info)
|
||||
if result and result.HasField("exception"):
|
||||
raise Exception(result.exception.display_text)
|
||||
|
||||
return result.output.decode(DEFAULT_ENCODING)
|
||||
except Exception:
|
||||
assert raise_exception
|
||||
|
||||
|
||||
def postgres_query(query, user_, pass_, raise_exception):
|
||||
try:
|
||||
client = py_psql.connect(
|
||||
host=instance.ip_address,
|
||||
port=POSTGRES_SERVER_PORT,
|
||||
user=user_,
|
||||
password=pass_,
|
||||
database="default",
|
||||
)
|
||||
cursor = client.cursor()
|
||||
cursor.execute(query)
|
||||
cursor.fetchall()
|
||||
except Exception:
|
||||
assert raise_exception
|
||||
|
||||
|
||||
def mysql_query(query, user_, pass_, raise_exception):
|
||||
try:
|
||||
client = pymysql.connections.Connection(
|
||||
host=instance.ip_address,
|
||||
user=user_,
|
||||
password=pass_,
|
||||
database="default",
|
||||
port=MYSQL_SERVER_PORT,
|
||||
)
|
||||
cursor = client.cursor(pymysql.cursors.DictCursor)
|
||||
if raise_exception:
|
||||
with pytest.raises(Exception):
|
||||
cursor.execute(query)
|
||||
else:
|
||||
cursor.execute(query)
|
||||
cursor.fetchall()
|
||||
except Exception:
|
||||
assert raise_exception
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
yield cluster
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def test_grpc_session(started_cluster):
|
||||
grpc_query("SELECT 1", "grpc_user", "pass", False)
|
||||
grpc_query("SELECT 2", "grpc_user", "wrong_pass", True)
|
||||
grpc_query("SELECT 3", "wrong_grpc_user", "pass", True)
|
||||
|
||||
instance.query("SYSTEM FLUSH LOGS")
|
||||
login_success_records = instance.query(
|
||||
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'LoginSuccess'"
|
||||
)
|
||||
assert login_success_records == "grpc_user\t1\t1\n"
|
||||
logout_records = instance.query(
|
||||
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'Logout'"
|
||||
)
|
||||
assert logout_records == "grpc_user\t1\t1\n"
|
||||
login_failure_records = instance.query(
|
||||
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'LoginFailure'"
|
||||
)
|
||||
assert login_failure_records == "grpc_user\t1\t1\n"
|
||||
logins_and_logouts = instance.query(
|
||||
f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'grpc_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'grpc_user' AND type = 'Logout')"
|
||||
)
|
||||
assert logins_and_logouts == "1\n"
|
||||
|
||||
|
||||
def test_mysql_session(started_cluster):
|
||||
mysql_query("SELECT 1", "mysql_user", "pass", False)
|
||||
mysql_query("SELECT 2", "mysql_user", "wrong_pass", True)
|
||||
mysql_query("SELECT 3", "wrong_mysql_user", "pass", True)
|
||||
|
||||
instance.query("SYSTEM FLUSH LOGS")
|
||||
login_success_records = instance.query(
|
||||
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'LoginSuccess'"
|
||||
)
|
||||
assert login_success_records == "mysql_user\t1\t1\n"
|
||||
logout_records = instance.query(
|
||||
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'Logout'"
|
||||
)
|
||||
assert logout_records == "mysql_user\t1\t1\n"
|
||||
login_failure_records = instance.query(
|
||||
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'LoginFailure'"
|
||||
)
|
||||
assert login_failure_records == "mysql_user\t1\t1\n"
|
||||
logins_and_logouts = instance.query(
|
||||
f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'mysql_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'mysql_user' AND type = 'Logout')"
|
||||
)
|
||||
assert logins_and_logouts == "1\n"
|
||||
|
||||
|
||||
def test_postgres_session(started_cluster):
|
||||
postgres_query("SELECT 1", "postgres_user", "pass", False)
|
||||
postgres_query("SELECT 2", "postgres_user", "wrong_pass", True)
|
||||
postgres_query("SELECT 3", "wrong_postgres_user", "pass", True)
|
||||
|
||||
instance.query("SYSTEM FLUSH LOGS")
|
||||
login_success_records = instance.query(
|
||||
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'LoginSuccess'"
|
||||
)
|
||||
assert login_success_records == "postgres_user\t1\t1\n"
|
||||
logout_records = instance.query(
|
||||
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'Logout'"
|
||||
)
|
||||
assert logout_records == "postgres_user\t1\t1\n"
|
||||
login_failure_records = instance.query(
|
||||
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'LoginFailure'"
|
||||
)
|
||||
assert login_failure_records == "postgres_user\t1\t1\n"
|
||||
logins_and_logouts = instance.query(
|
||||
f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'postgres_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'postgres_user' AND type = 'Logout')"
|
||||
)
|
||||
assert logins_and_logouts == "1\n"
|
||||
|
||||
|
||||
def test_parallel_sessions(started_cluster):
|
||||
thread_list = []
|
||||
for _ in range(10):
|
||||
# Sleep time does not significantly matter here,
|
||||
# test should pass even without sleeping.
|
||||
for function in [postgres_query, grpc_query, mysql_query]:
|
||||
thread = threading.Thread(
|
||||
target=function,
|
||||
args=(
|
||||
f"SELECT sleep({random.uniform(0.03, 0.04)})",
|
||||
"parallel_user",
|
||||
"pass",
|
||||
False,
|
||||
),
|
||||
)
|
||||
thread.start()
|
||||
thread_list.append(thread)
|
||||
thread = threading.Thread(
|
||||
target=function,
|
||||
args=(
|
||||
f"SELECT sleep({random.uniform(0.03, 0.04)})",
|
||||
"parallel_user",
|
||||
"wrong_pass",
|
||||
True,
|
||||
),
|
||||
)
|
||||
thread.start()
|
||||
thread_list.append(thread)
|
||||
thread = threading.Thread(
|
||||
target=function,
|
||||
args=(
|
||||
f"SELECT sleep({random.uniform(0.03, 0.04)})",
|
||||
"wrong_parallel_user",
|
||||
"pass",
|
||||
True,
|
||||
),
|
||||
)
|
||||
thread.start()
|
||||
thread_list.append(thread)
|
||||
|
||||
for thread in thread_list:
|
||||
thread.join()
|
||||
|
||||
instance.query("SYSTEM FLUSH LOGS")
|
||||
port_0_sessions = instance.query(
|
||||
f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user'"
|
||||
)
|
||||
assert port_0_sessions == "90\n"
|
||||
|
||||
port_0_sessions = instance.query(
|
||||
f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND client_port = 0"
|
||||
)
|
||||
assert port_0_sessions == "0\n"
|
||||
|
||||
address_0_sessions = instance.query(
|
||||
f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND client_address = toIPv6('::')"
|
||||
)
|
||||
assert address_0_sessions == "0\n"
|
||||
|
||||
grpc_sessions = instance.query(
|
||||
f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'gRPC'"
|
||||
)
|
||||
assert grpc_sessions == "30\n"
|
||||
|
||||
mysql_sessions = instance.query(
|
||||
f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'MySQL'"
|
||||
)
|
||||
assert mysql_sessions == "30\n"
|
||||
|
||||
postgres_sessions = instance.query(
|
||||
f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'PostgreSQL'"
|
||||
)
|
||||
assert postgres_sessions == "30\n"
|
||||
|
||||
logins_and_logouts = instance.query(
|
||||
f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'parallel_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'parallel_user' AND type = 'Logout')"
|
||||
)
|
||||
assert logins_and_logouts == "30\n"
|
||||
|
||||
logout_failure_sessions = instance.query(
|
||||
f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND type = 'LoginFailure'"
|
||||
)
|
||||
assert logout_failure_sessions == "30\n"
|
@ -0,0 +1,39 @@
|
||||
<clickhouse>
|
||||
<remote_servers>
|
||||
<simple_cluster>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>node_0</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>node_1</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>node_2</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</simple_cluster>
|
||||
|
||||
<cluster_non_existent_port>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>node_0</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>node_1</host>
|
||||
<port>19000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</cluster_non_existent_port>
|
||||
|
||||
</remote_servers>
|
||||
<macros>
|
||||
<default_cluster_macro>simple_cluster</default_cluster_macro>
|
||||
</macros>
|
||||
</clickhouse>
|
@ -657,3 +657,55 @@ def test_read_from_not_existing_container(cluster):
|
||||
query = f"select * from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont_not_exists', 'test_table.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')"
|
||||
expected_err_msg = "container does not exist"
|
||||
assert expected_err_msg in node.query_and_get_error(query)
|
||||
|
||||
|
||||
def test_function_signatures(cluster):
|
||||
node = cluster.instances["node"]
|
||||
connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1;"
|
||||
storage_account_url = "http://azurite1:10000/devstoreaccount1"
|
||||
account_name = "devstoreaccount1"
|
||||
account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
|
||||
azure_query(
|
||||
node,
|
||||
f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32') VALUES (1),(2),(3)",
|
||||
)
|
||||
|
||||
# " - connection_string, container_name, blobpath\n"
|
||||
query_1 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv')"
|
||||
assert azure_query(node, query_1) == "1\n2\n3\n"
|
||||
|
||||
# " - connection_string, container_name, blobpath, structure \n"
|
||||
query_2 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'column1 UInt32')"
|
||||
assert azure_query(node, query_2) == "1\n2\n3\n"
|
||||
|
||||
# " - connection_string, container_name, blobpath, format \n"
|
||||
query_3 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'CSV')"
|
||||
assert azure_query(node, query_3) == "1\n2\n3\n"
|
||||
|
||||
# " - connection_string, container_name, blobpath, format, compression \n"
|
||||
query_4 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'CSV', 'auto')"
|
||||
assert azure_query(node, query_4) == "1\n2\n3\n"
|
||||
|
||||
# " - connection_string, container_name, blobpath, format, compression, structure \n"
|
||||
query_5 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'CSV', 'auto', 'column1 UInt32')"
|
||||
assert azure_query(node, query_5) == "1\n2\n3\n"
|
||||
|
||||
# " - storage_account_url, container_name, blobpath, account_name, account_key\n"
|
||||
query_6 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}')"
|
||||
assert azure_query(node, query_6) == "1\n2\n3\n"
|
||||
|
||||
# " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n"
|
||||
query_7 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'column1 UInt32')"
|
||||
assert azure_query(node, query_7) == "1\n2\n3\n"
|
||||
|
||||
# " - storage_account_url, container_name, blobpath, account_name, account_key, format\n"
|
||||
query_8 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV')"
|
||||
assert azure_query(node, query_8) == "1\n2\n3\n"
|
||||
|
||||
# " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n"
|
||||
query_9 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto')"
|
||||
assert azure_query(node, query_9) == "1\n2\n3\n"
|
||||
|
||||
# " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n"
|
||||
query_10 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32')"
|
||||
assert azure_query(node, query_10) == "1\n2\n3\n"
|
||||
|
@ -0,0 +1,288 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import gzip
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import io
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
|
||||
from azure.storage.blob import BlobServiceClient
|
||||
import helpers.client
|
||||
import pytest
|
||||
from helpers.cluster import ClickHouseCluster, ClickHouseInstance
|
||||
from helpers.test_tools import TSV
|
||||
from helpers.network import PartitionManager
|
||||
from helpers.mock_servers import start_mock_servers
|
||||
from helpers.test_tools import exec_query_with_retry
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def cluster():
|
||||
try:
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
cluster.add_instance(
|
||||
"node_0",
|
||||
main_configs=["configs/named_collections.xml", "configs/cluster.xml"],
|
||||
user_configs=["configs/disable_profilers.xml", "configs/users.xml"],
|
||||
with_azurite=True,
|
||||
)
|
||||
cluster.add_instance(
|
||||
"node_1",
|
||||
main_configs=["configs/named_collections.xml", "configs/cluster.xml"],
|
||||
user_configs=["configs/disable_profilers.xml", "configs/users.xml"],
|
||||
with_azurite=True,
|
||||
)
|
||||
cluster.add_instance(
|
||||
"node_2",
|
||||
main_configs=["configs/named_collections.xml", "configs/cluster.xml"],
|
||||
user_configs=["configs/disable_profilers.xml", "configs/users.xml"],
|
||||
with_azurite=True,
|
||||
)
|
||||
cluster.start()
|
||||
|
||||
yield cluster
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def azure_query(node, query, try_num=3, settings={}):
|
||||
for i in range(try_num):
|
||||
try:
|
||||
return node.query(query, settings=settings)
|
||||
except Exception as ex:
|
||||
retriable_errors = [
|
||||
"DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response"
|
||||
]
|
||||
retry = False
|
||||
for error in retriable_errors:
|
||||
if error in str(ex):
|
||||
retry = True
|
||||
print(f"Try num: {i}. Having retriable error: {ex}")
|
||||
time.sleep(i)
|
||||
break
|
||||
if not retry or i == try_num - 1:
|
||||
raise Exception(ex)
|
||||
continue
|
||||
|
||||
|
||||
def get_azure_file_content(filename):
|
||||
container_name = "cont"
|
||||
connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;"
|
||||
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
|
||||
container_client = blob_service_client.get_container_client(container_name)
|
||||
blob_client = container_client.get_blob_client(filename)
|
||||
download_stream = blob_client.download_blob()
|
||||
return download_stream.readall().decode("utf-8")
|
||||
|
||||
|
||||
def test_select_all(cluster):
|
||||
node = cluster.instances["node_0"]
|
||||
azure_query(
|
||||
node,
|
||||
"INSERT INTO TABLE FUNCTION azureBlobStorage("
|
||||
"'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1', "
|
||||
"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', "
|
||||
"'auto', 'key UInt64, data String') VALUES (1, 'a'), (2, 'b')",
|
||||
)
|
||||
print(get_azure_file_content("test_cluster_select_all.csv"))
|
||||
|
||||
pure_azure = node.query(
|
||||
"""
|
||||
SELECT * from azureBlobStorage(
|
||||
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1',
|
||||
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV',
|
||||
'auto')"""
|
||||
)
|
||||
print(pure_azure)
|
||||
distributed_azure = node.query(
|
||||
"""
|
||||
SELECT * from azureBlobStorageCluster(
|
||||
'simple_cluster', 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1',
|
||||
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV',
|
||||
'auto')"""
|
||||
)
|
||||
print(distributed_azure)
|
||||
assert TSV(pure_azure) == TSV(distributed_azure)
|
||||
|
||||
|
||||
def test_count(cluster):
|
||||
node = cluster.instances["node_0"]
|
||||
azure_query(
|
||||
node,
|
||||
"INSERT INTO TABLE FUNCTION azureBlobStorage("
|
||||
"'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_count.csv', 'devstoreaccount1', "
|
||||
"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', "
|
||||
"'auto', 'key UInt64') VALUES (1), (2)",
|
||||
)
|
||||
print(get_azure_file_content("test_cluster_count.csv"))
|
||||
|
||||
pure_azure = node.query(
|
||||
"""
|
||||
SELECT count(*) from azureBlobStorage(
|
||||
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_count.csv', 'devstoreaccount1',
|
||||
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV',
|
||||
'auto', 'key UInt64')"""
|
||||
)
|
||||
print(pure_azure)
|
||||
distributed_azure = node.query(
|
||||
"""
|
||||
SELECT count(*) from azureBlobStorageCluster(
|
||||
'simple_cluster', 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_count.csv', 'devstoreaccount1',
|
||||
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV',
|
||||
'auto', 'key UInt64')"""
|
||||
)
|
||||
print(distributed_azure)
|
||||
assert TSV(pure_azure) == TSV(distributed_azure)
|
||||
|
||||
|
||||
def test_union_all(cluster):
|
||||
node = cluster.instances["node_0"]
|
||||
azure_query(
|
||||
node,
|
||||
"INSERT INTO TABLE FUNCTION azureBlobStorage("
|
||||
"'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', "
|
||||
"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', "
|
||||
"'auto', 'a Int32, b String') VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')",
|
||||
)
|
||||
|
||||
pure_azure = node.query(
|
||||
"""
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT * from azureBlobStorage(
|
||||
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1',
|
||||
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet',
|
||||
'auto', 'a Int32, b String')
|
||||
UNION ALL
|
||||
SELECT * from azureBlobStorage(
|
||||
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1',
|
||||
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet',
|
||||
'auto', 'a Int32, b String')
|
||||
)
|
||||
ORDER BY (a)
|
||||
"""
|
||||
)
|
||||
azure_distributed = node.query(
|
||||
"""
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT * from azureBlobStorageCluster(
|
||||
'simple_cluster',
|
||||
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1',
|
||||
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet',
|
||||
'auto', 'a Int32, b String')
|
||||
UNION ALL
|
||||
SELECT * from azureBlobStorageCluster(
|
||||
'simple_cluster',
|
||||
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1',
|
||||
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet',
|
||||
'auto', 'a Int32, b String')
|
||||
)
|
||||
ORDER BY (a)
|
||||
"""
|
||||
)
|
||||
|
||||
assert TSV(pure_azure) == TSV(azure_distributed)
|
||||
|
||||
|
||||
def test_skip_unavailable_shards(cluster):
|
||||
node = cluster.instances["node_0"]
|
||||
azure_query(
|
||||
node,
|
||||
"INSERT INTO TABLE FUNCTION azureBlobStorage("
|
||||
"'http://azurite1:10000/devstoreaccount1', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1', "
|
||||
"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', "
|
||||
"'auto', 'a UInt64') VALUES (1), (2)",
|
||||
)
|
||||
result = node.query(
|
||||
"""
|
||||
SELECT count(*) from azureBlobStorageCluster(
|
||||
'cluster_non_existent_port',
|
||||
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1',
|
||||
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==')
|
||||
SETTINGS skip_unavailable_shards = 1
|
||||
"""
|
||||
)
|
||||
|
||||
assert result == "2\n"
|
||||
|
||||
|
||||
def test_unset_skip_unavailable_shards(cluster):
|
||||
# Although skip_unavailable_shards is not set, cluster table functions should always skip unavailable shards.
|
||||
node = cluster.instances["node_0"]
|
||||
azure_query(
|
||||
node,
|
||||
"INSERT INTO TABLE FUNCTION azureBlobStorage("
|
||||
"'http://azurite1:10000/devstoreaccount1', 'cont', 'test_unset_skip_unavailable.csv', 'devstoreaccount1', "
|
||||
"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', "
|
||||
"'auto', 'a UInt64') VALUES (1), (2)",
|
||||
)
|
||||
result = node.query(
|
||||
"""
|
||||
SELECT count(*) from azureBlobStorageCluster(
|
||||
'cluster_non_existent_port',
|
||||
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1',
|
||||
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==')
|
||||
"""
|
||||
)
|
||||
|
||||
assert result == "2\n"
|
||||
|
||||
|
||||
def test_cluster_with_named_collection(cluster):
|
||||
node = cluster.instances["node_0"]
|
||||
|
||||
azure_query(
|
||||
node,
|
||||
"INSERT INTO TABLE FUNCTION azureBlobStorage("
|
||||
"'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_with_named_collection.csv', 'devstoreaccount1', "
|
||||
"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', "
|
||||
"'auto', 'a UInt64') VALUES (1), (2)",
|
||||
)
|
||||
|
||||
pure_azure = node.query(
|
||||
"""
|
||||
SELECT * from azureBlobStorage(
|
||||
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_with_named_collection.csv', 'devstoreaccount1',
|
||||
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==')
|
||||
"""
|
||||
)
|
||||
|
||||
azure_cluster = node.query(
|
||||
"""
|
||||
SELECT * from azureBlobStorageCluster(
|
||||
'simple_cluster', azure_conf2, container='cont', blob_path='test_cluster_with_named_collection.csv')
|
||||
"""
|
||||
)
|
||||
|
||||
assert TSV(pure_azure) == TSV(azure_cluster)
|
||||
|
||||
|
||||
def test_partition_parallel_readig_withcluster(cluster):
|
||||
node = cluster.instances["node_0"]
|
||||
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
|
||||
partition_by = "column3"
|
||||
values = "(1, 2, 3), (3, 2, 1), (78, 43, 45)"
|
||||
filename = "test_tf_{_partition_id}.csv"
|
||||
|
||||
azure_query(
|
||||
node,
|
||||
f"INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') PARTITION BY {partition_by} VALUES {values}",
|
||||
)
|
||||
|
||||
assert "1,2,3\n" == get_azure_file_content("test_tf_3.csv")
|
||||
assert "3,2,1\n" == get_azure_file_content("test_tf_1.csv")
|
||||
assert "78,43,45\n" == get_azure_file_content("test_tf_45.csv")
|
||||
|
||||
azure_cluster = node.query(
|
||||
"""
|
||||
SELECT count(*) from azureBlobStorageCluster(
|
||||
'simple_cluster',
|
||||
azure_conf2, container='cont', blob_path='test_tf_*.csv', format='CSV', compression='auto', structure='column1 UInt32, column2 UInt32, column3 UInt32')
|
||||
"""
|
||||
)
|
||||
|
||||
assert azure_cluster == "3\n"
|
@ -12,6 +12,11 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
|
||||
function wait_query_by_id_started()
|
||||
{
|
||||
# As the query we are waiting for is running simultaneously, let's give it a little time to actually start. The
|
||||
# queries are supposed to run for multiple seconds, so sleeping 0.5 seconds is not a big deal, especially when
|
||||
# flushing the logs can take up to 3 to 5 seconds. Basically waiting a bit here we can increase the chance that we
|
||||
# don't have spend precious time on flushing logs.
|
||||
sleep 0.5
|
||||
local query_id=$1 && shift
|
||||
# wait for query to be started
|
||||
while [ "$($CLICKHOUSE_CLIENT "$@" -q "select count() from system.processes where query_id = '$query_id'")" -ne 1 ]; do
|
||||
|
@ -0,0 +1,6 @@
|
||||
2
|
||||
SELECT countDistinctIf(number % 10, (number % 5) = 2)
|
||||
FROM numbers(1000)
|
||||
2
|
||||
SELECT uniqExactIf(number % 10, (number % 5) = 2)
|
||||
FROM numbers(1000)
|
@ -0,0 +1,8 @@
|
||||
-- Tags: no-parallel
|
||||
SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000);
|
||||
EXPLAIN SYNTAX SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000);
|
||||
|
||||
-- disable by default
|
||||
SET rewrite_count_distinct_if_with_count_distinct_implementation = 1;
|
||||
SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000);
|
||||
EXPLAIN SYNTAX SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000);
|
48
tests/queries/0_stateless/02661_read_from_archive.lib
Normal file
48
tests/queries/0_stateless/02661_read_from_archive.lib
Normal file
@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
function read_archive_file() {
|
||||
$CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2"
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2"
|
||||
$CLICKHOUSE_CLIENT --query "CREATE TABLE 02661_archive_table Engine=File('CSV', '${user_files_path}/$1')"
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM 02661_archive_table ORDER BY 1, 2"
|
||||
$CLICKHOUSE_CLIENT --query "DROP TABLE 02661_archive_table"
|
||||
}
|
||||
|
||||
function run_archive_test() {
|
||||
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS 02661_archive_table"
|
||||
|
||||
user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||
|
||||
echo -e "1,2\n3,4" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv
|
||||
echo -e "5,6\n7,8" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data2.csv
|
||||
echo -e "9,10\n11,12" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data3.csv
|
||||
|
||||
eval "$2 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.$1 ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv ${CLICKHOUSE_TEST_UNIQUE_NAME}_data2.csv > /dev/null"
|
||||
eval "$2 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.$1 ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv ${CLICKHOUSE_TEST_UNIQUE_NAME}_data3.csv > /dev/null"
|
||||
eval "$2 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive3.$1 ${CLICKHOUSE_TEST_UNIQUE_NAME}_data2.csv ${CLICKHOUSE_TEST_UNIQUE_NAME}_data3.csv > /dev/null"
|
||||
|
||||
echo "archive1 data1.csv"
|
||||
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv"
|
||||
echo "archive{1..2} data1.csv"
|
||||
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive{1..2}.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv"
|
||||
echo "archive{1,2} data{1,3}.csv"
|
||||
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive{1,2}.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data{1,3}.csv"
|
||||
echo "archive3 data*.csv"
|
||||
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive3.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.csv"
|
||||
echo "archive* *.csv"
|
||||
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive*.$1 :: *.csv"
|
||||
echo "archive* {2..3}.csv"
|
||||
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive*.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data{2..3}.csv"
|
||||
|
||||
$CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.$1::nonexistent.csv')" 2>&1 | grep -q "CANNOT_UNPACK_ARCHIVE" && echo "OK" || echo "FAIL"
|
||||
$CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive3.$1::{2..3}.csv')" 2>&1 | grep -q "CANNOT_UNPACK_ARCHIVE" && echo "OK" || echo "FAIL"
|
||||
|
||||
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive{1..3}.$1
|
||||
|
||||
rm ${CLICKHOUSE_TEST_UNIQUE_NAME}_data{1..3}.csv
|
||||
}
|
116
tests/queries/0_stateless/02661_read_from_archive_7z.reference
Normal file
116
tests/queries/0_stateless/02661_read_from_archive_7z.reference
Normal file
@ -0,0 +1,116 @@
|
||||
archive1 data1.csv
|
||||
1 2
|
||||
3 4
|
||||
1 2
|
||||
3 4
|
||||
1 2
|
||||
3 4
|
||||
archive{1..2} data1.csv
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
archive{1,2} data{1,3}.csv
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
9 10
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
9 10
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
9 10
|
||||
11 12
|
||||
archive3 data*.csv
|
||||
5 6
|
||||
7 8
|
||||
9 10
|
||||
11 12
|
||||
5 6
|
||||
7 8
|
||||
9 10
|
||||
11 12
|
||||
5 6
|
||||
7 8
|
||||
9 10
|
||||
11 12
|
||||
archive* *.csv
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
archive* {2..3}.csv
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
OK
|
||||
OK
|
11
tests/queries/0_stateless/02661_read_from_archive_7z.sh
Executable file
11
tests/queries/0_stateless/02661_read_from_archive_7z.sh
Executable file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest, long
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
# shellcheck source=./02661_read_from_archive.lib
|
||||
. "$CUR_DIR"/02661_read_from_archive.lib
|
||||
|
||||
run_archive_test "7z" "7z a"
|
116
tests/queries/0_stateless/02661_read_from_archive_tar.reference
Normal file
116
tests/queries/0_stateless/02661_read_from_archive_tar.reference
Normal file
@ -0,0 +1,116 @@
|
||||
archive1 data1.csv
|
||||
1 2
|
||||
3 4
|
||||
1 2
|
||||
3 4
|
||||
1 2
|
||||
3 4
|
||||
archive{1..2} data1.csv
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
archive{1,2} data{1,3}.csv
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
9 10
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
9 10
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
9 10
|
||||
11 12
|
||||
archive3 data*.csv
|
||||
5 6
|
||||
7 8
|
||||
9 10
|
||||
11 12
|
||||
5 6
|
||||
7 8
|
||||
9 10
|
||||
11 12
|
||||
5 6
|
||||
7 8
|
||||
9 10
|
||||
11 12
|
||||
archive* *.csv
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
archive* {2..3}.csv
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
OK
|
||||
OK
|
11
tests/queries/0_stateless/02661_read_from_archive_tar.sh
Executable file
11
tests/queries/0_stateless/02661_read_from_archive_tar.sh
Executable file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest, long
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
# shellcheck source=./02661_read_from_archive.lib
|
||||
. "$CUR_DIR"/02661_read_from_archive.lib
|
||||
|
||||
run_archive_test "tar" "tar -cvf"
|
@ -0,0 +1,116 @@
|
||||
archive1 data1.csv
|
||||
1 2
|
||||
3 4
|
||||
1 2
|
||||
3 4
|
||||
1 2
|
||||
3 4
|
||||
archive{1..2} data1.csv
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
archive{1,2} data{1,3}.csv
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
9 10
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
9 10
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
9 10
|
||||
11 12
|
||||
archive3 data*.csv
|
||||
5 6
|
||||
7 8
|
||||
9 10
|
||||
11 12
|
||||
5 6
|
||||
7 8
|
||||
9 10
|
||||
11 12
|
||||
5 6
|
||||
7 8
|
||||
9 10
|
||||
11 12
|
||||
archive* *.csv
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
archive* {2..3}.csv
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
OK
|
||||
OK
|
11
tests/queries/0_stateless/02661_read_from_archive_targz.sh
Executable file
11
tests/queries/0_stateless/02661_read_from_archive_targz.sh
Executable file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest, long
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
# shellcheck source=./02661_read_from_archive.lib
|
||||
. "$CUR_DIR"/02661_read_from_archive.lib
|
||||
|
||||
run_archive_test "tar.gz" "tar -cvzf"
|
116
tests/queries/0_stateless/02661_read_from_archive_zip.reference
Normal file
116
tests/queries/0_stateless/02661_read_from_archive_zip.reference
Normal file
@ -0,0 +1,116 @@
|
||||
archive1 data1.csv
|
||||
1 2
|
||||
3 4
|
||||
1 2
|
||||
3 4
|
||||
1 2
|
||||
3 4
|
||||
archive{1..2} data1.csv
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
archive{1,2} data{1,3}.csv
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
9 10
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
9 10
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
9 10
|
||||
11 12
|
||||
archive3 data*.csv
|
||||
5 6
|
||||
7 8
|
||||
9 10
|
||||
11 12
|
||||
5 6
|
||||
7 8
|
||||
9 10
|
||||
11 12
|
||||
5 6
|
||||
7 8
|
||||
9 10
|
||||
11 12
|
||||
archive* *.csv
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
1 2
|
||||
1 2
|
||||
3 4
|
||||
3 4
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
archive* {2..3}.csv
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
5 6
|
||||
5 6
|
||||
7 8
|
||||
7 8
|
||||
9 10
|
||||
9 10
|
||||
11 12
|
||||
11 12
|
||||
OK
|
||||
OK
|
11
tests/queries/0_stateless/02661_read_from_archive_zip.sh
Executable file
11
tests/queries/0_stateless/02661_read_from_archive_zip.sh
Executable file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest, long
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
# shellcheck source=./02661_read_from_archive.lib
|
||||
. "$CUR_DIR"/02661_read_from_archive.lib
|
||||
|
||||
run_archive_test "zip" "zip"
|
@ -0,0 +1,34 @@
|
||||
sessions:
|
||||
150
|
||||
port_0_sessions:
|
||||
0
|
||||
address_0_sessions:
|
||||
0
|
||||
tcp_sessions
|
||||
60
|
||||
http_sessions
|
||||
30
|
||||
http_with_session_id_sessions
|
||||
30
|
||||
my_sql_sessions
|
||||
30
|
||||
Corresponding LoginSuccess/Logout
|
||||
10
|
||||
LoginFailure
|
||||
10
|
||||
Corresponding LoginSuccess/Logout
|
||||
10
|
||||
LoginFailure
|
||||
10
|
||||
Corresponding LoginSuccess/Logout
|
||||
10
|
||||
LoginFailure
|
||||
10
|
||||
Corresponding LoginSuccess/Logout
|
||||
10
|
||||
LoginFailure
|
||||
10
|
||||
Corresponding LoginSuccess/Logout
|
||||
10
|
||||
LoginFailure
|
||||
10
|
138
tests/queries/0_stateless/02833_concurrrent_sessions.sh
Executable file
138
tests/queries/0_stateless/02833_concurrrent_sessions.sh
Executable file
@ -0,0 +1,138 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest, long
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
readonly PID=$$
|
||||
|
||||
# Each user uses a separate thread.
|
||||
readonly TCP_USERS=( "02833_TCP_USER_${PID}"_{1,2} ) # 2 concurrent TCP users
|
||||
readonly HTTP_USERS=( "02833_HTTP_USER_${PID}" )
|
||||
readonly HTTP_WITH_SESSION_ID_SESSION_USERS=( "02833_HTTP_WITH_SESSION_ID_USER_${PID}" )
|
||||
readonly MYSQL_USERS=( "02833_MYSQL_USER_${PID}")
|
||||
readonly ALL_USERS=( "${TCP_USERS[@]}" "${HTTP_USERS[@]}" "${HTTP_WITH_SESSION_ID_SESSION_USERS[@]}" "${MYSQL_USERS[@]}" )
|
||||
|
||||
readonly TCP_USERS_SQL_COLLECTION_STRING="$( echo "${TCP_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
|
||||
readonly HTTP_USERS_SQL_COLLECTION_STRING="$( echo "${HTTP_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
|
||||
readonly HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING="$( echo "${HTTP_WITH_SESSION_ID_SESSION_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
|
||||
readonly MYSQL_USERS_SQL_COLLECTION_STRING="$( echo "${MYSQL_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
|
||||
readonly ALL_USERS_SQL_COLLECTION_STRING="$( echo "${ALL_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
|
||||
|
||||
readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface"
|
||||
|
||||
for user in "${ALL_USERS[@]}"; do
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${user} IDENTIFIED WITH plaintext_password BY 'pass'"
|
||||
${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.* TO ${user}"
|
||||
${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON INFORMATION_SCHEMA.* TO ${user}";
|
||||
done
|
||||
|
||||
# All <type>_session functions execute in separate threads.
|
||||
# These functions try to create a session with successful login and logout.
|
||||
# Sleep a small, random amount of time to make concurrency more intense.
|
||||
# and try to login with an invalid password.
|
||||
function tcp_session()
|
||||
{
|
||||
local user=$1
|
||||
local i=0
|
||||
while (( (i++) < 10 )); do
|
||||
# login logout
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM})" --user="${user}" --password="pass"
|
||||
# login failure
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT 2" --user="${user}" --password 'invalid'
|
||||
done
|
||||
}
|
||||
|
||||
function http_session()
|
||||
{
|
||||
local user=$1
|
||||
local i=0
|
||||
while (( (i++) < 10 )); do
|
||||
# login logout
|
||||
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT 3, sleep(0.01${RANDOM})"
|
||||
|
||||
# login failure
|
||||
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=wrong" -d "SELECT 4"
|
||||
done
|
||||
}
|
||||
|
||||
function http_with_session_id_session()
|
||||
{
|
||||
local user=$1
|
||||
local i=0
|
||||
while (( (i++) < 10 )); do
|
||||
# login logout
|
||||
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${user}&user=${user}&password=pass" -d "SELECT 5, sleep 0.01${RANDOM}"
|
||||
|
||||
# login failure
|
||||
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${user}&user=${user}&password=wrong" -d "SELECT 6"
|
||||
done
|
||||
}
|
||||
|
||||
function mysql_session()
|
||||
{
|
||||
local user=$1
|
||||
local i=0
|
||||
while (( (i++) < 10 )); do
|
||||
# login logout
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM}) FROM mysql('127.0.0.1:9004', 'system', 'one', '${user}', 'pass')"
|
||||
|
||||
# login failure
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', '${user}', 'wrong', SETTINGS connection_max_tries=1)"
|
||||
done
|
||||
}
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
|
||||
${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING})"
|
||||
|
||||
export -f tcp_session;
|
||||
export -f http_session;
|
||||
export -f http_with_session_id_session;
|
||||
export -f mysql_session;
|
||||
|
||||
for user in "${TCP_USERS[@]}"; do
|
||||
timeout 60s bash -c "tcp_session ${user}" >/dev/null 2>&1 &
|
||||
done
|
||||
|
||||
for user in "${HTTP_USERS[@]}"; do
|
||||
timeout 60s bash -c "http_session ${user}" >/dev/null 2>&1 &
|
||||
done
|
||||
|
||||
for user in "${HTTP_WITH_SESSION_ID_SESSION_USERS[@]}"; do
|
||||
timeout 60s bash -c "http_with_session_id_session ${user}" >/dev/null 2>&1 &
|
||||
done
|
||||
|
||||
for user in "${MYSQL_USERS[@]}"; do
|
||||
timeout 60s bash -c "mysql_session ${user}" >/dev/null 2>&1 &
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
|
||||
|
||||
echo "sessions:"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING})"
|
||||
|
||||
echo "port_0_sessions:"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING}) AND client_port = 0"
|
||||
|
||||
echo "address_0_sessions:"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING}) AND client_address = toIPv6('::')"
|
||||
|
||||
echo "tcp_sessions"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${TCP_USERS_SQL_COLLECTION_STRING}) AND interface = 'TCP'"
|
||||
echo "http_sessions"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'"
|
||||
echo "http_with_session_id_sessions"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'"
|
||||
echo "my_sql_sessions"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${MYSQL_USERS_SQL_COLLECTION_STRING}) AND interface = 'MySQL'"
|
||||
|
||||
for user in "${ALL_USERS[@]}"; do
|
||||
${CLICKHOUSE_CLIENT} -q "DROP USER ${user}"
|
||||
echo "Corresponding LoginSuccess/Logout"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'LoginSuccess' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'Logout')"
|
||||
echo "LoginFailure"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.session_log WHERE user = '${user}' AND type = 'LoginFailure'"
|
||||
done
|
13
tests/queries/0_stateless/02834_remote_session_log.reference
Normal file
13
tests/queries/0_stateless/02834_remote_session_log.reference
Normal file
@ -0,0 +1,13 @@
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
client_port 0 connections:
|
||||
0
|
||||
client_address '::' connections:
|
||||
0
|
||||
login failures:
|
||||
0
|
||||
TCP Login and logout count is equal
|
||||
HTTP Login and logout count is equal
|
||||
MySQL Login and logout count is equal
|
56
tests/queries/0_stateless/02834_remote_session_log.sh
Executable file
56
tests/queries/0_stateless/02834_remote_session_log.sh
Executable file
@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
readonly PID=$$
|
||||
readonly TEST_USER=$"02834_USER_${PID}"
|
||||
readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${TEST_USER} IDENTIFIED WITH plaintext_password BY 'pass'"
|
||||
${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON INFORMATION_SCHEMA.* TO ${TEST_USER}"
|
||||
${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.* TO ${TEST_USER}"
|
||||
${CLICKHOUSE_CLIENT} -q "GRANT CREATE TEMPORARY TABLE, MYSQL, REMOTE ON *.* TO ${TEST_USER}"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
|
||||
${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user = '${TEST_USER}'"
|
||||
|
||||
${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${TEST_USER}&password=pass" \
|
||||
-d "SELECT * FROM remote('127.0.0.1:${CLICKHOUSE_PORT_TCP}', 'system', 'one', '${TEST_USER}', 'pass')"
|
||||
|
||||
${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${TEST_USER}&password=pass" \
|
||||
-d "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', '${TEST_USER}', 'pass')"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM remote('127.0.0.1:${CLICKHOUSE_PORT_TCP}', 'system', 'one', '${TEST_USER}', 'pass')" -u "${TEST_USER}" --password "pass"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', '${TEST_USER}', 'pass')" -u "${TEST_USER}" --password "pass"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
|
||||
|
||||
echo "client_port 0 connections:"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and client_port = 0"
|
||||
|
||||
echo "client_address '::' connections:"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and client_address = toIPv6('::')"
|
||||
|
||||
echo "login failures:"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and type = 'LoginFailure'"
|
||||
|
||||
# remote(...) function sometimes reuses old cached sessions for query execution.
|
||||
# This makes LoginSuccess/Logout entries count unstable, but success and logouts must always match.
|
||||
|
||||
for interface in 'TCP' 'HTTP' 'MySQL'
|
||||
do
|
||||
LOGIN_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' AND interface = '${interface}'"`
|
||||
CORRESPONDING_LOGOUT_RECORDS_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' AND interface = '${interface}' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout' AND interface = '${interface}')"`
|
||||
|
||||
if [ "$LOGIN_COUNT" == "$CORRESPONDING_LOGOUT_RECORDS_COUNT" ]; then
|
||||
echo "${interface} Login and logout count is equal"
|
||||
else
|
||||
TOTAL_LOGOUT_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout' AND interface = '${interface}'"`
|
||||
echo "${interface} Login count ${LOGIN_COUNT} != corresponding logout count ${CORRESPONDING_LOGOUT_RECORDS_COUNT}. TOTAL_LOGOUT_COUNT ${TOTAL_LOGOUT_COUNT}"
|
||||
fi
|
||||
done
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}"
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user