Merge branch 'master' of github.com:ClickHouse/ClickHouse into export-logs-in-ci

This commit is contained in:
Alexey Milovidov 2023-08-07 18:58:32 +02:00
commit 0bab88b7cb
105 changed files with 5038 additions and 280 deletions

4
.gitmodules vendored
View File

@ -331,6 +331,10 @@
[submodule "contrib/liburing"]
path = contrib/liburing
url = https://github.com/axboe/liburing
[submodule "contrib/libarchive"]
path = contrib/libarchive
url = https://github.com/libarchive/libarchive.git
ignore = dirty
[submodule "contrib/libfiu"]
path = contrib/libfiu
url = https://github.com/ClickHouse/libfiu.git

View File

@ -23,11 +23,8 @@ curl https://clickhouse.com/ | sh
## Upcoming Events
* [**v23.7 Release Webinar**](https://clickhouse.com/company/events/v23-7-community-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-07) - Jul 27 - 23.7 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/293913596) - Jul 18
* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/293913441) - Jul 19
* [**ClickHouse Meetup in Toronto**](https://www.meetup.com/clickhouse-toronto-user-group/events/294183127) - Jul 20
* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/294428050/) - Jul 27
* [**v23.8 Community Call**](https://clickhouse.com/company/events/v23-8-community-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-08) - Aug 31 - 23.8 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
* [**ClickHouse & AI - A Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/294472987) - Aug 8
* [**ClickHouse Meetup in Paris**](https://www.meetup.com/clickhouse-france-user-group/events/294283460) - Sep 12
Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.

View File

@ -92,6 +92,7 @@ add_contrib (google-protobuf-cmake google-protobuf)
add_contrib (openldap-cmake openldap)
add_contrib (grpc-cmake grpc)
add_contrib (msgpack-c-cmake msgpack-c)
add_contrib (libarchive-cmake libarchive)
add_contrib (corrosion-cmake corrosion)

1
contrib/libarchive vendored Submodule

@ -0,0 +1 @@
Subproject commit ee45796171324519f0c0bfd012018dd099296336

View File

@ -0,0 +1,172 @@
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libarchive")
set(SRCS
"${LIBRARY_DIR}/libarchive/archive_acl.c"
"${LIBRARY_DIR}/libarchive/archive_blake2sp_ref.c"
"${LIBRARY_DIR}/libarchive/archive_blake2s_ref.c"
"${LIBRARY_DIR}/libarchive/archive_check_magic.c"
"${LIBRARY_DIR}/libarchive/archive_cmdline.c"
"${LIBRARY_DIR}/libarchive/archive_cryptor.c"
"${LIBRARY_DIR}/libarchive/archive_digest.c"
"${LIBRARY_DIR}/libarchive/archive_disk_acl_darwin.c"
"${LIBRARY_DIR}/libarchive/archive_disk_acl_freebsd.c"
"${LIBRARY_DIR}/libarchive/archive_disk_acl_linux.c"
"${LIBRARY_DIR}/libarchive/archive_disk_acl_sunos.c"
"${LIBRARY_DIR}/libarchive/archive_entry.c"
"${LIBRARY_DIR}/libarchive/archive_entry_copy_bhfi.c"
"${LIBRARY_DIR}/libarchive/archive_entry_copy_stat.c"
"${LIBRARY_DIR}/libarchive/archive_entry_link_resolver.c"
"${LIBRARY_DIR}/libarchive/archive_entry_sparse.c"
"${LIBRARY_DIR}/libarchive/archive_entry_stat.c"
"${LIBRARY_DIR}/libarchive/archive_entry_strmode.c"
"${LIBRARY_DIR}/libarchive/archive_entry_xattr.c"
"${LIBRARY_DIR}/libarchive/archive_getdate.c"
"${LIBRARY_DIR}/libarchive/archive_hmac.c"
"${LIBRARY_DIR}/libarchive/archive_match.c"
"${LIBRARY_DIR}/libarchive/archive_options.c"
"${LIBRARY_DIR}/libarchive/archive_pack_dev.c"
"${LIBRARY_DIR}/libarchive/archive_pathmatch.c"
"${LIBRARY_DIR}/libarchive/archive_ppmd7.c"
"${LIBRARY_DIR}/libarchive/archive_ppmd8.c"
"${LIBRARY_DIR}/libarchive/archive_random.c"
"${LIBRARY_DIR}/libarchive/archive_rb.c"
"${LIBRARY_DIR}/libarchive/archive_read_add_passphrase.c"
"${LIBRARY_DIR}/libarchive/archive_read_append_filter.c"
"${LIBRARY_DIR}/libarchive/archive_read.c"
"${LIBRARY_DIR}/libarchive/archive_read_data_into_fd.c"
"${LIBRARY_DIR}/libarchive/archive_read_disk_entry_from_file.c"
"${LIBRARY_DIR}/libarchive/archive_read_disk_posix.c"
"${LIBRARY_DIR}/libarchive/archive_read_disk_set_standard_lookup.c"
"${LIBRARY_DIR}/libarchive/archive_read_disk_windows.c"
"${LIBRARY_DIR}/libarchive/archive_read_extract2.c"
"${LIBRARY_DIR}/libarchive/archive_read_extract.c"
"${LIBRARY_DIR}/libarchive/archive_read_open_fd.c"
"${LIBRARY_DIR}/libarchive/archive_read_open_file.c"
"${LIBRARY_DIR}/libarchive/archive_read_open_filename.c"
"${LIBRARY_DIR}/libarchive/archive_read_open_memory.c"
"${LIBRARY_DIR}/libarchive/archive_read_set_format.c"
"${LIBRARY_DIR}/libarchive/archive_read_set_options.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_all.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_by_code.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_bzip2.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_compress.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_grzip.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_gzip.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_lrzip.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_lz4.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_lzop.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_none.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_program.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_rpm.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_uu.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_xz.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_zstd.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_7zip.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_all.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_ar.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_by_code.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_cab.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_cpio.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_empty.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_iso9660.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_lha.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_mtree.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_rar5.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_rar.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_raw.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_tar.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_warc.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_xar.c"
"${LIBRARY_DIR}/libarchive/archive_read_support_format_zip.c"
"${LIBRARY_DIR}/libarchive/archive_string.c"
"${LIBRARY_DIR}/libarchive/archive_string_sprintf.c"
"${LIBRARY_DIR}/libarchive/archive_util.c"
"${LIBRARY_DIR}/libarchive/archive_version_details.c"
"${LIBRARY_DIR}/libarchive/archive_virtual.c"
"${LIBRARY_DIR}/libarchive/archive_windows.c"
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_b64encode.c"
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_by_name.c"
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_bzip2.c"
"${LIBRARY_DIR}/libarchive/archive_write_add_filter.c"
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_compress.c"
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_grzip.c"
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_gzip.c"
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_lrzip.c"
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_lz4.c"
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_lzop.c"
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_none.c"
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_program.c"
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_uuencode.c"
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_xz.c"
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_zstd.c"
"${LIBRARY_DIR}/libarchive/archive_write.c"
"${LIBRARY_DIR}/libarchive/archive_write_disk_posix.c"
"${LIBRARY_DIR}/libarchive/archive_write_disk_set_standard_lookup.c"
"${LIBRARY_DIR}/libarchive/archive_write_disk_windows.c"
"${LIBRARY_DIR}/libarchive/archive_write_open_fd.c"
"${LIBRARY_DIR}/libarchive/archive_write_open_file.c"
"${LIBRARY_DIR}/libarchive/archive_write_open_filename.c"
"${LIBRARY_DIR}/libarchive/archive_write_open_memory.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_7zip.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_ar.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_by_name.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_binary.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_newc.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_odc.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_filter_by_ext.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_gnutar.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_iso9660.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_mtree.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_pax.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_raw.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_shar.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_ustar.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_v7tar.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_warc.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_xar.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_format_zip.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_options.c"
"${LIBRARY_DIR}/libarchive/archive_write_set_passphrase.c"
"${LIBRARY_DIR}/libarchive/filter_fork_posix.c"
"${LIBRARY_DIR}/libarchive/filter_fork_windows.c"
"${LIBRARY_DIR}/libarchive/xxhash.c"
)
add_library(_libarchive ${SRCS})
target_include_directories(_libarchive PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
"${LIBRARY_DIR}/libarchive"
)
target_compile_definitions(_libarchive PUBLIC
HAVE_CONFIG_H
)
target_compile_options(_libarchive PRIVATE "-Wno-reserved-macro-identifier")
if (TARGET ch_contrib::xz)
target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1)
target_link_libraries(_libarchive PRIVATE ch_contrib::xz)
endif()
if (TARGET ch_contrib::zlib)
target_compile_definitions(_libarchive PUBLIC HAVE_ZLIB_H=1)
target_link_libraries(_libarchive PRIVATE ch_contrib::zlib)
endif()
if (OS_LINUX)
target_compile_definitions(
_libarchive PUBLIC
MAJOR_IN_SYSMACROS=1
HAVE_LINUX_FS_H=1
HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=1
HAVE_LINUX_TYPES_H=1
HAVE_SYS_STATFS_H=1
HAVE_FUTIMESAT=1
HAVE_ICONV=1
)
endif()
add_library(ch_contrib::libarchive ALIAS _libarchive)

File diff suppressed because it is too large Load Diff

View File

@ -19,13 +19,13 @@ RUN apt-get update \
# and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB).
# TSAN will flush shadow memory when reaching this limit.
# It may cause false-negatives, but it's better than OOM.
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment
RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment
RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment
RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
# (but w/o verbosity for TSAN, otherwise test.reference will not match)
ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
ENV UBSAN_OPTIONS='print_stacktrace=1'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'

View File

@ -130,7 +130,7 @@ COPY misc/ /misc/
# Same options as in test/base/Dockerfile
# (in case you need to override them in tests)
ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
ENV UBSAN_OPTIONS='print_stacktrace=1'
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'

View File

@ -41,6 +41,8 @@ RUN apt-get update -y \
zstd \
file \
pv \
zip \
p7zip-full \
&& apt-get clean
RUN pip3 install numpy scipy pandas Jinja2

View File

@ -13,7 +13,7 @@ A recommended alternative to the Buffer Table Engine is enabling [asynchronous i
:::
``` sql
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]])
```
### Engine parameters:

View File

@ -4578,6 +4578,17 @@ Type: Int64
Default: 0
## rewrite_count_distinct_if_with_count_distinct_implementation
Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#settings-count_distinct_implementation) setting.
Possible values:
- true — Allow.
- false — Disallow.
Default value: `false`.
## precise_float_parsing {#precise_float_parsing}
Switches [Float32/Float64](../../sql-reference/data-types/float.md) parsing algorithms:

View File

@ -11,7 +11,7 @@ A client application to interact with clickhouse-keeper by its native protocol.
- `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode.
- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`.
- `-p N`, `--port=N` — Server port. Default value: 2181
- `-p N`, `--port=N` — Server port. Default value: 9181
- `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s.
- `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
- `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.
@ -21,8 +21,8 @@ A client application to interact with clickhouse-keeper by its native protocol.
## Example {#clickhouse-keeper-client-example}
```bash
./clickhouse-keeper-client -h localhost:2181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
Connected to ZooKeeper at [::1]:2181 with session_id 137
./clickhouse-keeper-client -h localhost:9181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
Connected to ZooKeeper at [::1]:9181 with session_id 137
/ :) ls
keeper foo bar
/ :) cd keeper

View File

@ -0,0 +1,47 @@
---
slug: /en/sql-reference/table-functions/azureBlobStorageCluster
sidebar_position: 55
sidebar_label: azureBlobStorageCluster
title: "azureBlobStorageCluster Table Function"
---
Allows processing files from [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
This table function is similar to the [s3Cluster function](../../sql-reference/table-functions/s3Cluster.md).
**Syntax**
``` sql
azureBlobStorageCluster(cluster_name, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
```
**Arguments**
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
- `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key)
- `container_name` - Container name
- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings.
- `account_name` - if storage_account_url is used, then account name can be specified here
- `account_key` - if storage_account_url is used, then account key can be specified here
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `compression` — Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. (same as setting to `auto`).
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
**Returned value**
A table with the specified structure for reading or writing data in the specified file.
**Examples**
Select the count for the file `test_cluster_*.csv`, using all the nodes in the `cluster_simple` cluster:
``` sql
SELECT count(*) from azureBlobStorageCluster(
'cluster_simple', 'http://azurite1:10000/devstoreaccount1', 'test_container', 'test_cluster_count.csv', 'devstoreaccount1',
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV',
'auto', 'key UInt64')
```
**See Also**
- [AzureBlobStorage engine](../../engines/table-engines/integrations/azureBlobStorage.md)
- [azureBlobStorage table function](../../sql-reference/table-functions/azureBlobStorage.md)

View File

@ -13,16 +13,18 @@ The `file` function can be used in `SELECT` and `INSERT` queries to read from or
**Syntax**
``` sql
file(path [,format] [,structure] [,compression])
file([path_to_archive ::] path [,format] [,structure] [,compression])
```
**Parameters**
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
- `path_to_archive` - The relative path to zip/tar/7z archive. Path to archive support the same globs as `path`.
- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
**Returned value**
A table with the specified structure for reading or writing data in the specified file.
@ -128,6 +130,11 @@ file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32');
└─────────┴─────────┴─────────┘
```
Getting data from table in table.csv, located in archive1.zip or/and archive2.zip
``` sql
SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv');
```
## Globs in Path
Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix).

View File

@ -9,7 +9,7 @@ sidebar_label: Buffer
Буферизует записываемые данные в оперативке, периодически сбрасывая их в другую таблицу. При чтении, производится чтение данных одновременно из буфера и из другой таблицы.
``` sql
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]])
```
Параметры движка:

View File

@ -5,7 +5,7 @@ slug: /zh/engines/table-engines/special/buffer
缓冲数据写入 RAM 中,周期性地将数据刷新到另一个表。在读取操作时,同时从缓冲区和另一个表读取数据。
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]])
引擎的参数databasetable - 要刷新数据的表。可以使用返回字符串的常量表达式而不是数据库名称。 num_layers - 并行层数。在物理上,该表将表示为 num_layers 个独立缓冲区。建议值为16。min_timemax_timemin_rowsmax_rowsmin_bytesmax_bytes - 从缓冲区刷新数据的条件。

View File

@ -131,7 +131,7 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
.binding("host"));
options.addOption(
Poco::Util::Option("port", "p", "server port. default `2181`")
Poco::Util::Option("port", "p", "server port. default `9181`")
.argument("<port>")
.binding("port"));
@ -307,7 +307,7 @@ int KeeperClient::main(const std::vector<String> & /* args */)
}
auto host = config().getString("host", "localhost");
auto port = config().getString("port", "2181");
auto port = config().getString("port", "9181");
zk_args.hosts = {host + ":" + port};
zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000;
zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000;

View File

@ -18,7 +18,8 @@
namespace
{
template <typename T, typename = std::enable_if_t<std::is_fundamental_v<std::decay_t<T>>>>
template <typename T>
requires std::is_fundamental_v<std::decay_t<T>>
void updateHash(SipHash & hash, const T & value)
{
hash.update(value);

View File

@ -576,6 +576,10 @@ if (TARGET ch_contrib::bzip2)
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::bzip2)
endif()
if (TARGET ch_contrib::libarchive)
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::libarchive)
endif()
if (TARGET ch_contrib::minizip)
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::minizip)
endif ()

View File

@ -1436,6 +1436,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
ConstraintsDescription{},
String{},
{},
String{},
};
StoragePtr storage = std::make_shared<StorageFile>(in_file, global_context->getUserFilesPath(), args);
storage->startup();

View File

@ -81,9 +81,9 @@ public:
}
/// Message must be a compile-time constant
template<typename T, typename = std::enable_if_t<std::is_convertible_v<T, String>>>
Exception(int code, T && message)
: Exception(message, code)
template <typename T>
requires std::is_convertible_v<T, String>
Exception(int code, T && message) : Exception(message, code)
{
capture_thread_frame_pointers = thread_frame_pointers;
message_format_string = tryGetStaticFormatString(message);

View File

@ -3,8 +3,8 @@
#include <base/defines.h>
#include <base/sort.h>
#include <vector>
#include <utility>
#include <vector>
namespace DB
@ -119,7 +119,8 @@ public:
return true;
}
template <typename TValue = Value, std::enable_if_t<!std::is_same_v<TValue, IntervalTreeVoidValue>, bool> = true, typename... Args>
template <typename TValue = Value, bool = true, typename... Args>
requires(!std::is_same_v<TValue, IntervalTreeVoidValue>)
ALWAYS_INLINE bool emplace(Interval interval, Args &&... args)
{
assert(!tree_is_built);

View File

@ -9,7 +9,8 @@ namespace DB
class NetException : public Exception
{
public:
template<typename T, typename = std::enable_if_t<std::is_convertible_v<T, String>>>
template <typename T>
requires std::is_convertible_v<T, String>
NetException(int code, T && message) : Exception(std::forward<T>(message), code)
{
message_format_string = tryGetStaticFormatString(message);

View File

@ -59,6 +59,7 @@
#cmakedefine01 USE_ULID
#cmakedefine01 FIU_ENABLE
#cmakedefine01 USE_BCRYPT
#cmakedefine01 USE_LIBARCHIVE
/// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO.
/// That's why we use absolute paths.

View File

@ -12,9 +12,9 @@ struct MultiEnum
MultiEnum() = default;
template <typename ... EnumValues, typename = std::enable_if_t<std::conjunction_v<std::is_same<EnumTypeT, EnumValues>...>>>
constexpr explicit MultiEnum(EnumValues ... v)
: MultiEnum((toBitFlag(v) | ... | 0u))
template <typename... EnumValues>
requires std::conjunction_v<std::is_same<EnumTypeT, EnumValues>...>
constexpr explicit MultiEnum(EnumValues... v) : MultiEnum((toBitFlag(v) | ... | 0u))
{}
template <typename ValueType>

View File

@ -536,6 +536,7 @@ class IColumn;
M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
M(Bool, rewrite_count_distinct_if_with_count_distinct_implementation, false, "Rewrite countDistinctIf with count_distinct_implementation configuration", 0) \
M(Bool, convert_query_to_cnf, false, "Convert SELECT query to CNF", 0) \
M(Bool, optimize_or_like_chain, false, "Optimize multiple OR LIKE into multiMatchAny. This optimization should not be enabled by default, because it defies index analysis in some cases.", 0) \
M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \

View File

@ -80,6 +80,7 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}},
{"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}},
{"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
{"http_receive_timeout", 180, 30, "See http_send_timeout."}}},

View File

@ -138,7 +138,6 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS,
IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS,
{{"clickhouse", Dialect::clickhouse},
{"kusto", Dialect::kusto},
{"kusto", Dialect::kusto},
{"prql", Dialect::prql}})
// FIXME: do not add 'kusto_auto' to the list. Maybe remove it from code completely?

View File

@ -189,10 +189,10 @@ void SerializationNullable::serializeBinary(const IColumn & column, size_t row_n
/// Deserialize value into ColumnNullable.
/// We need to insert both to nested column and to null byte map, or, in case of exception, to not insert at all.
template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested, typename std::enable_if_t<std::is_same_v<ReturnType, void>, ReturnType>* = nullptr>
static ReturnType safeDeserialize(
IColumn & column, const ISerialization &,
CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested, ReturnType * = nullptr>
requires std::same_as<ReturnType, void>
static ReturnType
safeDeserialize(IColumn & column, const ISerialization &, CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
{
ColumnNullable & col = assert_cast<ColumnNullable &>(column);
@ -217,10 +217,10 @@ static ReturnType safeDeserialize(
}
/// Deserialize value into non-nullable column. In case of NULL, insert default value and return false.
template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested, typename std::enable_if_t<std::is_same_v<ReturnType, bool>, ReturnType>* = nullptr>
static ReturnType safeDeserialize(
IColumn & column, const ISerialization &,
CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested, ReturnType * = nullptr>
requires std::same_as<ReturnType, bool>
static ReturnType
safeDeserialize(IColumn & column, const ISerialization &, CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
{
bool insert_default = check_for_null();
if (insert_default)

View File

@ -534,18 +534,15 @@ struct JavaHashImpl
static_cast<uint32_t>(x) ^ static_cast<uint32_t>(static_cast<uint64_t>(x) >> 32));
}
template <class T, typename std::enable_if<std::is_same_v<T, int8_t>
|| std::is_same_v<T, int16_t>
|| std::is_same_v<T, int32_t>, T>::type * = nullptr>
template <class T, T * = nullptr>
requires std::same_as<T, int8_t> || std::same_as<T, int16_t> || std::same_as<T, int32_t>
static ReturnType apply(T x)
{
return x;
}
template <typename T, typename std::enable_if<!std::is_same_v<T, int8_t>
&& !std::is_same_v<T, int16_t>
&& !std::is_same_v<T, int32_t>
&& !std::is_same_v<T, int64_t>, T>::type * = nullptr>
template <class T, T * = nullptr>
requires(!std::same_as<T, int8_t> && !std::same_as<T, int16_t> && !std::same_as<T, int32_t>)
static ReturnType apply(T x)
{
if (std::is_unsigned_v<T>)

View File

@ -88,8 +88,9 @@ public:
}
}
template <typename T, typename ... Args, typename = std::enable_if_t<!std::is_same_v<T, DateTime64>>>
inline auto execute(const T & t, Args && ... args) const
template <typename T, typename... Args>
requires (!std::same_as<T, DateTime64>)
inline auto execute(const T & t, Args &&... args) const
{
return wrapped_transform.execute(t, std::forward<Args>(args)...);
}
@ -128,7 +129,8 @@ public:
}
}
template <typename T, typename ... Args, typename = std::enable_if_t<!std::is_same_v<T, DateTime64>>>
template <typename T, typename ... Args>
requires (!std::same_as<T, DateTime64>)
inline auto executeExtendedResult(const T & t, Args && ... args) const
{
return wrapped_transform.executeExtendedResult(t, std::forward<Args>(args)...);

View File

@ -0,0 +1,14 @@
#pragma once
#include "config.h"
#if USE_LIBARCHIVE
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wreserved-macro-identifier"
#include <archive.h>
#include <archive_entry.h>
#endif
#endif

View File

@ -40,18 +40,26 @@ public:
virtual bool nextFile() = 0;
};
virtual const std::string & getPath() const = 0;
/// Starts enumerating files in the archive.
virtual std::unique_ptr<FileEnumerator> firstFile() = 0;
using NameFilter = std::function<bool(const std::string &)>;
/// Starts reading a file from the archive. The function returns a read buffer,
/// you can read that buffer to extract uncompressed data from the archive.
/// Several read buffers can be used at the same time in parallel.
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename) = 0;
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter) = 0;
/// It's possible to convert a file enumerator to a read buffer and vice versa.
virtual std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) = 0;
virtual std::unique_ptr<FileEnumerator> nextFile(std::unique_ptr<ReadBuffer> read_buffer) = 0;
virtual std::vector<std::string> getAllFiles() = 0;
virtual std::vector<std::string> getAllFiles(NameFilter filter) = 0;
/// Sets password used to decrypt files in the archive.
virtual void setPassword(const String & /* password */) {}

View File

@ -0,0 +1,348 @@
#include <IO/Archives/LibArchiveReader.h>
#include <IO/ReadBufferFromFileBase.h>
#include <Common/quoteString.h>
#include <Common/scope_guard_safe.h>
#include <IO/Archives/ArchiveUtils.h>
#include <mutex>
namespace DB
{
#if USE_LIBARCHIVE
namespace ErrorCodes
{
extern const int CANNOT_UNPACK_ARCHIVE;
extern const int LOGICAL_ERROR;
extern const int CANNOT_READ_ALL_DATA;
extern const int UNSUPPORTED_METHOD;
}
class LibArchiveReader::Handle
{
public:
explicit Handle(std::string path_to_archive_, bool lock_on_reading_)
: path_to_archive(path_to_archive_), lock_on_reading(lock_on_reading_)
{
current_archive = open(path_to_archive);
}
Handle(const Handle &) = delete;
Handle(Handle && other) noexcept
: current_archive(other.current_archive)
, current_entry(other.current_entry)
, lock_on_reading(other.lock_on_reading)
{
other.current_archive = nullptr;
other.current_entry = nullptr;
}
~Handle()
{
close(current_archive);
}
bool locateFile(const std::string & filename)
{
return locateFile([&](const std::string & file) { return file == filename; });
}
bool locateFile(NameFilter filter)
{
resetFileInfo();
int err = ARCHIVE_OK;
while (true)
{
err = readNextHeader(current_archive, &current_entry);
if (err == ARCHIVE_RETRY)
continue;
if (err != ARCHIVE_OK)
break;
if (filter(archive_entry_pathname(current_entry)))
return true;
}
checkError(err);
return false;
}
bool nextFile()
{
resetFileInfo();
int err = ARCHIVE_OK;
do
{
err = readNextHeader(current_archive, &current_entry);
} while (err == ARCHIVE_RETRY);
checkError(err);
return err == ARCHIVE_OK;
}
std::vector<std::string> getAllFiles(NameFilter filter)
{
auto * archive = open(path_to_archive);
SCOPE_EXIT(
close(archive);
);
struct archive_entry * entry = nullptr;
std::vector<std::string> files;
int error = readNextHeader(archive, &entry);
while (error == ARCHIVE_OK || error == ARCHIVE_RETRY)
{
chassert(entry != nullptr);
std::string name = archive_entry_pathname(entry);
if (!filter || filter(name))
files.push_back(std::move(name));
error = readNextHeader(archive, &entry);
}
checkError(error);
return files;
}
const String & getFileName() const
{
chassert(current_entry);
if (!file_name)
file_name.emplace(archive_entry_pathname(current_entry));
return *file_name;
}
const FileInfo & getFileInfo() const
{
chassert(current_entry);
if (!file_info)
{
file_info.emplace();
file_info->uncompressed_size = archive_entry_size(current_entry);
file_info->compressed_size = archive_entry_size(current_entry);
file_info->is_encrypted = false;
}
return *file_info;
}
struct archive * current_archive;
struct archive_entry * current_entry = nullptr;
private:
void checkError(int error) const
{
if (error == ARCHIVE_FATAL)
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Failed to read archive while fetching all files: {}", archive_error_string(current_archive));
}
void resetFileInfo()
{
file_name.reset();
file_info.reset();
}
static struct archive * open(const String & path_to_archive)
{
auto * archive = archive_read_new();
try
{
archive_read_support_filter_all(archive);
archive_read_support_format_all(archive);
if (archive_read_open_filename(archive, path_to_archive.c_str(), 10240) != ARCHIVE_OK)
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open archive: {}", quoteString(path_to_archive));
}
catch (...)
{
close(archive);
throw;
}
return archive;
}
static void close(struct archive * archive)
{
if (archive)
{
archive_read_close(archive);
archive_read_free(archive);
}
}
int readNextHeader(struct archive * archive, struct archive_entry ** entry) const
{
std::unique_lock lock(Handle::read_lock, std::defer_lock);
if (lock_on_reading)
lock.lock();
return archive_read_next_header(archive, entry);
}
const String path_to_archive;
/// for some archive types when we are reading headers static variables are used
/// which are not thread-safe
const bool lock_on_reading;
static inline std::mutex read_lock;
mutable std::optional<String> file_name;
mutable std::optional<FileInfo> file_info;
};
class LibArchiveReader::FileEnumeratorImpl : public FileEnumerator
{
public:
explicit FileEnumeratorImpl(Handle handle_) : handle(std::move(handle_)) {}
const String & getFileName() const override { return handle.getFileName(); }
const FileInfo & getFileInfo() const override { return handle.getFileInfo(); }
bool nextFile() override { return handle.nextFile(); }
/// Releases owned handle to pass it to a read buffer.
Handle releaseHandle() && { return std::move(handle); }
private:
Handle handle;
};
class LibArchiveReader::ReadBufferFromLibArchive : public ReadBufferFromFileBase
{
public:
explicit ReadBufferFromLibArchive(Handle handle_, std::string path_to_archive_)
: ReadBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0)
, handle(std::move(handle_))
, path_to_archive(std::move(path_to_archive_))
{}
off_t seek(off_t /* off */, int /* whence */) override
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Seek is not supported when reading from archive");
}
off_t getPosition() override
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getPosition not supported when reading from archive");
}
String getFileName() const override { return handle.getFileName(); }
Handle releaseHandle() &&
{
return std::move(handle);
}
private:
bool nextImpl() override
{
auto bytes_read = archive_read_data(handle.current_archive, internal_buffer.begin(), static_cast<int>(internal_buffer.size()));
if (bytes_read < 0)
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Failed to read file {} from {}: {}", handle.getFileName(), path_to_archive, archive_error_string(handle.current_archive));
if (!bytes_read)
return false;
total_bytes_read += bytes;
working_buffer = internal_buffer;
working_buffer.resize(bytes_read);
return true;
}
Handle handle;
const String path_to_archive;
size_t total_bytes_read = 0;
};
LibArchiveReader::LibArchiveReader(std::string archive_name_, bool lock_on_reading_, std::string path_to_archive_)
: archive_name(std::move(archive_name_)), lock_on_reading(lock_on_reading_), path_to_archive(std::move(path_to_archive_))
{}
LibArchiveReader::~LibArchiveReader() = default;
const std::string & LibArchiveReader::getPath() const
{
return path_to_archive;
}
bool LibArchiveReader::fileExists(const String & filename)
{
Handle handle(path_to_archive, lock_on_reading);
return handle.locateFile(filename);
}
LibArchiveReader::FileInfo LibArchiveReader::getFileInfo(const String & filename)
{
Handle handle(path_to_archive, lock_on_reading);
if (!handle.locateFile(filename))
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: file not found", path_to_archive);
return handle.getFileInfo();
}
std::unique_ptr<LibArchiveReader::FileEnumerator> LibArchiveReader::firstFile()
{
Handle handle(path_to_archive, lock_on_reading);
if (!handle.nextFile())
return nullptr;
return std::make_unique<FileEnumeratorImpl>(std::move(handle));
}
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader::readFile(const String & filename)
{
return readFile([&](const std::string & file) { return file == filename; });
}
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader::readFile(NameFilter filter)
{
Handle handle(path_to_archive, lock_on_reading);
if (!handle.locateFile(filter))
throw Exception(
ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: no file found satisfying the filter", path_to_archive);
return std::make_unique<ReadBufferFromLibArchive>(std::move(handle), path_to_archive);
}
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader::readFile(std::unique_ptr<FileEnumerator> enumerator)
{
if (!dynamic_cast<FileEnumeratorImpl *>(enumerator.get()))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong enumerator passed to readFile()");
auto enumerator_impl = std::unique_ptr<FileEnumeratorImpl>(static_cast<FileEnumeratorImpl *>(enumerator.release()));
auto handle = std::move(*enumerator_impl).releaseHandle();
return std::make_unique<ReadBufferFromLibArchive>(std::move(handle), path_to_archive);
}
std::unique_ptr<LibArchiveReader::FileEnumerator> LibArchiveReader::nextFile(std::unique_ptr<ReadBuffer> read_buffer)
{
if (!dynamic_cast<ReadBufferFromLibArchive *>(read_buffer.get()))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong ReadBuffer passed to nextFile()");
auto read_buffer_from_libarchive = std::unique_ptr<ReadBufferFromLibArchive>(static_cast<ReadBufferFromLibArchive *>(read_buffer.release()));
auto handle = std::move(*read_buffer_from_libarchive).releaseHandle();
if (!handle.nextFile())
return nullptr;
return std::make_unique<FileEnumeratorImpl>(std::move(handle));
}
std::vector<std::string> LibArchiveReader::getAllFiles()
{
return getAllFiles({});
}
std::vector<std::string> LibArchiveReader::getAllFiles(NameFilter filter)
{
Handle handle(path_to_archive, lock_on_reading);
return handle.getAllFiles(filter);
}
void LibArchiveReader::setPassword(const String & /*password_*/)
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not set password to {} archive", archive_name);
}
#endif
}

View File

@ -0,0 +1,79 @@
#pragma once
#include "config.h"
#include <IO/Archives/IArchiveReader.h>
#include <iostream>
namespace DB
{
#if USE_LIBARCHIVE
class ReadBuffer;
class ReadBufferFromFileBase;
class SeekableReadBuffer;
/// Implementation of IArchiveReader for reading archives using libarchive.
class LibArchiveReader : public IArchiveReader
{
public:
~LibArchiveReader() override;
const std::string & getPath() const override;
/// Returns true if there is a specified file in the archive.
bool fileExists(const String & filename) override;
/// Returns the information about a file stored in the archive.
FileInfo getFileInfo(const String & filename) override;
/// Starts enumerating files in the archive.
std::unique_ptr<FileEnumerator> firstFile() override;
/// Starts reading a file from the archive. The function returns a read buffer,
/// you can read that buffer to extract uncompressed data from the archive.
/// Several read buffers can be used at the same time in parallel.
std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename) override;
std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter) override;
/// It's possible to convert a file enumerator to a read buffer and vice versa.
std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) override;
std::unique_ptr<FileEnumerator> nextFile(std::unique_ptr<ReadBuffer> read_buffer) override;
std::vector<std::string> getAllFiles() override;
std::vector<std::string> getAllFiles(NameFilter filter) override;
/// Sets password used to decrypt the contents of the files in the archive.
void setPassword(const String & password_) override;
protected:
/// Constructs an archive's reader that will read from a file in the local filesystem.
LibArchiveReader(std::string archive_name_, bool lock_on_reading_, std::string path_to_archive_);
private:
class ReadBufferFromLibArchive;
class Handle;
class FileEnumeratorImpl;
const std::string archive_name;
const bool lock_on_reading;
const String path_to_archive;
};
class TarArchiveReader : public LibArchiveReader
{
public:
explicit TarArchiveReader(std::string path_to_archive) : LibArchiveReader("tar", /*lock_on_reading_=*/ true, std::move(path_to_archive)) { }
};
class SevenZipArchiveReader : public LibArchiveReader
{
public:
explicit SevenZipArchiveReader(std::string path_to_archive) : LibArchiveReader("7z", /*lock_on_reading_=*/ false, std::move(path_to_archive)) { }
};
#endif
}

View File

@ -85,6 +85,26 @@ public:
file_name = file_name_;
}
void locateFile(NameFilter filter)
{
int err = unzGoToFirstFile(raw_handle);
if (err == UNZ_END_OF_LIST_OF_FILE)
showError("No file was found satisfying the filter");
do
{
checkResult(err);
resetFileInfo();
retrieveFileInfo();
if (filter(getFileName()))
return;
err = unzGoToNextFile(raw_handle);
} while (err != UNZ_END_OF_LIST_OF_FILE);
showError("No file was found satisfying the filter");
}
bool tryLocateFile(const String & file_name_)
{
resetFileInfo();
@ -131,6 +151,27 @@ public:
return *file_info;
}
std::vector<std::string> getAllFiles(NameFilter filter)
{
std::vector<std::string> files;
resetFileInfo();
int err = unzGoToFirstFile(raw_handle);
if (err == UNZ_END_OF_LIST_OF_FILE)
return files;
do
{
checkResult(err);
resetFileInfo();
retrieveFileInfo();
if (!filter || filter(getFileName()))
files.push_back(*file_name);
err = unzGoToNextFile(raw_handle);
} while (err != UNZ_END_OF_LIST_OF_FILE);
return files;
}
void closeFile()
{
int err = unzCloseCurrentFile(raw_handle);
@ -459,6 +500,11 @@ ZipArchiveReader::~ZipArchiveReader()
}
}
const std::string & ZipArchiveReader::getPath() const
{
return path_to_archive;
}
bool ZipArchiveReader::fileExists(const String & filename)
{
return acquireHandle().tryLocateFile(filename);
@ -486,6 +532,13 @@ std::unique_ptr<ReadBufferFromFileBase> ZipArchiveReader::readFile(const String
return std::make_unique<ReadBufferFromZipArchive>(std::move(handle));
}
std::unique_ptr<ReadBufferFromFileBase> ZipArchiveReader::readFile(NameFilter filter)
{
auto handle = acquireHandle();
handle.locateFile(filter);
return std::make_unique<ReadBufferFromZipArchive>(std::move(handle));
}
std::unique_ptr<ReadBufferFromFileBase> ZipArchiveReader::readFile(std::unique_ptr<FileEnumerator> enumerator)
{
if (!dynamic_cast<FileEnumeratorImpl *>(enumerator.get()))
@ -506,6 +559,17 @@ std::unique_ptr<ZipArchiveReader::FileEnumerator> ZipArchiveReader::nextFile(std
return std::make_unique<FileEnumeratorImpl>(std::move(handle));
}
std::vector<std::string> ZipArchiveReader::getAllFiles()
{
return getAllFiles({});
}
std::vector<std::string> ZipArchiveReader::getAllFiles(NameFilter filter)
{
auto handle = acquireHandle();
return handle.getAllFiles(filter);
}
void ZipArchiveReader::setPassword(const String & password_)
{
std::lock_guard lock{mutex};

View File

@ -27,6 +27,8 @@ public:
~ZipArchiveReader() override;
const std::string & getPath() const override;
/// Returns true if there is a specified file in the archive.
bool fileExists(const String & filename) override;
@ -40,11 +42,15 @@ public:
/// you can read that buffer to extract uncompressed data from the archive.
/// Several read buffers can be used at the same time in parallel.
std::unique_ptr<ReadBufferFromFileBase> readFile(const String & filename) override;
std::unique_ptr<ReadBufferFromFileBase> readFile(NameFilter filter) override;
/// It's possible to convert a file enumerator to a read buffer and vice versa.
std::unique_ptr<ReadBufferFromFileBase> readFile(std::unique_ptr<FileEnumerator> enumerator) override;
std::unique_ptr<FileEnumerator> nextFile(std::unique_ptr<ReadBuffer> read_buffer) override;
std::vector<std::string> getAllFiles() override;
std::vector<std::string> getAllFiles(NameFilter filter) override;
/// Sets password used to decrypt the contents of the files in the archive.
void setPassword(const String & password_) override;

View File

@ -1,5 +1,6 @@
#include <IO/Archives/createArchiveReader.h>
#include <IO/Archives/ZipArchiveReader.h>
#include <IO/Archives/LibArchiveReader.h>
#include <Common/Exception.h>
@ -29,10 +30,28 @@ std::shared_ptr<IArchiveReader> createArchiveReader(
return std::make_shared<ZipArchiveReader>(path_to_archive, archive_read_function, archive_size);
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "minizip library is disabled");
#endif
}
else if (path_to_archive.ends_with(".tar") || path_to_archive.ends_with("tar.gz"))
{
#if USE_LIBARCHIVE
return std::make_shared<TarArchiveReader>(path_to_archive);
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "libarchive library is disabled");
#endif
}
else if (path_to_archive.ends_with(".7z"))
{
#if USE_LIBARCHIVE
return std::make_shared<SevenZipArchiveReader>(path_to_archive);
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "libarchive library is disabled");
#endif
}
else
{
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Cannot determine the type of archive {}", path_to_archive);
}
}
}

View File

@ -45,6 +45,9 @@ bool MySQLPacketPayloadReadBuffer::nextImpl()
}
in.nextIfAtEnd();
/// Don't return a buffer when no bytes available
if (!in.hasPendingData())
return false;
working_buffer = ReadBuffer::Buffer(in.position(), in.buffer().end());
size_t count = std::min(in.available(), payload_length - offset);
working_buffer.resize(count);

View File

@ -536,7 +536,10 @@ void PocoHTTPClient::makeRequestInternalImpl(
}
catch (...)
{
tryLogCurrentException(log, fmt::format("Failed to make request to: {}", uri));
auto error_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ true);
error_message.text = fmt::format("Failed to make request to: {}: {}", uri, error_message.text);
LOG_INFO(log, error_message);
response->SetClientErrorType(Aws::Client::CoreErrors::NETWORK_CONNECTION);
response->SetClientErrorMessage(getCurrentExceptionMessage(false));

View File

@ -42,13 +42,50 @@ void ZstdDeflatingAppendableWriteBuffer::nextImpl()
if (!offset())
return;
input.src = reinterpret_cast<unsigned char *>(working_buffer.begin());
input.size = offset();
input.pos = 0;
if (first_write && append_to_existing_file && isNeedToAddEmptyBlock())
{
addEmptyBlock();
first_write = false;
}
flush(ZSTD_e_flush);
try
{
bool ended = false;
do
{
out->nextIfAtEnd();
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
output.size = out->buffer().size();
output.pos = out->offset();
size_t compression_result = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_flush);
if (ZSTD_isError(compression_result))
throw Exception(
ErrorCodes::ZSTD_ENCODER_FAILED,
"ZSTD stream decoding failed: error code: {}; ZSTD version: {}",
ZSTD_getErrorName(compression_result), ZSTD_VERSION_STRING);
first_write = false;
out->position() = out->buffer().begin() + output.pos;
bool everything_was_compressed = (input.pos == input.size);
bool everything_was_flushed = compression_result == 0;
ended = everything_was_compressed && everything_was_flushed;
} while (!ended);
}
catch (...)
{
/// Do not try to write next time after exception.
out->position() = out->buffer().begin();
throw;
}
}
ZstdDeflatingAppendableWriteBuffer::~ZstdDeflatingAppendableWriteBuffer()
@ -66,58 +103,58 @@ void ZstdDeflatingAppendableWriteBuffer::finalizeImpl()
}
else
{
finalizeBefore();
out->finalize();
finalizeAfter();
try
{
finalizeBefore();
out->finalize();
finalizeAfter();
}
catch (...)
{
/// Do not try to flush next time after exception.
out->position() = out->buffer().begin();
throw;
}
}
}
void ZstdDeflatingAppendableWriteBuffer::finalizeBefore()
{
/// Actually we can use ZSTD_e_flush here and add empty termination
/// block on each new buffer creation for non-empty file unconditionally (without isNeedToAddEmptyBlock).
/// However ZSTD_decompressStream is able to read non-terminated frame (we use it in reader buffer),
/// but console zstd utility cannot.
flush(ZSTD_e_end);
}
next();
out->nextIfAtEnd();
void ZstdDeflatingAppendableWriteBuffer::flush(ZSTD_EndDirective mode)
{
input.src = reinterpret_cast<unsigned char *>(working_buffer.begin());
input.size = offset();
input.pos = 0;
try
{
bool ended = false;
do
{
out->nextIfAtEnd();
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
output.size = out->buffer().size();
output.pos = out->offset();
/// Actually we can use ZSTD_e_flush here and add empty termination
/// block on each new buffer creation for non-empty file unconditionally (without isNeedToAddEmptyBlock).
/// However ZSTD_decompressStream is able to read non-terminated frame (we use it in reader buffer),
/// but console zstd utility cannot.
size_t remaining = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end);
while (remaining != 0)
{
if (ZSTD_isError(remaining))
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED,
"ZSTD stream encoder end failed: error: '{}' ZSTD version: {}",
ZSTD_getErrorName(remaining), ZSTD_VERSION_STRING);
remaining = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end);
out->position() = out->buffer().begin() + output.pos;
if (!out->hasPendingData())
{
out->next();
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
output.size = out->buffer().size();
output.pos = out->offset();
size_t compression_result = ZSTD_compressStream2(cctx, &output, &input, mode);
if (ZSTD_isError(compression_result))
throw Exception(
ErrorCodes::ZSTD_ENCODER_FAILED,
"ZSTD stream decoding failed: error code: {}; ZSTD version: {}",
ZSTD_getErrorName(compression_result), ZSTD_VERSION_STRING);
out->position() = out->buffer().begin() + output.pos;
bool everything_was_compressed = (input.pos == input.size);
bool everything_was_flushed = compression_result == 0;
ended = everything_was_compressed && everything_was_flushed;
} while (!ended);
}
catch (...)
{
/// Do not try to write next time after exception.
out->position() = out->buffer().begin();
throw;
}
}
}

View File

@ -52,8 +52,6 @@ private:
/// NOTE: will fill compressed data to the out.working_buffer, but will not call out.next method until the buffer is full
void nextImpl() override;
void flush(ZSTD_EndDirective mode);
/// Write terminating ZSTD_e_end: empty block + frame epilogue. BTW it
/// should be almost noop, because frame epilogue contains only checksums,
/// and they are disabled for this buffer.

View File

@ -1,6 +1,7 @@
#include <gtest/gtest.h>
#include "config.h"
#include <IO/Archives/ArchiveUtils.h>
#include <IO/Archives/IArchiveReader.h>
#include <IO/Archives/IArchiveWriter.h>
#include <IO/Archives/createArchiveReader.h>
@ -19,11 +20,52 @@
namespace DB::ErrorCodes
{
extern const int CANNOT_UNPACK_ARCHIVE;
extern const int LOGICAL_ERROR;
}
namespace fs = std::filesystem;
using namespace DB;
enum class ArchiveType : uint8_t
{
Tar,
SevenZip
};
template <ArchiveType archive_type>
bool createArchiveWithFiles(const std::string & archivename, const std::map<std::string, std::string> & files)
{
struct archive * a;
struct archive_entry * entry;
a = archive_write_new();
if constexpr (archive_type == ArchiveType::Tar)
archive_write_set_format_pax_restricted(a);
else if constexpr (archive_type == ArchiveType::SevenZip)
archive_write_set_format_7zip(a);
else
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Invalid archive type requested: {}", static_cast<size_t>(archive_type));
archive_write_open_filename(a, archivename.c_str());
for (const auto & [filename, content] : files) {
entry = archive_entry_new();
archive_entry_set_pathname(entry, filename.c_str());
archive_entry_set_size(entry, content.size());
archive_entry_set_mode(entry, S_IFREG | 0644); // regular file with rw-r--r-- permissions
archive_entry_set_mtime(entry, time(nullptr), 0);
archive_write_header(a, entry);
archive_write_data(a, content.c_str(), content.size());
archive_entry_free(entry);
}
archive_write_close(a);
archive_write_free(a);
return true;
}
class ArchiveReaderAndWriterTest : public ::testing::TestWithParam<const char *>
{
@ -327,6 +369,127 @@ TEST_P(ArchiveReaderAndWriterTest, ArchiveNotExist)
[&]{ createArchiveReader(getPathToArchive()); });
}
TEST(TarArchiveReaderTest, FileExists) {
String archive_path = "archive.tar";
String filename = "file.txt";
String contents = "test";
bool created = createArchiveWithFiles<ArchiveType::Tar>(archive_path, {{filename, contents}});
EXPECT_EQ(created, true);
auto reader = createArchiveReader(archive_path);
EXPECT_EQ(reader->fileExists(filename), true);
fs::remove(archive_path);
}
TEST(TarArchiveReaderTest, ReadFile) {
String archive_path = "archive.tar";
String filename = "file.txt";
String contents = "test";
bool created = createArchiveWithFiles<ArchiveType::Tar>(archive_path, {{filename, contents}});
EXPECT_EQ(created, true);
auto reader = createArchiveReader(archive_path);
auto in = reader->readFile(filename);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents);
fs::remove(archive_path);
}
TEST(TarArchiveReaderTest, ReadTwoFiles) {
String archive_path = "archive.tar";
String file1 = "file1.txt";
String contents1 = "test1";
String file2 = "file2.txt";
String contents2 = "test2";
bool created = createArchiveWithFiles<ArchiveType::Tar>(archive_path, {{file1, contents1}, {file2, contents2}});
EXPECT_EQ(created, true);
auto reader = createArchiveReader(archive_path);
EXPECT_EQ(reader->fileExists(file1), true);
EXPECT_EQ(reader->fileExists(file2), true);
auto in = reader->readFile(file1);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents1);
in = reader->readFile(file2);
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents2);
fs::remove(archive_path);
}
TEST(TarArchiveReaderTest, CheckFileInfo) {
String archive_path = "archive.tar";
String filename = "file.txt";
String contents = "test";
bool created = createArchiveWithFiles<ArchiveType::Tar>(archive_path, {{filename, contents}});
EXPECT_EQ(created, true);
auto reader = createArchiveReader(archive_path);
auto info = reader->getFileInfo(filename);
EXPECT_EQ(info.uncompressed_size, contents.size());
EXPECT_GT(info.compressed_size, 0);
fs::remove(archive_path);
}
TEST(SevenZipArchiveReaderTest, FileExists) {
String archive_path = "archive.7z";
String filename = "file.txt";
String contents = "test";
bool created = createArchiveWithFiles<ArchiveType::SevenZip>(archive_path, {{filename, contents}});
EXPECT_EQ(created, true);
auto reader = createArchiveReader(archive_path);
EXPECT_EQ(reader->fileExists(filename), true);
fs::remove(archive_path);
}
TEST(SevenZipArchiveReaderTest, ReadFile) {
String archive_path = "archive.7z";
String filename = "file.txt";
String contents = "test";
bool created = createArchiveWithFiles<ArchiveType::SevenZip>(archive_path, {{filename, contents}});
EXPECT_EQ(created, true);
auto reader = createArchiveReader(archive_path);
auto in = reader->readFile(filename);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents);
fs::remove(archive_path);
}
TEST(SevenZipArchiveReaderTest, CheckFileInfo) {
String archive_path = "archive.7z";
String filename = "file.txt";
String contents = "test";
bool created = createArchiveWithFiles<ArchiveType::SevenZip>(archive_path, {{filename, contents}});
EXPECT_EQ(created, true);
auto reader = createArchiveReader(archive_path);
auto info = reader->getFileInfo(filename);
EXPECT_EQ(info.uncompressed_size, contents.size());
EXPECT_GT(info.compressed_size, 0);
fs::remove(archive_path);
}
TEST(SevenZipArchiveReaderTest, ReadTwoFiles) {
String archive_path = "archive.7z";
String file1 = "file1.txt";
String contents1 = "test1";
String file2 = "file2.txt";
String contents2 = "test2";
bool created = createArchiveWithFiles<ArchiveType::SevenZip>(archive_path, {{file1, contents1}, {file2, contents2}});
EXPECT_EQ(created, true);
auto reader = createArchiveReader(archive_path);
EXPECT_EQ(reader->fileExists(file1), true);
EXPECT_EQ(reader->fileExists(file2), true);
auto in = reader->readFile(file1);
String str;
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents1);
in = reader->readFile(file2);
readStringUntilEOF(str, *in);
EXPECT_EQ(str, contents2);
fs::remove(archive_path);
}
#if USE_MINIZIP
@ -334,7 +497,7 @@ namespace
{
const char * supported_archive_file_exts[] =
{
".zip",
".zip"
};
}

View File

@ -62,11 +62,13 @@ KeyMetadata::KeyMetadata(
CleanupQueue & cleanup_queue_,
DownloadQueue & download_queue_,
Poco::Logger * log_,
std::shared_mutex & key_prefix_directory_mutex_,
bool created_base_directory_)
: key(key_)
, key_path(key_path_)
, cleanup_queue(cleanup_queue_)
, download_queue(download_queue_)
, key_prefix_directory_mutex(key_prefix_directory_mutex_)
, created_base_directory(created_base_directory_)
, log(log_)
{
@ -102,16 +104,21 @@ bool KeyMetadata::createBaseDirectory()
{
try
{
std::shared_lock lock(key_prefix_directory_mutex);
fs::create_directories(key_path);
}
catch (...)
catch (const fs::filesystem_error & e)
{
/// Avoid errors like
/// std::__1::__fs::filesystem::filesystem_error: filesystem error: in create_directories: No space left on device
/// and mark file segment with SKIP_CACHE state
tryLogCurrentException(__PRETTY_FUNCTION__);
created_base_directory = false;
return false;
if (e.code() == std::errc::no_space_on_device)
{
LOG_TRACE(log, "Failed to create base directory for key {}, "
"because no space left on device", key);
return false;
}
throw;
}
}
return true;
@ -200,7 +207,7 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata(
it = emplace(
key, std::make_shared<KeyMetadata>(
key, getPathForKey(key), *cleanup_queue, *download_queue, log, is_initial_load)).first;
key, getPathForKey(key), *cleanup_queue, *download_queue, log, key_prefix_directory_mutex, is_initial_load)).first;
}
key_metadata = it->second;
@ -315,17 +322,10 @@ void CacheMetadata::doCleanup()
try
{
std::unique_lock mutex(key_prefix_directory_mutex);
if (fs::exists(key_prefix_directory) && fs::is_empty(key_prefix_directory))
fs::remove(key_prefix_directory);
}
catch (const fs::filesystem_error & e)
{
/// Key prefix directory can become non-empty just now, it is expected.
if (e.code() == std::errc::directory_not_empty)
continue;
LOG_ERROR(log, "Error while removing key {}: {}", cleanup_key, getCurrentExceptionMessage(true));
chassert(false);
}
catch (...)
{
LOG_ERROR(log, "Error while removing key {}: {}", cleanup_key, getCurrentExceptionMessage(true));

View File

@ -5,6 +5,7 @@
#include <Interpreters/Cache/FileCacheKey.h>
#include <Interpreters/Cache/FileSegment.h>
#include <Interpreters/Cache/FileCache_fwd_internal.h>
#include <shared_mutex>
namespace DB
{
@ -50,6 +51,7 @@ struct KeyMetadata : public std::map<size_t, FileSegmentMetadataPtr>,
CleanupQueue & cleanup_queue_,
DownloadQueue & download_queue_,
Poco::Logger * log_,
std::shared_mutex & key_prefix_directory_mutex_,
bool created_base_directory_ = false);
enum class KeyState
@ -76,6 +78,7 @@ private:
KeyGuard guard;
CleanupQueue & cleanup_queue;
DownloadQueue & download_queue;
std::shared_mutex & key_prefix_directory_mutex;
std::atomic<bool> created_base_directory = false;
Poco::Logger * log;
};
@ -128,6 +131,7 @@ private:
mutable CacheMetadataGuard guard;
const CleanupQueuePtr cleanup_queue;
const DownloadQueuePtr download_queue;
std::shared_mutex key_prefix_directory_mutex;
Poco::Logger * log;
void downloadImpl(FileSegment & file_segment, std::optional<Memory<>> & memory);

View File

@ -520,6 +520,8 @@ ContextMutablePtr Session::makeSessionContext()
{},
session_context->getSettingsRef().max_sessions_for_user);
recordLoginSucess(session_context);
return session_context;
}
@ -582,6 +584,8 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std:
{ session_name_ },
max_sessions_for_user);
recordLoginSucess(session_context);
return session_context;
}
@ -655,24 +659,38 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t
if (user_id)
user = query_context->getUser();
if (!notified_session_log_about_login)
{
if (auto session_log = getSessionLog())
{
session_log->addLoginSuccess(
auth_id,
named_session ? std::optional<std::string>(named_session->key.second) : std::nullopt,
*query_context,
user);
notified_session_log_about_login = true;
}
}
/// Interserver does not create session context
recordLoginSucess(query_context);
return query_context;
}
void Session::recordLoginSucess(ContextPtr login_context) const
{
if (notified_session_log_about_login)
return;
if (!login_context)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Session or query context must be created");
if (auto session_log = getSessionLog())
{
const auto & settings = login_context->getSettingsRef();
const auto access = login_context->getAccess();
session_log->addLoginSuccess(auth_id,
named_session ? named_session->key.second : "",
settings,
access,
getClientInfo(),
user);
notified_session_log_about_login = true;
}
}
void Session::releaseSessionID()
{
if (!named_session)

View File

@ -97,6 +97,8 @@ public:
private:
std::shared_ptr<SessionLog> getSessionLog() const;
ContextMutablePtr makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const;
void recordLoginSucess(ContextPtr login_context) const;
mutable bool notified_session_log_about_login = false;
const UUID auth_id;

View File

@ -199,12 +199,13 @@ void SessionLogElement::appendToBlock(MutableColumns & columns) const
columns[i++]->insertData(auth_failure_reason.data(), auth_failure_reason.length());
}
void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional<String> session_id, const Context & login_context, const UserPtr & login_user)
void SessionLog::addLoginSuccess(const UUID & auth_id,
const String & session_id,
const Settings & settings,
const ContextAccessPtr & access,
const ClientInfo & client_info,
const UserPtr & login_user)
{
const auto access = login_context.getAccess();
const auto & settings = login_context.getSettingsRef();
const auto & client_info = login_context.getClientInfo();
DB::SessionLogElement log_entry(auth_id, SESSION_LOGIN_SUCCESS);
log_entry.client_info = client_info;
@ -215,8 +216,7 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional<String> ses
}
log_entry.external_auth_server = login_user ? login_user->auth_data.getLDAPServerName() : "";
if (session_id)
log_entry.session_id = *session_id;
log_entry.session_id = session_id;
if (const auto roles_info = access->getRolesInfo())
log_entry.roles = roles_info->getCurrentRolesNames();

View File

@ -20,6 +20,7 @@ enum SessionLogElementType : int8_t
class ContextAccess;
struct User;
using UserPtr = std::shared_ptr<const User>;
using ContextAccessPtr = std::shared_ptr<const ContextAccess>;
/** A struct which will be inserted as row into session_log table.
*
@ -72,7 +73,13 @@ class SessionLog : public SystemLog<SessionLogElement>
using SystemLog<SessionLogElement>::SystemLog;
public:
void addLoginSuccess(const UUID & auth_id, std::optional<String> session_id, const Context & login_context, const UserPtr & login_user);
void addLoginSuccess(const UUID & auth_id,
const String & session_id,
const Settings & settings,
const ContextAccessPtr & access,
const ClientInfo & client_info,
const UserPtr & login_user);
void addLoginFailure(const UUID & auth_id, const ClientInfo & info, const std::optional<String> & user, const Exception & reason);
void addLogOut(const UUID & auth_id, const UserPtr & login_user, const ClientInfo & client_info);
};

View File

@ -110,6 +110,9 @@ using CustomizeCountDistinctVisitor = InDepthNodeVisitor<OneTypeMatcher<Customiz
char countifdistinct[] = "countifdistinct";
using CustomizeCountIfDistinctVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<countifdistinct>>, true>;
char countdistinctif[] = "countdistinctif";
using CustomizeCountDistinctIfVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<countdistinctif>>, true>;
char in[] = "in";
using CustomizeInVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeFunctionsData<in>>, true>;
@ -1368,6 +1371,12 @@ void TreeRewriter::normalize(
CustomizeIfDistinctVisitor::Data data_distinct_if{"DistinctIf"};
CustomizeIfDistinctVisitor(data_distinct_if).visit(query);
if (settings.rewrite_count_distinct_if_with_count_distinct_implementation)
{
CustomizeCountDistinctIfVisitor::Data data_count_distinct_if{settings.count_distinct_implementation.toString() + "If"};
CustomizeCountDistinctIfVisitor(data_count_distinct_if).visit(query);
}
ExistsExpressionVisitor::Data exists;
ExistsExpressionVisitor(exists).visit(query);

View File

@ -108,7 +108,8 @@ namespace
});
}
template <typename T, typename = std::enable_if_t<std::is_same_v<T, double> || std::is_same_v<T, QuotaValue>>>
template <typename T>
requires std::same_as<T, double> || std::same_as<T, QuotaValue>
T fieldToNumber(const Field & f)
{
if (f.getType() == Field::Types::String)

View File

@ -8,6 +8,7 @@
#include <Common/formatReadable.h>
#include <Common/logger_useful.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnSparse.h>
#include <Core/ColumnWithTypeAndName.h>
#include <base/types.h>
@ -35,7 +36,11 @@ Columns getColumnsByIndices(const Chunk & chunk, const std::vector<size_t> & ind
Columns columns;
const Columns & all_cols = chunk.getColumns();
for (const auto & index : indices)
columns.push_back(all_cols.at(index));
{
auto col = recursiveRemoveSparse(all_cols.at(index));
columns.push_back(std::move(col));
}
return columns;
}
@ -149,7 +154,7 @@ IProcessor::Status FilterBySetOnTheFlyTransform::prepare()
LOG_DEBUG(log, "Finished {} by [{}]: consumed {} rows in total, {} rows bypassed, result {} rows, {:.2f}% filtered",
Poco::toLower(getDescription()), fmt::join(column_names, ", "),
stat.consumed_rows, stat.consumed_rows_before_set, stat.result_rows,
100 - 100.0 * stat.result_rows / stat.consumed_rows);
stat.consumed_rows > 0 ? (100 - 100.0 * stat.result_rows / stat.consumed_rows) : 0);
}
else
{

View File

@ -561,8 +561,7 @@ void HTTPHandler::processQuery(
session->makeSessionContext();
}
auto client_info = session->getClientInfo();
auto context = session->makeQueryContext(std::move(client_info));
auto context = session->makeQueryContext();
/// This parameter is used to tune the behavior of output formats (such as Native) for compatibility.
if (params.has("client_protocol_version"))

View File

@ -300,6 +300,7 @@ void registerStorageAzureBlob(StorageFactory & factory)
args.constraints,
args.comment,
format_settings,
/* distributed_processing */ false,
partition_by);
},
{
@ -448,12 +449,13 @@ StorageAzureBlob::StorageAzureBlob(
const ConstraintsDescription & constraints_,
const String & comment,
std::optional<FormatSettings> format_settings_,
bool distributed_processing_,
ASTPtr partition_by_)
: IStorage(table_id_)
, name("AzureBlobStorage")
, configuration(configuration_)
, object_storage(std::move(object_storage_))
, distributed_processing(false)
, distributed_processing(distributed_processing_)
, format_settings(format_settings_)
, partition_by(partition_by_)
{
@ -463,7 +465,7 @@ StorageAzureBlob::StorageAzureBlob(
StorageInMemoryMetadata storage_metadata;
if (columns_.empty())
{
auto columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context);
auto columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context, distributed_processing);
storage_metadata.setColumns(columns);
}
else
@ -672,7 +674,12 @@ Pipe StorageAzureBlob::read(
Pipes pipes;
std::shared_ptr<StorageAzureBlobSource::IIterator> iterator_wrapper;
if (configuration.withGlobs())
if (distributed_processing)
{
iterator_wrapper = std::make_shared<StorageAzureBlobSource::ReadIterator>(local_context,
local_context->getReadTaskCallback());
}
else if (configuration.withGlobs())
{
/// Iterate through disclosed globs and make a source for each file
iterator_wrapper = std::make_shared<StorageAzureBlobSource::GlobIterator>(
@ -845,6 +852,7 @@ StorageAzureBlobSource::GlobIterator::GlobIterator(
blobs_with_metadata.emplace_back(blob_path_with_globs, object_metadata);
if (outer_blobs)
outer_blobs->emplace_back(blobs_with_metadata.back());
is_finished = true;
return;
}
@ -863,8 +871,10 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
{
std::lock_guard lock(next_mutex);
if (is_finished)
if (is_finished && index >= blobs_with_metadata.size())
{
return {};
}
bool need_new_batch = blobs_with_metadata.empty() || index >= blobs_with_metadata.size();
@ -1184,11 +1194,17 @@ ColumnsDescription StorageAzureBlob::getTableStructureFromData(
AzureObjectStorage * object_storage,
const Configuration & configuration,
const std::optional<FormatSettings> & format_settings,
ContextPtr ctx)
ContextPtr ctx,
bool distributed_processing)
{
RelativePathsWithMetadata read_keys;
std::shared_ptr<StorageAzureBlobSource::IIterator> file_iterator;
if (configuration.withGlobs())
if (distributed_processing)
{
file_iterator = std::make_shared<StorageAzureBlobSource::ReadIterator>(ctx,
ctx->getReadTaskCallback());
}
else if (configuration.withGlobs())
{
file_iterator = std::make_shared<StorageAzureBlobSource::GlobIterator>(
object_storage, configuration.container, configuration.blob_path, nullptr, Block{}, ctx, &read_keys);

View File

@ -63,6 +63,7 @@ public:
const ConstraintsDescription & constraints_,
const String & comment,
std::optional<FormatSettings> format_settings_,
bool distributed_processing_,
ASTPtr partition_by_);
static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context);
@ -108,7 +109,8 @@ public:
AzureObjectStorage * object_storage,
const Configuration & configuration,
const std::optional<FormatSettings> & format_settings,
ContextPtr ctx);
ContextPtr ctx,
bool distributed_processing = false);
private:
std::string name;
@ -137,7 +139,6 @@ private:
const String & format_name,
const ContextPtr & ctx);
};
class StorageAzureBlobSource : public ISource, WithContext
@ -169,7 +170,7 @@ public:
RelativePathWithMetadata next() override;
~GlobIterator() override = default;
private:
private:
AzureObjectStorage * object_storage;
std::string container;
String blob_path_with_globs;
@ -194,6 +195,21 @@ public:
std::function<void(FileProgress)> file_progress_callback;
};
class ReadIterator : public IIterator
{
public:
explicit ReadIterator(ContextPtr context_,
const ReadTaskCallback & callback_)
: IIterator(context_), callback(callback_) { }
RelativePathWithMetadata next() override
{
return {callback(), {}};
}
private:
ReadTaskCallback callback;
};
class KeysIterator : public IIterator
{
public:

View File

@ -0,0 +1,99 @@
#include "Storages/StorageAzureBlobCluster.h"
#include "config.h"
#if USE_AZURE_BLOB_STORAGE
#include <DataTypes/DataTypeString.h>
#include <IO/ConnectionTimeouts.h>
#include <Interpreters/AddDefaultDatabaseVisitor.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Processors/Sources/RemoteSource.h>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <QueryPipeline/RemoteQueryExecutor.h>
#include <Storages/IStorage.h>
#include <Storages/StorageURL.h>
#include <Storages/SelectQueryInfo.h>
#include <Storages/StorageDictionary.h>
#include <Storages/extractTableFunctionArgumentsFromSelectQuery.h>
#include <Storages/getVirtualsForStorage.h>
#include <Common/Exception.h>
#include <Parsers/queryToString.h>
#include <TableFunctions/TableFunctionAzureBlobStorageCluster.h>
#include <memory>
#include <string>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
StorageAzureBlobCluster::StorageAzureBlobCluster(
const String & cluster_name_,
const StorageAzureBlob::Configuration & configuration_,
std::unique_ptr<AzureObjectStorage> && object_storage_,
const StorageID & table_id_,
const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_,
ContextPtr context_,
bool structure_argument_was_provided_)
: IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageAzureBlobCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_)
, configuration{configuration_}
, object_storage(std::move(object_storage_))
{
context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL());
StorageInMemoryMetadata storage_metadata;
if (columns_.empty())
{
/// `format_settings` is set to std::nullopt, because StorageAzureBlobCluster is used only as table function
auto columns = StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context_, false);
storage_metadata.setColumns(columns);
}
else
storage_metadata.setColumns(columns_);
storage_metadata.setConstraints(constraints_);
setInMemoryMetadata(storage_metadata);
auto default_virtuals = NamesAndTypesList{
{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList();
virtual_columns = getVirtualsForStorage(columns, default_virtuals);
for (const auto & column : virtual_columns)
virtual_block.insert({column.type->createColumn(), column.type, column.name});
}
void StorageAzureBlobCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
{
ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
if (!expression_list)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query));
TableFunctionAzureBlobStorageCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
}
RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const
{
auto iterator = std::make_shared<StorageAzureBlobSource::GlobIterator>(
object_storage.get(), configuration.container, configuration.blob_path,
query, virtual_block, context, nullptr);
auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String{ return iterator->next().relative_path; });
return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) };
}
NamesAndTypesList StorageAzureBlobCluster::getVirtuals() const
{
return virtual_columns;
}
}
#endif

View File

@ -0,0 +1,53 @@
#pragma once
#include "config.h"
#if USE_AZURE_BLOB_STORAGE
#include <memory>
#include <optional>
#include "Client/Connection.h"
#include <Interpreters/Cluster.h>
#include <Storages/IStorageCluster.h>
#include <Storages/StorageAzureBlob.h>
namespace DB
{
class Context;
class StorageAzureBlobCluster : public IStorageCluster
{
public:
StorageAzureBlobCluster(
const String & cluster_name_,
const StorageAzureBlob::Configuration & configuration_,
std::unique_ptr<AzureObjectStorage> && object_storage_,
const StorageID & table_id_,
const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_,
ContextPtr context_,
bool structure_argument_was_provided_);
std::string getName() const override { return "AzureBlobStorageCluster"; }
NamesAndTypesList getVirtuals() const override;
RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override;
private:
void updateBeforeRead(const ContextPtr & /*context*/) override {}
void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override;
StorageAzureBlob::Configuration configuration;
NamesAndTypesList virtual_columns;
Block virtual_block;
std::unique_ptr<AzureObjectStorage> object_storage;
};
}
#endif

View File

@ -22,6 +22,8 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <IO/Archives/createArchiveReader.h>
#include <IO/Archives/IArchiveReader.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeString.h>
@ -57,7 +59,6 @@
#include <cmath>
#include <algorithm>
namespace ProfileEvents
{
extern const Event CreatedReadBufferOrdinary;
@ -379,10 +380,33 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
bool use_table_fd,
int table_fd,
const String & compression_method,
ContextPtr context)
ContextPtr context,
const String & path_to_archive = "")
{
CompressionMethod method;
if (!path_to_archive.empty())
{
auto reader = createArchiveReader(path_to_archive);
if (current_path.find_first_of("*?{") != std::string::npos)
{
auto matcher = std::make_shared<re2::RE2>(makeRegexpPatternFromGlobs(current_path));
if (!matcher->ok())
throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
"Cannot compile regex from glob ({}): {}", current_path, matcher->error());
return reader->readFile([matcher = std::move(matcher)](const std::string & path)
{
return re2::RE2::FullMatch(path, *matcher);
});
}
else
{
return reader->readFile(current_path);
}
}
if (use_table_fd)
method = chooseCompressionMethod("", compression_method);
else
@ -471,7 +495,8 @@ ColumnsDescription StorageFile::getTableStructureFromFile(
const std::vector<String> & paths,
const String & compression_method,
const std::optional<FormatSettings> & format_settings,
ContextPtr context)
ContextPtr context,
const std::vector<String> & paths_to_archive)
{
if (format == "Distributed")
{
@ -491,30 +516,62 @@ ColumnsDescription StorageFile::getTableStructureFromFile(
if (context->getSettingsRef().schema_inference_use_cache_for_file)
columns_from_cache = tryGetColumnsFromCache(paths, format, format_settings, context);
ReadBufferIterator read_buffer_iterator = [&, it = paths.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr<ReadBuffer>
ReadBufferIterator read_buffer_iterator;
if (paths_to_archive.empty())
{
String path;
struct stat file_stat;
do
read_buffer_iterator = [&, it = paths.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr<ReadBuffer>
{
if (it == paths.end())
String path;
struct stat file_stat;
do
{
if (first)
throw Exception(
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
"Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually",
format);
return nullptr;
if (it == paths.end())
{
if (first)
throw Exception(
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
"Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually",
format);
return nullptr;
}
path = *it++;
file_stat = getFileStat(path, false, -1, "File");
}
while (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0);
path = *it++;
file_stat = getFileStat(path, false, -1, "File");
}
while (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0);
first = false;
return createReadBuffer(path, file_stat, false, -1, compression_method, context);
};
}
else
{
read_buffer_iterator = [&, path_it = paths.begin(), archive_it = paths_to_archive.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr<ReadBuffer>
{
String path;
struct stat file_stat;
do
{
if (archive_it == paths_to_archive.end())
{
if (first)
throw Exception(
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
"Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually",
format);
return nullptr;
}
first = false;
return createReadBuffer(path, file_stat, false, -1, compression_method, context);
};
path = *archive_it++;
file_stat = getFileStat(path, false, -1, "File");
}
while (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0);
first = false;
return createReadBuffer(*path_it, file_stat, false, -1, compression_method, context, path);
};
}
ColumnsDescription columns;
if (columns_from_cache)
@ -566,8 +623,17 @@ StorageFile::StorageFile(int table_fd_, CommonArguments args)
StorageFile::StorageFile(const std::string & table_path_, const std::string & user_files_path, CommonArguments args)
: StorageFile(args)
{
if (!args.path_to_archive.empty())
{
paths_to_archive = getPathsList(args.path_to_archive, user_files_path, args.getContext(), total_bytes_to_read);
paths = {table_path_};
}
else
{
paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read);
}
is_db_table = false;
paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read);
is_path_with_globs = paths.size() > 1;
if (!paths.empty())
path_for_partitioned_write = paths.front();
@ -621,7 +687,7 @@ void StorageFile::setStorageMetadata(CommonArguments args)
columns = getTableStructureFromFileDescriptor(args.getContext());
else
{
columns = getTableStructureFromFile(format_name, paths, compression_method, format_settings, args.getContext());
columns = getTableStructureFromFile(format_name, paths, compression_method, format_settings, args.getContext(), paths_to_archive);
if (!args.columns.empty() && args.columns != columns)
throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Table structure and file structure are different");
}
@ -654,7 +720,9 @@ public:
class FilesIterator
{
public:
explicit FilesIterator(const Strings & files_) : files(files_)
explicit FilesIterator(
const Strings & files_, std::vector<std::string> archives_, std::vector<std::pair<uint64_t, std::string>> files_in_archive_)
: files(files_), archives(std::move(archives_)), files_in_archive(std::move(files_in_archive_))
{
}
@ -667,8 +735,25 @@ public:
return files[current_index];
}
std::pair<String, String> nextFileFromArchive()
{
auto current_index = index.fetch_add(1, std::memory_order_relaxed);
if (current_index >= files_in_archive.size())
return {"", ""};
const auto & [archive_index, filename] = files_in_archive[current_index];
return {archives[archive_index], filename};
}
bool fromArchive() const
{
return !archives.empty();
}
private:
std::vector<std::string> files;
std::vector<std::string> archives;
std::vector<std::pair<uint64_t, std::string>> files_in_archive;
std::atomic<size_t> index = 0;
};
@ -776,9 +861,35 @@ public:
{
if (!storage->use_table_fd)
{
current_path = files_iterator->next();
if (current_path.empty())
return {};
if (files_iterator->fromArchive())
{
auto [archive, filename] = files_iterator->nextFileFromArchive();
if (archive.empty())
return {};
current_path = std::move(filename);
if (!archive_reader || archive_reader->getPath() != archive)
{
archive_reader = createArchiveReader(archive);
file_enumerator = archive_reader->firstFile();
}
if (file_enumerator == nullptr)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to find a file in archive {}", archive);
while (file_enumerator->getFileName() != current_path)
{
if (!file_enumerator->nextFile())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected file {} is missing from archive {}", current_path, archive);
}
}
else
{
current_path = files_iterator->next();
if (current_path.empty())
return {};
}
/// Special case for distributed format. Defaults are not needed here.
if (storage->format_name == "Distributed")
@ -791,10 +902,24 @@ public:
if (!read_buf)
{
auto file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName());
if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0)
continue;
read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context);
struct stat file_stat;
if (archive_reader == nullptr)
{
file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName());
if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0)
continue;
}
if (archive_reader == nullptr)
{
read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context);
}
else
{
chassert(file_enumerator);
read_buf = archive_reader->readFile(std::move(file_enumerator));
}
}
const Settings & settings = context->getSettingsRef();
@ -861,7 +986,11 @@ public:
reader.reset();
pipeline.reset();
input_format.reset();
read_buf.reset();
if (archive_reader != nullptr)
file_enumerator = archive_reader->nextFile(std::move(read_buf));
else
read_buf.reset();
}
return {};
@ -879,6 +1008,9 @@ private:
std::unique_ptr<QueryPipeline> pipeline;
std::unique_ptr<PullingPipelineExecutor> reader;
std::shared_ptr<IArchiveReader> archive_reader;
std::unique_ptr<IArchiveReader::FileEnumerator> file_enumerator = nullptr;
ColumnsDescription columns_description;
NamesAndTypesList requested_columns;
NamesAndTypesList requested_virtual_columns;
@ -908,21 +1040,67 @@ Pipe StorageFile::read(
}
else
{
if (paths.size() == 1 && !fs::exists(paths[0]))
const auto & p = paths_to_archive.empty() ? paths : paths_to_archive;
if (p.size() == 1 && !fs::exists(p[0]))
{
if (context->getSettingsRef().engine_file_empty_if_not_exists)
return Pipe(std::make_shared<NullSource>(storage_snapshot->getSampleBlockForColumns(column_names)));
else
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", paths[0]);
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", p[0]);
}
}
auto files_iterator = std::make_shared<StorageFileSource::FilesIterator>(paths);
std::vector<std::pair<uint64_t, std::string>> files_in_archive;
size_t files_in_archive_num = 0;
if (!paths_to_archive.empty())
{
if (paths.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Multiple paths defined for reading from archive");
const auto & path = paths[0];
IArchiveReader::NameFilter filter;
if (path.find_first_of("*?{") != std::string::npos)
{
auto matcher = std::make_shared<re2::RE2>(makeRegexpPatternFromGlobs(path));
if (!matcher->ok())
throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
"Cannot compile regex from glob ({}): {}", path, matcher->error());
filter = [matcher](const std::string & p)
{
return re2::RE2::FullMatch(p, *matcher);
};
}
for (size_t i = 0; i < paths_to_archive.size(); ++i)
{
if (filter)
{
const auto & path_to_archive = paths_to_archive[i];
auto archive_reader = createArchiveReader(path_to_archive);
auto files = archive_reader->getAllFiles(filter);
for (auto & file : files)
files_in_archive.push_back({i, std::move(file)});
}
else
{
files_in_archive.push_back({i, path});
}
}
files_in_archive_num = files_in_archive.size();
}
auto files_iterator = std::make_shared<StorageFileSource::FilesIterator>(paths, paths_to_archive, std::move(files_in_archive));
auto this_ptr = std::static_pointer_cast<StorageFile>(shared_from_this());
size_t num_streams = max_num_streams;
if (max_num_streams > paths.size())
num_streams = paths.size();
auto files_to_read = std::max(files_in_archive_num, paths.size());
if (max_num_streams > files_to_read)
num_streams = files_to_read;
Pipes pipes;
pipes.reserve(num_streams);
@ -1202,6 +1380,9 @@ SinkToStoragePtr StorageFile::write(
ContextPtr context,
bool /*async_insert*/)
{
if (!use_table_fd && !paths_to_archive.empty())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Writing to archives is not supported");
if (format_name == "Distributed")
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not implemented for Distributed format");
@ -1375,6 +1556,7 @@ void registerStorageFile(StorageFactory & factory)
factory_args.constraints,
factory_args.comment,
{},
{},
};
ASTs & engine_args_ast = factory_args.engine_args;
@ -1445,7 +1627,7 @@ void registerStorageFile(StorageFactory & factory)
else if (type == Field::Types::UInt64)
source_fd = static_cast<int>(literal->value.get<UInt64>());
else if (type == Field::Types::String)
source_path = literal->value.get<String>();
StorageFile::parseFileSource(literal->value.get<String>(), source_path, storage_args.path_to_archive);
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument must be path or file descriptor");
}
@ -1517,4 +1699,32 @@ void StorageFile::addColumnsToCache(
schema_cache.addMany(cache_keys, columns);
}
void StorageFile::parseFileSource(String source, String & filename, String & path_to_archive)
{
size_t pos = source.find("::");
if (pos == String::npos)
{
filename = std::move(source);
return;
}
std::string_view path_to_archive_view = std::string_view{source}.substr(0, pos);
while (path_to_archive_view.back() == ' ')
path_to_archive_view.remove_suffix(1);
if (path_to_archive_view.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to archive is empty");
path_to_archive = path_to_archive_view;
std::string_view filename_view = std::string_view{source}.substr(pos + 2);
while (filename_view.front() == ' ')
filename_view.remove_prefix(1);
if (filename_view.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filename is empty");
filename = filename_view;
}
}

View File

@ -22,8 +22,8 @@ public:
const ColumnsDescription & columns;
const ConstraintsDescription & constraints;
const String & comment;
const std::string rename_after_processing;
std::string path_to_archive;
};
/// From file descriptor
@ -90,10 +90,13 @@ public:
const std::vector<String> & paths,
const String & compression_method,
const std::optional<FormatSettings> & format_settings,
ContextPtr context);
ContextPtr context,
const std::vector<String> & paths_to_archive = {"auto"});
static SchemaCache & getSchemaCache(const ContextPtr & context);
static void parseFileSource(String source, String & filename, String & path_to_archive);
protected:
friend class StorageFileSource;
friend class StorageFileSink;
@ -123,6 +126,7 @@ private:
std::string base_path;
std::vector<std::string> paths;
std::vector<std::string> paths_to_archive;
bool is_db_table = true; /// Table is stored in real database, not user's file
bool use_table_fd = false; /// Use table_fd instead of path

View File

@ -42,6 +42,7 @@ protected:
virtual String getFormatFromFirstArgument();
String filename;
String path_to_archive;
String format = "auto";
String structure = "auto";
String compression_method = "auto";

View File

@ -44,10 +44,8 @@ bool isConnectionString(const std::string & candidate)
}
StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file)
void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context)
{
StorageAzureBlob::Configuration configuration;
/// Supported signatures:
///
/// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
@ -59,87 +57,80 @@ StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImp
configuration.blobs_paths = {configuration.blob_path};
if (configuration.format == "auto" && get_format_from_file)
if (configuration.format == "auto")
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true);
return configuration;
}
if (engine_args.size() < 3 || engine_args.size() > 8)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Storage Azure requires 3 to 7 arguments: "
"AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])");
for (auto & engine_arg : engine_args)
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context);
std::unordered_map<std::string_view, size_t> engine_args_to_idx;
configuration.connection_url = checkAndGetLiteralArgument<String>(engine_args[0], "connection_string/storage_account_url");
configuration.is_connection_string = isConnectionString(configuration.connection_url);
configuration.container = checkAndGetLiteralArgument<String>(engine_args[1], "container");
configuration.blob_path = checkAndGetLiteralArgument<String>(engine_args[2], "blobpath");
auto is_format_arg = [] (const std::string & s) -> bool
else
{
return s == "auto" || FormatFactory::instance().getAllFormats().contains(s);
};
if (engine_args.size() < 3 || engine_args.size() > 8)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Storage Azure requires 3 to 7 arguments: "
"AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])");
if (engine_args.size() == 4)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name/structure");
if (is_format_arg(fourth_arg))
for (auto & engine_arg : engine_args)
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context);
std::unordered_map<std::string_view, size_t> engine_args_to_idx;
configuration.connection_url = checkAndGetLiteralArgument<String>(engine_args[0], "connection_string/storage_account_url");
configuration.is_connection_string = isConnectionString(configuration.connection_url);
configuration.container = checkAndGetLiteralArgument<String>(engine_args[1], "container");
configuration.blob_path = checkAndGetLiteralArgument<String>(engine_args[2], "blobpath");
auto is_format_arg
= [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); };
if (engine_args.size() == 4)
{
configuration.format = fourth_arg;
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name/structure");
if (is_format_arg(fourth_arg))
{
configuration.format = fourth_arg;
}
else
{
configuration.structure = fourth_arg;
}
}
else
else if (engine_args.size() == 5)
{
configuration.structure = fourth_arg;
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
if (is_format_arg(fourth_arg))
{
configuration.format = fourth_arg;
configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[4], "compression");
}
else
{
configuration.account_name = fourth_arg;
configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
}
}
}
else if (engine_args.size() == 5)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
if (is_format_arg(fourth_arg))
else if (engine_args.size() == 6)
{
configuration.format = fourth_arg;
configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[4], "compression");
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
if (is_format_arg(fourth_arg))
{
configuration.format = fourth_arg;
configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[4], "compression");
configuration.structure = checkAndGetLiteralArgument<String>(engine_args[5], "structure");
}
else
{
configuration.account_name = fourth_arg;
configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name/structure");
if (is_format_arg(sixth_arg))
configuration.format = sixth_arg;
else
configuration.structure = sixth_arg;
}
}
else
{
configuration.account_name = fourth_arg;
configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
}
}
else if (engine_args.size() == 6)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
if (is_format_arg(fourth_arg))
{
configuration.format = fourth_arg;
configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[4], "compression");
configuration.structure = checkAndGetLiteralArgument<String>(engine_args[5], "structure");
}
else
{
configuration.account_name = fourth_arg;
configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
if (!is_format_arg(sixth_arg))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg);
configuration.format = sixth_arg;
}
}
else if (engine_args.size() == 7)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
if (is_format_arg(fourth_arg))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format, compression and structure must be last arguments");
}
else
else if (engine_args.size() == 7)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
configuration.account_name = fourth_arg;
configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
@ -148,17 +139,9 @@ StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImp
configuration.format = sixth_arg;
configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[6], "compression");
}
}
else if (engine_args.size() == 8)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
if (is_format_arg(fourth_arg))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments");
}
else
else if (engine_args.size() == 8)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
configuration.account_name = fourth_arg;
configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
@ -168,14 +151,12 @@ StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImp
configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[6], "compression");
configuration.structure = checkAndGetLiteralArgument<String>(engine_args[7], "structure");
}
configuration.blobs_paths = {configuration.blob_path};
if (configuration.format == "auto")
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true);
}
configuration.blobs_paths = {configuration.blob_path};
if (configuration.format == "auto" && get_format_from_file)
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true);
return configuration;
}
void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context)
@ -190,7 +171,87 @@ void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function,
auto & args = args_func.at(0)->children;
configuration = parseArgumentsImpl(args, context);
parseArgumentsImpl(args, context);
}
void TableFunctionAzureBlobStorage::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context)
{
if (tryGetNamedCollectionWithOverrides(args, context))
{
/// In case of named collection, just add key-value pair "structure='...'"
/// at the end of arguments to override existed structure.
ASTs equal_func_args = {std::make_shared<ASTIdentifier>("structure"), std::make_shared<ASTLiteral>(structure)};
auto equal_func = makeASTFunction("equals", std::move(equal_func_args));
args.push_back(equal_func);
}
else
{
if (args.size() < 3 || args.size() > 8)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Storage Azure requires 3 to 7 arguments: "
"AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])");
auto structure_literal = std::make_shared<ASTLiteral>(structure);
auto is_format_arg
= [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); };
if (args.size() == 3)
{
/// Add format=auto & compression=auto before structure argument.
args.push_back(std::make_shared<ASTLiteral>("auto"));
args.push_back(std::make_shared<ASTLiteral>("auto"));
args.push_back(structure_literal);
}
else if (args.size() == 4)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name/structure");
if (is_format_arg(fourth_arg))
{
/// Add compression=auto before structure argument.
args.push_back(std::make_shared<ASTLiteral>("auto"));
args.push_back(structure_literal);
}
else
{
args.back() = structure_literal;
}
}
else if (args.size() == 5)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name");
if (!is_format_arg(fourth_arg))
{
/// Add format=auto & compression=auto before structure argument.
args.push_back(std::make_shared<ASTLiteral>("auto"));
args.push_back(std::make_shared<ASTLiteral>("auto"));
}
args.push_back(structure_literal);
}
else if (args.size() == 6)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name");
if (!is_format_arg(fourth_arg))
{
/// Add compression=auto before structure argument.
args.push_back(std::make_shared<ASTLiteral>("auto"));
args.push_back(structure_literal);
}
else
{
args.back() = structure_literal;
}
}
else if (args.size() == 7)
{
args.push_back(structure_literal);
}
else if (args.size() == 8)
{
args.back() = structure_literal;
}
}
}
ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(ContextPtr context, bool is_insert_query) const
@ -202,7 +263,7 @@ ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(Contex
auto settings = StorageAzureBlob::createSettings(context);
auto object_storage = std::make_unique<AzureObjectStorage>("AzureBlobStorageTableFunction", std::move(client), std::move(settings));
return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context);
return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context, false);
}
return parseColumnsListFromString(configuration.structure, context);
@ -234,6 +295,7 @@ StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_funct
String{},
/// No format_settings for table function Azure
std::nullopt,
/* distributed_processing */ false,
nullptr);
storage->startup();

View File

@ -13,13 +13,23 @@ namespace DB
class Context;
/* AzureBlob(source, [access_key_id, secret_access_key,] [format, structure, compression]) - creates a temporary storage for a file in AzureBlob.
/* AzureBlob(source, [access_key_id, secret_access_key,] [format, compression, structure]) - creates a temporary storage for a file in AzureBlob.
*/
class TableFunctionAzureBlobStorage : public ITableFunction
{
public:
static constexpr auto name = "azureBlobStorage";
static constexpr auto signature = "- connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]\n";
static constexpr auto signature = " - connection_string, container_name, blobpath\n"
" - connection_string, container_name, blobpath, structure \n"
" - connection_string, container_name, blobpath, format \n"
" - connection_string, container_name, blobpath, format, compression \n"
" - connection_string, container_name, blobpath, format, compression, structure \n"
" - storage_account_url, container_name, blobpath, account_name, account_key\n"
" - storage_account_url, container_name, blobpath, account_name, account_key, structure\n"
" - storage_account_url, container_name, blobpath, account_name, account_key, format\n"
" - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n"
" - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n";
static size_t getMaxNumberOfArguments() { return 8; }
@ -46,7 +56,9 @@ public:
return {"_path", "_file"};
}
static StorageAzureBlob::Configuration parseArgumentsImpl(ASTs & args, const ContextPtr & context, bool get_format_from_file = true);
virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context);
static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context);
protected:

View File

@ -0,0 +1,85 @@
#include "config.h"
#if USE_AZURE_BLOB_STORAGE
#include <TableFunctions/TableFunctionAzureBlobStorageCluster.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Interpreters/parseColumnsListForTableFunction.h>
#include <Storages/StorageAzureBlob.h>
#include "registerTableFunctions.h"
#include <memory>
namespace DB
{
StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl(
const ASTPtr & /*function*/, ContextPtr context,
const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const
{
StoragePtr storage;
ColumnsDescription columns;
bool structure_argument_was_provided = configuration.structure != "auto";
if (structure_argument_was_provided)
{
columns = parseColumnsListFromString(configuration.structure, context);
}
else if (!structure_hint.empty())
{
columns = structure_hint;
}
auto client = StorageAzureBlob::createClient(configuration, !is_insert_query);
auto settings = StorageAzureBlob::createSettings(context);
if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
{
/// On worker node this filename won't contains globs
storage = std::make_shared<StorageAzureBlob>(
configuration,
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings)),
context,
StorageID(getDatabaseName(), table_name),
columns,
ConstraintsDescription{},
/* comment */String{},
/* format_settings */std::nullopt, /// No format_settings
/* distributed_processing */ true,
/*partition_by_=*/nullptr);
}
else
{
storage = std::make_shared<StorageAzureBlobCluster>(
cluster_name,
configuration,
std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings)),
StorageID(getDatabaseName(), table_name),
columns,
ConstraintsDescription{},
context,
structure_argument_was_provided);
}
storage->startup();
return storage;
}
void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory)
{
factory.registerFunction<TableFunctionAzureBlobStorageCluster>(
{.documentation
= {.description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)",
.examples{{"azureBlobStorageCluster", "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}},
.allow_readonly = false}
);
}
}
#endif

View File

@ -0,0 +1,55 @@
#pragma once
#include "config.h"
#if USE_AZURE_BLOB_STORAGE
#include <TableFunctions/ITableFunction.h>
#include <TableFunctions/TableFunctionAzureBlobStorage.h>
#include <TableFunctions/ITableFunctionCluster.h>
#include <Storages/StorageAzureBlobCluster.h>
namespace DB
{
class Context;
/**
* azureBlobStorageCluster(cluster_name, source, [access_key_id, secret_access_key,] format, compression_method, structure)
* A table function, which allows to process many files from Azure Blob Storage on a specific cluster
* On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks
* in Azure Blob Storage file path and dispatch each file dynamically.
* On worker node it asks initiator about next task to process, processes it.
* This is repeated until the tasks are finished.
*/
class TableFunctionAzureBlobStorageCluster : public ITableFunctionCluster<TableFunctionAzureBlobStorage>
{
public:
static constexpr auto name = "azureBlobStorageCluster";
static constexpr auto signature = " - cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]";
String getName() const override
{
return name;
}
String getSignature() const override
{
return signature;
}
protected:
StoragePtr executeImpl(
const ASTPtr & ast_function,
ContextPtr context,
const std::string & table_name,
ColumnsDescription cached_columns,
bool is_insert_query) const override;
const char * getStorageTypeName() const override { return "AzureBlobStorageCluster"; }
};
}
#endif

View File

@ -25,6 +25,7 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr
if (context->getApplicationType() != Context::ApplicationType::LOCAL)
{
ITableFunctionFileLike::parseFirstArguments(arg, context);
StorageFile::parseFileSource(std::move(filename), filename, path_to_archive);
return;
}
@ -39,6 +40,8 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr
fd = STDOUT_FILENO;
else if (filename == "stderr")
fd = STDERR_FILENO;
else
StorageFile::parseFileSource(std::move(filename), filename, path_to_archive);
}
else if (type == Field::Types::Int64 || type == Field::Types::UInt64)
{
@ -76,7 +79,9 @@ StoragePtr TableFunctionFile::getStorage(const String & source,
ConstraintsDescription{},
String{},
global_context->getSettingsRef().rename_files_after_processing,
path_to_archive,
};
if (fd >= 0)
return std::make_shared<StorageFile>(fd, args);
@ -90,8 +95,15 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context
if (fd >= 0)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Schema inference is not supported for table function '{}' with file descriptor", getName());
size_t total_bytes_to_read = 0;
Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read);
return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context);
Strings paths;
Strings paths_to_archives;
if (path_to_archive.empty())
paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read);
else
paths_to_archives = StorageFile::getPathsList(path_to_archive, context->getUserFilesPath(), context, total_bytes_to_read);
return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context, paths_to_archives);
}

View File

@ -75,6 +75,7 @@ void registerTableFunctions()
#if USE_AZURE_BLOB_STORAGE
registerTableFunctionAzureBlobStorage(factory);
registerTableFunctionAzureBlobStorageCluster(factory);
#endif

View File

@ -72,6 +72,7 @@ void registerTableFunctionExplain(TableFunctionFactory & factory);
#if USE_AZURE_BLOB_STORAGE
void registerTableFunctionAzureBlobStorage(TableFunctionFactory & factory);
void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory);
#endif
void registerTableFunctions();

View File

@ -162,5 +162,8 @@ endif ()
if (TARGET ch_contrib::fiu)
set(FIU_ENABLE 1)
endif()
if (TARGET ch_contrib::libarchive)
set(USE_LIBARCHIVE 1)
endif()
set(SOURCE_DIR ${CMAKE_SOURCE_DIR})

View File

@ -18,6 +18,8 @@
<session_timeout_ms>15000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
<force_sync>false</force_sync>
<election_timeout_lower_bound_ms>2000</election_timeout_lower_bound_ms>
<election_timeout_upper_bound_ms>4000</election_timeout_upper_bound_ms>
</coordination_settings>
<raft_configuration>

View File

@ -18,6 +18,8 @@
<session_timeout_ms>15000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
<force_sync>false</force_sync>
<election_timeout_lower_bound_ms>2000</election_timeout_lower_bound_ms>
<election_timeout_upper_bound_ms>4000</election_timeout_upper_bound_ms>
</coordination_settings>
<raft_configuration>

View File

@ -18,6 +18,8 @@
<session_timeout_ms>15000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
<force_sync>false</force_sync>
<election_timeout_lower_bound_ms>2000</election_timeout_lower_bound_ms>
<election_timeout_upper_bound_ms>4000</election_timeout_upper_bound_ms>
</coordination_settings>
<raft_configuration>

View File

@ -2592,6 +2592,20 @@ def named_collections(clickhouse_node, mysql_node, service_name):
f"/* expect: (1, 'a', 1), (2, 'b', 2) */ SELECT * FROM {db}.t1",
"1\ta\t1\n2\tb\t2\n",
)
clickhouse_node.query(f"ALTER NAMED COLLECTION {db} SET port=9999")
clickhouse_node.query(f"DETACH DATABASE {db}")
mysql_node.query(f"INSERT INTO {db}.t1 VALUES (3, 'c', 3)")
assert "ConnectionFailed:" in clickhouse_node.query_and_get_error(
f"ATTACH DATABASE {db}"
)
clickhouse_node.query(f"ALTER NAMED COLLECTION {db} SET port=3306")
clickhouse_node.query(f"ATTACH DATABASE {db}")
check_query(
clickhouse_node,
f"/* expect: (1, 'a', 1), (2, 'b', 2), (3, 'c', 3) */ SELECT * FROM {db}.t1",
"1\ta\t1\n2\tb\t2\n3\tc\t3\n",
)
clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}")
mysql_node.query(f"DROP DATABASE IF EXISTS {db}")

View File

@ -27,10 +27,7 @@ proto_dir = os.path.join(SCRIPT_DIR, "./protos")
gen_dir = os.path.join(SCRIPT_DIR, "./_gen")
os.makedirs(gen_dir, exist_ok=True)
run_and_check(
"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} \
{proto_dir}/clickhouse_grpc.proto".format(
proto_dir=proto_dir, gen_dir=gen_dir
),
f"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} {proto_dir}/clickhouse_grpc.proto",
shell=True,
)

View File

@ -0,0 +1 @@
_gen

View File

@ -0,0 +1,9 @@
<clickhouse>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>10</count>
</logger>
</clickhouse>

View File

@ -0,0 +1,9 @@
<clickhouse>
<postgresql_port>5433</postgresql_port>
<mysql_port>9001</mysql_port>
<grpc_port>9100</grpc_port>
<grpc replace="replace">
<!-- Enable if you want very detailed logs -->
<verbose_logs>false</verbose_logs>
</grpc>
</clickhouse>

View File

@ -0,0 +1,9 @@
<clickhouse>
<session_log>
<database>system</database>
<table>session_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</session_log>
</clickhouse>

View File

@ -0,0 +1,23 @@
<clickhouse>
<profiles>
<default>
<function_sleep_max_microseconds_per_block>0</function_sleep_max_microseconds_per_block>
</default>
</profiles>
<users>
<default>
</default>
<mysql_user>
<password>pass</password>
</mysql_user>
<postgres_user>
<password>pass</password>
</postgres_user>
<grpc_user>
<password>pass</password>
</grpc_user>
<parallel_user>
<password>pass</password>
</parallel_user>
</users>
</clickhouse>

View File

@ -0,0 +1 @@
../../../../src/Server/grpc_protos/clickhouse_grpc.proto

View File

@ -0,0 +1,289 @@
import os
import grpc
import pymysql.connections
import psycopg2 as py_psql
import pytest
import random
import sys
import threading
from helpers.cluster import ClickHouseCluster, run_and_check
POSTGRES_SERVER_PORT = 5433
MYSQL_SERVER_PORT = 9001
GRPC_PORT = 9100
SESSION_LOG_MATCHING_FIELDS = "auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface"
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
DEFAULT_ENCODING = "utf-8"
# Use grpcio-tools to generate *pb2.py files from *.proto.
proto_dir = os.path.join(SCRIPT_DIR, "./protos")
gen_dir = os.path.join(SCRIPT_DIR, "./_gen")
os.makedirs(gen_dir, exist_ok=True)
run_and_check(
f"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} {proto_dir}/clickhouse_grpc.proto",
shell=True,
)
sys.path.append(gen_dir)
import clickhouse_grpc_pb2
import clickhouse_grpc_pb2_grpc
cluster = ClickHouseCluster(__file__)
instance = cluster.add_instance(
"node",
main_configs=[
"configs/ports.xml",
"configs/log.xml",
"configs/session_log.xml",
],
user_configs=["configs/users.xml"],
# Bug in TSAN reproduces in this test https://github.com/grpc/grpc/issues/29550#issuecomment-1188085387
env_variables={
"TSAN_OPTIONS": "report_atomic_races=0 " + os.getenv("TSAN_OPTIONS", default="")
},
)
def grpc_get_url():
return f"{instance.ip_address}:{GRPC_PORT}"
def grpc_create_insecure_channel():
channel = grpc.insecure_channel(grpc_get_url())
grpc.channel_ready_future(channel).result(timeout=2)
return channel
session_id_counter = 0
def next_session_id():
global session_id_counter
session_id = session_id_counter
session_id_counter += 1
return str(session_id)
def grpc_query(query, user_, pass_, raise_exception):
try:
query_info = clickhouse_grpc_pb2.QueryInfo(
query=query,
session_id=next_session_id(),
user_name=user_,
password=pass_,
)
channel = grpc_create_insecure_channel()
stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(channel)
result = stub.ExecuteQuery(query_info)
if result and result.HasField("exception"):
raise Exception(result.exception.display_text)
return result.output.decode(DEFAULT_ENCODING)
except Exception:
assert raise_exception
def postgres_query(query, user_, pass_, raise_exception):
try:
client = py_psql.connect(
host=instance.ip_address,
port=POSTGRES_SERVER_PORT,
user=user_,
password=pass_,
database="default",
)
cursor = client.cursor()
cursor.execute(query)
cursor.fetchall()
except Exception:
assert raise_exception
def mysql_query(query, user_, pass_, raise_exception):
try:
client = pymysql.connections.Connection(
host=instance.ip_address,
user=user_,
password=pass_,
database="default",
port=MYSQL_SERVER_PORT,
)
cursor = client.cursor(pymysql.cursors.DictCursor)
if raise_exception:
with pytest.raises(Exception):
cursor.execute(query)
else:
cursor.execute(query)
cursor.fetchall()
except Exception:
assert raise_exception
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_grpc_session(started_cluster):
grpc_query("SELECT 1", "grpc_user", "pass", False)
grpc_query("SELECT 2", "grpc_user", "wrong_pass", True)
grpc_query("SELECT 3", "wrong_grpc_user", "pass", True)
instance.query("SYSTEM FLUSH LOGS")
login_success_records = instance.query(
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'LoginSuccess'"
)
assert login_success_records == "grpc_user\t1\t1\n"
logout_records = instance.query(
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'Logout'"
)
assert logout_records == "grpc_user\t1\t1\n"
login_failure_records = instance.query(
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'LoginFailure'"
)
assert login_failure_records == "grpc_user\t1\t1\n"
logins_and_logouts = instance.query(
f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'grpc_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'grpc_user' AND type = 'Logout')"
)
assert logins_and_logouts == "1\n"
def test_mysql_session(started_cluster):
mysql_query("SELECT 1", "mysql_user", "pass", False)
mysql_query("SELECT 2", "mysql_user", "wrong_pass", True)
mysql_query("SELECT 3", "wrong_mysql_user", "pass", True)
instance.query("SYSTEM FLUSH LOGS")
login_success_records = instance.query(
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'LoginSuccess'"
)
assert login_success_records == "mysql_user\t1\t1\n"
logout_records = instance.query(
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'Logout'"
)
assert logout_records == "mysql_user\t1\t1\n"
login_failure_records = instance.query(
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'LoginFailure'"
)
assert login_failure_records == "mysql_user\t1\t1\n"
logins_and_logouts = instance.query(
f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'mysql_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'mysql_user' AND type = 'Logout')"
)
assert logins_and_logouts == "1\n"
def test_postgres_session(started_cluster):
postgres_query("SELECT 1", "postgres_user", "pass", False)
postgres_query("SELECT 2", "postgres_user", "wrong_pass", True)
postgres_query("SELECT 3", "wrong_postgres_user", "pass", True)
instance.query("SYSTEM FLUSH LOGS")
login_success_records = instance.query(
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'LoginSuccess'"
)
assert login_success_records == "postgres_user\t1\t1\n"
logout_records = instance.query(
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'Logout'"
)
assert logout_records == "postgres_user\t1\t1\n"
login_failure_records = instance.query(
"SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'LoginFailure'"
)
assert login_failure_records == "postgres_user\t1\t1\n"
logins_and_logouts = instance.query(
f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'postgres_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'postgres_user' AND type = 'Logout')"
)
assert logins_and_logouts == "1\n"
def test_parallel_sessions(started_cluster):
thread_list = []
for _ in range(10):
# Sleep time does not significantly matter here,
# test should pass even without sleeping.
for function in [postgres_query, grpc_query, mysql_query]:
thread = threading.Thread(
target=function,
args=(
f"SELECT sleep({random.uniform(0.03, 0.04)})",
"parallel_user",
"pass",
False,
),
)
thread.start()
thread_list.append(thread)
thread = threading.Thread(
target=function,
args=(
f"SELECT sleep({random.uniform(0.03, 0.04)})",
"parallel_user",
"wrong_pass",
True,
),
)
thread.start()
thread_list.append(thread)
thread = threading.Thread(
target=function,
args=(
f"SELECT sleep({random.uniform(0.03, 0.04)})",
"wrong_parallel_user",
"pass",
True,
),
)
thread.start()
thread_list.append(thread)
for thread in thread_list:
thread.join()
instance.query("SYSTEM FLUSH LOGS")
port_0_sessions = instance.query(
f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user'"
)
assert port_0_sessions == "90\n"
port_0_sessions = instance.query(
f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND client_port = 0"
)
assert port_0_sessions == "0\n"
address_0_sessions = instance.query(
f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND client_address = toIPv6('::')"
)
assert address_0_sessions == "0\n"
grpc_sessions = instance.query(
f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'gRPC'"
)
assert grpc_sessions == "30\n"
mysql_sessions = instance.query(
f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'MySQL'"
)
assert mysql_sessions == "30\n"
postgres_sessions = instance.query(
f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'PostgreSQL'"
)
assert postgres_sessions == "30\n"
logins_and_logouts = instance.query(
f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'parallel_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'parallel_user' AND type = 'Logout')"
)
assert logins_and_logouts == "30\n"
logout_failure_sessions = instance.query(
f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND type = 'LoginFailure'"
)
assert logout_failure_sessions == "30\n"

View File

@ -0,0 +1,39 @@
<clickhouse>
<remote_servers>
<simple_cluster>
<shard>
<replica>
<host>node_0</host>
<port>9000</port>
</replica>
<replica>
<host>node_1</host>
<port>9000</port>
</replica>
<replica>
<host>node_2</host>
<port>9000</port>
</replica>
</shard>
</simple_cluster>
<cluster_non_existent_port>
<shard>
<replica>
<host>node_0</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>node_1</host>
<port>19000</port>
</replica>
</shard>
</cluster_non_existent_port>
</remote_servers>
<macros>
<default_cluster_macro>simple_cluster</default_cluster_macro>
</macros>
</clickhouse>

View File

@ -657,3 +657,55 @@ def test_read_from_not_existing_container(cluster):
query = f"select * from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont_not_exists', 'test_table.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')"
expected_err_msg = "container does not exist"
assert expected_err_msg in node.query_and_get_error(query)
def test_function_signatures(cluster):
node = cluster.instances["node"]
connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1;"
storage_account_url = "http://azurite1:10000/devstoreaccount1"
account_name = "devstoreaccount1"
account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
azure_query(
node,
f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32') VALUES (1),(2),(3)",
)
# " - connection_string, container_name, blobpath\n"
query_1 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv')"
assert azure_query(node, query_1) == "1\n2\n3\n"
# " - connection_string, container_name, blobpath, structure \n"
query_2 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'column1 UInt32')"
assert azure_query(node, query_2) == "1\n2\n3\n"
# " - connection_string, container_name, blobpath, format \n"
query_3 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'CSV')"
assert azure_query(node, query_3) == "1\n2\n3\n"
# " - connection_string, container_name, blobpath, format, compression \n"
query_4 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'CSV', 'auto')"
assert azure_query(node, query_4) == "1\n2\n3\n"
# " - connection_string, container_name, blobpath, format, compression, structure \n"
query_5 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'CSV', 'auto', 'column1 UInt32')"
assert azure_query(node, query_5) == "1\n2\n3\n"
# " - storage_account_url, container_name, blobpath, account_name, account_key\n"
query_6 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}')"
assert azure_query(node, query_6) == "1\n2\n3\n"
# " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n"
query_7 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'column1 UInt32')"
assert azure_query(node, query_7) == "1\n2\n3\n"
# " - storage_account_url, container_name, blobpath, account_name, account_key, format\n"
query_8 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV')"
assert azure_query(node, query_8) == "1\n2\n3\n"
# " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n"
query_9 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto')"
assert azure_query(node, query_9) == "1\n2\n3\n"
# " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n"
query_10 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32')"
assert azure_query(node, query_10) == "1\n2\n3\n"

View File

@ -0,0 +1,288 @@
#!/usr/bin/env python3
import gzip
import json
import logging
import os
import io
import random
import threading
import time
from azure.storage.blob import BlobServiceClient
import helpers.client
import pytest
from helpers.cluster import ClickHouseCluster, ClickHouseInstance
from helpers.test_tools import TSV
from helpers.network import PartitionManager
from helpers.mock_servers import start_mock_servers
from helpers.test_tools import exec_query_with_retry
@pytest.fixture(scope="module")
def cluster():
try:
cluster = ClickHouseCluster(__file__)
cluster.add_instance(
"node_0",
main_configs=["configs/named_collections.xml", "configs/cluster.xml"],
user_configs=["configs/disable_profilers.xml", "configs/users.xml"],
with_azurite=True,
)
cluster.add_instance(
"node_1",
main_configs=["configs/named_collections.xml", "configs/cluster.xml"],
user_configs=["configs/disable_profilers.xml", "configs/users.xml"],
with_azurite=True,
)
cluster.add_instance(
"node_2",
main_configs=["configs/named_collections.xml", "configs/cluster.xml"],
user_configs=["configs/disable_profilers.xml", "configs/users.xml"],
with_azurite=True,
)
cluster.start()
yield cluster
finally:
cluster.shutdown()
def azure_query(node, query, try_num=3, settings={}):
for i in range(try_num):
try:
return node.query(query, settings=settings)
except Exception as ex:
retriable_errors = [
"DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response"
]
retry = False
for error in retriable_errors:
if error in str(ex):
retry = True
print(f"Try num: {i}. Having retriable error: {ex}")
time.sleep(i)
break
if not retry or i == try_num - 1:
raise Exception(ex)
continue
def get_azure_file_content(filename):
container_name = "cont"
connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;"
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
container_client = blob_service_client.get_container_client(container_name)
blob_client = container_client.get_blob_client(filename)
download_stream = blob_client.download_blob()
return download_stream.readall().decode("utf-8")
def test_select_all(cluster):
node = cluster.instances["node_0"]
azure_query(
node,
"INSERT INTO TABLE FUNCTION azureBlobStorage("
"'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1', "
"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', "
"'auto', 'key UInt64, data String') VALUES (1, 'a'), (2, 'b')",
)
print(get_azure_file_content("test_cluster_select_all.csv"))
pure_azure = node.query(
"""
SELECT * from azureBlobStorage(
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1',
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV',
'auto')"""
)
print(pure_azure)
distributed_azure = node.query(
"""
SELECT * from azureBlobStorageCluster(
'simple_cluster', 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1',
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV',
'auto')"""
)
print(distributed_azure)
assert TSV(pure_azure) == TSV(distributed_azure)
def test_count(cluster):
node = cluster.instances["node_0"]
azure_query(
node,
"INSERT INTO TABLE FUNCTION azureBlobStorage("
"'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_count.csv', 'devstoreaccount1', "
"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', "
"'auto', 'key UInt64') VALUES (1), (2)",
)
print(get_azure_file_content("test_cluster_count.csv"))
pure_azure = node.query(
"""
SELECT count(*) from azureBlobStorage(
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_count.csv', 'devstoreaccount1',
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV',
'auto', 'key UInt64')"""
)
print(pure_azure)
distributed_azure = node.query(
"""
SELECT count(*) from azureBlobStorageCluster(
'simple_cluster', 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_count.csv', 'devstoreaccount1',
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV',
'auto', 'key UInt64')"""
)
print(distributed_azure)
assert TSV(pure_azure) == TSV(distributed_azure)
def test_union_all(cluster):
node = cluster.instances["node_0"]
azure_query(
node,
"INSERT INTO TABLE FUNCTION azureBlobStorage("
"'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', "
"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', "
"'auto', 'a Int32, b String') VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')",
)
pure_azure = node.query(
"""
SELECT * FROM
(
SELECT * from azureBlobStorage(
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1',
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet',
'auto', 'a Int32, b String')
UNION ALL
SELECT * from azureBlobStorage(
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1',
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet',
'auto', 'a Int32, b String')
)
ORDER BY (a)
"""
)
azure_distributed = node.query(
"""
SELECT * FROM
(
SELECT * from azureBlobStorageCluster(
'simple_cluster',
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1',
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet',
'auto', 'a Int32, b String')
UNION ALL
SELECT * from azureBlobStorageCluster(
'simple_cluster',
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1',
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet',
'auto', 'a Int32, b String')
)
ORDER BY (a)
"""
)
assert TSV(pure_azure) == TSV(azure_distributed)
def test_skip_unavailable_shards(cluster):
node = cluster.instances["node_0"]
azure_query(
node,
"INSERT INTO TABLE FUNCTION azureBlobStorage("
"'http://azurite1:10000/devstoreaccount1', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1', "
"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', "
"'auto', 'a UInt64') VALUES (1), (2)",
)
result = node.query(
"""
SELECT count(*) from azureBlobStorageCluster(
'cluster_non_existent_port',
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1',
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==')
SETTINGS skip_unavailable_shards = 1
"""
)
assert result == "2\n"
def test_unset_skip_unavailable_shards(cluster):
# Although skip_unavailable_shards is not set, cluster table functions should always skip unavailable shards.
node = cluster.instances["node_0"]
azure_query(
node,
"INSERT INTO TABLE FUNCTION azureBlobStorage("
"'http://azurite1:10000/devstoreaccount1', 'cont', 'test_unset_skip_unavailable.csv', 'devstoreaccount1', "
"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', "
"'auto', 'a UInt64') VALUES (1), (2)",
)
result = node.query(
"""
SELECT count(*) from azureBlobStorageCluster(
'cluster_non_existent_port',
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1',
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==')
"""
)
assert result == "2\n"
def test_cluster_with_named_collection(cluster):
node = cluster.instances["node_0"]
azure_query(
node,
"INSERT INTO TABLE FUNCTION azureBlobStorage("
"'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_with_named_collection.csv', 'devstoreaccount1', "
"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', "
"'auto', 'a UInt64') VALUES (1), (2)",
)
pure_azure = node.query(
"""
SELECT * from azureBlobStorage(
'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_with_named_collection.csv', 'devstoreaccount1',
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==')
"""
)
azure_cluster = node.query(
"""
SELECT * from azureBlobStorageCluster(
'simple_cluster', azure_conf2, container='cont', blob_path='test_cluster_with_named_collection.csv')
"""
)
assert TSV(pure_azure) == TSV(azure_cluster)
def test_partition_parallel_readig_withcluster(cluster):
node = cluster.instances["node_0"]
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
partition_by = "column3"
values = "(1, 2, 3), (3, 2, 1), (78, 43, 45)"
filename = "test_tf_{_partition_id}.csv"
azure_query(
node,
f"INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') PARTITION BY {partition_by} VALUES {values}",
)
assert "1,2,3\n" == get_azure_file_content("test_tf_3.csv")
assert "3,2,1\n" == get_azure_file_content("test_tf_1.csv")
assert "78,43,45\n" == get_azure_file_content("test_tf_45.csv")
azure_cluster = node.query(
"""
SELECT count(*) from azureBlobStorageCluster(
'simple_cluster',
azure_conf2, container='cont', blob_path='test_tf_*.csv', format='CSV', compression='auto', structure='column1 UInt32, column2 UInt32, column3 UInt32')
"""
)
assert azure_cluster == "3\n"

View File

@ -12,6 +12,11 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
function wait_query_by_id_started()
{
# As the query we are waiting for is running simultaneously, let's give it a little time to actually start. The
# queries are supposed to run for multiple seconds, so sleeping 0.5 seconds is not a big deal, especially when
# flushing the logs can take up to 3 to 5 seconds. Basically waiting a bit here we can increase the chance that we
# don't have spend precious time on flushing logs.
sleep 0.5
local query_id=$1 && shift
# wait for query to be started
while [ "$($CLICKHOUSE_CLIENT "$@" -q "select count() from system.processes where query_id = '$query_id'")" -ne 1 ]; do

View File

@ -0,0 +1,6 @@
2
SELECT countDistinctIf(number % 10, (number % 5) = 2)
FROM numbers(1000)
2
SELECT uniqExactIf(number % 10, (number % 5) = 2)
FROM numbers(1000)

View File

@ -0,0 +1,8 @@
-- Tags: no-parallel
SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000);
EXPLAIN SYNTAX SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000);
-- disable by default
SET rewrite_count_distinct_if_with_count_distinct_implementation = 1;
SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000);
EXPLAIN SYNTAX SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers(1000);

View File

@ -0,0 +1,48 @@
#!/usr/bin/env bash
# Tags: no-fasttest
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
function read_archive_file() {
$CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2"
$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2"
$CLICKHOUSE_CLIENT --query "CREATE TABLE 02661_archive_table Engine=File('CSV', '${user_files_path}/$1')"
$CLICKHOUSE_CLIENT --query "SELECT * FROM 02661_archive_table ORDER BY 1, 2"
$CLICKHOUSE_CLIENT --query "DROP TABLE 02661_archive_table"
}
function run_archive_test() {
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS 02661_archive_table"
user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
echo -e "1,2\n3,4" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv
echo -e "5,6\n7,8" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data2.csv
echo -e "9,10\n11,12" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data3.csv
eval "$2 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.$1 ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv ${CLICKHOUSE_TEST_UNIQUE_NAME}_data2.csv > /dev/null"
eval "$2 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.$1 ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv ${CLICKHOUSE_TEST_UNIQUE_NAME}_data3.csv > /dev/null"
eval "$2 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive3.$1 ${CLICKHOUSE_TEST_UNIQUE_NAME}_data2.csv ${CLICKHOUSE_TEST_UNIQUE_NAME}_data3.csv > /dev/null"
echo "archive1 data1.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv"
echo "archive{1..2} data1.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive{1..2}.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv"
echo "archive{1,2} data{1,3}.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive{1,2}.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data{1,3}.csv"
echo "archive3 data*.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive3.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.csv"
echo "archive* *.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive*.$1 :: *.csv"
echo "archive* {2..3}.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive*.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data{2..3}.csv"
$CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.$1::nonexistent.csv')" 2>&1 | grep -q "CANNOT_UNPACK_ARCHIVE" && echo "OK" || echo "FAIL"
$CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive3.$1::{2..3}.csv')" 2>&1 | grep -q "CANNOT_UNPACK_ARCHIVE" && echo "OK" || echo "FAIL"
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive{1..3}.$1
rm ${CLICKHOUSE_TEST_UNIQUE_NAME}_data{1..3}.csv
}

View File

@ -0,0 +1,116 @@
archive1 data1.csv
1 2
3 4
1 2
3 4
1 2
3 4
archive{1..2} data1.csv
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
archive{1,2} data{1,3}.csv
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
archive3 data*.csv
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
archive* *.csv
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
archive* {2..3}.csv
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
OK
OK

View File

@ -0,0 +1,11 @@
#!/usr/bin/env bash
# Tags: no-fasttest, long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
# shellcheck source=./02661_read_from_archive.lib
. "$CUR_DIR"/02661_read_from_archive.lib
run_archive_test "7z" "7z a"

View File

@ -0,0 +1,116 @@
archive1 data1.csv
1 2
3 4
1 2
3 4
1 2
3 4
archive{1..2} data1.csv
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
archive{1,2} data{1,3}.csv
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
archive3 data*.csv
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
archive* *.csv
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
archive* {2..3}.csv
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
OK
OK

View File

@ -0,0 +1,11 @@
#!/usr/bin/env bash
# Tags: no-fasttest, long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
# shellcheck source=./02661_read_from_archive.lib
. "$CUR_DIR"/02661_read_from_archive.lib
run_archive_test "tar" "tar -cvf"

View File

@ -0,0 +1,116 @@
archive1 data1.csv
1 2
3 4
1 2
3 4
1 2
3 4
archive{1..2} data1.csv
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
archive{1,2} data{1,3}.csv
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
archive3 data*.csv
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
archive* *.csv
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
archive* {2..3}.csv
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
OK
OK

View File

@ -0,0 +1,11 @@
#!/usr/bin/env bash
# Tags: no-fasttest, long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
# shellcheck source=./02661_read_from_archive.lib
. "$CUR_DIR"/02661_read_from_archive.lib
run_archive_test "tar.gz" "tar -cvzf"

View File

@ -0,0 +1,116 @@
archive1 data1.csv
1 2
3 4
1 2
3 4
1 2
3 4
archive{1..2} data1.csv
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
archive{1,2} data{1,3}.csv
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
archive3 data*.csv
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
archive* *.csv
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
archive* {2..3}.csv
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
OK
OK

View File

@ -0,0 +1,11 @@
#!/usr/bin/env bash
# Tags: no-fasttest, long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
# shellcheck source=./02661_read_from_archive.lib
. "$CUR_DIR"/02661_read_from_archive.lib
run_archive_test "zip" "zip"

View File

@ -0,0 +1,34 @@
sessions:
150
port_0_sessions:
0
address_0_sessions:
0
tcp_sessions
60
http_sessions
30
http_with_session_id_sessions
30
my_sql_sessions
30
Corresponding LoginSuccess/Logout
10
LoginFailure
10
Corresponding LoginSuccess/Logout
10
LoginFailure
10
Corresponding LoginSuccess/Logout
10
LoginFailure
10
Corresponding LoginSuccess/Logout
10
LoginFailure
10
Corresponding LoginSuccess/Logout
10
LoginFailure
10

View File

@ -0,0 +1,138 @@
#!/usr/bin/env bash
# Tags: no-fasttest, long
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
readonly PID=$$
# Each user uses a separate thread.
readonly TCP_USERS=( "02833_TCP_USER_${PID}"_{1,2} ) # 2 concurrent TCP users
readonly HTTP_USERS=( "02833_HTTP_USER_${PID}" )
readonly HTTP_WITH_SESSION_ID_SESSION_USERS=( "02833_HTTP_WITH_SESSION_ID_USER_${PID}" )
readonly MYSQL_USERS=( "02833_MYSQL_USER_${PID}")
readonly ALL_USERS=( "${TCP_USERS[@]}" "${HTTP_USERS[@]}" "${HTTP_WITH_SESSION_ID_SESSION_USERS[@]}" "${MYSQL_USERS[@]}" )
readonly TCP_USERS_SQL_COLLECTION_STRING="$( echo "${TCP_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
readonly HTTP_USERS_SQL_COLLECTION_STRING="$( echo "${HTTP_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
readonly HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING="$( echo "${HTTP_WITH_SESSION_ID_SESSION_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
readonly MYSQL_USERS_SQL_COLLECTION_STRING="$( echo "${MYSQL_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
readonly ALL_USERS_SQL_COLLECTION_STRING="$( echo "${ALL_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface"
for user in "${ALL_USERS[@]}"; do
${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${user} IDENTIFIED WITH plaintext_password BY 'pass'"
${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.* TO ${user}"
${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON INFORMATION_SCHEMA.* TO ${user}";
done
# All <type>_session functions execute in separate threads.
# These functions try to create a session with successful login and logout.
# Sleep a small, random amount of time to make concurrency more intense.
# and try to login with an invalid password.
function tcp_session()
{
local user=$1
local i=0
while (( (i++) < 10 )); do
# login logout
${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM})" --user="${user}" --password="pass"
# login failure
${CLICKHOUSE_CLIENT} -q "SELECT 2" --user="${user}" --password 'invalid'
done
}
function http_session()
{
local user=$1
local i=0
while (( (i++) < 10 )); do
# login logout
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT 3, sleep(0.01${RANDOM})"
# login failure
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=wrong" -d "SELECT 4"
done
}
function http_with_session_id_session()
{
local user=$1
local i=0
while (( (i++) < 10 )); do
# login logout
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${user}&user=${user}&password=pass" -d "SELECT 5, sleep 0.01${RANDOM}"
# login failure
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${user}&user=${user}&password=wrong" -d "SELECT 6"
done
}
function mysql_session()
{
local user=$1
local i=0
while (( (i++) < 10 )); do
# login logout
${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM}) FROM mysql('127.0.0.1:9004', 'system', 'one', '${user}', 'pass')"
# login failure
${CLICKHOUSE_CLIENT} -q "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', '${user}', 'wrong', SETTINGS connection_max_tries=1)"
done
}
${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING})"
export -f tcp_session;
export -f http_session;
export -f http_with_session_id_session;
export -f mysql_session;
for user in "${TCP_USERS[@]}"; do
timeout 60s bash -c "tcp_session ${user}" >/dev/null 2>&1 &
done
for user in "${HTTP_USERS[@]}"; do
timeout 60s bash -c "http_session ${user}" >/dev/null 2>&1 &
done
for user in "${HTTP_WITH_SESSION_ID_SESSION_USERS[@]}"; do
timeout 60s bash -c "http_with_session_id_session ${user}" >/dev/null 2>&1 &
done
for user in "${MYSQL_USERS[@]}"; do
timeout 60s bash -c "mysql_session ${user}" >/dev/null 2>&1 &
done
wait
${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
echo "sessions:"
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING})"
echo "port_0_sessions:"
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING}) AND client_port = 0"
echo "address_0_sessions:"
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING}) AND client_address = toIPv6('::')"
echo "tcp_sessions"
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${TCP_USERS_SQL_COLLECTION_STRING}) AND interface = 'TCP'"
echo "http_sessions"
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'"
echo "http_with_session_id_sessions"
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'"
echo "my_sql_sessions"
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${MYSQL_USERS_SQL_COLLECTION_STRING}) AND interface = 'MySQL'"
for user in "${ALL_USERS[@]}"; do
${CLICKHOUSE_CLIENT} -q "DROP USER ${user}"
echo "Corresponding LoginSuccess/Logout"
${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'LoginSuccess' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'Logout')"
echo "LoginFailure"
${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.session_log WHERE user = '${user}' AND type = 'LoginFailure'"
done

View File

@ -0,0 +1,13 @@
0
0
0
0
client_port 0 connections:
0
client_address '::' connections:
0
login failures:
0
TCP Login and logout count is equal
HTTP Login and logout count is equal
MySQL Login and logout count is equal

View File

@ -0,0 +1,56 @@
#!/usr/bin/env bash
# Tags: no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
readonly PID=$$
readonly TEST_USER=$"02834_USER_${PID}"
readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface"
${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${TEST_USER} IDENTIFIED WITH plaintext_password BY 'pass'"
${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON INFORMATION_SCHEMA.* TO ${TEST_USER}"
${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.* TO ${TEST_USER}"
${CLICKHOUSE_CLIENT} -q "GRANT CREATE TEMPORARY TABLE, MYSQL, REMOTE ON *.* TO ${TEST_USER}"
${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user = '${TEST_USER}'"
${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${TEST_USER}&password=pass" \
-d "SELECT * FROM remote('127.0.0.1:${CLICKHOUSE_PORT_TCP}', 'system', 'one', '${TEST_USER}', 'pass')"
${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${TEST_USER}&password=pass" \
-d "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', '${TEST_USER}', 'pass')"
${CLICKHOUSE_CLIENT} -q "SELECT * FROM remote('127.0.0.1:${CLICKHOUSE_PORT_TCP}', 'system', 'one', '${TEST_USER}', 'pass')" -u "${TEST_USER}" --password "pass"
${CLICKHOUSE_CLIENT} -q "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', '${TEST_USER}', 'pass')" -u "${TEST_USER}" --password "pass"
${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
echo "client_port 0 connections:"
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and client_port = 0"
echo "client_address '::' connections:"
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and client_address = toIPv6('::')"
echo "login failures:"
${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and type = 'LoginFailure'"
# remote(...) function sometimes reuses old cached sessions for query execution.
# This makes LoginSuccess/Logout entries count unstable, but success and logouts must always match.
for interface in 'TCP' 'HTTP' 'MySQL'
do
LOGIN_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' AND interface = '${interface}'"`
CORRESPONDING_LOGOUT_RECORDS_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' AND interface = '${interface}' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout' AND interface = '${interface}')"`
if [ "$LOGIN_COUNT" == "$CORRESPONDING_LOGOUT_RECORDS_COUNT" ]; then
echo "${interface} Login and logout count is equal"
else
TOTAL_LOGOUT_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout' AND interface = '${interface}'"`
echo "${interface} Login count ${LOGIN_COUNT} != corresponding logout count ${CORRESPONDING_LOGOUT_RECORDS_COUNT}. TOTAL_LOGOUT_COUNT ${TOTAL_LOGOUT_COUNT}"
fi
done
${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}"

Some files were not shown because too many files have changed in this diff Show More