mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Merge branch 'master' into nullable-num-intdiv
This commit is contained in:
commit
d1a8a88ae0
4
.gitmodules
vendored
4
.gitmodules
vendored
@ -331,6 +331,10 @@
|
||||
[submodule "contrib/liburing"]
|
||||
path = contrib/liburing
|
||||
url = https://github.com/axboe/liburing
|
||||
[submodule "contrib/libarchive"]
|
||||
path = contrib/libarchive
|
||||
url = https://github.com/libarchive/libarchive.git
|
||||
ignore = dirty
|
||||
[submodule "contrib/libfiu"]
|
||||
path = contrib/libfiu
|
||||
url = https://github.com/ClickHouse/libfiu.git
|
||||
|
@ -52,7 +52,6 @@
|
||||
* Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
#### Performance Improvement
|
||||
* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)).
|
||||
@ -114,6 +113,7 @@
|
||||
* Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)).
|
||||
|
||||
#### Experimental Feature
|
||||
* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)). This is controlled by the setting `output_format_parquet_use_custom_encoder` which is disabled by default, because the feature is non-ideal.
|
||||
* Added support for [PRQL](https://prql-lang.org/) as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_<name>(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). This syntax can be changed in this release.
|
||||
* (experimental MaterializedMySQL) Fixed crash when `mysqlxx::Pool::Entry` is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)).
|
||||
|
@ -23,11 +23,8 @@ curl https://clickhouse.com/ | sh
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [**v23.7 Release Webinar**](https://clickhouse.com/company/events/v23-7-community-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-07) - Jul 27 - 23.7 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
|
||||
* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/293913596) - Jul 18
|
||||
* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/293913441) - Jul 19
|
||||
* [**ClickHouse Meetup in Toronto**](https://www.meetup.com/clickhouse-toronto-user-group/events/294183127) - Jul 20
|
||||
* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/294428050/) - Jul 27
|
||||
* [**v23.8 Community Call**](https://clickhouse.com/company/events/v23-8-community-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-08) - Aug 31 - 23.8 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
|
||||
* [**ClickHouse & AI - A Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/294472987) - Aug 8
|
||||
* [**ClickHouse Meetup in Paris**](https://www.meetup.com/clickhouse-france-user-group/events/294283460) - Sep 12
|
||||
|
||||
Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.
|
||||
|
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -92,6 +92,7 @@ add_contrib (google-protobuf-cmake google-protobuf)
|
||||
add_contrib (openldap-cmake openldap)
|
||||
add_contrib (grpc-cmake grpc)
|
||||
add_contrib (msgpack-c-cmake msgpack-c)
|
||||
add_contrib (libarchive-cmake libarchive)
|
||||
|
||||
add_contrib (corrosion-cmake corrosion)
|
||||
|
||||
|
1
contrib/libarchive
vendored
Submodule
1
contrib/libarchive
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit ee45796171324519f0c0bfd012018dd099296336
|
172
contrib/libarchive-cmake/CMakeLists.txt
Normal file
172
contrib/libarchive-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,172 @@
|
||||
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libarchive")
|
||||
|
||||
set(SRCS
|
||||
"${LIBRARY_DIR}/libarchive/archive_acl.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_blake2sp_ref.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_blake2s_ref.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_check_magic.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_cmdline.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_cryptor.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_digest.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_disk_acl_darwin.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_disk_acl_freebsd.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_disk_acl_linux.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_disk_acl_sunos.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry_copy_bhfi.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry_copy_stat.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry_link_resolver.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry_sparse.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry_stat.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry_strmode.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_entry_xattr.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_getdate.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_hmac.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_match.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_options.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_pack_dev.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_pathmatch.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_ppmd7.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_ppmd8.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_random.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_rb.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_add_passphrase.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_append_filter.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_data_into_fd.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_disk_entry_from_file.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_disk_posix.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_disk_set_standard_lookup.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_disk_windows.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_extract2.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_extract.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_open_fd.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_open_file.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_open_filename.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_open_memory.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_set_format.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_set_options.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_all.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_by_code.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_bzip2.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_compress.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_grzip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_gzip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_lrzip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_lz4.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_lzop.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_none.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_program.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_rpm.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_uu.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_xz.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_filter_zstd.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_7zip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_all.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_ar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_by_code.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_cab.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_cpio.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_empty.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_iso9660.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_lha.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_mtree.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_rar5.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_rar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_raw.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_tar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_warc.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_xar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_read_support_format_zip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_string.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_string_sprintf.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_util.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_version_details.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_virtual.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_windows.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_b64encode.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_by_name.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_bzip2.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_compress.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_grzip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_gzip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_lrzip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_lz4.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_lzop.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_none.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_program.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_uuencode.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_xz.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_add_filter_zstd.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_disk_posix.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_disk_set_standard_lookup.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_disk_windows.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_open_fd.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_open_file.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_open_filename.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_open_memory.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_7zip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_ar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_by_name.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_binary.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_newc.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_odc.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_filter_by_ext.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_gnutar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_iso9660.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_mtree.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_pax.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_raw.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_shar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_ustar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_v7tar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_warc.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_xar.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_format_zip.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_options.c"
|
||||
"${LIBRARY_DIR}/libarchive/archive_write_set_passphrase.c"
|
||||
"${LIBRARY_DIR}/libarchive/filter_fork_posix.c"
|
||||
"${LIBRARY_DIR}/libarchive/filter_fork_windows.c"
|
||||
"${LIBRARY_DIR}/libarchive/xxhash.c"
|
||||
)
|
||||
|
||||
add_library(_libarchive ${SRCS})
|
||||
target_include_directories(_libarchive PUBLIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"${LIBRARY_DIR}/libarchive"
|
||||
)
|
||||
|
||||
target_compile_definitions(_libarchive PUBLIC
|
||||
HAVE_CONFIG_H
|
||||
)
|
||||
|
||||
target_compile_options(_libarchive PRIVATE "-Wno-reserved-macro-identifier")
|
||||
|
||||
if (TARGET ch_contrib::xz)
|
||||
target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1)
|
||||
target_link_libraries(_libarchive PRIVATE ch_contrib::xz)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::zlib)
|
||||
target_compile_definitions(_libarchive PUBLIC HAVE_ZLIB_H=1)
|
||||
target_link_libraries(_libarchive PRIVATE ch_contrib::zlib)
|
||||
endif()
|
||||
|
||||
if (OS_LINUX)
|
||||
target_compile_definitions(
|
||||
_libarchive PUBLIC
|
||||
MAJOR_IN_SYSMACROS=1
|
||||
HAVE_LINUX_FS_H=1
|
||||
HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=1
|
||||
HAVE_LINUX_TYPES_H=1
|
||||
HAVE_SYS_STATFS_H=1
|
||||
HAVE_FUTIMESAT=1
|
||||
HAVE_ICONV=1
|
||||
)
|
||||
endif()
|
||||
|
||||
add_library(ch_contrib::libarchive ALIAS _libarchive)
|
1391
contrib/libarchive-cmake/config.h
Normal file
1391
contrib/libarchive-cmake/config.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
esac
|
||||
|
||||
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
|
||||
ARG VERSION="23.7.1.2470"
|
||||
ARG VERSION="23.7.4.5"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="23.7.1.2470"
|
||||
ARG VERSION="23.7.4.5"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="23.7.1.2470"
|
||||
ARG VERSION="23.7.4.5"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
@ -19,13 +19,13 @@ RUN apt-get update \
|
||||
# and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB).
|
||||
# TSAN will flush shadow memory when reaching this limit.
|
||||
# It may cause false-negatives, but it's better than OOM.
|
||||
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment
|
||||
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment
|
||||
RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment
|
||||
RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment
|
||||
RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment
|
||||
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
|
||||
# (but w/o verbosity for TSAN, otherwise test.reference will not match)
|
||||
ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
|
||||
ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
|
||||
ENV UBSAN_OPTIONS='print_stacktrace=1'
|
||||
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
|
||||
|
||||
|
@ -130,7 +130,7 @@ COPY misc/ /misc/
|
||||
|
||||
# Same options as in test/base/Dockerfile
|
||||
# (in case you need to override them in tests)
|
||||
ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
|
||||
ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'
|
||||
ENV UBSAN_OPTIONS='print_stacktrace=1'
|
||||
ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'
|
||||
|
||||
|
@ -12,3 +12,5 @@ services:
|
||||
- type: ${HDFS_FS:-tmpfs}
|
||||
source: ${HDFS_LOGS:-}
|
||||
target: /usr/local/hadoop/logs
|
||||
sysctls:
|
||||
net.ipv4.ip_local_port_range: '55000 65535'
|
||||
|
@ -31,6 +31,8 @@ services:
|
||||
- kafka_zookeeper
|
||||
security_opt:
|
||||
- label:disable
|
||||
sysctls:
|
||||
net.ipv4.ip_local_port_range: '55000 65535'
|
||||
|
||||
schema-registry:
|
||||
image: confluentinc/cp-schema-registry:5.2.0
|
||||
|
@ -20,6 +20,8 @@ services:
|
||||
depends_on:
|
||||
- hdfskerberos
|
||||
entrypoint: /etc/bootstrap.sh -d
|
||||
sysctls:
|
||||
net.ipv4.ip_local_port_range: '55000 65535'
|
||||
|
||||
hdfskerberos:
|
||||
image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest}
|
||||
@ -29,3 +31,5 @@ services:
|
||||
- ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh
|
||||
- /dev/urandom:/dev/random
|
||||
expose: [88, 749]
|
||||
sysctls:
|
||||
net.ipv4.ip_local_port_range: '55000 65535'
|
||||
|
@ -48,6 +48,8 @@ services:
|
||||
- kafka_kerberos
|
||||
security_opt:
|
||||
- label:disable
|
||||
sysctls:
|
||||
net.ipv4.ip_local_port_range: '55000 65535'
|
||||
|
||||
kafka_kerberos:
|
||||
image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest}
|
||||
|
@ -14,7 +14,7 @@ services:
|
||||
MINIO_ACCESS_KEY: minio
|
||||
MINIO_SECRET_KEY: minio123
|
||||
MINIO_PROMETHEUS_AUTH_TYPE: public
|
||||
command: server --address :9001 --certs-dir /certs /data1-1
|
||||
command: server --console-address 127.0.0.1:19001 --address :9001 --certs-dir /certs /data1-1
|
||||
depends_on:
|
||||
- proxy1
|
||||
- proxy2
|
||||
|
@ -3,7 +3,7 @@
|
||||
<default>
|
||||
<allow_introspection_functions>1</allow_introspection_functions>
|
||||
<log_queries>1</log_queries>
|
||||
<metrics_perf_events_enabled>1</metrics_perf_events_enabled>
|
||||
<metrics_perf_events_enabled>0</metrics_perf_events_enabled>
|
||||
<!--
|
||||
If a test takes too long by mistake, the entire test task can
|
||||
time out and the author won't get a proper message. Put some cap
|
||||
|
@ -369,6 +369,7 @@ for query_index in queries_to_run:
|
||||
"max_execution_time": args.prewarm_max_query_seconds,
|
||||
"query_profiler_real_time_period_ns": 10000000,
|
||||
"query_profiler_cpu_time_period_ns": 10000000,
|
||||
"metrics_perf_events_enabled": 1,
|
||||
"memory_profiler_step": "4Mi",
|
||||
},
|
||||
)
|
||||
@ -503,6 +504,7 @@ for query_index in queries_to_run:
|
||||
settings={
|
||||
"query_profiler_real_time_period_ns": 10000000,
|
||||
"query_profiler_cpu_time_period_ns": 10000000,
|
||||
"metrics_perf_events_enabled": 1,
|
||||
},
|
||||
)
|
||||
print(
|
||||
|
@ -96,5 +96,4 @@ rg -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||:
|
||||
zstd < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst &
|
||||
|
||||
# Compressed (FIXME: remove once only github actions will be left)
|
||||
rm /var/log/clickhouse-server/clickhouse-server.log
|
||||
mv /var/log/clickhouse-server/stderr.log /test_output/ ||:
|
||||
|
@ -41,6 +41,8 @@ RUN apt-get update -y \
|
||||
zstd \
|
||||
file \
|
||||
pv \
|
||||
zip \
|
||||
p7zip-full \
|
||||
&& apt-get clean
|
||||
|
||||
RUN pip3 install numpy scipy pandas Jinja2
|
||||
|
@ -200,8 +200,8 @@ Templates:
|
||||
- [Server Setting](_description_templates/template-server-setting.md)
|
||||
- [Database or Table engine](_description_templates/template-engine.md)
|
||||
- [System table](_description_templates/template-system-table.md)
|
||||
- [Data type](_description_templates/data-type.md)
|
||||
- [Statement](_description_templates/statement.md)
|
||||
- [Data type](_description_templates/template-data-type.md)
|
||||
- [Statement](_description_templates/template-statement.md)
|
||||
|
||||
|
||||
<a name="how-to-build-docs"/>
|
||||
|
31
docs/changelogs/v23.7.2.25-stable.md
Normal file
31
docs/changelogs/v23.7.2.25-stable.md
Normal file
@ -0,0 +1,31 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.7.2.25-stable (8dd1107b032) FIXME as compared to v23.7.1.2470-stable (a70127baecc)
|
||||
|
||||
#### Backward Incompatible Change
|
||||
* Backported in [#52850](https://github.com/ClickHouse/ClickHouse/issues/52850): If a dynamic disk contains a name, it should be specified as `disk = disk(name = 'disk_name'`, ...) in disk function arguments. In previous version it could be specified as `disk = disk_<disk_name>(...)`, which is no longer supported. [#52820](https://github.com/ClickHouse/ClickHouse/pull/52820) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#52913](https://github.com/ClickHouse/ClickHouse/issues/52913): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix data race in Keeper reconfiguration [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Rename setting disable_url_encoding to enable_url_encoding and add a test [#52656](https://github.com/ClickHouse/ClickHouse/pull/52656) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix bugs and better test for SYSTEM STOP LISTEN [#52680](https://github.com/ClickHouse/ClickHouse/pull/52680) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Increase min protocol version for sparse serialization [#52835](https://github.com/ClickHouse/ClickHouse/pull/52835) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Docker improvements [#52869](https://github.com/ClickHouse/ClickHouse/pull/52869) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
23
docs/changelogs/v23.7.3.14-stable.md
Normal file
23
docs/changelogs/v23.7.3.14-stable.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.7.3.14-stable (bd9a510550c) FIXME as compared to v23.7.2.25-stable (8dd1107b032)
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#53025](https://github.com/ClickHouse/ClickHouse/issues/53025): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix named collections on cluster 23.7 [#52687](https://github.com/ClickHouse/ClickHouse/pull/52687) ([Al Korgun](https://github.com/alkorgun)).
|
||||
* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Fix ZstdDeflatingWriteBuffer truncating the output sometimes [#53064](https://github.com/ClickHouse/ClickHouse/pull/53064) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Suspicious DISTINCT crashes from sqlancer [#52636](https://github.com/ClickHouse/ClickHouse/pull/52636) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fix Parquet stats for Float32 and Float64 [#53067](https://github.com/ClickHouse/ClickHouse/pull/53067) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
|
17
docs/changelogs/v23.7.4.5-stable.md
Normal file
17
docs/changelogs/v23.7.4.5-stable.md
Normal file
@ -0,0 +1,17 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.7.4.5-stable (bd2fcd44553) FIXME as compared to v23.7.3.14-stable (bd9a510550c)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Disable the new parquet encoder [#53130](https://github.com/ClickHouse/ClickHouse/pull/53130) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Revert changes in `ZstdDeflatingAppendableWriteBuffer` [#53111](https://github.com/ClickHouse/ClickHouse/pull/53111) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
@ -42,20 +42,20 @@ sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk lsb-rel
|
||||
|
||||
### Install and Use the Clang compiler
|
||||
|
||||
On Ubuntu/Debian you can use LLVM's automatic installation script, see [here](https://apt.llvm.org/).
|
||||
On Ubuntu/Debian, you can use LLVM's automatic installation script; see [here](https://apt.llvm.org/).
|
||||
|
||||
``` bash
|
||||
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
```
|
||||
|
||||
Note: in case of troubles, you can also use this:
|
||||
Note: in case of trouble, you can also use this:
|
||||
|
||||
```bash
|
||||
sudo apt-get install software-properties-common
|
||||
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
```
|
||||
|
||||
For other Linux distribution - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html).
|
||||
For other Linux distributions - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html).
|
||||
|
||||
As of April 2023, clang-16 or higher will work.
|
||||
GCC as a compiler is not supported.
|
||||
@ -92,8 +92,12 @@ cmake -S . -B build
|
||||
cmake --build build # or: `cd build; ninja`
|
||||
```
|
||||
|
||||
:::tip
|
||||
In case `cmake` isn't able to detect the number of available logical cores, the build will be done by one thread. To overcome this, you can tweak `cmake` to use a specific number of threads with `-j` flag, for example, `cmake --build build -j 16`. Alternatively, you can generate build files with a specific number of jobs in advance to avoid always setting the flag: `cmake -DPARALLEL_COMPILE_JOBS=16 -S . -B build`, where `16` is the desired number of threads.
|
||||
:::
|
||||
|
||||
To create an executable, run `cmake --build build --target clickhouse` (or: `cd build; ninja clickhouse`).
|
||||
This will create executable `build/programs/clickhouse` which can be used with `client` or `server` arguments.
|
||||
This will create an executable `build/programs/clickhouse`, which can be used with `client` or `server` arguments.
|
||||
|
||||
## Building on Any Linux {#how-to-build-clickhouse-on-any-linux}
|
||||
|
||||
@ -107,7 +111,7 @@ The build requires the following components:
|
||||
- Yasm
|
||||
- Gawk
|
||||
|
||||
If all the components are installed, you may build in the same way as the steps above.
|
||||
If all the components are installed, you may build it in the same way as the steps above.
|
||||
|
||||
Example for OpenSUSE Tumbleweed:
|
||||
|
||||
@ -123,7 +127,7 @@ Example for Fedora Rawhide:
|
||||
|
||||
``` bash
|
||||
sudo yum update
|
||||
sudo yum --nogpg install git cmake make clang python3 ccache nasm yasm gawk
|
||||
sudo yum --nogpg install git cmake make clang python3 ccache lld nasm yasm gawk
|
||||
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
|
||||
mkdir build
|
||||
cmake -S . -B build
|
||||
|
@ -141,6 +141,10 @@ Runs [stateful functional tests](tests.md#functional-tests). Treat them in the s
|
||||
Runs [integration tests](tests.md#integration-tests).
|
||||
|
||||
|
||||
## Bugfix validate check
|
||||
Checks that either a new test (functional or integration) or there some changed tests that fail with the binary built on master branch. This check is triggered when pull request has "pr-bugfix" label.
|
||||
|
||||
|
||||
## Stress Test
|
||||
Runs stateless functional tests concurrently from several clients to detect
|
||||
concurrency-related errors. If it fails:
|
||||
|
@ -22,7 +22,7 @@ CREATE TABLE deltalake
|
||||
- `url` — Bucket url with path to the existing Delta Lake table.
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file.
|
||||
|
||||
Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md)
|
||||
Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md).
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -22,7 +22,7 @@ CREATE TABLE hudi_table
|
||||
- `url` — Bucket url with the path to an existing Hudi table.
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file.
|
||||
|
||||
Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md)
|
||||
Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md).
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -237,7 +237,7 @@ The following settings can be set before query execution or placed into configur
|
||||
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||
- `s3_upload_part_size_multiply_factor` - Multiply `s3_min_upload_part_size` by this factor each time `s3_multiply_parts_count_threshold` parts were uploaded from a single write to S3. Default values is `2`.
|
||||
- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. Default value us `500`.
|
||||
- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3, `s3_min_upload_part_size` is multiplied by `s3_upload_part_size_multiply_factor`. Default value is `500`.
|
||||
- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each in-flight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enough, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file.
|
||||
|
||||
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
|
||||
|
@ -193,6 +193,19 @@ index creation, `L2Distance` is used as default. Parameter `NumTrees` is the num
|
||||
specified: 100). Higher values of `NumTree` mean more accurate search results but slower index creation / query times (approximately
|
||||
linearly) as well as larger index sizes.
|
||||
|
||||
`L2Distance` is also called Euclidean distance, the Euclidean distance between two points in Euclidean space is the length of a line segment between the two points.
|
||||
For example: If we have point P(p1,p2), Q(q1,q2), their distance will be d(p,q)
|
||||
![L2Distance](https://en.wikipedia.org/wiki/Euclidean_distance#/media/File:Euclidean_distance_2d.svg)
|
||||
|
||||
`cosineDistance` also called cosine similarity is a measure of similarity between two non-zero vectors defined in an inner product space. Cosine similarity is the cosine of the angle between the vectors; that is, it is the dot product of the vectors divided by the product of their lengths.
|
||||
![cosineDistance](https://www.tyrrell4innovation.ca/wp-content/uploads/2021/06/rsz_jenny_du_miword.png)
|
||||
|
||||
The Euclidean distance corresponds to the L2-norm of a difference between vectors. The cosine similarity is proportional to the dot product of two vectors and inversely proportional to the product of their magnitudes.
|
||||
![compare](https://www.researchgate.net/publication/320914786/figure/fig2/AS:558221849841664@1510101868614/The-difference-between-Euclidean-distance-and-cosine-similarity.png)
|
||||
In one sentence: cosine similarity care only about the angle between them, but do not care about the "distance" we normally think.
|
||||
![L2 distance](https://www.baeldung.com/wp-content/uploads/sites/4/2020/06/4-1.png)
|
||||
![cosineDistance](https://www.baeldung.com/wp-content/uploads/sites/4/2020/06/5.png)
|
||||
|
||||
:::note
|
||||
Indexes over columns of type `Array` will generally work faster than indexes on `Tuple` columns. All arrays **must** have same length. Use
|
||||
[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1
|
||||
|
@ -13,7 +13,7 @@ A recommended alternative to the Buffer Table Engine is enabling [asynchronous i
|
||||
:::
|
||||
|
||||
``` sql
|
||||
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
|
||||
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]])
|
||||
```
|
||||
|
||||
### Engine parameters:
|
||||
|
@ -2131,7 +2131,6 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
|
||||
|
||||
- [output_format_parquet_row_group_size](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`.
|
||||
- [output_format_parquet_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`.
|
||||
- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) table in Parquet input format. Default value - `false`.
|
||||
- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`.
|
||||
- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`.
|
||||
- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
|
||||
@ -2336,7 +2335,6 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
|
||||
|
||||
- [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`.
|
||||
- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
|
||||
- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`.
|
||||
- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
|
||||
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
|
||||
- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`.
|
||||
@ -2402,7 +2400,6 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.
|
||||
|
||||
- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
|
||||
- [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`.
|
||||
- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`.
|
||||
- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
|
||||
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
|
||||
- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`.
|
||||
|
@ -84,6 +84,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
|
||||
- `password` for the file on disk
|
||||
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
|
||||
- `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables
|
||||
- `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family.
|
||||
- `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD`
|
||||
|
||||
### Usage examples
|
||||
|
@ -7,6 +7,10 @@ pagination_next: en/operations/settings/settings
|
||||
|
||||
# Settings Overview
|
||||
|
||||
:::note
|
||||
XML-based Settings Profiles and [configuration files](https://clickhouse.com/docs/en/operations/configuration-files) are currently not supported for ClickHouse Cloud. To specify settings for your ClickHouse Cloud service, you must use [SQL-driven Settings Profiles](https://clickhouse.com/docs/en/operations/access-rights#settings-profiles-management).
|
||||
:::
|
||||
|
||||
There are two main groups of ClickHouse settings:
|
||||
|
||||
- Global server settings
|
||||
|
@ -298,7 +298,7 @@ Default value: `THROW`.
|
||||
- [JOIN clause](../../sql-reference/statements/select/join.md#select-join)
|
||||
- [Join table engine](../../engines/table-engines/special/join.md)
|
||||
|
||||
## max_partitions_per_insert_block {#max-partitions-per-insert-block}
|
||||
## max_partitions_per_insert_block {#settings-max_partitions_per_insert_block}
|
||||
|
||||
Limits the maximum number of partitions in a single inserted block.
|
||||
|
||||
@ -309,9 +309,18 @@ Default value: 100.
|
||||
|
||||
**Details**
|
||||
|
||||
When inserting data, ClickHouse calculates the number of partitions in the inserted block. If the number of partitions is more than `max_partitions_per_insert_block`, ClickHouse throws an exception with the following text:
|
||||
When inserting data, ClickHouse calculates the number of partitions in the inserted block. If the number of partitions is more than `max_partitions_per_insert_block`, ClickHouse either logs a warning or throws an exception based on `throw_on_max_partitions_per_insert_block`. Exceptions have the following text:
|
||||
|
||||
> “Too many partitions for single INSERT block (more than” + toString(max_parts) + “). The limit is controlled by ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).”
|
||||
> “Too many partitions for a single INSERT block (`partitions_count` partitions, limit is ” + toString(max_partitions) + “). The limit is controlled by the ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).”
|
||||
|
||||
## throw_on_max_partitions_per_insert_block {#settings-throw_on_max_partition_per_insert_block}
|
||||
|
||||
Allows you to control behaviour when `max_partitions_per_insert_block` is reached.
|
||||
|
||||
- `true` - When an insert block reaches `max_partitions_per_insert_block`, an exception is raised.
|
||||
- `false` - Logs a warning when `max_partitions_per_insert_block` is reached.
|
||||
|
||||
Default value: `true`
|
||||
|
||||
## max_temporary_data_on_disk_size_for_user {#settings_max_temporary_data_on_disk_size_for_user}
|
||||
|
||||
|
@ -1112,17 +1112,6 @@ Default value: 1.
|
||||
|
||||
## Arrow format settings {#arrow-format-settings}
|
||||
|
||||
### input_format_arrow_import_nested {#input_format_arrow_import_nested}
|
||||
|
||||
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Data can not be inserted into `Nested` columns as an array of structs.
|
||||
- 1 — Data can be inserted into `Nested` columns as an array of structs.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
### input_format_arrow_case_insensitive_column_matching {#input_format_arrow_case_insensitive_column_matching}
|
||||
|
||||
Ignore case when matching Arrow column names with ClickHouse column names.
|
||||
@ -1172,17 +1161,6 @@ Default value: `lz4_frame`.
|
||||
|
||||
## ORC format settings {#orc-format-settings}
|
||||
|
||||
### input_format_orc_import_nested {#input_format_orc_import_nested}
|
||||
|
||||
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Data can not be inserted into `Nested` columns as an array of structs.
|
||||
- 1 — Data can be inserted into `Nested` columns as an array of structs.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
### input_format_orc_row_batch_size {#input_format_orc_row_batch_size}
|
||||
|
||||
Batch size when reading ORC stripes.
|
||||
@ -1221,17 +1199,6 @@ Default value: `none`.
|
||||
|
||||
## Parquet format settings {#parquet-format-settings}
|
||||
|
||||
### input_format_parquet_import_nested {#input_format_parquet_import_nested}
|
||||
|
||||
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Data can not be inserted into `Nested` columns as an array of structs.
|
||||
- 1 — Data can be inserted into `Nested` columns as an array of structs.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
### input_format_parquet_case_insensitive_column_matching {#input_format_parquet_case_insensitive_column_matching}
|
||||
|
||||
Ignore case when matching Parquet column names with ClickHouse column names.
|
||||
|
@ -4578,6 +4578,17 @@ Type: Int64
|
||||
|
||||
Default: 0
|
||||
|
||||
## rewrite_count_distinct_if_with_count_distinct_implementation
|
||||
|
||||
Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#settings-count_distinct_implementation) setting.
|
||||
|
||||
Possible values:
|
||||
|
||||
- true — Allow.
|
||||
- false — Disallow.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## precise_float_parsing {#precise_float_parsing}
|
||||
|
||||
Switches [Float32/Float64](../../sql-reference/data-types/float.md) parsing algorithms:
|
||||
|
@ -11,7 +11,7 @@ A client application to interact with clickhouse-keeper by its native protocol.
|
||||
|
||||
- `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode.
|
||||
- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`.
|
||||
- `-p N`, `--port=N` — Server port. Default value: 2181
|
||||
- `-p N`, `--port=N` — Server port. Default value: 9181
|
||||
- `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s.
|
||||
- `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
|
||||
- `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.
|
||||
@ -21,8 +21,8 @@ A client application to interact with clickhouse-keeper by its native protocol.
|
||||
## Example {#clickhouse-keeper-client-example}
|
||||
|
||||
```bash
|
||||
./clickhouse-keeper-client -h localhost:2181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
|
||||
Connected to ZooKeeper at [::1]:2181 with session_id 137
|
||||
./clickhouse-keeper-client -h localhost:9181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
|
||||
Connected to ZooKeeper at [::1]:9181 with session_id 137
|
||||
/ :) ls
|
||||
keeper foo bar
|
||||
/ :) cd keeper
|
||||
@ -51,7 +51,3 @@ keeper foo bar
|
||||
- `rmr <path>` -- Recursively deletes path. Confirmation required
|
||||
- `flwc <command>` -- Executes four-letter-word command
|
||||
- `help` -- Prints this message
|
||||
- `get_stat [path]` -- Returns the node's stat (default `.`)
|
||||
- `find_super_nodes <threshold> [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`)
|
||||
- `delete_stable_backups` -- Deletes ClickHouse nodes used for backups that are now inactive
|
||||
- `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)
|
||||
|
@ -34,7 +34,13 @@ The binary you just downloaded can run all sorts of ClickHouse tools and utiliti
|
||||
|
||||
A common use of `clickhouse-local` is to run ad-hoc queries on files: where you don't have to insert the data into a table. `clickhouse-local` can stream the data from a file into a temporary table and execute your SQL.
|
||||
|
||||
If the file is sitting on the same machine as `clickhouse-local`, use the `file` table engine. The following `reviews.tsv` file contains a sampling of Amazon product reviews:
|
||||
If the file is sitting on the same machine as `clickhouse-local`, you can simple specify the file to load. The following `reviews.tsv` file contains a sampling of Amazon product reviews:
|
||||
|
||||
```bash
|
||||
./clickhouse local -q "SELECT * FROM 'reviews.tsv'"
|
||||
```
|
||||
|
||||
This command is a shortcut of:
|
||||
|
||||
```bash
|
||||
./clickhouse local -q "SELECT * FROM file('reviews.tsv')"
|
||||
|
@ -36,6 +36,8 @@ These `ALTER` statements modify entities related to role-based access control:
|
||||
|
||||
[ALTER TABLE ... MODIFY COMMENT](/docs/en/sql-reference/statements/alter/comment.md) statement adds, modifies, or removes comments to the table, regardless if it was set before or not.
|
||||
|
||||
[ALTER NAMED COLLECTION](/docs/en/sql-reference/statements/alter/named-collection.md) statement modifies [Named Collections](/docs/en/operations/named-collections.md).
|
||||
|
||||
## Mutations
|
||||
|
||||
`ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE … DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE … UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts.
|
||||
|
30
docs/en/sql-reference/statements/alter/named-collection.md
Normal file
30
docs/en/sql-reference/statements/alter/named-collection.md
Normal file
@ -0,0 +1,30 @@
|
||||
---
|
||||
slug: /en/sql-reference/statements/alter/named-collection
|
||||
sidebar_label: NAMED COLLECTION
|
||||
---
|
||||
|
||||
# ALTER NAMED COLLECTION
|
||||
|
||||
This query intends to modify already existing named collections.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
ALTER NAMED COLLECTION [IF EXISTS] name [ON CLUSTER cluster]
|
||||
[ SET
|
||||
key_name1 = 'some value',
|
||||
key_name2 = 'some value',
|
||||
key_name3 = 'some value',
|
||||
... ] |
|
||||
[ DELETE key_name4, key_name5, ... ]
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION foobar AS a = '1', b = '2';
|
||||
|
||||
ALTER NAMED COLLECTION foobar SET a = '2', c = '3';
|
||||
|
||||
ALTER NAMED COLLECTION foobar DELETE b;
|
||||
```
|
@ -8,13 +8,14 @@ sidebar_label: CREATE
|
||||
|
||||
Create queries make a new entity of one of the following kinds:
|
||||
|
||||
- [DATABASE](../../../sql-reference/statements/create/database.md)
|
||||
- [TABLE](../../../sql-reference/statements/create/table.md)
|
||||
- [VIEW](../../../sql-reference/statements/create/view.md)
|
||||
- [DICTIONARY](../../../sql-reference/statements/create/dictionary.md)
|
||||
- [FUNCTION](../../../sql-reference/statements/create/function.md)
|
||||
- [USER](../../../sql-reference/statements/create/user.md)
|
||||
- [ROLE](../../../sql-reference/statements/create/role.md)
|
||||
- [ROW POLICY](../../../sql-reference/statements/create/row-policy.md)
|
||||
- [QUOTA](../../../sql-reference/statements/create/quota.md)
|
||||
- [SETTINGS PROFILE](../../../sql-reference/statements/create/settings-profile.md)
|
||||
- [DATABASE](/docs/en/sql-reference/statements/create/database.md)
|
||||
- [TABLE](/docs/en/sql-reference/statements/create/table.md)
|
||||
- [VIEW](/docs/en/sql-reference/statements/create/view.md)
|
||||
- [DICTIONARY](/docs/en/sql-reference/statements/create/dictionary.md)
|
||||
- [FUNCTION](/docs/en/sql-reference/statements/create/function.md)
|
||||
- [USER](/docs/en/sql-reference/statements/create/user.md)
|
||||
- [ROLE](/docs/en/sql-reference/statements/create/role.md)
|
||||
- [ROW POLICY](/docs/en/sql-reference/statements/create/row-policy.md)
|
||||
- [QUOTA](/docs/en/sql-reference/statements/create/quota.md)
|
||||
- [SETTINGS PROFILE](/docs/en/sql-reference/statements/create/settings-profile.md)
|
||||
- [NAMED COLLECTION](/docs/en/sql-reference/statements/create/named-collection.md)
|
||||
|
34
docs/en/sql-reference/statements/create/named-collection.md
Normal file
34
docs/en/sql-reference/statements/create/named-collection.md
Normal file
@ -0,0 +1,34 @@
|
||||
---
|
||||
slug: /en/sql-reference/statements/create/named-collection
|
||||
sidebar_label: NAMED COLLECTION
|
||||
---
|
||||
|
||||
# CREATE NAMED COLLECTION
|
||||
|
||||
Creates a new named collection.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION [IF NOT EXISTS] name [ON CLUSTER cluster] AS
|
||||
key_name1 = 'some value',
|
||||
key_name2 = 'some value',
|
||||
key_name3 = 'some value',
|
||||
...
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION foobar AS a = '1', b = '2';
|
||||
```
|
||||
|
||||
**Related statements**
|
||||
|
||||
- [CREATE NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/alter/named-collection)
|
||||
- [DROP NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/drop#drop-function)
|
||||
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Named collections guide](/docs/en/operations/named-collections.md)
|
@ -119,3 +119,20 @@ DROP FUNCTION [IF EXISTS] function_name [on CLUSTER cluster]
|
||||
CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b;
|
||||
DROP FUNCTION linear_equation;
|
||||
```
|
||||
|
||||
## DROP NAMED COLLECTION
|
||||
|
||||
Deletes a named collection.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
DROP NAMED COLLECTION [IF EXISTS] name [on CLUSTER cluster]
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
CREATE NAMED COLLECTION foobar AS a = '1', b = '2';
|
||||
DROP NAMED COLLECTION foobar;
|
||||
```
|
||||
|
@ -314,6 +314,22 @@ Provides possibility to start background fetch tasks from replication queues whi
|
||||
SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
|
||||
```
|
||||
|
||||
### STOP PULLING REPLICATION LOG
|
||||
|
||||
Stops loading new entries from replication log to replication queue in a `ReplicatedMergeTree` table.
|
||||
|
||||
``` sql
|
||||
SYSTEM STOP PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
|
||||
```
|
||||
|
||||
### START PULLING REPLICATION LOG
|
||||
|
||||
Cancels `SYSTEM STOP PULLING REPLICATION LOG`.
|
||||
|
||||
``` sql
|
||||
SYSTEM START PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
|
||||
```
|
||||
|
||||
### SYNC REPLICA
|
||||
|
||||
Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster, but no more than `receive_timeout` seconds.
|
||||
|
@ -0,0 +1,47 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/azureBlobStorageCluster
|
||||
sidebar_position: 55
|
||||
sidebar_label: azureBlobStorageCluster
|
||||
title: "azureBlobStorageCluster Table Function"
|
||||
---
|
||||
|
||||
Allows processing files from [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||
This table function is similar to the [s3Cluster function](../../sql-reference/table-functions/s3Cluster.md).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
azureBlobStorageCluster(cluster_name, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
|
||||
- `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key)
|
||||
- `container_name` - Container name
|
||||
- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings.
|
||||
- `account_name` - if storage_account_url is used, then account name can be specified here
|
||||
- `account_key` - if storage_account_url is used, then account key can be specified here
|
||||
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
|
||||
- `compression` — Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. (same as setting to `auto`).
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A table with the specified structure for reading or writing data in the specified file.
|
||||
|
||||
**Examples**
|
||||
|
||||
Select the count for the file `test_cluster_*.csv`, using all the nodes in the `cluster_simple` cluster:
|
||||
|
||||
``` sql
|
||||
SELECT count(*) from azureBlobStorageCluster(
|
||||
'cluster_simple', 'http://azurite1:10000/devstoreaccount1', 'test_container', 'test_cluster_count.csv', 'devstoreaccount1',
|
||||
'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV',
|
||||
'auto', 'key UInt64')
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [AzureBlobStorage engine](../../engines/table-engines/integrations/azureBlobStorage.md)
|
||||
- [azureBlobStorage table function](../../sql-reference/table-functions/azureBlobStorage.md)
|
@ -16,14 +16,14 @@ All available clusters are listed in the [system.clusters](../../operations/syst
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
cluster('cluster_name', db.table[, sharding_key])
|
||||
cluster('cluster_name', db, table[, sharding_key])
|
||||
clusterAllReplicas('cluster_name', db.table[, sharding_key])
|
||||
clusterAllReplicas('cluster_name', db, table[, sharding_key])
|
||||
cluster(['cluster_name', db.table, sharding_key])
|
||||
cluster(['cluster_name', db, table, sharding_key])
|
||||
clusterAllReplicas(['cluster_name', db.table, sharding_key])
|
||||
clusterAllReplicas(['cluster_name', db, table, sharding_key])
|
||||
```
|
||||
**Arguments**
|
||||
|
||||
- `cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
|
||||
- `cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers, set `default` if not specified.
|
||||
- `db.table` or `db`, `table` - Name of a database and a table.
|
||||
- `sharding_key` - A sharding key. Optional. Needs to be specified if the cluster has more than one shard.
|
||||
|
||||
|
@ -13,16 +13,18 @@ The `file` function can be used in `SELECT` and `INSERT` queries to read from or
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
file(path [,format] [,structure] [,compression])
|
||||
file([path_to_archive ::] path [,format] [,structure] [,compression])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
|
||||
- `path_to_archive` - The relative path to zip/tar/7z archive. Path to archive support the same globs as `path`.
|
||||
- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
|
||||
|
||||
|
||||
**Returned value**
|
||||
|
||||
A table with the specified structure for reading or writing data in the specified file.
|
||||
@ -128,6 +130,11 @@ file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32');
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
Getting data from table in table.csv, located in archive1.zip or/and archive2.zip
|
||||
``` sql
|
||||
SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv');
|
||||
```
|
||||
|
||||
## Globs in Path
|
||||
|
||||
Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix).
|
||||
|
@ -21,7 +21,7 @@ iceberg(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure])
|
||||
- `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. By default `Parquet` is used.
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
|
||||
Engine parameters can be specified using [Named Collections](../../operations/named-collections.md)
|
||||
Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -13,10 +13,10 @@ Both functions can be used in `SELECT` and `INSERT` queries.
|
||||
## Syntax
|
||||
|
||||
``` sql
|
||||
remote('addresses_expr', db, table[, 'user'[, 'password'], sharding_key])
|
||||
remote('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
|
||||
remoteSecure('addresses_expr', db, table[, 'user'[, 'password'], sharding_key])
|
||||
remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
|
||||
remote('addresses_expr', [db, table, 'user'[, 'password'], sharding_key])
|
||||
remote('addresses_expr', [db.table, 'user'[, 'password'], sharding_key])
|
||||
remoteSecure('addresses_expr', [db, table, 'user'[, 'password'], sharding_key])
|
||||
remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key])
|
||||
```
|
||||
|
||||
## Parameters
|
||||
@ -29,6 +29,8 @@ remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
|
||||
|
||||
The port is required for an IPv6 address.
|
||||
|
||||
If only specify this parameter, `db` and `table` will use `system.one` by default.
|
||||
|
||||
Type: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
- `db` — Database name. Type: [String](../../sql-reference/data-types/string.md).
|
||||
|
@ -9,7 +9,7 @@ sidebar_label: Buffer
|
||||
Буферизует записываемые данные в оперативке, периодически сбрасывая их в другую таблицу. При чтении, производится чтение данных одновременно из буфера и из другой таблицы.
|
||||
|
||||
``` sql
|
||||
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
|
||||
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]])
|
||||
```
|
||||
|
||||
Параметры движка:
|
||||
|
@ -1353,8 +1353,6 @@ ClickHouse поддерживает настраиваемую точность
|
||||
$ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet"
|
||||
```
|
||||
|
||||
Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested).
|
||||
|
||||
Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Parquet, используйте команду следующего вида:
|
||||
|
||||
``` bash
|
||||
@ -1413,8 +1411,6 @@ ClickHouse поддерживает настраиваемую точность
|
||||
$ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow"
|
||||
```
|
||||
|
||||
Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested).
|
||||
|
||||
### Вывод данных {#selecting-data-arrow}
|
||||
|
||||
Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Arrow, используйте команду следующего вида:
|
||||
@ -1471,8 +1467,6 @@ ClickHouse поддерживает настраиваемую точность
|
||||
$ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC"
|
||||
```
|
||||
|
||||
Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested).
|
||||
|
||||
### Вывод данных {#selecting-data-2}
|
||||
|
||||
Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата ORC, используйте команду следующего вида:
|
||||
|
@ -0,0 +1 @@
|
||||
../../../en/operations/optimizing-performance/profile-guided-optimization.md
|
@ -311,9 +311,18 @@ FORMAT Null;
|
||||
|
||||
**Подробности**
|
||||
|
||||
При вставке данных, ClickHouse вычисляет количество партиций во вставленном блоке. Если число партиций больше, чем `max_partitions_per_insert_block`, ClickHouse генерирует исключение со следующим текстом:
|
||||
При вставке данных ClickHouse проверяет количество партиций во вставляемом блоке. Если количество разделов превышает число `max_partitions_per_insert_block`, ClickHouse либо логирует предупреждение, либо выбрасывает исключение в зависимости от значения `throw_on_max_partitions_per_insert_block`. Исключения имеют следующий текст:
|
||||
|
||||
> «Too many partitions for single INSERT block (more than» + toString(max_parts) + «). The limit is controlled by ‘max_partitions_per_insert_block’ setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).»
|
||||
> “Too many partitions for a single INSERT block (`partitions_count` partitions, limit is ” + toString(max_partitions) + “). The limit is controlled by the ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).”
|
||||
|
||||
## throw_on_max_partitions_per_insert_block {#settings-throw_on_max_partition_per_insert_block}
|
||||
|
||||
Позволяет контролировать поведение при достижении `max_partitions_per_insert_block`
|
||||
|
||||
- `true` - Когда вставляемый блок достигает `max_partitions_per_insert_block`, возникает исключение.
|
||||
- `false` - Записывает предупреждение при достижении `max_partitions_per_insert_block`.
|
||||
|
||||
Значение по умолчанию: `true`
|
||||
|
||||
## max_sessions_for_user {#max-sessions-per-user}
|
||||
|
||||
|
@ -238,39 +238,6 @@ ClickHouse применяет настройку в тех случаях, ко
|
||||
|
||||
В случае превышения `input_format_allow_errors_ratio` ClickHouse генерирует исключение.
|
||||
|
||||
## input_format_parquet_import_nested {#input_format_parquet_import_nested}
|
||||
|
||||
Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Parquet](../../interfaces/formats.md#data-format-parquet).
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур.
|
||||
- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
## input_format_arrow_import_nested {#input_format_arrow_import_nested}
|
||||
|
||||
Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Arrow](../../interfaces/formats.md#data_types-matching-arrow).
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур.
|
||||
- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
## input_format_orc_import_nested {#input_format_orc_import_nested}
|
||||
|
||||
Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [ORC](../../interfaces/formats.md#data-format-orc).
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур.
|
||||
- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions}
|
||||
|
||||
Включает или отключает парсер SQL, если потоковый парсер не может проанализировать данные. Этот параметр используется только для формата [Values](../../interfaces/formats.md#data-format-values) при вставке данных. Дополнительные сведения о парсерах читайте в разделе [Синтаксис](../../sql-reference/syntax.md).
|
||||
|
@ -5,7 +5,7 @@ slug: /zh/engines/table-engines/special/buffer
|
||||
|
||||
缓冲数据写入 RAM 中,周期性地将数据刷新到另一个表。在读取操作时,同时从缓冲区和另一个表读取数据。
|
||||
|
||||
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
|
||||
Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]])
|
||||
|
||||
引擎的参数:database,table - 要刷新数据的表。可以使用返回字符串的常量表达式而不是数据库名称。 num_layers - 并行层数。在物理上,该表将表示为 num_layers 个独立缓冲区。建议值为16。min_time,max_time,min_rows,max_rows,min_bytes,max_bytes - 从缓冲区刷新数据的条件。
|
||||
|
||||
|
@ -0,0 +1 @@
|
||||
../../../en/operations/optimizing-performance/profile-guided-optimization.md
|
@ -1,6 +1,5 @@
|
||||
|
||||
#include "Commands.h"
|
||||
#include <queue>
|
||||
#include "KeeperClient.h"
|
||||
|
||||
|
||||
@ -25,18 +24,8 @@ void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
|
||||
else
|
||||
path = client->cwd;
|
||||
|
||||
auto children = client->zookeeper->getChildren(path);
|
||||
std::sort(children.begin(), children.end());
|
||||
|
||||
bool need_space = false;
|
||||
for (const auto & child : children)
|
||||
{
|
||||
if (std::exchange(need_space, true))
|
||||
std::cout << " ";
|
||||
|
||||
std::cout << child;
|
||||
}
|
||||
|
||||
for (const auto & child : client->zookeeper->getChildren(path))
|
||||
std::cout << child << " ";
|
||||
std::cout << "\n";
|
||||
}
|
||||
|
||||
@ -88,7 +77,7 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
|
||||
client->zookeeper->set(
|
||||
client->getAbsolutePath(query->args[0].safeGet<String>()),
|
||||
query->args[1].safeGet<String>(),
|
||||
static_cast<Int32>(query->args[2].safeGet<Int64>()));
|
||||
static_cast<Int32>(query->args[2].get<Int32>()));
|
||||
}
|
||||
|
||||
bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
@ -141,173 +130,6 @@ void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
|
||||
std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
|
||||
}
|
||||
|
||||
bool GetStatCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
{
|
||||
String arg;
|
||||
if (!parseKeeperPath(pos, expected, arg))
|
||||
return true;
|
||||
|
||||
node->args.push_back(std::move(arg));
|
||||
return true;
|
||||
}
|
||||
|
||||
void GetStatCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
|
||||
{
|
||||
Coordination::Stat stat;
|
||||
String path;
|
||||
if (!query->args.empty())
|
||||
path = client->getAbsolutePath(query->args[0].safeGet<String>());
|
||||
else
|
||||
path = client->cwd;
|
||||
|
||||
client->zookeeper->get(path, &stat);
|
||||
|
||||
std::cout << "cZxid = " << stat.czxid << "\n";
|
||||
std::cout << "mZxid = " << stat.mzxid << "\n";
|
||||
std::cout << "pZxid = " << stat.pzxid << "\n";
|
||||
std::cout << "ctime = " << stat.ctime << "\n";
|
||||
std::cout << "mtime = " << stat.mtime << "\n";
|
||||
std::cout << "version = " << stat.version << "\n";
|
||||
std::cout << "cversion = " << stat.cversion << "\n";
|
||||
std::cout << "aversion = " << stat.aversion << "\n";
|
||||
std::cout << "ephemeralOwner = " << stat.ephemeralOwner << "\n";
|
||||
std::cout << "dataLength = " << stat.dataLength << "\n";
|
||||
std::cout << "numChildren = " << stat.numChildren << "\n";
|
||||
}
|
||||
|
||||
bool FindSuperNodes::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
{
|
||||
ASTPtr threshold;
|
||||
if (!ParserUnsignedInteger{}.parse(pos, threshold, expected))
|
||||
return false;
|
||||
|
||||
node->args.push_back(threshold->as<ASTLiteral &>().value);
|
||||
|
||||
String path;
|
||||
if (!parseKeeperPath(pos, expected, path))
|
||||
path = ".";
|
||||
|
||||
node->args.push_back(std::move(path));
|
||||
return true;
|
||||
}
|
||||
|
||||
void FindSuperNodes::execute(const ASTKeeperQuery * query, KeeperClient * client) const
|
||||
{
|
||||
auto threshold = query->args[0].safeGet<UInt64>();
|
||||
auto path = client->getAbsolutePath(query->args[1].safeGet<String>());
|
||||
|
||||
Coordination::Stat stat;
|
||||
client->zookeeper->get(path, &stat);
|
||||
|
||||
if (stat.numChildren >= static_cast<Int32>(threshold))
|
||||
{
|
||||
std::cout << static_cast<String>(path) << "\t" << stat.numChildren << "\n";
|
||||
return;
|
||||
}
|
||||
|
||||
auto children = client->zookeeper->getChildren(path);
|
||||
std::sort(children.begin(), children.end());
|
||||
for (const auto & child : children)
|
||||
{
|
||||
auto next_query = *query;
|
||||
next_query.args[1] = DB::Field(path / child);
|
||||
execute(&next_query, client);
|
||||
}
|
||||
}
|
||||
|
||||
bool DeleteStableBackups::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery> & /* node */, Expected & /* expected */) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
void DeleteStableBackups::execute(const ASTKeeperQuery * /* query */, KeeperClient * client) const
|
||||
{
|
||||
client->askConfirmation(
|
||||
"You are going to delete all inactive backups in /clickhouse/backups.",
|
||||
[client]
|
||||
{
|
||||
fs::path backup_root = "/clickhouse/backups";
|
||||
auto backups = client->zookeeper->getChildren(backup_root);
|
||||
std::sort(backups.begin(), backups.end());
|
||||
|
||||
for (const auto & child : backups)
|
||||
{
|
||||
auto backup_path = backup_root / child;
|
||||
std::cout << "Found backup " << backup_path << ", checking if it's active\n";
|
||||
|
||||
String stage_path = backup_path / "stage";
|
||||
auto stages = client->zookeeper->getChildren(stage_path);
|
||||
|
||||
bool is_active = false;
|
||||
for (const auto & stage : stages)
|
||||
{
|
||||
if (startsWith(stage, "alive"))
|
||||
{
|
||||
is_active = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_active)
|
||||
{
|
||||
std::cout << "Backup " << backup_path << " is active, not going to delete\n";
|
||||
continue;
|
||||
}
|
||||
|
||||
std::cout << "Backup " << backup_path << " is not active, deleting it\n";
|
||||
client->zookeeper->removeRecursive(backup_path);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
bool FindBigFamily::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
{
|
||||
String path;
|
||||
if (!parseKeeperPath(pos, expected, path))
|
||||
path = ".";
|
||||
|
||||
node->args.push_back(std::move(path));
|
||||
|
||||
ASTPtr count;
|
||||
if (ParserUnsignedInteger{}.parse(pos, count, expected))
|
||||
node->args.push_back(count->as<ASTLiteral &>().value);
|
||||
else
|
||||
node->args.push_back(UInt64(10));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client) const
|
||||
{
|
||||
auto path = client->getAbsolutePath(query->args[0].safeGet<String>());
|
||||
auto n = query->args[1].safeGet<UInt64>();
|
||||
|
||||
std::vector<std::tuple<Int32, String>> result;
|
||||
|
||||
std::queue<fs::path> queue;
|
||||
queue.push(path);
|
||||
while (!queue.empty())
|
||||
{
|
||||
auto next_path = queue.front();
|
||||
queue.pop();
|
||||
|
||||
auto children = client->zookeeper->getChildren(next_path);
|
||||
std::transform(children.cbegin(), children.cend(), children.begin(), [&](const String & child) { return next_path / child; });
|
||||
|
||||
auto response = client->zookeeper->get(children);
|
||||
|
||||
for (size_t i = 0; i < response.size(); ++i)
|
||||
{
|
||||
result.emplace_back(response[i].stat.numChildren, children[i]);
|
||||
queue.push(children[i]);
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(result.begin(), result.end(), std::greater());
|
||||
for (UInt64 i = 0; i < std::min(result.size(), static_cast<size_t>(n)); ++i)
|
||||
std::cout << std::get<1>(result[i]) << "\t" << std::get<0>(result[i]) << "\n";
|
||||
}
|
||||
|
||||
bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
{
|
||||
String arg;
|
||||
@ -348,7 +170,7 @@ bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery
|
||||
void HelpCommand::execute(const ASTKeeperQuery * /* query */, KeeperClient * /* client */) const
|
||||
{
|
||||
for (const auto & pair : KeeperClient::commands)
|
||||
std::cout << pair.second->generateHelpString() << "\n";
|
||||
std::cout << pair.second->getHelpMessage() << "\n";
|
||||
}
|
||||
|
||||
bool FourLetterWordCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
|
@ -21,12 +21,6 @@ public:
|
||||
virtual String getName() const = 0;
|
||||
|
||||
virtual ~IKeeperClientCommand() = default;
|
||||
|
||||
String generateHelpString() const
|
||||
{
|
||||
return fmt::vformat(getHelpMessage(), fmt::make_format_args(getName()));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
using Command = std::shared_ptr<IKeeperClientCommand>;
|
||||
@ -40,7 +34,7 @@ class LSCommand : public IKeeperClientCommand
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} [path] -- Lists the nodes for the given path (default: cwd)"; }
|
||||
String getHelpMessage() const override { return "ls [path] -- Lists the nodes for the given path (default: cwd)"; }
|
||||
};
|
||||
|
||||
class CDCommand : public IKeeperClientCommand
|
||||
@ -51,7 +45,7 @@ class CDCommand : public IKeeperClientCommand
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} [path] -- Change the working path (default `.`)"; }
|
||||
String getHelpMessage() const override { return "cd [path] -- Change the working path (default `.`)"; }
|
||||
};
|
||||
|
||||
class SetCommand : public IKeeperClientCommand
|
||||
@ -64,7 +58,7 @@ class SetCommand : public IKeeperClientCommand
|
||||
|
||||
String getHelpMessage() const override
|
||||
{
|
||||
return "{} <path> <value> [version] -- Updates the node's value. Only update if version matches (default: -1)";
|
||||
return "set <path> <value> [version] -- Updates the node's value. Only update if version matches (default: -1)";
|
||||
}
|
||||
};
|
||||
|
||||
@ -76,7 +70,7 @@ class CreateCommand : public IKeeperClientCommand
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} <path> <value> -- Creates new node"; }
|
||||
String getHelpMessage() const override { return "create <path> <value> -- Creates new node"; }
|
||||
};
|
||||
|
||||
class GetCommand : public IKeeperClientCommand
|
||||
@ -87,63 +81,9 @@ class GetCommand : public IKeeperClientCommand
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} <path> -- Returns the node's value"; }
|
||||
String getHelpMessage() const override { return "get <path> -- Returns the node's value"; }
|
||||
};
|
||||
|
||||
class GetStatCommand : public IKeeperClientCommand
|
||||
{
|
||||
String getName() const override { return "get_stat"; }
|
||||
|
||||
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} [path] -- Returns the node's stat (default `.`)"; }
|
||||
};
|
||||
|
||||
class FindSuperNodes : public IKeeperClientCommand
|
||||
{
|
||||
String getName() const override { return "find_super_nodes"; }
|
||||
|
||||
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override
|
||||
{
|
||||
return "{} <threshold> [path] -- Finds nodes with number of children larger than some threshold for the given path (default `.`)";
|
||||
}
|
||||
};
|
||||
|
||||
class DeleteStableBackups : public IKeeperClientCommand
|
||||
{
|
||||
String getName() const override { return "delete_stable_backups"; }
|
||||
|
||||
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override
|
||||
{
|
||||
return "{} -- Deletes ClickHouse nodes used for backups that are now inactive";
|
||||
}
|
||||
};
|
||||
|
||||
class FindBigFamily : public IKeeperClientCommand
|
||||
{
|
||||
String getName() const override { return "find_big_family"; }
|
||||
|
||||
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override
|
||||
{
|
||||
return "{} [path] [n] -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)";
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class RMCommand : public IKeeperClientCommand
|
||||
{
|
||||
String getName() const override { return "rm"; }
|
||||
@ -152,7 +92,7 @@ class RMCommand : public IKeeperClientCommand
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} <path> -- Remove the node"; }
|
||||
String getHelpMessage() const override { return "remove <path> -- Remove the node"; }
|
||||
};
|
||||
|
||||
class RMRCommand : public IKeeperClientCommand
|
||||
@ -163,7 +103,7 @@ class RMRCommand : public IKeeperClientCommand
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} <path> -- Recursively deletes path. Confirmation required"; }
|
||||
String getHelpMessage() const override { return "rmr <path> -- Recursively deletes path. Confirmation required"; }
|
||||
};
|
||||
|
||||
class HelpCommand : public IKeeperClientCommand
|
||||
@ -174,7 +114,7 @@ class HelpCommand : public IKeeperClientCommand
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} -- Prints this message"; }
|
||||
String getHelpMessage() const override { return "help -- Prints this message"; }
|
||||
};
|
||||
|
||||
class FourLetterWordCommand : public IKeeperClientCommand
|
||||
@ -185,7 +125,7 @@ class FourLetterWordCommand : public IKeeperClientCommand
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} <command> -- Executes four-letter-word command"; }
|
||||
String getHelpMessage() const override { return "flwc <command> -- Executes four-letter-word command"; }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -131,7 +131,7 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
|
||||
.binding("host"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("port", "p", "server port. default `2181`")
|
||||
Poco::Util::Option("port", "p", "server port. default `9181`")
|
||||
.argument("<port>")
|
||||
.binding("port"));
|
||||
|
||||
@ -177,10 +177,6 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
|
||||
std::make_shared<SetCommand>(),
|
||||
std::make_shared<CreateCommand>(),
|
||||
std::make_shared<GetCommand>(),
|
||||
std::make_shared<GetStatCommand>(),
|
||||
std::make_shared<FindSuperNodes>(),
|
||||
std::make_shared<DeleteStableBackups>(),
|
||||
std::make_shared<FindBigFamily>(),
|
||||
std::make_shared<RMCommand>(),
|
||||
std::make_shared<RMRCommand>(),
|
||||
std::make_shared<HelpCommand>(),
|
||||
@ -270,8 +266,16 @@ void KeeperClient::runInteractive()
|
||||
|
||||
LineReader::Patterns query_extenders = {"\\"};
|
||||
LineReader::Patterns query_delimiters = {};
|
||||
char word_break_characters[] = " \t\v\f\a\b\r\n/";
|
||||
|
||||
ReplxxLineReader lr(suggest, history_file, false, query_extenders, query_delimiters, {});
|
||||
ReplxxLineReader lr(
|
||||
suggest,
|
||||
history_file,
|
||||
/* multiline= */ false,
|
||||
query_extenders,
|
||||
query_delimiters,
|
||||
word_break_characters,
|
||||
/* highlighter_= */ {});
|
||||
lr.enableBracketedPaste();
|
||||
|
||||
while (true)
|
||||
@ -303,7 +307,7 @@ int KeeperClient::main(const std::vector<String> & /* args */)
|
||||
}
|
||||
|
||||
auto host = config().getString("host", "localhost");
|
||||
auto port = config().getString("port", "2181");
|
||||
auto port = config().getString("port", "9181");
|
||||
zk_args.hosts = {host + ":" + port};
|
||||
zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000;
|
||||
zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000;
|
||||
|
@ -58,7 +58,6 @@ bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
return false;
|
||||
|
||||
String command_name(pos->begin, pos->end);
|
||||
std::transform(command_name.begin(), command_name.end(), command_name.begin(), [](unsigned char c) { return std::tolower(c); });
|
||||
Command command;
|
||||
|
||||
auto iter = KeeperClient::commands.find(command_name);
|
||||
|
@ -288,13 +288,27 @@ try
|
||||
std::string path;
|
||||
|
||||
if (config().has("keeper_server.storage_path"))
|
||||
{
|
||||
path = config().getString("keeper_server.storage_path");
|
||||
}
|
||||
else if (std::filesystem::is_directory(std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"))
|
||||
{
|
||||
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
|
||||
"By default 'keeper.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper.storage_path' in the keeper configuration explicitly",
|
||||
KEEPER_DEFAULT_PATH, String{std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"});
|
||||
}
|
||||
else if (config().has("keeper_server.log_storage_path"))
|
||||
{
|
||||
path = std::filesystem::path(config().getString("keeper_server.log_storage_path")).parent_path();
|
||||
}
|
||||
else if (config().has("keeper_server.snapshot_storage_path"))
|
||||
{
|
||||
path = std::filesystem::path(config().getString("keeper_server.snapshot_storage_path")).parent_path();
|
||||
}
|
||||
else
|
||||
path = std::filesystem::path{KEEPER_DEFAULT_PATH};
|
||||
{
|
||||
path = KEEPER_DEFAULT_PATH;
|
||||
}
|
||||
|
||||
std::filesystem::create_directories(path);
|
||||
|
||||
@ -330,6 +344,7 @@ try
|
||||
auto global_context = Context::createGlobal(shared_context.get());
|
||||
|
||||
global_context->makeGlobalContext();
|
||||
global_context->setApplicationType(Context::ApplicationType::KEEPER);
|
||||
global_context->setPath(path);
|
||||
global_context->setRemoteHostFilter(config());
|
||||
|
||||
@ -365,7 +380,7 @@ try
|
||||
}
|
||||
|
||||
/// Initialize keeper RAFT. Do nothing if no keeper_server in config.
|
||||
global_context->initializeKeeperDispatcher(/* start_async = */ true);
|
||||
global_context->initializeKeeperDispatcher(/* start_async = */ false);
|
||||
FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher());
|
||||
|
||||
auto config_getter = [&] () -> const Poco::Util::AbstractConfiguration &
|
||||
|
@ -466,6 +466,11 @@ int main(int argc_, char ** argv_)
|
||||
checkHarmfulEnvironmentVariables(argv_);
|
||||
#endif
|
||||
|
||||
/// This is used for testing. For example,
|
||||
/// clickhouse-local should be able to run a simple query without throw/catch.
|
||||
if (getenv("CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION")) // NOLINT(concurrency-mt-unsafe)
|
||||
DB::terminate_on_any_exception = true;
|
||||
|
||||
/// Reset new handler to default (that throws std::bad_alloc)
|
||||
/// It is needed because LLVM library clobbers it.
|
||||
std::set_new_handler(nullptr);
|
||||
|
@ -1650,6 +1650,7 @@ try
|
||||
database_catalog.initializeAndLoadTemporaryDatabase();
|
||||
loadMetadataSystem(global_context);
|
||||
maybeConvertSystemDatabase(global_context);
|
||||
startupSystemTables();
|
||||
/// After attaching system databases we can initialize system log.
|
||||
global_context->initializeSystemLogs();
|
||||
global_context->setSystemZooKeeperLogAfterInitializationIfNeeded();
|
||||
@ -1668,7 +1669,6 @@ try
|
||||
/// Then, load remaining databases
|
||||
loadMetadata(global_context, default_database);
|
||||
convertDatabasesEnginesIfNeed(global_context);
|
||||
startupSystemTables();
|
||||
database_catalog.startupBackgroundCleanup();
|
||||
/// After loading validate that default database exists
|
||||
database_catalog.assertDatabaseExists(default_database);
|
||||
|
1
programs/server/config.d/clusters.xml
Symbolic link
1
programs/server/config.d/clusters.xml
Symbolic link
@ -0,0 +1 @@
|
||||
../../../tests/config/config.d/clusters.xml
|
231
rust/Cargo.lock
generated
231
rust/Cargo.lock
generated
@ -78,6 +78,55 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"colorchoice",
|
||||
"is-terminal",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd"
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-parse"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333"
|
||||
dependencies = [
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-query"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.72"
|
||||
@ -89,9 +138,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "ariadne"
|
||||
version = "0.2.0"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702"
|
||||
checksum = "72fe02fc62033df9ba41cba57ee19acf5e742511a140c7dbc3a873e19a19a1bd"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
"yansi",
|
||||
@ -142,6 +191,12 @@ version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
|
||||
|
||||
[[package]]
|
||||
name = "blake3"
|
||||
version = "1.4.1"
|
||||
@ -204,7 +259,7 @@ version = "0.9.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d"
|
||||
dependencies = [
|
||||
"hashbrown 0.12.3",
|
||||
"hashbrown",
|
||||
"stacker",
|
||||
]
|
||||
|
||||
@ -218,6 +273,12 @@ dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
|
||||
|
||||
[[package]]
|
||||
name = "constant_time_eq"
|
||||
version = "0.3.0"
|
||||
@ -488,21 +549,36 @@ checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
|
||||
|
||||
[[package]]
|
||||
name = "enum-as-inner"
|
||||
version = "0.5.1"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116"
|
||||
checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
"syn 2.0.27",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.1"
|
||||
name = "errno"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
|
||||
checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f"
|
||||
dependencies = [
|
||||
"errno-dragonfly",
|
||||
"libc",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "errno-dragonfly"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
@ -555,12 +631,6 @@ dependencies = [
|
||||
"ahash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.1"
|
||||
@ -603,13 +673,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.0.0"
|
||||
name = "is-terminal"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
|
||||
checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown 0.14.0",
|
||||
"hermit-abi",
|
||||
"rustix",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -621,6 +692,15 @@ dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.9"
|
||||
@ -657,6 +737,12 @@ dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.19"
|
||||
@ -708,7 +794,7 @@ version = "0.24.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa52e972a9a719cecb6864fb88568781eb706bac2cd1d4f04a648542dbf78069"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bitflags 1.3.2",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
]
|
||||
@ -720,7 +806,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"bitflags",
|
||||
"bitflags 1.3.2",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"memoffset 0.6.5",
|
||||
@ -787,31 +873,55 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prql-compiler"
|
||||
version = "0.8.1"
|
||||
name = "prql-ast"
|
||||
version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff"
|
||||
checksum = "71194e75f14dbe7debdf2b5eca0812c978021a1bd23d6fe1da98b58e407e035a"
|
||||
dependencies = [
|
||||
"enum-as-inner",
|
||||
"semver",
|
||||
"serde",
|
||||
"strum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prql-compiler"
|
||||
version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ff28e838b1be4227cc567a75c11caa3be25c5015f0e5fd21279c06e944ba44f"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anyhow",
|
||||
"ariadne",
|
||||
"chumsky",
|
||||
"csv",
|
||||
"enum-as-inner",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"itertools 0.11.0",
|
||||
"log",
|
||||
"once_cell",
|
||||
"prql-ast",
|
||||
"prql-parser",
|
||||
"regex",
|
||||
"semver",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_yaml",
|
||||
"sqlformat",
|
||||
"sqlparser",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prql-parser"
|
||||
version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3182e2ef0465a960eb02519b18768e39123d3c3a0037a2d2934055a3ef901870"
|
||||
dependencies = [
|
||||
"chumsky",
|
||||
"itertools 0.11.0",
|
||||
"prql-ast",
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "psm"
|
||||
version = "0.1.21"
|
||||
@ -858,7 +968,7 @@ version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bitflags 1.3.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -907,6 +1017,19 @@ version = "0.1.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ee020b1716f0a80e2ace9b03441a749e402e86712f15f16fe8a8f75afac732f"
|
||||
dependencies = [
|
||||
"bitflags 2.3.3",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.14"
|
||||
@ -971,19 +1094,6 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_yaml"
|
||||
version = "0.9.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
"unsafe-libyaml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "skim"
|
||||
version = "0.10.4"
|
||||
@ -991,7 +1101,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5d28de0a6cb2cdd83a076f1de9d965b973ae08b244df1aa70b432946dda0f32"
|
||||
dependencies = [
|
||||
"beef",
|
||||
"bitflags",
|
||||
"bitflags 1.3.2",
|
||||
"chrono",
|
||||
"crossbeam",
|
||||
"defer-drop",
|
||||
@ -1015,16 +1125,16 @@ version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e"
|
||||
dependencies = [
|
||||
"itertools",
|
||||
"itertools 0.10.5",
|
||||
"nom",
|
||||
"unicode_categories",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlparser"
|
||||
version = "0.33.0"
|
||||
version = "0.36.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a"
|
||||
checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87"
|
||||
dependencies = [
|
||||
"log",
|
||||
"serde",
|
||||
@ -1051,24 +1161,24 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.24.1"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
|
||||
checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
|
||||
dependencies = [
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.24.3"
|
||||
version = "0.25.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
|
||||
checksum = "6069ca09d878a33f883cc06aaa9718ede171841d3832450354410b718b097232"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 1.0.109",
|
||||
"syn 2.0.27",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1191,7 +1301,7 @@ version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e19c6ab038babee3d50c8c12ff8b910bdb2196f62278776422f50390d8e53d8"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bitflags 1.3.2",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"nix 0.24.3",
|
||||
@ -1223,12 +1333,6 @@ version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
|
||||
|
||||
[[package]]
|
||||
name = "unsafe-libyaml"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.1"
|
||||
@ -1368,6 +1472,15 @@ dependencies = [
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
|
||||
dependencies = [
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.48.1"
|
||||
|
@ -1,12 +1,12 @@
|
||||
[package]
|
||||
edition = "2021"
|
||||
name = "_ch_rust_prql"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
prql-compiler = "0.8.1"
|
||||
prql-compiler = "0.9.3"
|
||||
serde_json = "1.0"
|
||||
|
||||
[lib]
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Access/DiskAccessStorage.h>
|
||||
#include <Access/LDAPAccessStorage.h>
|
||||
#include <Access/ContextAccess.h>
|
||||
#include <Access/EnabledSettings.h>
|
||||
#include <Access/EnabledRolesInfo.h>
|
||||
#include <Access/RoleCache.h>
|
||||
#include <Access/RowPolicyCache.h>
|
||||
@ -729,6 +730,14 @@ std::shared_ptr<const EnabledRoles> AccessControl::getEnabledRoles(
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<const EnabledRolesInfo> AccessControl::getEnabledRolesInfo(
|
||||
const std::vector<UUID> & current_roles,
|
||||
const std::vector<UUID> & current_roles_with_admin_option) const
|
||||
{
|
||||
return getEnabledRoles(current_roles, current_roles_with_admin_option)->getRolesInfo();
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<const EnabledRowPolicies> AccessControl::getEnabledRowPolicies(const UUID & user_id, const boost::container::flat_set<UUID> & enabled_roles) const
|
||||
{
|
||||
return row_policy_cache->getEnabledRowPolicies(user_id, enabled_roles);
|
||||
@ -772,6 +781,15 @@ std::shared_ptr<const EnabledSettings> AccessControl::getEnabledSettings(
|
||||
return settings_profiles_cache->getEnabledSettings(user_id, settings_from_user, enabled_roles, settings_from_enabled_roles);
|
||||
}
|
||||
|
||||
std::shared_ptr<const SettingsProfilesInfo> AccessControl::getEnabledSettingsInfo(
|
||||
const UUID & user_id,
|
||||
const SettingsProfileElements & settings_from_user,
|
||||
const boost::container::flat_set<UUID> & enabled_roles,
|
||||
const SettingsProfileElements & settings_from_enabled_roles) const
|
||||
{
|
||||
return getEnabledSettings(user_id, settings_from_user, enabled_roles, settings_from_enabled_roles)->getInfo();
|
||||
}
|
||||
|
||||
std::shared_ptr<const SettingsProfilesInfo> AccessControl::getSettingsProfileInfo(const UUID & profile_id)
|
||||
{
|
||||
return settings_profiles_cache->getSettingsProfileInfo(profile_id);
|
||||
|
@ -29,6 +29,7 @@ class ContextAccessParams;
|
||||
struct User;
|
||||
using UserPtr = std::shared_ptr<const User>;
|
||||
class EnabledRoles;
|
||||
struct EnabledRolesInfo;
|
||||
class RoleCache;
|
||||
class EnabledRowPolicies;
|
||||
class RowPolicyCache;
|
||||
@ -187,6 +188,10 @@ public:
|
||||
const std::vector<UUID> & current_roles,
|
||||
const std::vector<UUID> & current_roles_with_admin_option) const;
|
||||
|
||||
std::shared_ptr<const EnabledRolesInfo> getEnabledRolesInfo(
|
||||
const std::vector<UUID> & current_roles,
|
||||
const std::vector<UUID> & current_roles_with_admin_option) const;
|
||||
|
||||
std::shared_ptr<const EnabledRowPolicies> getEnabledRowPolicies(
|
||||
const UUID & user_id,
|
||||
const boost::container::flat_set<UUID> & enabled_roles) const;
|
||||
@ -209,6 +214,12 @@ public:
|
||||
const boost::container::flat_set<UUID> & enabled_roles,
|
||||
const SettingsProfileElements & settings_from_enabled_roles) const;
|
||||
|
||||
std::shared_ptr<const SettingsProfilesInfo> getEnabledSettingsInfo(
|
||||
const UUID & user_id,
|
||||
const SettingsProfileElements & settings_from_user,
|
||||
const boost::container::flat_set<UUID> & enabled_roles,
|
||||
const SettingsProfileElements & settings_from_enabled_roles) const;
|
||||
|
||||
std::shared_ptr<const SettingsProfilesInfo> getSettingsProfileInfo(const UUID & profile_id);
|
||||
|
||||
const ExternalAuthenticators & getExternalAuthenticators() const;
|
||||
|
@ -168,6 +168,7 @@ enum class AccessType
|
||||
M(SYSTEM_TTL_MERGES, "SYSTEM STOP TTL MERGES, SYSTEM START TTL MERGES, STOP TTL MERGES, START TTL MERGES", TABLE, SYSTEM) \
|
||||
M(SYSTEM_FETCHES, "SYSTEM STOP FETCHES, SYSTEM START FETCHES, STOP FETCHES, START FETCHES", TABLE, SYSTEM) \
|
||||
M(SYSTEM_MOVES, "SYSTEM STOP MOVES, SYSTEM START MOVES, STOP MOVES, START MOVES", TABLE, SYSTEM) \
|
||||
M(SYSTEM_PULLING_REPLICATION_LOG, "SYSTEM STOP PULLING REPLICATION LOG, SYSTEM START PULLING REPLICATION LOG", TABLE, SYSTEM) \
|
||||
M(SYSTEM_DISTRIBUTED_SENDS, "SYSTEM STOP DISTRIBUTED SENDS, SYSTEM START DISTRIBUTED SENDS, STOP DISTRIBUTED SENDS, START DISTRIBUTED SENDS", TABLE, SYSTEM_SENDS) \
|
||||
M(SYSTEM_REPLICATED_SENDS, "SYSTEM STOP REPLICATED SENDS, SYSTEM START REPLICATED SENDS, STOP REPLICATED SENDS, START REPLICATED SENDS", TABLE, SYSTEM_SENDS) \
|
||||
M(SYSTEM_SENDS, "SYSTEM STOP SENDS, SYSTEM START SENDS, STOP SENDS, START SENDS", GROUP, SYSTEM) \
|
||||
|
@ -18,7 +18,8 @@
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename T, typename = std::enable_if_t<std::is_fundamental_v<std::decay_t<T>>>>
|
||||
template <typename T>
|
||||
requires std::is_fundamental_v<std::decay_t<T>>
|
||||
void updateHash(SipHash & hash, const T & value)
|
||||
{
|
||||
hash.update(value);
|
||||
|
@ -51,7 +51,7 @@ TEST(AccessRights, Union)
|
||||
"CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, "
|
||||
"TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, "
|
||||
"SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
|
||||
"SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
|
||||
"SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
|
||||
"SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "
|
||||
"SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION ADMIN ON db1");
|
||||
}
|
||||
|
646
src/AggregateFunctions/AggregateFunctionFlameGraph.cpp
Normal file
646
src/AggregateFunctions/AggregateFunctionFlameGraph.cpp
Normal file
@ -0,0 +1,646 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/SymbolIndex.h>
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <filesystem>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int FUNCTION_NOT_ALLOWED;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
struct AggregateFunctionFlameGraphTree
|
||||
{
|
||||
struct ListNode;
|
||||
|
||||
struct TreeNode
|
||||
{
|
||||
TreeNode * parent = nullptr;
|
||||
ListNode * children = nullptr;
|
||||
UInt64 ptr = 0;
|
||||
size_t allocated = 0;
|
||||
};
|
||||
|
||||
struct ListNode
|
||||
{
|
||||
ListNode * next = nullptr;
|
||||
TreeNode * child = nullptr;
|
||||
};
|
||||
|
||||
TreeNode root;
|
||||
|
||||
static ListNode * createChild(TreeNode * parent, UInt64 ptr, Arena * arena)
|
||||
{
|
||||
|
||||
ListNode * list_node = reinterpret_cast<ListNode *>(arena->alloc(sizeof(ListNode)));
|
||||
TreeNode * tree_node = reinterpret_cast<TreeNode *>(arena->alloc(sizeof(TreeNode)));
|
||||
|
||||
list_node->child = tree_node;
|
||||
list_node->next = nullptr;
|
||||
|
||||
tree_node->parent =parent;
|
||||
tree_node->children = nullptr;
|
||||
tree_node->ptr = ptr;
|
||||
tree_node->allocated = 0;
|
||||
|
||||
return list_node;
|
||||
}
|
||||
|
||||
TreeNode * find(const UInt64 * stack, size_t stack_size, Arena * arena)
|
||||
{
|
||||
TreeNode * node = &root;
|
||||
for (size_t i = 0; i < stack_size; ++i)
|
||||
{
|
||||
UInt64 ptr = stack[i];
|
||||
if (ptr == 0)
|
||||
break;
|
||||
|
||||
if (!node->children)
|
||||
{
|
||||
node->children = createChild(node, ptr, arena);
|
||||
node = node->children->child;
|
||||
}
|
||||
else
|
||||
{
|
||||
ListNode * list = node->children;
|
||||
while (list->child->ptr != ptr && list->next)
|
||||
list = list->next;
|
||||
|
||||
if (list->child->ptr != ptr)
|
||||
{
|
||||
list->next = createChild(node, ptr, arena);
|
||||
list = list->next;
|
||||
}
|
||||
|
||||
node = list->child;
|
||||
}
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static void append(DB::PaddedPODArray<UInt64> & values, DB::PaddedPODArray<UInt64> & offsets, std::vector<UInt64> & frame)
|
||||
{
|
||||
UInt64 prev = offsets.empty() ? 0 : offsets.back();
|
||||
offsets.push_back(prev + frame.size());
|
||||
for (UInt64 val : frame)
|
||||
values.push_back(val);
|
||||
}
|
||||
|
||||
struct Trace
|
||||
{
|
||||
using Frames = std::vector<UInt64>;
|
||||
|
||||
Frames frames;
|
||||
|
||||
/// The total number of bytes allocated for traces with the same prefix.
|
||||
size_t allocated_total = 0;
|
||||
/// This counter is relevant in case we want to filter some traces with small amount of bytes.
|
||||
/// It shows the total number of bytes for *filtered* traces with the same prefix.
|
||||
/// This is the value which is used in flamegraph.
|
||||
size_t allocated_self = 0;
|
||||
};
|
||||
|
||||
using Traces = std::vector<Trace>;
|
||||
|
||||
Traces dump(size_t max_depth, size_t min_bytes) const
|
||||
{
|
||||
Traces traces;
|
||||
Trace::Frames frames;
|
||||
std::vector<size_t> allocated_total;
|
||||
std::vector<size_t> allocated_self;
|
||||
std::vector<ListNode *> nodes;
|
||||
|
||||
nodes.push_back(root.children);
|
||||
allocated_total.push_back(root.allocated);
|
||||
allocated_self.push_back(root.allocated);
|
||||
|
||||
while (!nodes.empty())
|
||||
{
|
||||
if (nodes.back() == nullptr)
|
||||
{
|
||||
traces.push_back({frames, allocated_total.back(), allocated_self.back()});
|
||||
|
||||
nodes.pop_back();
|
||||
allocated_total.pop_back();
|
||||
allocated_self.pop_back();
|
||||
|
||||
/// We don't have root's frame so framers are empty in the end.
|
||||
if (!frames.empty())
|
||||
frames.pop_back();
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
TreeNode * current = nodes.back()->child;
|
||||
nodes.back() = nodes.back()->next;
|
||||
|
||||
bool enough_bytes = current->allocated >= min_bytes;
|
||||
bool enough_depth = max_depth == 0 || nodes.size() < max_depth;
|
||||
|
||||
if (enough_bytes)
|
||||
{
|
||||
frames.push_back(current->ptr);
|
||||
allocated_self.back() -= current->allocated;
|
||||
|
||||
if (enough_depth)
|
||||
{
|
||||
allocated_total.push_back(current->allocated);
|
||||
allocated_self.push_back(current->allocated);
|
||||
nodes.push_back(current->children);
|
||||
}
|
||||
else
|
||||
{
|
||||
traces.push_back({frames, current->allocated, current->allocated});
|
||||
frames.pop_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return traces;
|
||||
}
|
||||
};
|
||||
|
||||
static void insertData(DB::PaddedPODArray<UInt8> & chars, DB::PaddedPODArray<UInt64> & offsets, const char * pos, size_t length)
|
||||
{
|
||||
const size_t old_size = chars.size();
|
||||
const size_t new_size = old_size + length + 1;
|
||||
|
||||
chars.resize(new_size);
|
||||
if (length)
|
||||
memcpy(chars.data() + old_size, pos, length);
|
||||
chars[old_size + length] = 0;
|
||||
offsets.push_back(new_size);
|
||||
}
|
||||
|
||||
/// Split str by line feed and write as separate row to ColumnString.
|
||||
static void fillColumn(DB::PaddedPODArray<UInt8> & chars, DB::PaddedPODArray<UInt64> & offsets, const std::string & str)
|
||||
{
|
||||
size_t start = 0;
|
||||
size_t end = 0;
|
||||
size_t size = str.size();
|
||||
|
||||
while (end < size)
|
||||
{
|
||||
if (str[end] == '\n')
|
||||
{
|
||||
insertData(chars, offsets, str.data() + start, end - start);
|
||||
start = end + 1;
|
||||
}
|
||||
|
||||
++end;
|
||||
}
|
||||
|
||||
if (start < end)
|
||||
insertData(chars, offsets, str.data() + start, end - start);
|
||||
}
|
||||
|
||||
void dumpFlameGraph(
|
||||
const AggregateFunctionFlameGraphTree::Traces & traces,
|
||||
DB::PaddedPODArray<UInt8> & chars,
|
||||
DB::PaddedPODArray<UInt64> & offsets)
|
||||
{
|
||||
DB::WriteBufferFromOwnString out;
|
||||
|
||||
std::unordered_map<uintptr_t, size_t> mapping;
|
||||
|
||||
#if defined(__ELF__) && !defined(OS_FREEBSD)
|
||||
const DB::SymbolIndex & symbol_index = DB::SymbolIndex::instance();
|
||||
#endif
|
||||
|
||||
for (const auto & trace : traces)
|
||||
{
|
||||
if (trace.allocated_self == 0)
|
||||
continue;
|
||||
|
||||
for (size_t i = 0; i < trace.frames.size(); ++i)
|
||||
{
|
||||
if (i)
|
||||
out << ";";
|
||||
|
||||
const void * ptr = reinterpret_cast<const void *>(trace.frames[i]);
|
||||
|
||||
#if defined(__ELF__) && !defined(OS_FREEBSD)
|
||||
if (const auto * symbol = symbol_index.findSymbol(ptr))
|
||||
writeString(demangle(symbol->name), out);
|
||||
else
|
||||
DB::writePointerHex(ptr, out);
|
||||
#else
|
||||
DB::writePointerHex(ptr, out);
|
||||
#endif
|
||||
}
|
||||
|
||||
out << ' ' << trace.allocated_self << "\n";
|
||||
}
|
||||
|
||||
fillColumn(chars, offsets, out.str());
|
||||
}
|
||||
|
||||
struct AggregateFunctionFlameGraphData
|
||||
{
|
||||
struct Entry
|
||||
{
|
||||
AggregateFunctionFlameGraphTree::TreeNode * trace;
|
||||
UInt64 size;
|
||||
Entry * next = nullptr;
|
||||
};
|
||||
|
||||
struct Pair
|
||||
{
|
||||
Entry * allocation = nullptr;
|
||||
Entry * deallocation = nullptr;
|
||||
};
|
||||
|
||||
using Entries = HashMap<UInt64, Pair>;
|
||||
|
||||
AggregateFunctionFlameGraphTree tree;
|
||||
Entries entries;
|
||||
Entry * free_list = nullptr;
|
||||
|
||||
Entry * alloc(Arena * arena)
|
||||
{
|
||||
if (free_list)
|
||||
{
|
||||
auto * res = free_list;
|
||||
free_list = free_list->next;
|
||||
return res;
|
||||
}
|
||||
|
||||
return reinterpret_cast<Entry *>(arena->alloc(sizeof(Entry)));
|
||||
}
|
||||
|
||||
void release(Entry * entry)
|
||||
{
|
||||
entry->next = free_list;
|
||||
free_list = entry;
|
||||
}
|
||||
|
||||
static void track(Entry * allocation)
|
||||
{
|
||||
auto * node = allocation->trace;
|
||||
while (node)
|
||||
{
|
||||
node->allocated += allocation->size;
|
||||
node = node->parent;
|
||||
}
|
||||
}
|
||||
|
||||
static void untrack(Entry * allocation)
|
||||
{
|
||||
auto * node = allocation->trace;
|
||||
while (node)
|
||||
{
|
||||
node->allocated -= allocation->size;
|
||||
node = node->parent;
|
||||
}
|
||||
}
|
||||
|
||||
static Entry * tryFindMatchAndRemove(Entry *& list, UInt64 size)
|
||||
{
|
||||
if (!list)
|
||||
return nullptr;
|
||||
|
||||
if (list->size == size)
|
||||
{
|
||||
Entry * entry = list;
|
||||
list = list->next;
|
||||
return entry;
|
||||
}
|
||||
else
|
||||
{
|
||||
Entry * parent = list;
|
||||
while (parent->next && parent->next->size != size)
|
||||
parent = parent->next;
|
||||
|
||||
if (parent->next && parent->next->size == size)
|
||||
{
|
||||
Entry * entry = parent->next;
|
||||
parent->next = entry->next;
|
||||
return entry;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void add(UInt64 ptr, Int64 size, const UInt64 * stack, size_t stack_size, Arena * arena)
|
||||
{
|
||||
/// In case if argument is nullptr, only track allocations.
|
||||
if (ptr == 0)
|
||||
{
|
||||
if (size > 0)
|
||||
{
|
||||
auto * node = tree.find(stack, stack_size, arena);
|
||||
Entry entry{.trace = node, .size = UInt64(size)};
|
||||
track(&entry);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
auto & place = entries[ptr];
|
||||
if (size > 0)
|
||||
{
|
||||
if (auto * deallocation = tryFindMatchAndRemove(place.deallocation, size))
|
||||
{
|
||||
release(deallocation);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto * node = tree.find(stack, stack_size, arena);
|
||||
|
||||
auto * allocation = alloc(arena);
|
||||
allocation->size = UInt64(size);
|
||||
allocation->trace = node;
|
||||
|
||||
track(allocation);
|
||||
|
||||
allocation->next = place.allocation;
|
||||
place.allocation = allocation;
|
||||
}
|
||||
}
|
||||
else if (size < 0)
|
||||
{
|
||||
UInt64 abs_size = -size;
|
||||
if (auto * allocation = tryFindMatchAndRemove(place.allocation, abs_size))
|
||||
{
|
||||
untrack(allocation);
|
||||
release(allocation);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto * deallocation = alloc(arena);
|
||||
deallocation->size = abs_size;
|
||||
|
||||
deallocation->next = place.deallocation;
|
||||
place.deallocation = deallocation;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionFlameGraphTree & other_tree, Arena * arena)
|
||||
{
|
||||
AggregateFunctionFlameGraphTree::Trace::Frames frames;
|
||||
std::vector<AggregateFunctionFlameGraphTree::ListNode *> nodes;
|
||||
|
||||
nodes.push_back(other_tree.root.children);
|
||||
|
||||
while (!nodes.empty())
|
||||
{
|
||||
if (nodes.back() == nullptr)
|
||||
{
|
||||
nodes.pop_back();
|
||||
|
||||
/// We don't have root's frame so framers are empty in the end.
|
||||
if (!frames.empty())
|
||||
frames.pop_back();
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
AggregateFunctionFlameGraphTree::TreeNode * current = nodes.back()->child;
|
||||
nodes.back() = nodes.back()->next;
|
||||
|
||||
frames.push_back(current->ptr);
|
||||
|
||||
if (current->children)
|
||||
nodes.push_back(current->children);
|
||||
else
|
||||
{
|
||||
if (current->allocated)
|
||||
add(0, current->allocated, frames.data(), frames.size(), arena);
|
||||
|
||||
frames.pop_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionFlameGraphData & other, Arena * arena)
|
||||
{
|
||||
AggregateFunctionFlameGraphTree::Trace::Frames frames;
|
||||
for (const auto & entry : other.entries)
|
||||
{
|
||||
for (auto * allocation = entry.value.second.allocation; allocation; allocation = allocation->next)
|
||||
{
|
||||
frames.clear();
|
||||
const auto * node = allocation->trace;
|
||||
while (node->ptr)
|
||||
{
|
||||
frames.push_back(node->ptr);
|
||||
node = node->parent;
|
||||
}
|
||||
|
||||
std::reverse(frames.begin(), frames.end());
|
||||
add(entry.value.first, allocation->size, frames.data(), frames.size(), arena);
|
||||
untrack(allocation);
|
||||
}
|
||||
|
||||
for (auto * deallocation = entry.value.second.deallocation; deallocation; deallocation = deallocation->next)
|
||||
{
|
||||
add(entry.value.first, -Int64(deallocation->size), nullptr, 0, arena);
|
||||
}
|
||||
}
|
||||
|
||||
merge(other.tree, arena);
|
||||
}
|
||||
|
||||
void dumpFlameGraph(
|
||||
DB::PaddedPODArray<UInt8> & chars,
|
||||
DB::PaddedPODArray<UInt64> & offsets,
|
||||
size_t max_depth, size_t min_bytes) const
|
||||
{
|
||||
DB::dumpFlameGraph(tree.dump(max_depth, min_bytes), chars, offsets);
|
||||
}
|
||||
};
|
||||
|
||||
/// Aggregate function which builds a flamegraph using the list of stacktraces.
|
||||
/// The output is an array of strings which can be used by flamegraph.pl util.
|
||||
/// See https://github.com/brendangregg/FlameGraph
|
||||
///
|
||||
/// Syntax: flameGraph(traces, [size = 1], [ptr = 0])
|
||||
/// - trace : Array(UInt64), a stacktrace
|
||||
/// - size : Int64, an allocation size (for memory profiling)
|
||||
/// - ptr : UInt64, an allocation address
|
||||
/// In case if ptr != 0, a flameGraph will map allocations (size > 0) and deallocations (size < 0) with the same size and ptr.
|
||||
/// Only allocations which were not freed are shown. Not mapped deallocations are ignored.
|
||||
///
|
||||
/// Usage:
|
||||
///
|
||||
/// * Build a flamegraph based on CPU query profiler
|
||||
/// set query_profiler_cpu_time_period_ns=10000000;
|
||||
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
/// clickhouse client --allow_introspection_functions=1
|
||||
/// -q "select arrayJoin(flameGraph(arrayReverse(trace))) from system.trace_log where trace_type = 'CPU' and query_id = 'xxx'"
|
||||
/// | ~/dev/FlameGraph/flamegraph.pl > flame_cpu.svg
|
||||
///
|
||||
/// * Build a flamegraph based on memory query profiler, showing all allocations
|
||||
/// set memory_profiler_sample_probability=1, max_untracked_memory=1;
|
||||
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
/// clickhouse client --allow_introspection_functions=1
|
||||
/// -q "select arrayJoin(flameGraph(trace, size)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'"
|
||||
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem.svg
|
||||
///
|
||||
/// * Build a flamegraph based on memory query profiler, showing allocations which were not deallocated in query context
|
||||
/// set memory_profiler_sample_probability=1, max_untracked_memory=1, use_uncompressed_cache=1, merge_tree_max_rows_to_use_cache=100000000000, merge_tree_max_bytes_to_use_cache=1000000000000;
|
||||
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
/// clickhouse client --allow_introspection_functions=1
|
||||
/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'"
|
||||
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_untracked.svg
|
||||
///
|
||||
/// * Build a flamegraph based on memory query profiler, showing active allocations at the fixed point of time
|
||||
/// set memory_profiler_sample_probability=1, max_untracked_memory=1;
|
||||
/// SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
/// 1. Memory usage per second
|
||||
/// select event_time, m, formatReadableSize(max(s) as m) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample') group by event_time order by event_time;
|
||||
/// 2. Find a time point with maximal memory usage
|
||||
/// select argMax(event_time, s), max(s) from (select event_time, sum(size) over (order by event_time) as s from system.trace_log where query_id = 'xxx' and trace_type = 'MemorySample');
|
||||
/// 3. Fix active allocations at fixed point of time
|
||||
/// clickhouse client --allow_introspection_functions=1
|
||||
/// -q "select arrayJoin(flameGraph(trace, size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time <= 'yyy' order by event_time)"
|
||||
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_pos.svg
|
||||
/// 4. Find deallocations at fixed point of time
|
||||
/// clickhouse client --allow_introspection_functions=1
|
||||
/// -q "select arrayJoin(flameGraph(trace, -size, ptr)) from (select * from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx' and event_time > 'yyy' order by event_time desc)"
|
||||
/// | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_neg.svg
|
||||
class AggregateFunctionFlameGraph final : public IAggregateFunctionDataHelper<AggregateFunctionFlameGraphData, AggregateFunctionFlameGraph>
|
||||
{
|
||||
public:
|
||||
explicit AggregateFunctionFlameGraph(const DataTypes & argument_types_)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionFlameGraphData, AggregateFunctionFlameGraph>(argument_types_, {}, createResultType())
|
||||
{}
|
||||
|
||||
String getName() const override { return "flameGraph"; }
|
||||
|
||||
static DataTypePtr createResultType()
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
const auto & trace = assert_cast<const ColumnArray &>(*columns[0]);
|
||||
|
||||
const auto & trace_offsets = trace.getOffsets();
|
||||
const auto & trace_values = assert_cast<const ColumnUInt64 &>(trace.getData()).getData();
|
||||
UInt64 prev_offset = 0;
|
||||
if (row_num)
|
||||
prev_offset = trace_offsets[row_num - 1];
|
||||
UInt64 trace_size = trace_offsets[row_num] - prev_offset;
|
||||
|
||||
Int64 allocated = 1;
|
||||
if (argument_types.size() >= 2)
|
||||
{
|
||||
const auto & sizes = assert_cast<const ColumnInt64 &>(*columns[1]).getData();
|
||||
allocated = sizes[row_num];
|
||||
}
|
||||
|
||||
UInt64 ptr = 0;
|
||||
if (argument_types.size() >= 3)
|
||||
{
|
||||
const auto & ptrs = assert_cast<const ColumnUInt64 &>(*columns[2]).getData();
|
||||
ptr = ptrs[row_num];
|
||||
}
|
||||
|
||||
this->data(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena);
|
||||
}
|
||||
|
||||
void addManyDefaults(
|
||||
AggregateDataPtr __restrict /*place*/,
|
||||
const IColumn ** /*columns*/,
|
||||
size_t /*length*/,
|
||||
Arena * /*arena*/) const override
|
||||
{
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs), arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Serialization for function flameGraph is not implemented.");
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict, ReadBuffer &, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Deserialization for function flameGraph is not implemented.");
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto & array = assert_cast<ColumnArray &>(to);
|
||||
auto & str = assert_cast<ColumnString &>(array.getData());
|
||||
|
||||
this->data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0);
|
||||
|
||||
array.getOffsets().push_back(str.size());
|
||||
}
|
||||
};
|
||||
|
||||
static void check(const std::string & name, const DataTypes & argument_types, const Array & params)
|
||||
{
|
||||
assertNoParameters(name, params);
|
||||
|
||||
if (argument_types.empty() || argument_types.size() > 3)
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Aggregate function {} requires 1 to 3 arguments : trace, [size = 1], [ptr = 0]",
|
||||
name);
|
||||
|
||||
auto ptr_type = std::make_shared<DataTypeUInt64>();
|
||||
auto trace_type = std::make_shared<DataTypeArray>(ptr_type);
|
||||
auto size_type = std::make_shared<DataTypeInt64>();
|
||||
|
||||
if (!argument_types[0]->equals(*trace_type))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"First argument (trace) for function {} must be Array(UInt64), but it has type {}",
|
||||
name, argument_types[0]->getName());
|
||||
|
||||
if (argument_types.size() >= 2 && !argument_types[1]->equals(*size_type))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Second argument (size) for function {} must be Int64, but it has type {}",
|
||||
name, argument_types[1]->getName());
|
||||
|
||||
if (argument_types.size() >= 3 && !argument_types[2]->equals(*ptr_type))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Third argument (ptr) for function {} must be UInt64, but it has type {}",
|
||||
name, argument_types[2]->getName());
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionFlameGraph(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings * settings)
|
||||
{
|
||||
if (!settings->allow_introspection_functions)
|
||||
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED,
|
||||
"Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0");
|
||||
|
||||
check(name, argument_types, params);
|
||||
return std::make_shared<AggregateFunctionFlameGraph>(argument_types);
|
||||
}
|
||||
|
||||
void registerAggregateFunctionFlameGraph(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("flameGraph", { createAggregateFunctionFlameGraph, properties });
|
||||
}
|
||||
|
||||
}
|
@ -80,6 +80,7 @@ void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory
|
||||
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionFlameGraph(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory);
|
||||
|
||||
class AggregateFunctionCombinatorFactory;
|
||||
@ -173,6 +174,7 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionExponentialMovingAverage(factory);
|
||||
registerAggregateFunctionSparkbar(factory);
|
||||
registerAggregateFunctionAnalysisOfVariance(factory);
|
||||
registerAggregateFunctionFlameGraph(factory);
|
||||
registerAggregateFunctionKolmogorovSmirnovTest(factory);
|
||||
|
||||
registerWindowFunctions(factory);
|
||||
|
@ -6887,13 +6887,12 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
}
|
||||
|
||||
std::erase_if(with_nodes, [](const QueryTreeNodePtr & node)
|
||||
{
|
||||
auto * subquery_node = node->as<QueryNode>();
|
||||
auto * union_node = node->as<UnionNode>();
|
||||
|
||||
return (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE());
|
||||
});
|
||||
/** WITH section can be safely removed, because WITH section only can provide aliases to query expressions
|
||||
* and CTE for other sections to use.
|
||||
*
|
||||
* Example: WITH 1 AS constant, (x -> x + 1) AS lambda, a AS (SELECT * FROM test_table);
|
||||
*/
|
||||
query_node_typed.getWith().getNodes().clear();
|
||||
|
||||
for (auto & window_node : query_node_typed.getWindow().getNodes())
|
||||
{
|
||||
@ -6952,9 +6951,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
}
|
||||
|
||||
if (query_node_typed.hasWith())
|
||||
resolveExpressionNodeList(query_node_typed.getWithNode(), scope, true /*allow_lambda_expression*/, false /*allow_table_expression*/);
|
||||
|
||||
if (query_node_typed.getPrewhere())
|
||||
resolveExpressionNode(query_node_typed.getPrewhere(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
|
||||
|
||||
@ -7123,13 +7119,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
}
|
||||
|
||||
/** WITH section can be safely removed, because WITH section only can provide aliases to query expressions
|
||||
* and CTE for other sections to use.
|
||||
*
|
||||
* Example: WITH 1 AS constant, (x -> x + 1) AS lambda, a AS (SELECT * FROM test_table);
|
||||
*/
|
||||
query_node_typed.getWith().getNodes().clear();
|
||||
|
||||
/** WINDOW section can be safely removed, because WINDOW section can only provide window definition to window functions.
|
||||
*
|
||||
* Example: SELECT count(*) OVER w FROM test_table WINDOW w AS (PARTITION BY id);
|
||||
|
@ -70,10 +70,13 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes &
|
||||
{
|
||||
if (columns_size == 1)
|
||||
{
|
||||
auto field = convertFieldToType(value, *block_types[0]);
|
||||
auto field = convertFieldToTypeStrict(value, *block_types[0]);
|
||||
if (!field)
|
||||
continue;
|
||||
|
||||
bool need_insert_null = transform_null_in && block_types[0]->isNullable();
|
||||
if (!field.isNull() || need_insert_null)
|
||||
columns[0]->insert(std::move(field));
|
||||
if (!field->isNull() || need_insert_null)
|
||||
columns[0]->insert(*field);
|
||||
|
||||
continue;
|
||||
}
|
||||
@ -98,7 +101,11 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes &
|
||||
size_t i = 0;
|
||||
for (; i < tuple_size; ++i)
|
||||
{
|
||||
tuple_values[i] = convertFieldToType(tuple[i], *block_types[i]);
|
||||
auto converted_field = convertFieldToTypeStrict(tuple[i], *block_types[i]);
|
||||
if (!converted_field)
|
||||
break;
|
||||
tuple_values[i] = std::move(*converted_field);
|
||||
|
||||
bool need_insert_null = transform_null_in && block_types[i]->isNullable();
|
||||
if (tuple_values[i].isNull() && !need_insert_null)
|
||||
break;
|
||||
|
@ -77,10 +77,12 @@ BackupEntriesCollector::BackupEntriesCollector(
|
||||
const ASTBackupQuery::Elements & backup_query_elements_,
|
||||
const BackupSettings & backup_settings_,
|
||||
std::shared_ptr<IBackupCoordination> backup_coordination_,
|
||||
const ReadSettings & read_settings_,
|
||||
const ContextPtr & context_)
|
||||
: backup_query_elements(backup_query_elements_)
|
||||
, backup_settings(backup_settings_)
|
||||
, backup_coordination(backup_coordination_)
|
||||
, read_settings(read_settings_)
|
||||
, context(context_)
|
||||
, on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000))
|
||||
, consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000))
|
||||
|
@ -30,6 +30,7 @@ public:
|
||||
BackupEntriesCollector(const ASTBackupQuery::Elements & backup_query_elements_,
|
||||
const BackupSettings & backup_settings_,
|
||||
std::shared_ptr<IBackupCoordination> backup_coordination_,
|
||||
const ReadSettings & read_settings_,
|
||||
const ContextPtr & context_);
|
||||
~BackupEntriesCollector();
|
||||
|
||||
@ -40,6 +41,7 @@ public:
|
||||
|
||||
const BackupSettings & getBackupSettings() const { return backup_settings; }
|
||||
std::shared_ptr<IBackupCoordination> getBackupCoordination() const { return backup_coordination; }
|
||||
const ReadSettings & getReadSettings() const { return read_settings; }
|
||||
ContextPtr getContext() const { return context; }
|
||||
|
||||
/// Adds a backup entry which will be later returned by run().
|
||||
@ -93,6 +95,7 @@ private:
|
||||
const ASTBackupQuery::Elements backup_query_elements;
|
||||
const BackupSettings backup_settings;
|
||||
std::shared_ptr<IBackupCoordination> backup_coordination;
|
||||
const ReadSettings read_settings;
|
||||
ContextPtr context;
|
||||
std::chrono::milliseconds on_cluster_first_sync_timeout;
|
||||
std::chrono::milliseconds consistent_metadata_snapshot_timeout;
|
||||
|
@ -57,7 +57,7 @@ UInt64 BackupEntryFromImmutableFile::getSize() const
|
||||
return *file_size;
|
||||
}
|
||||
|
||||
UInt128 BackupEntryFromImmutableFile::getChecksum() const
|
||||
UInt128 BackupEntryFromImmutableFile::getChecksum(const ReadSettings & read_settings) const
|
||||
{
|
||||
{
|
||||
std::lock_guard lock{size_and_checksum_mutex};
|
||||
@ -73,7 +73,7 @@ UInt128 BackupEntryFromImmutableFile::getChecksum() const
|
||||
}
|
||||
}
|
||||
|
||||
auto calculated_checksum = BackupEntryWithChecksumCalculation<IBackupEntry>::getChecksum();
|
||||
auto calculated_checksum = BackupEntryWithChecksumCalculation<IBackupEntry>::getChecksum(read_settings);
|
||||
|
||||
{
|
||||
std::lock_guard lock{size_and_checksum_mutex};
|
||||
@ -86,13 +86,13 @@ UInt128 BackupEntryFromImmutableFile::getChecksum() const
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<UInt128> BackupEntryFromImmutableFile::getPartialChecksum(size_t prefix_length) const
|
||||
std::optional<UInt128> BackupEntryFromImmutableFile::getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const
|
||||
{
|
||||
if (prefix_length == 0)
|
||||
return 0;
|
||||
|
||||
if (prefix_length >= getSize())
|
||||
return getChecksum();
|
||||
return getChecksum(read_settings);
|
||||
|
||||
/// For immutable files we don't use partial checksums.
|
||||
return std::nullopt;
|
||||
|
@ -27,8 +27,8 @@ public:
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override;
|
||||
|
||||
UInt64 getSize() const override;
|
||||
UInt128 getChecksum() const override;
|
||||
std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override;
|
||||
UInt128 getChecksum(const ReadSettings & read_settings) const override;
|
||||
std::optional<UInt128> getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const override;
|
||||
|
||||
DataSourceDescription getDataSourceDescription() const override { return data_source_description; }
|
||||
bool isEncryptedByDisk() const override { return copy_encrypted; }
|
||||
|
@ -11,17 +11,17 @@ namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
String readFile(const String & file_path)
|
||||
String readFile(const String & file_path, const ReadSettings & read_settings)
|
||||
{
|
||||
auto buf = createReadBufferFromFileBase(file_path, /* settings= */ {});
|
||||
auto buf = createReadBufferFromFileBase(file_path, read_settings);
|
||||
String s;
|
||||
readStringUntilEOF(s, *buf);
|
||||
return s;
|
||||
}
|
||||
|
||||
String readFile(const DiskPtr & disk, const String & file_path, bool copy_encrypted)
|
||||
String readFile(const DiskPtr & disk, const String & file_path, const ReadSettings & read_settings, bool copy_encrypted)
|
||||
{
|
||||
auto buf = copy_encrypted ? disk->readEncryptedFile(file_path, {}) : disk->readFile(file_path);
|
||||
auto buf = copy_encrypted ? disk->readEncryptedFile(file_path, read_settings) : disk->readFile(file_path, read_settings);
|
||||
String s;
|
||||
readStringUntilEOF(s, *buf);
|
||||
return s;
|
||||
@ -29,19 +29,19 @@ namespace
|
||||
}
|
||||
|
||||
|
||||
BackupEntryFromSmallFile::BackupEntryFromSmallFile(const String & file_path_)
|
||||
BackupEntryFromSmallFile::BackupEntryFromSmallFile(const String & file_path_, const ReadSettings & read_settings_)
|
||||
: file_path(file_path_)
|
||||
, data_source_description(DiskLocal::getLocalDataSourceDescription(file_path_))
|
||||
, data(readFile(file_path_))
|
||||
, data(readFile(file_path_, read_settings_))
|
||||
{
|
||||
}
|
||||
|
||||
BackupEntryFromSmallFile::BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, bool copy_encrypted_)
|
||||
BackupEntryFromSmallFile::BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, const ReadSettings & read_settings_, bool copy_encrypted_)
|
||||
: disk(disk_)
|
||||
, file_path(file_path_)
|
||||
, data_source_description(disk_->getDataSourceDescription())
|
||||
, copy_encrypted(copy_encrypted_ && data_source_description.is_encrypted)
|
||||
, data(readFile(disk_, file_path, copy_encrypted))
|
||||
, data(readFile(disk_, file_path, read_settings_, copy_encrypted))
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -13,8 +13,8 @@ using DiskPtr = std::shared_ptr<IDisk>;
|
||||
class BackupEntryFromSmallFile : public BackupEntryWithChecksumCalculation<IBackupEntry>
|
||||
{
|
||||
public:
|
||||
explicit BackupEntryFromSmallFile(const String & file_path_);
|
||||
BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, bool copy_encrypted_ = false);
|
||||
explicit BackupEntryFromSmallFile(const String & file_path_, const ReadSettings & read_settings_);
|
||||
BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, const ReadSettings & read_settings_, bool copy_encrypted_ = false);
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings &) const override;
|
||||
UInt64 getSize() const override { return data.size(); }
|
||||
|
@ -6,7 +6,7 @@ namespace DB
|
||||
{
|
||||
|
||||
template <typename Base>
|
||||
UInt128 BackupEntryWithChecksumCalculation<Base>::getChecksum() const
|
||||
UInt128 BackupEntryWithChecksumCalculation<Base>::getChecksum(const ReadSettings & read_settings) const
|
||||
{
|
||||
{
|
||||
std::lock_guard lock{checksum_calculation_mutex};
|
||||
@ -26,7 +26,7 @@ UInt128 BackupEntryWithChecksumCalculation<Base>::getChecksum() const
|
||||
}
|
||||
else
|
||||
{
|
||||
auto read_buffer = this->getReadBuffer(ReadSettings{}.adjustBufferSize(size));
|
||||
auto read_buffer = this->getReadBuffer(read_settings.adjustBufferSize(size));
|
||||
HashingReadBuffer hashing_read_buffer(*read_buffer);
|
||||
hashing_read_buffer.ignoreAll();
|
||||
calculated_checksum = hashing_read_buffer.getHash();
|
||||
@ -37,23 +37,20 @@ UInt128 BackupEntryWithChecksumCalculation<Base>::getChecksum() const
|
||||
}
|
||||
|
||||
template <typename Base>
|
||||
std::optional<UInt128> BackupEntryWithChecksumCalculation<Base>::getPartialChecksum(size_t prefix_length) const
|
||||
std::optional<UInt128> BackupEntryWithChecksumCalculation<Base>::getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const
|
||||
{
|
||||
if (prefix_length == 0)
|
||||
return 0;
|
||||
|
||||
size_t size = this->getSize();
|
||||
if (prefix_length >= size)
|
||||
return this->getChecksum();
|
||||
return this->getChecksum(read_settings);
|
||||
|
||||
std::lock_guard lock{checksum_calculation_mutex};
|
||||
|
||||
ReadSettings read_settings;
|
||||
if (calculated_checksum)
|
||||
read_settings.adjustBufferSize(calculated_checksum ? prefix_length : size);
|
||||
|
||||
auto read_buffer = this->getReadBuffer(read_settings);
|
||||
auto read_buffer = this->getReadBuffer(read_settings.adjustBufferSize(calculated_checksum ? prefix_length : size));
|
||||
HashingReadBuffer hashing_read_buffer(*read_buffer);
|
||||
|
||||
hashing_read_buffer.ignore(prefix_length);
|
||||
auto partial_checksum = hashing_read_buffer.getHash();
|
||||
|
||||
|
@ -11,8 +11,8 @@ template <typename Base>
|
||||
class BackupEntryWithChecksumCalculation : public Base
|
||||
{
|
||||
public:
|
||||
UInt128 getChecksum() const override;
|
||||
std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override;
|
||||
UInt128 getChecksum(const ReadSettings & read_settings) const override;
|
||||
std::optional<UInt128> getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const override;
|
||||
|
||||
private:
|
||||
mutable std::optional<UInt128> calculated_checksum;
|
||||
|
@ -17,8 +17,8 @@ public:
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override { return entry->getReadBuffer(read_settings); }
|
||||
UInt64 getSize() const override { return entry->getSize(); }
|
||||
UInt128 getChecksum() const override { return entry->getChecksum(); }
|
||||
std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override { return entry->getPartialChecksum(prefix_length); }
|
||||
UInt128 getChecksum(const ReadSettings & read_settings) const override { return entry->getChecksum(read_settings); }
|
||||
std::optional<UInt128> getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const override { return entry->getPartialChecksum(prefix_length, read_settings); }
|
||||
DataSourceDescription getDataSourceDescription() const override { return entry->getDataSourceDescription(); }
|
||||
bool isEncryptedByDisk() const override { return entry->isEncryptedByDisk(); }
|
||||
bool isFromFile() const override { return entry->isFromFile(); }
|
||||
|
@ -3,6 +3,8 @@
|
||||
#include <Backups/IBackup.h>
|
||||
#include <Backups/BackupInfo.h>
|
||||
#include <Core/Types.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <IO/WriteSettings.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <memory>
|
||||
@ -37,6 +39,8 @@ public:
|
||||
std::optional<UUID> backup_uuid;
|
||||
bool deduplicate_files = true;
|
||||
bool allow_s3_native_copy = true;
|
||||
ReadSettings read_settings;
|
||||
WriteSettings write_settings;
|
||||
};
|
||||
|
||||
static BackupFactory & instance();
|
||||
|
@ -57,12 +57,12 @@ namespace
|
||||
|
||||
/// Calculate checksum for backup entry if it's empty.
|
||||
/// Also able to calculate additional checksum of some prefix.
|
||||
ChecksumsForNewEntry calculateNewEntryChecksumsIfNeeded(const BackupEntryPtr & entry, size_t prefix_size)
|
||||
ChecksumsForNewEntry calculateNewEntryChecksumsIfNeeded(const BackupEntryPtr & entry, size_t prefix_size, const ReadSettings & read_settings)
|
||||
{
|
||||
ChecksumsForNewEntry res;
|
||||
/// The partial checksum should be calculated before the full checksum to enable optimization in BackupEntryWithChecksumCalculation.
|
||||
res.prefix_checksum = entry->getPartialChecksum(prefix_size);
|
||||
res.full_checksum = entry->getChecksum();
|
||||
res.prefix_checksum = entry->getPartialChecksum(prefix_size, read_settings);
|
||||
res.full_checksum = entry->getChecksum(read_settings);
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -93,7 +93,12 @@ String BackupFileInfo::describe() const
|
||||
}
|
||||
|
||||
|
||||
BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const BackupEntryPtr & backup_entry, const BackupPtr & base_backup, Poco::Logger * log)
|
||||
BackupFileInfo buildFileInfoForBackupEntry(
|
||||
const String & file_name,
|
||||
const BackupEntryPtr & backup_entry,
|
||||
const BackupPtr & base_backup,
|
||||
const ReadSettings & read_settings,
|
||||
Poco::Logger * log)
|
||||
{
|
||||
auto adjusted_path = removeLeadingSlash(file_name);
|
||||
|
||||
@ -126,7 +131,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
|
||||
/// File with the same name but smaller size exist in previous backup
|
||||
if (check_base == CheckBackupResult::HasPrefix)
|
||||
{
|
||||
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, base_backup_file_info->first);
|
||||
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, base_backup_file_info->first, read_settings);
|
||||
info.checksum = checksums.full_checksum;
|
||||
|
||||
/// We have prefix of this file in backup with the same checksum.
|
||||
@ -146,7 +151,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
|
||||
{
|
||||
/// We have full file or have nothing, first of all let's get checksum
|
||||
/// of current file
|
||||
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0);
|
||||
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0, read_settings);
|
||||
info.checksum = checksums.full_checksum;
|
||||
|
||||
if (info.checksum == base_backup_file_info->second)
|
||||
@ -169,7 +174,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
|
||||
}
|
||||
else
|
||||
{
|
||||
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0);
|
||||
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0, read_settings);
|
||||
info.checksum = checksums.full_checksum;
|
||||
}
|
||||
|
||||
@ -188,7 +193,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
|
||||
return info;
|
||||
}
|
||||
|
||||
BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, ThreadPool & thread_pool)
|
||||
BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, const ReadSettings & read_settings, ThreadPool & thread_pool)
|
||||
{
|
||||
BackupFileInfos infos;
|
||||
infos.resize(backup_entries.size());
|
||||
@ -210,7 +215,7 @@ BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entr
|
||||
++num_active_jobs;
|
||||
}
|
||||
|
||||
auto job = [&mutex, &num_active_jobs, &event, &exception, &infos, &backup_entries, &base_backup, &thread_group, i, log](bool async)
|
||||
auto job = [&mutex, &num_active_jobs, &event, &exception, &infos, &backup_entries, &read_settings, &base_backup, &thread_group, i, log](bool async)
|
||||
{
|
||||
SCOPE_EXIT_SAFE({
|
||||
std::lock_guard lock{mutex};
|
||||
@ -237,7 +242,7 @@ BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entr
|
||||
return;
|
||||
}
|
||||
|
||||
infos[i] = buildFileInfoForBackupEntry(name, entry, base_backup, log);
|
||||
infos[i] = buildFileInfoForBackupEntry(name, entry, base_backup, read_settings, log);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -13,6 +13,7 @@ class IBackupEntry;
|
||||
using BackupPtr = std::shared_ptr<const IBackup>;
|
||||
using BackupEntryPtr = std::shared_ptr<const IBackupEntry>;
|
||||
using BackupEntries = std::vector<std::pair<String, BackupEntryPtr>>;
|
||||
struct ReadSettings;
|
||||
|
||||
|
||||
/// Information about a file stored in a backup.
|
||||
@ -66,9 +67,9 @@ struct BackupFileInfo
|
||||
using BackupFileInfos = std::vector<BackupFileInfo>;
|
||||
|
||||
/// Builds a BackupFileInfo for a specified backup entry.
|
||||
BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const BackupEntryPtr & backup_entry, const BackupPtr & base_backup, Poco::Logger * log);
|
||||
BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const BackupEntryPtr & backup_entry, const BackupPtr & base_backup, const ReadSettings & read_settings, Poco::Logger * log);
|
||||
|
||||
/// Builds a vector of BackupFileInfos for specified backup entries.
|
||||
BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, ThreadPool & thread_pool);
|
||||
BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, const ReadSettings & read_settings, ThreadPool & thread_pool);
|
||||
|
||||
}
|
||||
|
@ -4,17 +4,16 @@
|
||||
#include <IO/copyData.h>
|
||||
#include <IO/WriteBufferFromFileBase.h>
|
||||
#include <IO/ReadBufferFromFileBase.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
BackupReaderDefault::BackupReaderDefault(Poco::Logger * log_, const ContextPtr & context_)
|
||||
BackupReaderDefault::BackupReaderDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_)
|
||||
: log(log_)
|
||||
, read_settings(context_->getBackupReadSettings())
|
||||
, write_settings(context_->getWriteSettings())
|
||||
, read_settings(read_settings_)
|
||||
, write_settings(write_settings_)
|
||||
, write_buffer_size(DBMS_DEFAULT_BUFFER_SIZE)
|
||||
{
|
||||
}
|
||||
@ -37,10 +36,10 @@ void BackupReaderDefault::copyFileToDisk(const String & path_in_backup, size_t f
|
||||
write_buffer->finalize();
|
||||
}
|
||||
|
||||
BackupWriterDefault::BackupWriterDefault(Poco::Logger * log_, const ContextPtr & context_)
|
||||
BackupWriterDefault::BackupWriterDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_)
|
||||
: log(log_)
|
||||
, read_settings(context_->getBackupReadSettings())
|
||||
, write_settings(context_->getWriteSettings())
|
||||
, read_settings(read_settings_)
|
||||
, write_settings(write_settings_)
|
||||
, write_buffer_size(DBMS_DEFAULT_BUFFER_SIZE)
|
||||
{
|
||||
}
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include <Backups/BackupIO.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <IO/WriteSettings.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -19,7 +18,7 @@ enum class WriteMode;
|
||||
class BackupReaderDefault : public IBackupReader
|
||||
{
|
||||
public:
|
||||
BackupReaderDefault(Poco::Logger * log_, const ContextPtr & context_);
|
||||
BackupReaderDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_);
|
||||
~BackupReaderDefault() override = default;
|
||||
|
||||
/// The function copyFileToDisk() can be much faster than reading the file with readFile() and then writing it to some disk.
|
||||
@ -46,7 +45,7 @@ protected:
|
||||
class BackupWriterDefault : public IBackupWriter
|
||||
{
|
||||
public:
|
||||
BackupWriterDefault(Poco::Logger * log_, const ContextPtr & context_);
|
||||
BackupWriterDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_);
|
||||
~BackupWriterDefault() override = default;
|
||||
|
||||
bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
|
||||
|
@ -8,8 +8,8 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
BackupReaderDisk::BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_)
|
||||
: BackupReaderDefault(&Poco::Logger::get("BackupReaderDisk"), context_)
|
||||
BackupReaderDisk::BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_)
|
||||
: BackupReaderDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupReaderDisk"))
|
||||
, disk(disk_)
|
||||
, root_path(root_path_)
|
||||
, data_source_description(disk->getDataSourceDescription())
|
||||
@ -56,8 +56,8 @@ void BackupReaderDisk::copyFileToDisk(const String & path_in_backup, size_t file
|
||||
}
|
||||
|
||||
|
||||
BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_)
|
||||
: BackupWriterDefault(&Poco::Logger::get("BackupWriterDisk"), context_)
|
||||
BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_)
|
||||
: BackupWriterDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupWriterDisk"))
|
||||
, disk(disk_)
|
||||
, root_path(root_path_)
|
||||
, data_source_description(disk->getDataSourceDescription())
|
||||
|
@ -13,7 +13,7 @@ using DiskPtr = std::shared_ptr<IDisk>;
|
||||
class BackupReaderDisk : public BackupReaderDefault
|
||||
{
|
||||
public:
|
||||
BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_);
|
||||
BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_);
|
||||
~BackupReaderDisk() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
@ -33,7 +33,7 @@ private:
|
||||
class BackupWriterDisk : public BackupWriterDefault
|
||||
{
|
||||
public:
|
||||
BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_);
|
||||
BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_);
|
||||
~BackupWriterDisk() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
|
@ -16,8 +16,8 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
BackupReaderFile::BackupReaderFile(const String & root_path_, const ContextPtr & context_)
|
||||
: BackupReaderDefault(&Poco::Logger::get("BackupReaderFile"), context_)
|
||||
BackupReaderFile::BackupReaderFile(const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_)
|
||||
: BackupReaderDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupReaderFile"))
|
||||
, root_path(root_path_)
|
||||
, data_source_description(DiskLocal::getLocalDataSourceDescription(root_path))
|
||||
{
|
||||
@ -74,8 +74,8 @@ void BackupReaderFile::copyFileToDisk(const String & path_in_backup, size_t file
|
||||
}
|
||||
|
||||
|
||||
BackupWriterFile::BackupWriterFile(const String & root_path_, const ContextPtr & context_)
|
||||
: BackupWriterDefault(&Poco::Logger::get("BackupWriterFile"), context_)
|
||||
BackupWriterFile::BackupWriterFile(const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_)
|
||||
: BackupWriterDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupWriterFile"))
|
||||
, root_path(root_path_)
|
||||
, data_source_description(DiskLocal::getLocalDataSourceDescription(root_path))
|
||||
{
|
||||
|
@ -11,7 +11,7 @@ namespace DB
|
||||
class BackupReaderFile : public BackupReaderDefault
|
||||
{
|
||||
public:
|
||||
explicit BackupReaderFile(const String & root_path_, const ContextPtr & context_);
|
||||
explicit BackupReaderFile(const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_);
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
UInt64 getFileSize(const String & file_name) override;
|
||||
@ -29,7 +29,7 @@ private:
|
||||
class BackupWriterFile : public BackupWriterDefault
|
||||
{
|
||||
public:
|
||||
BackupWriterFile(const String & root_path_, const ContextPtr & context_);
|
||||
BackupWriterFile(const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_);
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
UInt64 getFileSize(const String & file_name) override;
|
||||
|
@ -50,7 +50,7 @@ namespace
|
||||
context->getRemoteHostFilter(),
|
||||
static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_max_redirects),
|
||||
context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
|
||||
/* for_disk_s3 = */ false, /* get_request_throttler = */ {}, /* put_request_throttler = */ {});
|
||||
/* for_disk_s3 = */ false, settings.request_settings.get_request_throttler, settings.request_settings.put_request_throttler);
|
||||
|
||||
client_configuration.endpointOverride = s3_uri.endpoint;
|
||||
client_configuration.maxConnections = static_cast<unsigned>(context->getSettingsRef().s3_max_connections);
|
||||
@ -101,8 +101,14 @@ namespace
|
||||
|
||||
|
||||
BackupReaderS3::BackupReaderS3(
|
||||
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_)
|
||||
: BackupReaderDefault(&Poco::Logger::get("BackupReaderS3"), context_)
|
||||
const S3::URI & s3_uri_,
|
||||
const String & access_key_id_,
|
||||
const String & secret_access_key_,
|
||||
bool allow_s3_native_copy,
|
||||
const ReadSettings & read_settings_,
|
||||
const WriteSettings & write_settings_,
|
||||
const ContextPtr & context_)
|
||||
: BackupReaderDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupReaderS3"))
|
||||
, s3_uri(s3_uri_)
|
||||
, client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
|
||||
, request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
|
||||
@ -178,8 +184,15 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
|
||||
|
||||
|
||||
BackupWriterS3::BackupWriterS3(
|
||||
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ContextPtr & context_)
|
||||
: BackupWriterDefault(&Poco::Logger::get("BackupWriterS3"), context_)
|
||||
const S3::URI & s3_uri_,
|
||||
const String & access_key_id_,
|
||||
const String & secret_access_key_,
|
||||
bool allow_s3_native_copy,
|
||||
const String & storage_class_name,
|
||||
const ReadSettings & read_settings_,
|
||||
const WriteSettings & write_settings_,
|
||||
const ContextPtr & context_)
|
||||
: BackupWriterDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupWriterS3"))
|
||||
, s3_uri(s3_uri_)
|
||||
, client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
|
||||
, request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
|
||||
|
@ -17,7 +17,7 @@ namespace DB
|
||||
class BackupReaderS3 : public BackupReaderDefault
|
||||
{
|
||||
public:
|
||||
BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_);
|
||||
BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
|
||||
~BackupReaderS3() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
@ -38,7 +38,7 @@ private:
|
||||
class BackupWriterS3 : public BackupWriterDefault
|
||||
{
|
||||
public:
|
||||
BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ContextPtr & context_);
|
||||
BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
|
||||
~BackupWriterS3() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
|
@ -27,6 +27,7 @@ namespace ErrorCodes
|
||||
M(Bool, decrypt_files_from_encrypted_disks) \
|
||||
M(Bool, deduplicate_files) \
|
||||
M(Bool, allow_s3_native_copy) \
|
||||
M(Bool, read_from_filesystem_cache) \
|
||||
M(UInt64, shard_num) \
|
||||
M(UInt64, replica_num) \
|
||||
M(Bool, internal) \
|
||||
|
@ -44,6 +44,10 @@ struct BackupSettings
|
||||
/// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs)
|
||||
bool allow_s3_native_copy = true;
|
||||
|
||||
/// Allow to use the filesystem cache in passive mode - benefit from the existing cache entries,
|
||||
/// but don't put more entries into the cache.
|
||||
bool read_from_filesystem_cache = true;
|
||||
|
||||
/// 1-based shard index to store in the backup. 0 means all shards.
|
||||
/// Can only be used with BACKUP ON CLUSTER.
|
||||
size_t shard_num = 0;
|
||||
|
@ -178,6 +178,42 @@ namespace
|
||||
{
|
||||
return status == BackupStatus::RESTORING;
|
||||
}
|
||||
|
||||
/// We use slightly different read and write settings for backup/restore
|
||||
/// with a separate throttler and limited usage of filesystem cache.
|
||||
ReadSettings getReadSettingsForBackup(const ContextPtr & context, const BackupSettings & backup_settings)
|
||||
{
|
||||
auto read_settings = context->getReadSettings();
|
||||
read_settings.remote_throttler = context->getBackupsThrottler();
|
||||
read_settings.local_throttler = context->getBackupsThrottler();
|
||||
read_settings.enable_filesystem_cache = backup_settings.read_from_filesystem_cache;
|
||||
read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = backup_settings.read_from_filesystem_cache;
|
||||
return read_settings;
|
||||
}
|
||||
|
||||
WriteSettings getWriteSettingsForBackup(const ContextPtr & context)
|
||||
{
|
||||
auto write_settings = context->getWriteSettings();
|
||||
write_settings.enable_filesystem_cache_on_write_operations = false;
|
||||
return write_settings;
|
||||
}
|
||||
|
||||
ReadSettings getReadSettingsForRestore(const ContextPtr & context)
|
||||
{
|
||||
auto read_settings = context->getReadSettings();
|
||||
read_settings.remote_throttler = context->getBackupsThrottler();
|
||||
read_settings.local_throttler = context->getBackupsThrottler();
|
||||
read_settings.enable_filesystem_cache = false;
|
||||
read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false;
|
||||
return read_settings;
|
||||
}
|
||||
|
||||
WriteSettings getWriteSettingsForRestore(const ContextPtr & context)
|
||||
{
|
||||
auto write_settings = context->getWriteSettings();
|
||||
write_settings.enable_filesystem_cache_on_write_operations = false;
|
||||
return write_settings;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -350,6 +386,8 @@ void BackupsWorker::doBackup(
|
||||
backup_create_params.backup_uuid = backup_settings.backup_uuid;
|
||||
backup_create_params.deduplicate_files = backup_settings.deduplicate_files;
|
||||
backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy;
|
||||
backup_create_params.read_settings = getReadSettingsForBackup(context, backup_settings);
|
||||
backup_create_params.write_settings = getWriteSettingsForBackup(context);
|
||||
BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params);
|
||||
|
||||
/// Write the backup.
|
||||
@ -378,12 +416,12 @@ void BackupsWorker::doBackup(
|
||||
/// Prepare backup entries.
|
||||
BackupEntries backup_entries;
|
||||
{
|
||||
BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, context};
|
||||
BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, backup_create_params.read_settings, context};
|
||||
backup_entries = backup_entries_collector.run();
|
||||
}
|
||||
|
||||
/// Write the backup entries to the backup.
|
||||
buildFileInfosForBackupEntries(backup, backup_entries, backup_coordination);
|
||||
buildFileInfosForBackupEntries(backup, backup_entries, backup_create_params.read_settings, backup_coordination);
|
||||
writeBackupEntries(backup, std::move(backup_entries), backup_id, backup_coordination, backup_settings.internal);
|
||||
|
||||
/// We have written our backup entries, we need to tell other hosts (they could be waiting for it).
|
||||
@ -433,12 +471,12 @@ void BackupsWorker::doBackup(
|
||||
}
|
||||
|
||||
|
||||
void BackupsWorker::buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, std::shared_ptr<IBackupCoordination> backup_coordination)
|
||||
void BackupsWorker::buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, const ReadSettings & read_settings, std::shared_ptr<IBackupCoordination> backup_coordination)
|
||||
{
|
||||
LOG_TRACE(log, "{}", Stage::BUILDING_FILE_INFOS);
|
||||
backup_coordination->setStage(Stage::BUILDING_FILE_INFOS, "");
|
||||
backup_coordination->waitForStage(Stage::BUILDING_FILE_INFOS);
|
||||
backup_coordination->addFileInfos(::DB::buildFileInfosForBackupEntries(backup_entries, backup->getBaseBackup(), *backups_thread_pool));
|
||||
backup_coordination->addFileInfos(::DB::buildFileInfosForBackupEntries(backup_entries, backup->getBaseBackup(), read_settings, *backups_thread_pool));
|
||||
}
|
||||
|
||||
|
||||
@ -650,6 +688,8 @@ void BackupsWorker::doRestore(
|
||||
backup_open_params.base_backup_info = restore_settings.base_backup_info;
|
||||
backup_open_params.password = restore_settings.password;
|
||||
backup_open_params.allow_s3_native_copy = restore_settings.allow_s3_native_copy;
|
||||
backup_open_params.read_settings = getReadSettingsForRestore(context);
|
||||
backup_open_params.write_settings = getWriteSettingsForRestore(context);
|
||||
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
|
||||
|
||||
String current_database = context->getCurrentDatabase();
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user