Merge branch 'master' into isnull-operator-priority

This commit is contained in:
Alexey Milovidov 2023-06-01 03:20:22 +03:00 committed by GitHub
commit 379c573db0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
597 changed files with 13836 additions and 4946 deletions

2
.gitmodules vendored
View File

@ -267,7 +267,7 @@
url = https://github.com/ClickHouse/nats.c
[submodule "contrib/vectorscan"]
path = contrib/vectorscan
url = https://github.com/ClickHouse/vectorscan.git
url = https://github.com/VectorCamp/vectorscan.git
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/ClickHouse/c-ares

View File

@ -22,12 +22,10 @@ curl https://clickhouse.com/ | sh
## Upcoming Events
* [**v23.5 Release Webinar**](https://clickhouse.com/company/events/v23-5-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-05) - May 31 - 23.5 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - May 16
* [**ClickHouse Meetup in Barcelona**](https://www.meetup.com/clickhouse-barcelona-user-group/events/292892669) - May 25
* [**ClickHouse Meetup in London**](https://www.meetup.com/clickhouse-london-user-group/events/292892824) - May 25
* [**v23.5 Release Webinar**](https://clickhouse.com/company/events/v23-5-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-05) - Jun 8 - 23.5 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
* [**ClickHouse Meetup in Bangalore**](https://www.meetup.com/clickhouse-bangalore-user-group/events/293740066/) - Jun 7
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/293426725/) - Jun 7
* [**ClickHouse Meetup in Stockholm**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - Jun 13
Also, keep an eye out for upcoming meetups in Amsterdam, Boston, NYC, Beijing, and Toronto. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.

View File

@ -8,6 +8,9 @@ option (SANITIZE "Enable one of the code sanitizers" "")
set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")
# It's possible to pass an ignore list to sanitizers (-fsanitize-ignorelist). Intentionally not doing this because
# 1. out-of-source suppressions are awkward 2. it seems ignore lists don't work after the Clang v16 upgrade (#49829)
if (SANITIZE)
if (SANITIZE STREQUAL "address")
set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")
@ -29,7 +32,7 @@ if (SANITIZE)
# Linking can fail due to relocation overflows (see #49145), caused by too big object files / libraries.
# Work around this with position-independent builds (-fPIC and -fpie), this is slightly slower than non-PIC/PIE but that's okay.
set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fPIC -fpie -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/tests/msan_suppressions.txt")
set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fPIC -fpie")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit ecccfc026a42b30023289410a67024d561f4bf3e
Subproject commit ca02358dcc7ce3ab733dd4cbcc32734eecfa4ee3

2
contrib/aws-c-auth vendored

@ -1 +1 @@
Subproject commit 30df6c407e2df43bd244e2c34c9b4a4b87372bfb
Subproject commit 97133a2b5dbca1ccdf88cd6f44f39d0531d27d12

@ -1 +1 @@
Subproject commit 324fd1d973ccb25c813aa747bf1759cfde5121c5
Subproject commit 45dcb2849c891dba2100b270b4676765c92949ff

@ -1 +1 @@
Subproject commit 39bfa94a14b7126bf0c1330286ef8db452d87e66
Subproject commit 2f9b60c42f90840ec11822acda3d8cdfa97a773d

2
contrib/aws-c-http vendored

@ -1 +1 @@
Subproject commit 2c5a2a7d5556600b9782ffa6c9d7e09964df1abc
Subproject commit dd34461987947672444d0bc872c5a733dfdb9711

2
contrib/aws-c-io vendored

@ -1 +1 @@
Subproject commit 5d32c453560d0823df521a686bf7fbacde7f9be3
Subproject commit d58ed4f272b1cb4f89ac9196526ceebe5f2b0d89

2
contrib/aws-c-mqtt vendored

@ -1 +1 @@
Subproject commit 882c689561a3db1466330ccfe3b63637e0a575d3
Subproject commit 33c3455cec82b16feb940e12006cefd7b3ef4194

2
contrib/aws-c-s3 vendored

@ -1 +1 @@
Subproject commit a41255ece72a7c887bba7f9d998ca3e14f4c8a1b
Subproject commit d7bfe602d6925948f1fff95784e3613cca6a3900

@ -1 +1 @@
Subproject commit 25bf5cf225f977c3accc6a05a0a7a181ef2a4a30
Subproject commit 208a701fa01e99c7c8cc3dcebc8317da71362972

@ -1 +1 @@
Subproject commit 48e7c0e01479232f225c8044d76c84e74192889d
Subproject commit ad53be196a25bbefa3700a01187fdce573a7d2d0

View File

@ -52,8 +52,8 @@ endif()
# Directories.
SET(AWS_SDK_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws")
SET(AWS_SDK_CORE_DIR "${AWS_SDK_DIR}/aws-cpp-sdk-core")
SET(AWS_SDK_S3_DIR "${AWS_SDK_DIR}/aws-cpp-sdk-s3")
SET(AWS_SDK_CORE_DIR "${AWS_SDK_DIR}/src/aws-cpp-sdk-core")
SET(AWS_SDK_S3_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-s3")
SET(AWS_AUTH_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-auth")
SET(AWS_CAL_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-cal")
@ -118,7 +118,7 @@ configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in"
list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MAJOR=1")
list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MINOR=10")
list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_PATCH=36")
list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC})
list(APPEND AWS_PUBLIC_INCLUDES

2
contrib/aws-crt-cpp vendored

@ -1 +1 @@
Subproject commit ec0bea288f451d884c0d80d534bc5c66241c39a4
Subproject commit 8a301b7e842f1daed478090c869207300972379f

2
contrib/aws-s2n-tls vendored

@ -1 +1 @@
Subproject commit 0f1ba9e5c4a67cb3898de0c0b4f911d4194dc8de
Subproject commit 71f4794b7580cf780eb4aca77d69eded5d3c7bb4

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit 8fe7b3326ef482ee6ecdf5a4f698f2b8c2780f98
Subproject commit aec12eea7fc762721ae16943d1361340c66c9c17

2
contrib/libpqxx vendored

@ -1 +1 @@
Subproject commit a4e834839270a8c1f7ff1db351ba85afced3f0e2
Subproject commit bdd6540fb95ff56c813691ceb5da5a3266cf235d

View File

@ -25,6 +25,9 @@ message(STATUS "Intel QPL version: ${QPL_VERSION}")
# Generate 8 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, qplcore_sw_dispatcher, core_iaa, middle_layer_lib.
# Output ch_contrib::qpl by linking with 8 library targets.
# The qpl submodule comes with its own version of isal. It contains code which does not exist in upstream isal. It would be nice to link
# only upstream isal (ch_contrib::isal) but at this point we can't.
include("${QPL_PROJECT_DIR}/cmake/CompileOptions.cmake")
# check nasm compiler
@ -308,7 +311,7 @@ target_include_directories(middle_layer_lib
target_compile_definitions(middle_layer_lib PUBLIC -DQPL_LIB)
# [SUBDIR]c_api
file(GLOB_RECURSE QPL_C_API_SRC
file(GLOB_RECURSE QPL_C_API_SRC
${QPL_SRC_DIR}/c_api/*.c
${QPL_SRC_DIR}/c_api/*.cpp)

View File

@ -5,8 +5,8 @@ echo "Using sparse checkout for aws"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/*/*' >> $FILES_TO_CHECKOUT
echo '/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT
echo '/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT
echo '/src/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT
echo '/generated/src/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1

2
contrib/vectorscan vendored

@ -1 +1 @@
Subproject commit 1f4d448314e581473103187765e4c949d01b4259
Subproject commit 38431d111781843741a781a57a6381a527d900a4

View File

@ -132,6 +132,9 @@ function run_tests()
ADDITIONAL_OPTIONS+=('--report-logs-stats')
clickhouse-test "00001_select_1" > /dev/null ||:
clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" ||:
set +e
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \

View File

@ -65,6 +65,9 @@ sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
> /etc/clickhouse-server/config.d/storage_conf.xml.tmp
sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
start
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
@ -94,6 +97,9 @@ sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
> /etc/clickhouse-server/config.d/storage_conf.xml.tmp
sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
start
clickhouse-client --query="SELECT 'Server version: ', version()"

View File

@ -1,6 +0,0 @@
# ARM (AArch64) build works on Amazon Graviton, Oracle Cloud, Huawei Cloud ARM machines.
# The support for AArch64 is pre-production ready.
wget 'https://builds.clickhouse.com/master/aarch64/clickhouse'
chmod a+x ./clickhouse
sudo ./clickhouse install

View File

@ -1,3 +0,0 @@
fetch 'https://builds.clickhouse.com/master/freebsd/clickhouse'
chmod a+x ./clickhouse
su -m root -c './clickhouse install'

View File

@ -1,3 +0,0 @@
wget 'https://builds.clickhouse.com/master/macos-aarch64/clickhouse'
chmod a+x ./clickhouse
./clickhouse

View File

@ -1,3 +0,0 @@
wget 'https://builds.clickhouse.com/master/macos/clickhouse'
chmod a+x ./clickhouse
./clickhouse

View File

@ -22,7 +22,7 @@ The minimum recommended Ubuntu version for development is 22.04 LTS.
### Install Prerequisites {#install-prerequisites}
``` bash
sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk
sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk lsb-release wget software-properties-common gnupg
```
### Install and Use the Clang compiler
@ -43,9 +43,14 @@ sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
For other Linux distribution - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html).
As of April 2023, any version of Clang >= 15 will work.
GCC as a compiler is not supported
GCC as a compiler is not supported.
To build with a specific Clang version:
:::tip
This is optional, if you are following along and just now installed Clang then check
to see what version you have installed before setting this environment variable.
:::
``` bash
export CC=clang-16
export CXX=clang++-16
@ -109,18 +114,3 @@ mkdir build
cmake -S . -B build
cmake --build build
```
## You Dont Have to Build ClickHouse {#you-dont-have-to-build-clickhouse}
ClickHouse is available in pre-built binaries and packages. Binaries are portable and can be run on any Linux flavour.
The CI checks build the binaries on each commit to [ClickHouse](https://github.com/clickhouse/clickhouse/). To download them:
1. Open the [commits list](https://github.com/ClickHouse/ClickHouse/commits/master)
1. Choose a **Merge pull request** commit that includes the new feature, or was added after the new feature
1. Click the status symbol (yellow dot, red x, green check) to open the CI check list
1. Scroll through the list until you find **ClickHouse build check x/x artifact groups are OK**
1. Click **Details**
1. Find the type of package for your operating system that you need and download the files.
![build artifact check](images/find-build-artifact.png)

View File

@ -131,14 +131,17 @@ CREATE TABLE table_with_asterisk (name String, value UInt32)
The following settings can be set before query execution or placed into configuration file.
- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `64Mb`.
- `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`.
- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `32Mb`.
- `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `16Mb`.
- `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`.
- `s3_single_read_retries` — The maximum number of attempts during single read. Default value is `4`.
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
- `s3_upload_part_size_multiply_factor` - Multiply `s3_min_upload_part_size` by this factor each time `s3_multiply_parts_count_threshold` parts were uploaded from a single write to S3. Default values is `2`.
- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. DEfault value us `500`.
- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurenly for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each inflight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enought, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file.
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.

View File

@ -1219,11 +1219,12 @@ Authentication parameters (the disk will try all available methods **and** Manag
* `account_name` and `account_key` - For authentication using Shared Key.
Limit parameters (mainly for internal usage):
* `max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
* `min_bytes_for_seek` - Limits the size of a seekable region.
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurenly for one object.
Other parameters:
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.

View File

@ -258,4 +258,4 @@ Since [remote](../../../sql-reference/table-functions/remote.md) and [cluster](.
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) description
- [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting
- [shardNum()](../../../sql-reference/functions/other-functions.md#shard-num) and [shardCount()](../../../sql-reference/functions/other-functions.md#shard-count) functions
- [shardNum()](../../../sql-reference/functions/other-functions.md#shardnum) and [shardCount()](../../../sql-reference/functions/other-functions.md#shardcount) functions

View File

@ -36,8 +36,8 @@ The data is in CSV files but uses a semi-colon for the delimiter. The rows look
│ 7389 │ BMP180 │ 3735 │ 50.136 │ 11.062 │ 2019-06-01T00:00:06 │ 98905 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 12.1 │
│ 13199 │ BMP180 │ 6664 │ 52.514 │ 13.44 │ 2019-06-01T00:00:07 │ 101855.54 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 19.74 │
│ 12753 │ BMP180 │ 6440 │ 44.616 │ 2.032 │ 2019-06-01T00:00:07 │ 99475 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 17 │
│ 16956 │ BMP180 │ 8594 │ 52.052 │ 8.354 │ 2019-06-01T00:00:08 │ 101322 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 17.2 │
└───────────┴─────────────┴──────────┴────────┴───────┴─────────────────────┴──────────┴──────────┴───────────────────┴─────────────┘
│ 16956 │ BMP180 │ 8594 │ 52.052 │ 8.354 │ 2019-06-01T00:00:08 │ 101322 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 17.2 │
└───────────┴─────────────┴──────────┴────────┴───────┴─────────────────────┴──────────┴──────────┴───────────────────┴─────────────┘
```
2. We will use the following `MergeTree` table to store the data in ClickHouse:

View File

@ -28,23 +28,25 @@ The quickest and easiest way to get up and running with ClickHouse is to create
For production installs of a specific release version see the [installation options](#available-installation-options) down below.
:::
On Linux and macOS:
On Linux, macOS and FreeBSD:
1. If you are just getting started and want to see what ClickHouse can do, the simplest way to download ClickHouse locally is to run the following command. It downloads a single binary for your operating system that can be used to run the ClickHouse server, clickhouse-client, clickhouse-local,
ClickHouse Keeper, and other tools:
1. If you are just getting started and want to see what ClickHouse can do, the simplest way to download ClickHouse locally is to run the
following command. It downloads a single binary for your operating system that can be used to run the ClickHouse server,
clickhouse-client, clickhouse-local, ClickHouse Keeper, and other tools:
```bash
curl https://clickhouse.com/ | sh
```
1. Run the following command to start the ClickHouse server:
```bash
./clickhouse server
```
The first time you run this script, the necessary files and folders are created in the current directory, then the server starts.
The first time you run this script, the necessary files and folders are created in the current directory, then the server starts.
1. Open a new terminal and use the **clickhouse-client** to connect to your service:
1. Open a new terminal and use the **./clickhouse client** to connect to your service:
```bash
./clickhouse client
@ -330,7 +332,9 @@ For production environments, its recommended to use the latest `stable`-versi
To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/clickhouse/clickhouse-server/). Those images use official `deb` packages inside.
### From Sources {#from-sources}
## Non-Production Deployments (Advanced)
### Compile From Source {#from-sources}
To manually compile ClickHouse, follow the instructions for [Linux](/docs/en/development/build.md) or [macOS](/docs/en/development/build-osx.md).
@ -346,8 +350,33 @@ Youll need to create data and metadata folders manually and `chown` them for
On Gentoo, you can just use `emerge clickhouse` to install ClickHouse from sources.
### From CI checks pre-built binaries
ClickHouse binaries are built for each [commit](/docs/en/development/build.md#you-dont-have-to-build-clickhouse).
### Install a CI-generated Binary
ClickHouse's continuous integration (CI) infrastructure produces specialized builds for each commit in the [ClickHouse
repository](https://github.com/clickhouse/clickhouse/), e.g. [sanitized](https://github.com/google/sanitizers) builds, unoptimized (Debug)
builds, cross-compiled builds etc. While such builds are normally only useful during development, they can in certain situations also be
interesting for users.
:::note
Since ClickHouse's CI is evolving over time, the exact steps to download CI-generated builds may vary.
Also, CI may delete too old build artifacts, making them unavailable for download.
:::
For example, to download a aarch64 binary for ClickHouse v23.4, follow these steps:
- Find the GitHub pull request for release v23.4: [Release pull request for branch 23.4](https://github.com/ClickHouse/ClickHouse/pull/49238)
- Click "Commits", then click a commit similar to "Update autogenerated version to 23.4.2.1 and contributors" for the particular version you like to install.
- Click the green check / yellow dot / red cross to open the list of CI checks.
- Click "Details" next to "ClickHouse Build Check" in the list, it will open a page similar to [this page](https://s3.amazonaws.com/clickhouse-test-reports/46793/b460eb70bf29b19eadd19a1f959b15d186705394/clickhouse_build_check/report.html)
- Find the rows with compiler = "clang-*-aarch64" - there are multiple rows.
- Download the artifacts for these builds.
To download binaries for very old x86-64 systems without [SSE3](https://en.wikipedia.org/wiki/SSE3) support or old ARM systems without
[ARMv8.1-A](https://en.wikipedia.org/wiki/AArch64#ARMv8.1-A) support, open a [pull
request](https://github.com/ClickHouse/ClickHouse/commits/master) and find CI check "BuilderBinAmd64Compat", respectively
"BuilderBinAarch64V80Compat". Then click "Details", open the "Build" fold, scroll to the end, find message "Notice: Build URLs
https://s3.amazonaws.com/clickhouse/builds/PRs/.../.../binary_aarch64_v80compat/clickhouse". You can then click the link to download the
build.
## Launch {#launch}

View File

@ -177,11 +177,11 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
- `--user, -u` The username. Default value: default.
- `--password` The password. Default value: empty string.
- `--ask-password` - Prompt the user to enter a password.
- `--query, -q` The query to process when using non-interactive mode. You must specify either `query` or `queries-file` option.
- `--queries-file` file path with queries to execute. You must specify either `query` or `queries-file` option.
- `--database, -d` Select the current default database. Default value: the current database from the server settings (default by default).
- `--query, -q` The query to process when using non-interactive mode. Cannot be used simultaneously with `--queries-file`.
- `--queries-file` file path with queries to execute. Cannot be used simultaneously with `--query`.
- `--multiquery, -n` If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`.
- `--multiline, -m` If specified, allow multiline queries (do not send the query on Enter).
- `--multiquery, -n` If specified, allow processing multiple queries separated by semicolons.
- `--database, -d` Select the current default database. Default value: the current database from the server settings (default by default).
- `--format, -f` Use the specified default format to output the result.
- `--vertical, -E` If specified, use the [Vertical format](../interfaces/formats.md#vertical) by default to output the result. This is the same as `format=Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables.
- `--time, -t` If specified, print the query execution time to stderr in non-interactive mode.

View File

@ -167,9 +167,9 @@ user = 'myuser',
password = 'mypass',
host = '127.0.0.1',
port = 3306,
database = 'test'
connection_pool_size = 8
on_duplicate_clause = 1
database = 'test',
connection_pool_size = 8,
on_duplicate_clause = 1,
replace_query = 1
```

File diff suppressed because it is too large Load Diff

View File

@ -577,7 +577,7 @@ Default value: 20
**Usage**
The value of the `number_of_free_entries_in_pool_to_execute_mutation` setting should be less than the value of the [background_pool_size](/docs/en/operations/server-configuration-parameters/settings#background_pool_size) * [background_pool_size](/docs/en/operations/server-configuration-parameters/settings#background_merges_mutations_concurrency_ratio). Otherwise, ClickHouse throws an exception.
The value of the `number_of_free_entries_in_pool_to_execute_mutation` setting should be less than the value of the [background_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_pool_size) * [background_merges_mutations_concurrency_ratio](/docs/en/operations/server-configuration-parameters/settings.md/#background_merges_mutations_concurrency_ratio). Otherwise, ClickHouse throws an exception.
## max_part_loading_threads {#max-part-loading-threads}
@ -840,4 +840,4 @@ Possible values:
- `Always` or `Never`.
Default value: `Never`
Default value: `Never`

View File

@ -452,6 +452,8 @@ Possible values:
The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which dont belong to the current bucket are flushed and reassigned.
Supports `INNER/LEFT/RIGHT/FULL ALL/ANY JOIN`.
- hash
[Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
@ -1185,6 +1187,36 @@ Disable limit on kafka_num_consumers that depends on the number of available CPU
Default value: false.
## postgresql_connection_pool_size {#postgresql-connection-pool-size}
Connection pool size for PostgreSQL table engine and database engine.
Default value: 16
## postgresql_connection_pool_size {#postgresql-connection-pool-size}
Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.
Default value: 5000
## postgresql_connection_pool_auto_close_connection {#postgresql-connection-pool-auto-close-connection}
Close connection before returning connection to the pool.
Default value: true.
## odbc_bridge_connection_pool_size {#odbc-bridge-connection-pool-size}
Connection pool size for each connection settings string in ODBC bridge.
Default value: 16
## odbc_bridge_use_connection_pooling {#odbc-bridge-use-connection-pooling}
Use connection pooling in ODBC bridge. If set to false, a new connection is created every time.
Default value: true
## use_uncompressed_cache {#setting-use_uncompressed_cache}
Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled).
@ -3561,7 +3593,7 @@ SETTINGS index_granularity = 8192 │
## external_table_functions_use_nulls {#external-table-functions-use-nulls}
Defines how [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) and [odbc](../../sql-reference/table-functions/odbc.md)] table functions use Nullable columns.
Defines how [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) and [odbc](../../sql-reference/table-functions/odbc.md) table functions use Nullable columns.
Possible values:
@ -4218,3 +4250,12 @@ Possible values:
- false — Disallow.
Default value: `false`.
## zstd_window_log_max
Allows you to select the max window log of ZSTD (it will not be used for MergeTree family)
Type: Int64
Default: 0

View File

@ -0,0 +1,27 @@
---
slug: /en/operations/system-tables/build_options
---
# build_options
Contains information about the ClickHouse server's build options.
Columns:
- `name` (String) — Name of the build option, e.g. `USE_ODBC`
- `value` (String) — Value of the build option, e.g. `1`
**Example**
``` sql
SELECT * FROM system.build_options LIMIT 5
```
``` text
┌─name─────────────┬─value─┐
│ USE_BROTLI │ 1 │
│ USE_BZIP2 │ 1 │
│ USE_CAPNP │ 1 │
│ USE_CASSANDRA │ 1 │
│ USE_DATASKETCHES │ 1 │
└──────────────────┴───────┘
```

View File

@ -5,16 +5,18 @@ This table contains profiling on processors level (that you can find in [`EXPLAI
Columns:
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened.
- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the event happened.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the event happened.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time with microseconds precision when the event happened.
- `id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of processor
- `parent_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Parent processors IDs
- `plan_step` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the query plan step which created this processor. The value is zero if the processor was not added from any step.
- `plan_group` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result.
- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution).
- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query
- `name` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Name of the processor.
- `elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was executed.
- `input_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting for data (from other processor).
- `output_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting because output port was full.
- `plan_step` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the query plan step which created this processor. The value is zero if the processor was not added from any step.
- `plan_group` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result.
- `input_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows consumed by processor.
- `input_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes consumed by processor.
- `output_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows generated by processor.

View File

@ -59,9 +59,10 @@ Columns:
- `query_kind` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Type of the query.
- `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the databases present in the query.
- `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the tables present in the query.
- `views` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the (materialized or live) views present in the query.
- `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the columns present in the query.
- `partitions` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the partitions present in the query.
- `projections` ([String](../../sql-reference/data-types/string.md)) — Names of the projections used during the query execution.
- `views` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the (materialized or live) views present in the query.
- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception.
- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message.
- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully.

View File

@ -183,8 +183,9 @@ Arguments:
- `-S`, `--structure` — table structure for input data.
- `--input-format` — input format, `TSV` by default.
- `-f`, `--file` — path to data, `stdin` by default.
- `-q`, `--query` — queries to execute with `;` as delimiter. You must specify either `query` or `queries-file` option.
- `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option.
- `-q`, `--query` — queries to execute with `;` as delimiter. Cannot be used simultaneously with `--queries-file`.
- `--queries-file` - file path with queries to execute. Cannot be used simultaneously with `--query`.
- `--multiquery, -n` If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`.
- `-N`, `--table` — table name where to put output data, `table` by default.
- `--format`, `--output-format` — output format, `TSV` by default.
- `-d`, `--database` — default database, `_local` by default.

View File

@ -29,7 +29,7 @@ select first_value(b) from test_data
### example2
The NULL value is ignored.
```sql
select first_value(b) ignore nulls sfrom test_data
select first_value(b) ignore nulls from test_data
```
```text

View File

@ -1,48 +0,0 @@
---
slug: /en/sql-reference/aggregate-functions/reference/greatest
title: greatest
---
Aggregate function that returns the greatest across a list of values. All of the list members must be of comparable types.
Examples:
```sql
SELECT
toTypeName(greatest(toUInt8(1), 2, toUInt8(3), 3.)),
greatest(1, 2, toUInt8(3), 3.)
```
```response
┌─toTypeName(greatest(toUInt8(1), 2, toUInt8(3), 3.))─┬─greatest(1, 2, toUInt8(3), 3.)─┐
│ Float64 │ 3 │
└─────────────────────────────────────────────────────┴────────────────────────────────┘
```
:::note
The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison.
:::
```sql
SELECT greatest(['hello'], ['there'], ['world'])
```
```response
┌─greatest(['hello'], ['there'], ['world'])─┐
│ ['world'] │
└───────────────────────────────────────────┘
```
```sql
SELECT greatest(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3))
```
```response
┌─greatest(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐
│ 2023-05-12 01:16:59.000 │
└──---──────────────────────────────────────────────────────────────────────────┘
```
:::note
The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison.
:::
Also see [least](/docs/en/sql-reference/aggregate-functions/reference/least.md).

View File

@ -3,7 +3,7 @@ slug: /en/sql-reference/aggregate-functions/reference/last_value
sidebar_position: 8
---
# first_value
# last_value
Selects the last encountered value, similar to `anyLast`, but could accept NULL.

View File

@ -1,48 +0,0 @@
---
slug: /en/sql-reference/aggregate-functions/reference/least
title: least
---
Aggregate function that returns the least across a list of values. All of the list members must be of comparable types.
Examples:
```sql
SELECT
toTypeName(least(toUInt8(1), 2, toUInt8(3), 3.)),
least(1, 2, toUInt8(3), 3.)
```
```response
┌─toTypeName(least(toUInt8(1), 2, toUInt8(3), 3.))─┬─least(1, 2, toUInt8(3), 3.)─┐
│ Float64 │ 1 │
└──────────────────────────────────────────────────┴─────────────────────────────┘
```
:::note
The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison.
:::
```sql
SELECT least(['hello'], ['there'], ['world'])
```
```response
┌─least(['hello'], ['there'], ['world'])─┐
│ ['hello'] │
└────────────────────────────────────────┘
```
```sql
SELECT least(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3))
```
```response
┌─least(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐
│ 2023-05-12 01:16:59.000 │
└────────────────────────────────────────────────────────────────────────────┘
```
:::note
The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison.
:::
Also see [greatest](/docs/en/sql-reference/aggregate-functions/reference/greatest.md).

View File

@ -865,16 +865,34 @@ LIFETIME(3600);
The key must have only one `String` type attribute that contains an allowed IP prefix. Other types are not supported yet.
For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys. The syntax is:
The syntax is:
``` sql
dictGetT('dict_name', 'attr_name', tuple(ip))
dictGetT('dict_name', 'attr_name', ip)
```
The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6. For example:
``` sql
select dictGet('my_ip_trie_dictionary', 'asn', tuple(IPv6StringToNum('2001:db8::1')))
SELECT dictGet('my_ip_trie_dictionary', 'cca2', toIPv4('202.79.32.10')) AS result;
┌─result─┐
│ NP │
└────────┘
SELECT dictGet('my_ip_trie_dictionary', 'asn', IPv6StringToNum('2001:db8::1')) AS result;
┌─result─┐
│ 65536 │
└────────┘
SELECT dictGet('my_ip_trie_dictionary', ('asn', 'cca2'), IPv6StringToNum('2001:db8::1')) AS result;
┌─result───────┐
│ (65536,'ZZ') │
└──────────────┘
```
Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned.
@ -2216,7 +2234,7 @@ Result:
## Regular Expression Tree Dictionary {#regexp-tree-dictionary}
Regular expression tree dictionaries are a special type of dictionary which represent the mapping from key to attributes using a tree of regular expressions. There are some use cases, e.g. parsing of (user agent)[https://en.wikipedia.org/wiki/User_agent] strings, which can be expressed elegantly with regexp tree dictionaries.
Regular expression tree dictionaries are a special type of dictionary which represent the mapping from key to attributes using a tree of regular expressions. There are some use cases, e.g. parsing of [user agent](https://en.wikipedia.org/wiki/User_agent) strings, which can be expressed elegantly with regexp tree dictionaries.
### Use Regular Expression Tree Dictionary in ClickHouse Open-Source
@ -2262,7 +2280,7 @@ This config consists of a list of regular expression tree nodes. Each node has t
- The value of an attribute may contain **back references**, referring to capture groups of the matched regular expression. In the example, the value of attribute `version` in the first node consists of a back-reference `\1` to capture group `(\d+[\.\d]*)` in the regular expression. Back-reference numbers range from 1 to 9 and are written as `$1` or `\1` (for number 1). The back reference is replaced by the matched capture group during query execution.
- **child nodes**: a list of children of a regexp tree node, each of which has its own attributes and (potentially) children nodes. String matching proceeds in a depth-first fashion. If a string matches a regexp node, the dictionary checks if it also matches the nodes' child nodes. If that is the case, the attributes of the deepest matching node are assigned. Attributes of a child node overwrite equally named attributes of parent nodes. The name of child nodes in YAML files can be arbitrary, e.g. `versions` in above example.
Regexp tree dictionaries only allow access using functions `dictGet`, `dictGetOrDefault` and `dictGetOrNull`.
Regexp tree dictionaries only allow access using the functions `dictGet` and `dictGetOrDefault`.
Example:

View File

@ -20,7 +20,7 @@ Strings are compared byte-by-byte. Note that this may lead to unexpected results
A string S1 which has another string S2 as prefix is considered longer than S2.
## equals
## equals, `=`, `==` operators
**Syntax**
@ -32,7 +32,7 @@ Alias:
- `a = b` (operator)
- `a == b` (operator)
## notEquals
## notEquals, `!=`, `<>` operators
**Syntax**
@ -44,7 +44,7 @@ Alias:
- `a != b` (operator)
- `a <> b` (operator)
## less
## less, `<` operator
**Syntax**
@ -55,7 +55,7 @@ less(a, b)
Alias:
- `a < b` (operator)
## greater
## greater, `>` operator
**Syntax**
@ -66,7 +66,7 @@ greater(a, b)
Alias:
- `a > b` (operator)
## lessOrEquals
## lessOrEquals, `<=` operator
**Syntax**

View File

@ -152,3 +152,85 @@ FROM LEFT_RIGHT
│ 4 │ ᴺᵁᴸᴸ │ Both equal │
└──────┴───────┴──────────────────┘
```
## greatest
Returns the greatest across a list of values. All of the list members must be of comparable types.
Examples:
```sql
SELECT greatest(1, 2, toUInt8(3), 3.) result, toTypeName(result) type;
```
```response
┌─result─┬─type────┐
│ 3 │ Float64 │
└────────┴─────────┘
```
:::note
The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison.
:::
```sql
SELECT greatest(['hello'], ['there'], ['world'])
```
```response
┌─greatest(['hello'], ['there'], ['world'])─┐
│ ['world'] │
└───────────────────────────────────────────┘
```
```sql
SELECT greatest(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3))
```
```response
┌─greatest(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐
│ 2023-05-12 01:16:59.000 │
└──---──────────────────────────────────────────────────────────────────────────┘
```
:::note
The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison.
:::
## least
Returns the least across a list of values. All of the list members must be of comparable types.
Examples:
```sql
SELECT least(1, 2, toUInt8(3), 3.) result, toTypeName(result) type;
```
```response
┌─result─┬─type────┐
│ 1 │ Float64 │
└────────┴─────────┘
```
:::note
The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison.
:::
```sql
SELECT least(['hello'], ['there'], ['world'])
```
```response
┌─least(['hello'], ['there'], ['world'])─┐
│ ['hello'] │
└────────────────────────────────────────┘
```
```sql
SELECT least(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3))
```
```response
┌─least(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐
│ 2023-05-12 01:16:59.000 │
└────────────────────────────────────────────────────────────────────────────┘
```
:::note
The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison.
:::

View File

@ -357,14 +357,14 @@ Alias: `SECOND`.
## toUnixTimestamp
For DateTime arguments: converts the value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
Converts a string, a date or a date with time to the [Unix Timestamp](https://en.wikipedia.org/wiki/Unix_time) in `UInt32` representation.
For String argument: converts the input string to the datetime according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp.
If the function is called with a string, it accepts an optional timezone argument.
**Syntax**
``` sql
toUnixTimestamp(datetime)
toUnixTimestamp(date)
toUnixTimestamp(str, [timezone])
```
@ -377,15 +377,29 @@ Type: `UInt32`.
**Example**
``` sql
SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp
SELECT
'2017-11-05 08:07:47' AS dt_str,
toUnixTimestamp(dt_str) AS from_str,
toUnixTimestamp(dt_str, 'Asia/Tokyo') AS from_str_tokyo,
toUnixTimestamp(toDateTime(dt_str)) AS from_datetime,
toUnixTimestamp(toDateTime64(dt_str, 0)) AS from_datetime64,
toUnixTimestamp(toDate(dt_str)) AS from_date,
toUnixTimestamp(toDate32(dt_str)) AS from_date32
FORMAT Vertical;
```
Result:
``` text
┌─unix_timestamp─┐
│ 1509836867 │
└────────────────┘
Row 1:
──────
dt_str: 2017-11-05 08:07:47
from_str: 1509869267
from_str_tokyo: 1509836867
from_datetime: 1509869267
from_datetime64: 1509869267
from_date: 1509840000
from_date32: 1509840000
```
:::note

File diff suppressed because one or more lines are too long

View File

@ -560,6 +560,77 @@ Result:
└───────────────────────────┘
```
## Entropy-learned hashing (experimental)
Entropy-learned hashing is disabled by default, to enable: `SET allow_experimental_hash_functions=1`.
Entropy-learned hashing is not a standalone hash function like `metroHash64`, `cityHash64`, `sipHash64` etc. Instead, it aims to preprocess
the data to be hashed in a way that a standalone hash function can be computed more efficiently while not compromising the hash quality,
i.e. the randomness of the hashes. For that, entropy-based hashing chooses a subset of the bytes in a training data set of Strings which has
the same randomness (entropy) as the original Strings. For example, if the Strings are in average 100 bytes long, and we pick a subset of 5
bytes, then a hash function will be 95% less expensive to evaluate. For details of the method, refer to [Entropy-Learned Hashing: Constant
Time Hashing with Controllable Uniformity](https://doi.org/10.1145/3514221.3517894).
Entropy-learned hashing has two phases:
1. A training phase on a representative but typically small set of Strings to be hashed. Training consists of two steps:
- Function `prepareTrainEntropyLearnedHash(data, id)` caches the training data in a global state under a given `id`. It returns dummy
value `0` on every row.
- Function `trainEntropyLearnedHash(id)` computes a minimal partial sub-key of the training data stored stored under `id` in the global
state. The cached training data in the global state is replaced by the partial key. Dummy value `0` is returned on every row.
2. An evaluation phase where hashes are computed using the previously calculated partial sub-keys. Function `entropyLearnedHash(data, id)`
hashes `data` using the partial subkey stored as `id`. CityHash64 is used as hash function.
The reason that the training phase comprises two steps is that ClickHouse processes data at chunk granularity but entropy-learned hashing
needs to process the entire training set at once.
Since functions `prepareTrainEntropyLearnedHash()` and `trainEntropyLearnedHash()` access global state, they should not be called in
parallel with the same `id`.
**Syntax**
``` sql
prepareTrainEntropyLearnedHash(data, id);
trainEntropyLearnedHash(id);
entropyLearnedHash(data, id);
```
**Example**
```sql
SET allow_experimental_hash_functions=1;
CREATE TABLE tab (col String) ENGINE=Memory;
INSERT INTO tab VALUES ('aa'), ('ba'), ('ca');
SELECT prepareTrainEntropyLearnedHash(col, 'id1') AS prepared FROM tab;
SELECT trainEntropyLearnedHash('id1') AS trained FROM tab;
SELECT entropyLearnedHash(col, 'id1') as hashes FROM tab;
```
Result:
``` response
┌─prepared─┐
│ 0 │
│ 0 │
│ 0 │
└──────────┘
┌─trained─┐
│ 0 │
│ 0 │
│ 0 │
└─────────┘
┌───────────────hashes─┐
│ 2603192927274642682 │
│ 4947675599669400333 │
│ 10783339242466472992 │
└──────────────────────┘
```
## metroHash64
Produces a 64-bit [MetroHash](http://www.jandrewrogers.com/2015/05/27/metrohash/) hash value.

View File

@ -12,18 +12,18 @@ This is an experimental feature that is currently in development and is not read
Performs stemming on a given word.
**Syntax**
### Syntax
``` sql
stem('language', word)
```
**Arguments**
### Arguments
- `language` — Language which rules will be applied. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string).
- `language` — Language which rules will be applied. Use the two letter [ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes).
- `word` — word that needs to be stemmed. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string).
**Examples**
### Examples
Query:
@ -38,23 +38,58 @@ Result:
│ ['I','think','it','is','a','bless','in','disguis'] │
└────────────────────────────────────────────────────┘
```
### Supported languages for stem()
:::note
The stem() function uses the [Snowball stemming](https://snowballstem.org/) library, see the Snowball website for updated languages etc.
:::
- Arabic
- Armenian
- Basque
- Catalan
- Danish
- Dutch
- English
- Finnish
- French
- German
- Greek
- Hindi
- Hungarian
- Indonesian
- Irish
- Italian
- Lithuanian
- Nepali
- Norwegian
- Porter
- Portuguese
- Romanian
- Russian
- Serbian
- Spanish
- Swedish
- Tamil
- Turkish
- Yiddish
## lemmatize
Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
**Syntax**
### Syntax
``` sql
lemmatize('language', word)
```
**Arguments**
### Arguments
- `language` — Language which rules will be applied. [String](../../sql-reference/data-types/string.md#string).
- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../../sql-reference/data-types/string.md#string).
**Examples**
### Examples
Query:
@ -70,12 +105,18 @@ Result:
└─────────────────────┘
```
Configuration:
### Configuration
This configuration specifies that the dictionary `en.bin` should be used for lemmatization of English (`en`) words. The `.bin` files can be downloaded from
[here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
``` xml
<lemmatizers>
<lemmatizer>
<!-- highlight-start -->
<lang>en</lang>
<path>en.bin</path>
<!-- highlight-end -->
</lemmatizer>
</lemmatizers>
```
@ -88,18 +129,18 @@ With the `plain` extension type we need to provide a path to a simple text file,
With the `wordnet` extension type we need to provide a path to a directory with WordNet thesaurus in it. Thesaurus must contain a WordNet sense index.
**Syntax**
### Syntax
``` sql
synonyms('extension_name', word)
```
**Arguments**
### Arguments
- `extension_name` — Name of the extension in which search will be performed. [String](../../sql-reference/data-types/string.md#string).
- `word` — Word that will be searched in extension. [String](../../sql-reference/data-types/string.md#string).
**Examples**
### Examples
Query:
@ -115,7 +156,7 @@ Result:
└──────────────────────────────────────────┘
```
Configuration:
### Configuration
``` xml
<synonyms_extensions>
<extension>
@ -137,17 +178,17 @@ Detects the language of the UTF8-encoded input string. The function uses the [CL
The `detectLanguage` function works best when providing over 200 characters in the input string.
**Syntax**
### Syntax
``` sql
detectLanguage('text_to_be_analyzed')
```
**Arguments**
### Arguments
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).
**Returned value**
### Returned value
- The 2-letter ISO code of the detected language
@ -156,7 +197,7 @@ Other possible results:
- `un` = unknown, can not detect any language.
- `other` = the detected language does not have 2 letter code.
**Examples**
### Examples
Query:
@ -175,22 +216,22 @@ fr
Similar to the `detectLanguage` function, but `detectLanguageMixed` returns a `Map` of 2-letter language codes that are mapped to the percentage of the certain language in the text.
**Syntax**
### Syntax
``` sql
detectLanguageMixed('text_to_be_analyzed')
```
**Arguments**
### Arguments
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).
**Returned value**
### Returned value
- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language
**Examples**
### Examples
Query:
@ -211,17 +252,17 @@ Result:
Similar to the `detectLanguage` function, except the `detectLanguageUnknown` function works with non-UTF8-encoded strings. Prefer this version when your character set is UTF-16 or UTF-32.
**Syntax**
### Syntax
``` sql
detectLanguageUnknown('text_to_be_analyzed')
```
**Arguments**
### Arguments
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).
**Returned value**
### Returned value
- The 2-letter ISO code of the detected language
@ -230,7 +271,7 @@ Other possible results:
- `un` = unknown, can not detect any language.
- `other` = the detected language does not have 2 letter code.
**Examples**
### Examples
Query:
@ -251,21 +292,21 @@ Result:
The `detectCharset` function detects the character set of the non-UTF8-encoded input string.
**Syntax**
### Syntax
``` sql
detectCharset('text_to_be_analyzed')
```
**Arguments**
### Arguments
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).
**Returned value**
### Returned value
- A `String` containing the code of the detected character set
**Examples**
### Examples
Query:

View File

@ -323,11 +323,11 @@ Alias: `REPEAT`
**Arguments**
- `s` — The string to repeat. [String](../../sql-reference/data-types/string.md).
- `n` — The number of times to repeat the string. [UInt or Int](../../sql-reference/data-types/int-uint.md).
- `n` — The number of times to repeat the string. [UInt* or Int*](../../sql-reference/data-types/int-uint.md).
**Returned value**
The single string containing string `s` repeated `n` times. If `n` \< 1, the function returns empty string.
A string containing string `s` repeated `n` times. If `n` <= 0, the function returns the empty string.
Type: `String`.
@ -345,6 +345,44 @@ Result:
└────────────────────────────────┘
```
## space
Concatenates a space (` `) as many times with itself as specified.
**Syntax**
``` sql
space(n)
```
Alias: `SPACE`.
**Arguments**
- `n` — The number of times to repeat the space. [UInt* or Int*](../../sql-reference/data-types/int-uint.md).
**Returned value**
The string containing string ` ` repeated `n` times. If `n` <= 0, the function returns the empty string.
Type: `String`.
**Example**
Query:
``` sql
SELECT space(3);
```
Result:
``` text
┌─space(3) ────┐
│ │
└──────────────┘
```
## reverse
Reverses the sequence of bytes in a string.

View File

@ -2,11 +2,10 @@
slug: /en/sql-reference/statements/create/function
sidebar_position: 38
sidebar_label: FUNCTION
title: "CREATE FUNCTION -user defined function (UDF)"
---
# CREATE FUNCTION - user defined function (UDF)
Creates a user defined function from a lambda expression. The expression must consist of function parameters, constants, operators, or other function calls.
Creates a user defined function (UDF) from a lambda expression. The expression must consist of function parameters, constants, operators, or other function calls.
**Syntax**

View File

@ -544,6 +544,54 @@ Result:
└─────┴──────────┴───────┘
```
## Filling grouped by sorting prefix
It can be useful to fill rows which have the same values in particular columns independently, - a good example is filling missing values in time series.
Assume there is the following time series table:
``` sql
CREATE TABLE timeseries
(
`sensor_id` UInt64,
`timestamp` DateTime64(3, 'UTC'),
`value` Float64
)
ENGINE = Memory;
SELECT * FROM timeseries;
┌─sensor_id─┬───────────────timestamp─┬─value─┐
│ 234 │ 2021-12-01 00:00:03.000 │ 3 │
│ 432 │ 2021-12-01 00:00:01.000 │ 1 │
│ 234 │ 2021-12-01 00:00:07.000 │ 7 │
│ 432 │ 2021-12-01 00:00:05.000 │ 5 │
└───────────┴─────────────────────────┴───────┘
```
And we'd like to fill missing values for each sensor independently with 1 second interval.
The way to achieve it is to use `sensor_id` column as sorting prefix for filling column `timestamp`:
```
SELECT *
FROM timeseries
ORDER BY
sensor_id,
timestamp WITH FILL
INTERPOLATE ( value AS 9999 )
┌─sensor_id─┬───────────────timestamp─┬─value─┐
│ 234 │ 2021-12-01 00:00:03.000 │ 3 │
│ 234 │ 2021-12-01 00:00:04.000 │ 9999 │
│ 234 │ 2021-12-01 00:00:05.000 │ 9999 │
│ 234 │ 2021-12-01 00:00:06.000 │ 9999 │
│ 234 │ 2021-12-01 00:00:07.000 │ 7 │
│ 432 │ 2021-12-01 00:00:01.000 │ 1 │
│ 432 │ 2021-12-01 00:00:02.000 │ 9999 │
│ 432 │ 2021-12-01 00:00:03.000 │ 9999 │
│ 432 │ 2021-12-01 00:00:04.000 │ 9999 │
│ 432 │ 2021-12-01 00:00:05.000 │ 5 │
└───────────┴─────────────────────────┴───────┘
```
Here, the `value` column was interpolated with `9999` just to make filled rows more noticeable.
This behavior is controlled by setting `use_with_fill_by_sorting_prefix` (enabled by default)
## Related content
- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)

View File

@ -34,7 +34,7 @@ For the `SAMPLE` clause the following syntax is supported:
| `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1. The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#select-sample-offset) |
## SAMPLE K
## SAMPLE K {#select-sample-k}
Here `k` is the number from 0 to 1 (both fractional and decimal notations are supported). For example, `SAMPLE 1/2` or `SAMPLE 0.5`.
@ -54,7 +54,7 @@ ORDER BY PageViews DESC LIMIT 1000
In this example, the query is executed on a sample from 0.1 (10%) of data. Values of aggregate functions are not corrected automatically, so to get an approximate result, the value `count()` is manually multiplied by 10.
## SAMPLE N
## SAMPLE N {#select-sample-n}
Here `n` is a sufficiently large integer. For example, `SAMPLE 10000000`.
@ -90,7 +90,7 @@ FROM visits
SAMPLE 10000000
```
## SAMPLE K OFFSET M
## SAMPLE K OFFSET M {#select-sample-offset}
Here `k` and `m` are numbers from 0 to 1. Examples are shown below.

View File

@ -13,7 +13,7 @@ sidebar_label: url
**Syntax**
``` sql
url(URL [,format] [,structure])
url(URL [,format] [,structure] [,headers])
```
**Parameters**
@ -21,6 +21,7 @@ url(URL [,format] [,structure])
- `URL` — HTTP or HTTPS server address, which can accept `GET` or `POST` requests (for `SELECT` or `INSERT` queries correspondingly). Type: [String](../../sql-reference/data-types/string.md).
- `format` — [Format](../../interfaces/formats.md#formats) of the data. Type: [String](../../sql-reference/data-types/string.md).
- `structure` — Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md).
- `headers` - Headers in `'headers('key1'='value1', 'key2'='value2')'` format. You can set headers for HTTP call.
**Returned value**
@ -31,7 +32,7 @@ A table with the specified format and structure and with data from the defined `
Getting the first 3 lines of a table that contains columns of `String` and [UInt32](../../sql-reference/data-types/int-uint.md) type from HTTP-server which answers in [CSV](../../interfaces/formats.md#csv) format.
``` sql
SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3;
SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32', headers('Accept'='text/csv; charset=utf-8')) LIMIT 3;
```
Inserting data from a `URL` into a table:
@ -46,3 +47,12 @@ SELECT * FROM test_table;
Patterns in curly brackets `{ }` are used to generate a set of shards or to specify failover addresses. Supported pattern types and examples see in the description of the [remote](remote.md#globs-in-addresses) function.
Character `|` inside patterns is used to specify failover addresses. They are iterated in the same order as listed in the pattern. The number of generated addresses is limited by [glob_expansion_max_elements](../../operations/settings/settings.md#glob_expansion_max_elements) setting.
## Virtual Columns
- `_path` — Path to the `URL`.
- `_file` — Resource name of the `URL`.
**See Also**
- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)

View File

@ -0,0 +1,62 @@
---
slug: /en/sql-reference/table-functions/urlCluster
sidebar_position: 55
sidebar_label: urlCluster
---
# urlCluster Table Function
Allows processing files from URL in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in URL file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
**Syntax**
``` sql
urlCluster(cluster_name, URL, format, structure)
```
**Arguments**
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
- `URL` — HTTP or HTTPS server address, which can accept `GET` requests. Type: [String](../../sql-reference/data-types/string.md).
- `format` — [Format](../../interfaces/formats.md#formats) of the data. Type: [String](../../sql-reference/data-types/string.md).
- `structure` — Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md).
**Returned value**
A table with the specified format and structure and with data from the defined `URL`.
**Examples**
Getting the first 3 lines of a table that contains columns of `String` and [UInt32](../../sql-reference/data-types/int-uint.md) type from HTTP-server which answers in [CSV](../../interfaces/formats.md#csv) format.
1. Create a basic HTTP server using the standard Python 3 tools and start it:
```python
from http.server import BaseHTTPRequestHandler, HTTPServer
class CSVHTTPServer(BaseHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.send_header('Content-type', 'text/csv')
self.end_headers()
self.wfile.write(bytes('Hello,1\nWorld,2\n', "utf-8"))
if __name__ == "__main__":
server_address = ('127.0.0.1', 12345)
HTTPServer(server_address, CSVHTTPServer).serve_forever()
```
``` sql
SELECT * FROM urlCluster('cluster_simple','http://127.0.0.1:12345', CSV, 'column1 String, column2 UInt32')
```
## Globs in URL
Patterns in curly brackets `{ }` are used to generate a set of shards or to specify failover addresses. Supported pattern types and examples see in the description of the [remote](remote.md#globs-in-addresses) function.
Character `|` inside patterns is used to specify failover addresses. They are iterated in the same order as listed in the pattern. The number of generated addresses is limited by [glob_expansion_max_elements](../../operations/settings/settings.md#glob_expansion_max_elements) setting.
**See Also**
- [HDFS engine](../../engines/table-engines/special/url.md)
- [URL table function](../../sql-reference/table-functions/url.md)

View File

@ -132,7 +132,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe
- `--queries-file` - путь к файлу с запросами для выполнения. Необходимо указать только одну из опций: `query` или `queries-file`.
- `--database, -d` — выбрать текущую БД. Без указания значение берется из настроек сервера (по умолчанию — БД default).
- `--multiline, -m` — если указано — разрешить многострочные запросы, не отправлять запрос по нажатию Enter.
- `--multiquery, -n`если указано — разрешить выполнять несколько запросов, разделённых точкой с запятой.
- `--multiquery, -n`Если указано, то после опции `--query` могут быть перечислены несколько запросов, разделенных точкой с запятой. Для удобства можно также опустить `--query` и передавать запросы непосредственно после `--multiquery`.
- `--format, -f` — использовать указанный формат по умолчанию для вывода результата.
- `--vertical, -E` — если указано, использовать по умолчанию формат [Vertical](../interfaces/formats.md#vertical) для вывода результата. То же самое, что `format=Vertical`. В этом формате каждое значение выводится на отдельной строке, что удобно для отображения широких таблиц.
- `--time, -t` — если указано, в неинтерактивном режиме вывести время выполнения запроса в поток stderr.

View File

@ -235,13 +235,13 @@ SELECT toDateTime('2021-04-21 10:20:30', 'Europe/Moscow') AS Time, toTypeName(Ti
## toUnixTimestamp {#to-unix-timestamp}
Переводит дату-с-временем в число типа UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
Для аргумента String, строка конвертируется в дату и время в соответствии с часовым поясом (необязательный второй аргумент, часовой пояс сервера используется по умолчанию).
Переводит строку, дату или дату-с-временем в [Unix Timestamp](https://en.wikipedia.org/wiki/Unix_time), имеющий тип `UInt32`.
Строка может сопровождаться вторым (необязательным) аргументом, указывающим часовой пояс.
**Синтаксис**
``` sql
toUnixTimestamp(datetime)
toUnixTimestamp(date)
toUnixTimestamp(str, [timezone])
```
@ -256,15 +256,29 @@ toUnixTimestamp(str, [timezone])
Запрос:
``` sql
SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp;
SELECT
'2017-11-05 08:07:47' AS dt_str,
toUnixTimestamp(dt_str) AS from_str,
toUnixTimestamp(dt_str, 'Asia/Tokyo') AS from_str_tokyo,
toUnixTimestamp(toDateTime(dt_str)) AS from_datetime,
toUnixTimestamp(toDateTime64(dt_str, 0)) AS from_datetime64,
toUnixTimestamp(toDate(dt_str)) AS from_date,
toUnixTimestamp(toDate32(dt_str)) AS from_date32
FORMAT Vertical;
```
Результат:
``` text
┌─unix_timestamp─┐
│ 1509836867 │
└────────────────┘
Row 1:
──────
dt_str: 2017-11-05 08:07:47
from_str: 1509869267
from_str_tokyo: 1509836867
from_datetime: 1509869267
from_datetime64: 1509869267
from_date: 1509840000
from_date32: 1509840000
```
:::note

View File

@ -21,6 +21,7 @@ url(URL [,format] [,structure])
- `URL` — HTTP или HTTPS-адрес сервера, который может принимать запросы `GET` или `POST` (для запросов `SELECT` или `INSERT` соответственно). Тип: [String](../../sql-reference/data-types/string.md).
- `format` — [формат](../../interfaces/formats.md#formats) данных. Тип: [String](../../sql-reference/data-types/string.md).
- `structure` — структура таблицы в формате `'UserID UInt64, Name String'`. Определяет имена и типы столбцов. Тип: [String](../../sql-reference/data-types/string.md).
- `headers` - HTTP-заголовки в формате `'headers('key1'='value1', 'key2'='value2')'`. Определяет заголовки для HTTP вызова.
**Возвращаемое значение**
@ -31,7 +32,7 @@ url(URL [,format] [,structure])
Получение с HTTP-сервера первых 3 строк таблицы с данными в формате [CSV](../../interfaces/formats.md#csv), содержащей столбцы типа [String](../../sql-reference/data-types/string.md) и [UInt32](../../sql-reference/data-types/int-uint.md).
``` sql
SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3;
SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32', headers('Accept'='text/csv; charset=utf-8')) LIMIT 3;
```
Вставка данных в таблицу:
@ -46,3 +47,12 @@ SELECT * FROM test_table;
Шаблоны в фигурных скобках `{ }` используются, чтобы сгенерировать список шардов или указать альтернативные адреса на случай отказа. Поддерживаемые типы шаблонов и примеры смотрите в описании функции [remote](remote.md#globs-in-addresses).
Символ `|` внутри шаблонов используется, чтобы задать адреса, если предыдущие оказались недоступны. Эти адреса перебираются в том же порядке, в котором они указаны в шаблоне. Количество адресов, которые могут быть сгенерированы, ограничено настройкой [glob_expansion_max_elements](../../operations/settings/settings.md#glob_expansion_max_elements).
## Виртуальные столбцы
- `_path` — Путь до `URL`.
- `_file` — Имя ресурса `URL`.
**Смотрите также**
- [Виртуальные столбцы](index.md#table_engines-virtual_columns)

View File

@ -46,7 +46,7 @@ $ cd ..
为此,请创建以下文件:
/资源库/LaunchDaemons/limit.maxfiles.plist:
/Library/LaunchDaemons/limit.maxfiles.plist:
``` xml
<?xml version="1.0" encoding="UTF-8"?>

View File

@ -41,3 +41,11 @@ CREATE TABLE test_table (column1 String, column2 UInt32) ENGINE=Memory;
INSERT INTO FUNCTION url('http://127.0.0.1:8123/?query=INSERT+INTO+test_table+FORMAT+CSV', 'CSV', 'column1 String, column2 UInt32') VALUES ('http interface', 42);
SELECT * FROM test_table;
```
## 虚拟列 {#virtual-columns}
- `_path``URL`路径。
- `_file` — 资源名称。
**另请参阅**
- [虚拟列](https://clickhouse.com/docs/en/operations/table_engines/#table_engines-virtual_columns)

View File

@ -1181,7 +1181,7 @@ void Client::processOptions(const OptionsDescription & options_description,
void Client::processConfig()
{
/// Batch mode is enabled if one of the following is true:
/// - -e (--query) command line option is present.
/// - -q (--query) command line option is present.
/// The value of the option is used as the text of query (or of multiple queries).
/// If stdin is not a terminal, INSERT data for the first query is read from it.
/// - stdin is not a terminal. In this case queries are read from it.
@ -1381,6 +1381,13 @@ void Client::readArguments(
allow_repeated_settings = true;
else if (arg == "--allow_merge_tree_settings")
allow_merge_tree_settings = true;
else if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-'))
{
/// Transform the abbreviated syntax '--multiquery <SQL>' into the full syntax '--multiquery -q <SQL>'
++arg_num;
arg = argv[arg_num];
addMultiquery(arg, common_arguments);
}
else
common_arguments.emplace_back(arg);
}

View File

@ -33,6 +33,7 @@ require (
github.com/cenkalti/backoff/v4 v4.2.0 // indirect
github.com/containerd/containerd v1.6.17 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/distribution/distribution v2.8.2+incompatible // indirect
github.com/docker/distribution v2.8.1+incompatible // indirect
github.com/docker/docker v23.0.0+incompatible // indirect
github.com/docker/go-units v0.5.0 // indirect

View File

@ -126,6 +126,8 @@ github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxG
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/distribution/distribution v2.8.2+incompatible h1:k9+4DKdOG+quPFZXT/mUsiQrGu9vYCp+dXpuPkuqhk8=
github.com/distribution/distribution v2.8.2+incompatible/go.mod h1:EgLm2NgWtdKgzF9NpMzUKgzmR7AMmb0VQi2B+ZzDRjc=
github.com/docker/distribution v2.8.1+incompatible h1:Q50tZOPR6T/hjNsyc9g8/syEs6bk8XXApsHjKukMl68=
github.com/docker/distribution v2.8.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
github.com/docker/docker v23.0.0+incompatible h1:L6c28tNyqZ4/ub9AZC9d5QUuunoHHfEH4/Ue+h/E5nE=

View File

@ -69,6 +69,7 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ProtocolServerAdapter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp

View File

@ -11,6 +11,9 @@
#include <Core/ServerUUID.h>
#include <Common/logger_useful.h>
#include <Common/ErrorHandlers.h>
#include <Common/assertProcessUserMatchesDataOwner.h>
#include <Common/makeSocketAddress.h>
#include <Server/waitServersToFinish.h>
#include <base/scope_guard.h>
#include <base/safeExit.h>
#include <Poco/Net/NetException.h>
@ -75,92 +78,9 @@ namespace ErrorCodes
extern const int NO_ELEMENTS_IN_CONFIG;
extern const int SUPPORT_IS_DISABLED;
extern const int NETWORK_ERROR;
extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
extern const int FAILED_TO_GETPWUID;
extern const int LOGICAL_ERROR;
}
namespace
{
size_t waitServersToFinish(std::vector<DB::ProtocolServerAdapter> & servers, size_t seconds_to_wait)
{
const size_t sleep_max_ms = 1000 * seconds_to_wait;
const size_t sleep_one_ms = 100;
size_t sleep_current_ms = 0;
size_t current_connections = 0;
for (;;)
{
current_connections = 0;
for (auto & server : servers)
{
server.stop();
current_connections += server.currentConnections();
}
if (!current_connections)
break;
sleep_current_ms += sleep_one_ms;
if (sleep_current_ms < sleep_max_ms)
std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms));
else
break;
}
return current_connections;
}
Poco::Net::SocketAddress makeSocketAddress(const std::string & host, UInt16 port, Poco::Logger * log)
{
Poco::Net::SocketAddress socket_address;
try
{
socket_address = Poco::Net::SocketAddress(host, port);
}
catch (const Poco::Net::DNSException & e)
{
const auto code = e.code();
if (code == EAI_FAMILY
#if defined(EAI_ADDRFAMILY)
|| code == EAI_ADDRFAMILY
#endif
)
{
LOG_ERROR(log, "Cannot resolve listen_host ({}), error {}: {}. "
"If it is an IPv6 address and your host has disabled IPv6, then consider to "
"specify IPv4 address to listen in <listen_host> element of configuration "
"file. Example: <listen_host>0.0.0.0</listen_host>",
host, e.code(), e.message());
}
throw;
}
return socket_address;
}
std::string getUserName(uid_t user_id)
{
/// Try to convert user id into user name.
auto buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX);
if (buffer_size <= 0)
buffer_size = 1024;
std::string buffer;
buffer.reserve(buffer_size);
struct passwd passwd_entry;
struct passwd * result = nullptr;
const auto error = getpwuid_r(user_id, &passwd_entry, buffer.data(), buffer_size, &result);
if (error)
throwFromErrno("Failed to find user name for " + toString(user_id), ErrorCodes::FAILED_TO_GETPWUID, error);
else if (result)
return result->pw_name;
return toString(user_id);
}
}
Poco::Net::SocketAddress Keeper::socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure) const
{
auto address = makeSocketAddress(host, port, &logger());
@ -364,24 +284,7 @@ try
std::filesystem::create_directories(path);
/// Check that the process user id matches the owner of the data.
const auto effective_user_id = geteuid();
struct stat statbuf;
if (stat(path.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid)
{
const auto effective_user = getUserName(effective_user_id);
const auto data_owner = getUserName(statbuf.st_uid);
std::string message = "Effective user of the process (" + effective_user +
") does not match the owner of the data (" + data_owner + ").";
if (effective_user_id == 0)
{
message += " Run under 'sudo -u " + data_owner + "'.";
throw Exception::createDeprecated(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA);
}
else
{
LOG_WARNING(log, fmt::runtime(message));
}
}
assertProcessUserMatchesDataOwner(path, [&](const std::string & message){ LOG_WARNING(log, fmt::runtime(message)); });
DB::ServerUUID::load(path + "/uuid", log);

View File

@ -818,8 +818,16 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum
{
for (int arg_num = 1; arg_num < argc; ++arg_num)
{
const char * arg = argv[arg_num];
common_arguments.emplace_back(arg);
std::string_view arg = argv[arg_num];
if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-'))
{
/// Transform the abbreviated syntax '--multiquery <SQL>' into the full syntax '--multiquery -q <SQL>'
++arg_num;
arg = argv[arg_num];
addMultiquery(arg, common_arguments);
}
else
common_arguments.emplace_back(arg);
}
}

View File

@ -39,6 +39,9 @@
#include <Common/remapExecutable.h>
#include <Common/TLDListsHolder.h>
#include <Common/Config/AbstractConfigurationComparison.h>
#include <Common/assertProcessUserMatchesDataOwner.h>
#include <Common/makeSocketAddress.h>
#include <Server/waitServersToFinish.h>
#include <Core/ServerUUID.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromFile.h>
@ -200,40 +203,6 @@ int mainEntryClickHouseServer(int argc, char ** argv)
}
}
namespace
{
size_t waitServersToFinish(std::vector<DB::ProtocolServerAdapter> & servers, size_t seconds_to_wait)
{
const size_t sleep_max_ms = 1000 * seconds_to_wait;
const size_t sleep_one_ms = 100;
size_t sleep_current_ms = 0;
size_t current_connections = 0;
for (;;)
{
current_connections = 0;
for (auto & server : servers)
{
server.stop();
current_connections += server.currentConnections();
}
if (!current_connections)
break;
sleep_current_ms += sleep_one_ms;
if (sleep_current_ms < sleep_max_ms)
std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms));
else
break;
}
return current_connections;
}
}
namespace DB
{
@ -244,8 +213,6 @@ namespace ErrorCodes
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
extern const int INVALID_CONFIG_PARAMETER;
extern const int FAILED_TO_GETPWUID;
extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
extern const int NETWORK_ERROR;
extern const int CORRUPTED_DATA;
}
@ -261,54 +228,6 @@ static std::string getCanonicalPath(std::string && path)
return std::move(path);
}
static std::string getUserName(uid_t user_id)
{
/// Try to convert user id into user name.
auto buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX);
if (buffer_size <= 0)
buffer_size = 1024;
std::string buffer;
buffer.reserve(buffer_size);
struct passwd passwd_entry;
struct passwd * result = nullptr;
const auto error = getpwuid_r(user_id, &passwd_entry, buffer.data(), buffer_size, &result);
if (error)
throwFromErrno("Failed to find user name for " + toString(user_id), ErrorCodes::FAILED_TO_GETPWUID, error);
else if (result)
return result->pw_name;
return toString(user_id);
}
Poco::Net::SocketAddress makeSocketAddress(const std::string & host, UInt16 port, Poco::Logger * log)
{
Poco::Net::SocketAddress socket_address;
try
{
socket_address = Poco::Net::SocketAddress(host, port);
}
catch (const Poco::Net::DNSException & e)
{
const auto code = e.code();
if (code == EAI_FAMILY
#if defined(EAI_ADDRFAMILY)
|| code == EAI_ADDRFAMILY
#endif
)
{
LOG_ERROR(log, "Cannot resolve listen_host ({}), error {}: {}. "
"If it is an IPv6 address and your host has disabled IPv6, then consider to "
"specify IPv4 address to listen in <listen_host> element of configuration "
"file. Example: <listen_host>0.0.0.0</listen_host>",
host, e.code(), e.message());
}
throw;
}
return socket_address;
}
Poco::Net::SocketAddress Server::socketBindListen(
const Poco::Util::AbstractConfiguration & config,
Poco::Net::ServerSocket & socket,
@ -959,24 +878,7 @@ try
std::string default_database = server_settings.default_database.toString();
/// Check that the process user id matches the owner of the data.
const auto effective_user_id = geteuid();
struct stat statbuf;
if (stat(path_str.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid)
{
const auto effective_user = getUserName(effective_user_id);
const auto data_owner = getUserName(statbuf.st_uid);
std::string message = "Effective user of the process (" + effective_user +
") does not match the owner of the data (" + data_owner + ").";
if (effective_user_id == 0)
{
message += " Run under 'sudo -u " + data_owner + "'.";
throw Exception::createDeprecated(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA);
}
else
{
global_context->addWarningMessage(message);
}
}
assertProcessUserMatchesDataOwner(path_str, [&](const std::string & message){ global_context->addWarningMessage(message); });
global_context->setPath(path_str);

View File

@ -909,6 +909,11 @@
<host>127.0.0.10</host>
<port>9000</port>
</replica>
<!-- Unavailable replica -->
<replica>
<host>127.0.0.11</host>
<port>1234</port>
</replica>
</shard>
</parallel_replicas>
<test_cluster_two_shards_localhost>
@ -1137,6 +1142,16 @@
<ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
-->
<!--
ORDER BY expr: https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#order_by
Example:
event_date, event_time
event_date, type, query_id
event_date, event_time, initial_query_id
<order_by>event_date, event_time, initial_query_id</order_by>
-->
<!-- Instead of partition_by, you can provide full engine expression (starting with ENGINE = ) with parameters,
Example: <engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024</engine>
-->

View File

@ -152,6 +152,13 @@ public:
nested_func->merge(place, rhs, arena);
}
bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); }
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override
{
nested_func->merge(place, rhs, thread_pool, arena);
}
void mergeBatch(
size_t row_begin,
size_t row_end,

View File

@ -1205,6 +1205,56 @@ private:
static std::string rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, const ContextPtr & context);
static std::optional<JoinTableSide> getColumnSideFromJoinTree(const QueryTreeNodePtr & resolved_identifier, const JoinNode & join_node)
{
if (resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
return {};
if (resolved_identifier->getNodeType() == QueryTreeNodeType::FUNCTION)
{
const auto & resolved_function = resolved_identifier->as<FunctionNode &>();
const auto & argument_nodes = resolved_function.getArguments().getNodes();
std::optional<JoinTableSide> result;
for (const auto & argument_node : argument_nodes)
{
auto table_side = getColumnSideFromJoinTree(argument_node, join_node);
if (table_side && result && *table_side != *result)
{
throw Exception(ErrorCodes::AMBIGUOUS_IDENTIFIER,
"Ambiguous identifier {}. In scope {}",
resolved_identifier->formatASTForErrorMessage(),
join_node.formatASTForErrorMessage());
}
if (table_side)
result = *table_side;
}
return result;
}
const auto * column_src = resolved_identifier->as<ColumnNode &>().getColumnSource().get();
if (join_node.getLeftTableExpression().get() == column_src)
return JoinTableSide::Left;
if (join_node.getRightTableExpression().get() == column_src)
return JoinTableSide::Right;
return {};
}
static void convertJoinedColumnTypeToNullIfNeeded(QueryTreeNodePtr & resolved_identifier, const JoinKind & join_kind, std::optional<JoinTableSide> resolved_side)
{
if (resolved_identifier->getNodeType() == QueryTreeNodeType::COLUMN &&
JoinCommon::canBecomeNullable(resolved_identifier->getResultType()) &&
(isFull(join_kind) ||
(isLeft(join_kind) && resolved_side && *resolved_side == JoinTableSide::Right) ||
(isRight(join_kind) && resolved_side && *resolved_side == JoinTableSide::Left)))
{
auto & resolved_column = resolved_identifier->as<ColumnNode &>();
resolved_column.setColumnType(makeNullableOrLowCardinalityNullable(resolved_column.getColumnType()));
}
}
/// Resolve identifier functions
static QueryTreeNodePtr tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context);
@ -2982,6 +3032,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
QueryTreeNodePtr resolved_identifier;
JoinKind join_kind = from_join_node.getKind();
bool join_use_nulls = scope.context->getSettingsRef().join_use_nulls;
if (left_resolved_identifier && right_resolved_identifier)
{
@ -3027,19 +3078,31 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
*
* Otherwise we prefer column from left table.
*/
if (identifier_path_part == right_column_source_alias)
return right_resolved_identifier;
else if (!left_column_source_alias.empty() &&
right_column_source_alias.empty() &&
identifier_path_part != left_column_source_alias)
return right_resolved_identifier;
bool column_resolved_using_right_alias = identifier_path_part == right_column_source_alias;
bool column_resolved_without_using_left_alias = !left_column_source_alias.empty()
&& right_column_source_alias.empty()
&& identifier_path_part != left_column_source_alias;
if (column_resolved_using_right_alias || column_resolved_without_using_left_alias)
{
resolved_side = JoinTableSide::Right;
resolved_identifier = right_resolved_identifier;
}
else
{
resolved_side = JoinTableSide::Left;
resolved_identifier = left_resolved_identifier;
}
}
else
{
resolved_side = JoinTableSide::Left;
resolved_identifier = left_resolved_identifier;
}
return left_resolved_identifier;
}
else if (scope.joins_count == 1 && scope.context->getSettingsRef().single_join_prefer_left_table)
{
return left_resolved_identifier;
resolved_side = JoinTableSide::Left;
resolved_identifier = left_resolved_identifier;
}
else
{
@ -3092,17 +3155,10 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
if (join_node_in_resolve_process || !resolved_identifier)
return resolved_identifier;
bool join_use_nulls = scope.context->getSettingsRef().join_use_nulls;
if (join_use_nulls &&
resolved_identifier->getNodeType() == QueryTreeNodeType::COLUMN &&
(isFull(join_kind) ||
(isLeft(join_kind) && resolved_side && *resolved_side == JoinTableSide::Right) ||
(isRight(join_kind) && resolved_side && *resolved_side == JoinTableSide::Left)))
if (join_use_nulls)
{
resolved_identifier = resolved_identifier->clone();
auto & resolved_column = resolved_identifier->as<ColumnNode &>();
resolved_column.setColumnType(makeNullableOrLowCardinalityNullable(resolved_column.getColumnType()));
convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side);
}
return resolved_identifier;
@ -4001,6 +4057,27 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
else
matched_expression_nodes_with_names = resolveUnqualifiedMatcher(matcher_node, scope);
if (scope.context->getSettingsRef().join_use_nulls)
{
/** If we are resolving matcher came from the result of JOIN and `join_use_nulls` is set,
* we need to convert joined column type to Nullable.
* We are taking the nearest JoinNode to check to which table column belongs,
* because for LEFT/RIGHT join, we convert only the corresponding side.
*/
const auto * nearest_query_scope = scope.getNearestQueryScope();
const QueryNode * nearest_scope_query_node = nearest_query_scope ? nearest_query_scope->scope_node->as<QueryNode>() : nullptr;
const QueryTreeNodePtr & nearest_scope_join_tree = nearest_scope_query_node ? nearest_scope_query_node->getJoinTree() : nullptr;
const JoinNode * nearest_scope_join_node = nearest_scope_join_tree ? nearest_scope_join_tree->as<JoinNode>() : nullptr;
if (nearest_scope_join_node)
{
for (auto & [node, node_name] : matched_expression_nodes_with_names)
{
auto join_identifier_side = getColumnSideFromJoinTree(node, *nearest_scope_join_node);
convertJoinedColumnTypeToNullIfNeeded(node, nearest_scope_join_node->getKind(), join_identifier_side);
}
}
}
std::unordered_map<const IColumnTransformerNode *, std::unordered_set<std::string>> strict_transformer_to_used_column_names;
for (const auto & transformer : matcher_node_typed.getColumnTransformers().getNodes())
{
@ -6355,7 +6432,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
auto table_function_ast = table_function_node_typed.toAST();
table_function_ptr->parseArguments(table_function_ast, scope_context);
auto table_function_storage = table_function_ptr->execute(table_function_ast, scope_context, table_function_ptr->getName());
auto table_function_storage = scope_context->getQueryContext()->executeTableFunction(table_function_ast, table_function_ptr);
table_function_node_typed.resolve(std::move(table_function_ptr), std::move(table_function_storage), scope_context);
}

View File

@ -1,6 +1,7 @@
#include <Backups/BackupEntryFromAppendOnlyFile.h>
#include <Disks/IDisk.h>
#include <IO/LimitSeekableReadBuffer.h>
#include <IO/ReadBufferFromFileBase.h>
namespace DB

View File

@ -1,5 +1,7 @@
#include <Backups/BackupEntryFromImmutableFile.h>
#include <IO/ReadBufferFromFileBase.h>
#include <Disks/IDisk.h>
#include <city.h>
namespace DB
@ -57,16 +59,31 @@ UInt64 BackupEntryFromImmutableFile::getSize() const
UInt128 BackupEntryFromImmutableFile::getChecksum() const
{
std::lock_guard lock{size_and_checksum_mutex};
if (!checksum_adjusted)
{
if (!checksum)
checksum = BackupEntryWithChecksumCalculation<IBackupEntry>::getChecksum();
else if (copy_encrypted)
checksum = combineChecksums(*checksum, disk->getEncryptedFileIV(file_path));
checksum_adjusted = true;
std::lock_guard lock{size_and_checksum_mutex};
if (checksum_adjusted)
return *checksum;
if (checksum)
{
if (copy_encrypted)
checksum = combineChecksums(*checksum, disk->getEncryptedFileIV(file_path));
checksum_adjusted = true;
return *checksum;
}
}
auto calculated_checksum = BackupEntryWithChecksumCalculation<IBackupEntry>::getChecksum();
{
std::lock_guard lock{size_and_checksum_mutex};
if (!checksum_adjusted)
{
checksum = calculated_checksum;
checksum_adjusted = true;
}
return *checksum;
}
return *checksum;
}
std::optional<UInt128> BackupEntryFromImmutableFile::getPartialChecksum(size_t prefix_length) const

View File

@ -44,7 +44,7 @@ private:
const DataSourceDescription data_source_description;
const bool copy_encrypted;
mutable std::optional<UInt64> file_size;
mutable std::optional<UInt64> checksum;
mutable std::optional<UInt128> checksum;
mutable bool file_size_adjusted = false;
mutable bool checksum_adjusted = false;
mutable std::mutex size_and_checksum_mutex;

View File

@ -8,15 +8,32 @@ namespace DB
template <typename Base>
UInt128 BackupEntryWithChecksumCalculation<Base>::getChecksum() const
{
std::lock_guard lock{checksum_calculation_mutex};
if (!calculated_checksum)
{
auto read_buffer = this->getReadBuffer(ReadSettings{}.adjustBufferSize(this->getSize()));
HashingReadBuffer hashing_read_buffer(*read_buffer);
hashing_read_buffer.ignoreAll();
calculated_checksum = hashing_read_buffer.getHash();
std::lock_guard lock{checksum_calculation_mutex};
if (calculated_checksum)
return *calculated_checksum;
}
size_t size = this->getSize();
{
std::lock_guard lock{checksum_calculation_mutex};
if (!calculated_checksum)
{
if (size == 0)
{
calculated_checksum = 0;
}
else
{
auto read_buffer = this->getReadBuffer(ReadSettings{}.adjustBufferSize(size));
HashingReadBuffer hashing_read_buffer(*read_buffer);
hashing_read_buffer.ignoreAll();
calculated_checksum = hashing_read_buffer.getHash();
}
}
return *calculated_checksum;
}
return *calculated_checksum;
}
template <typename Base>

View File

@ -3,7 +3,7 @@
#include <Disks/IDisk.h>
#include <IO/copyData.h>
#include <IO/WriteBufferFromFileBase.h>
#include <IO/SeekableReadBuffer.h>
#include <IO/ReadBufferFromFileBase.h>
#include <Interpreters/Context.h>
#include <Common/logger_useful.h>

View File

@ -255,6 +255,7 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
client,
s3_uri.bucket,
fs::path(s3_uri.key) / file_name,
DBMS_DEFAULT_BUFFER_SIZE,
request_settings,
std::nullopt,
threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"),

View File

@ -15,7 +15,7 @@
#include <IO/Archives/createArchiveWriter.h>
#include <IO/ConcatSeekableReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/SeekableReadBuffer.h>
#include <IO/ReadBufferFromFileBase.h>
#include <IO/WriteBufferFromFileBase.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>

View File

@ -0,0 +1,355 @@
#include <gtest/gtest.h>
#include <Backups/BackupEntryFromAppendOnlyFile.h>
#include <Backups/BackupEntryFromImmutableFile.h>
#include <Backups/BackupEntryFromSmallFile.h>
#include <Disks/IDisk.h>
#include <Disks/DiskLocal.h>
#include <Disks/DiskEncrypted.h>
#include <IO/FileEncryptionCommon.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Poco/TemporaryFile.h>
using namespace DB;
class BackupEntriesTest : public ::testing::Test
{
protected:
void SetUp() override
{
/// Make local disk.
temp_dir = std::make_unique<Poco::TemporaryFile>();
temp_dir->createDirectories();
local_disk = std::make_shared<DiskLocal>("local_disk", temp_dir->path() + "/", 0);
/// Make encrypted disk.
auto settings = std::make_unique<DiskEncryptedSettings>();
settings->wrapped_disk = local_disk;
settings->disk_path = "encrypted/";
settings->current_algorithm = FileEncryption::Algorithm::AES_128_CTR;
String key = "1234567890123456";
UInt128 fingerprint = FileEncryption::calculateKeyFingerprint(key);
settings->all_keys[fingerprint] = key;
settings->current_key = key;
settings->current_key_fingerprint = fingerprint;
encrypted_disk = std::make_shared<DiskEncrypted>("encrypted_disk", std::move(settings), true);
}
void TearDown() override
{
encrypted_disk.reset();
local_disk.reset();
}
static void writeFile(DiskPtr disk, const String & filepath)
{
auto buf = disk->writeFile(filepath, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {});
writeString(std::string_view{"Some text"}, *buf);
buf->finalize();
}
static void writeEmptyFile(DiskPtr disk, const String & filepath)
{
auto buf = disk->writeFile(filepath, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {});
buf->finalize();
}
static void appendFile(DiskPtr disk, const String & filepath)
{
auto buf = disk->writeFile(filepath, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {});
writeString(std::string_view{"Appended"}, *buf);
buf->finalize();
}
static String getChecksum(const BackupEntryPtr & backup_entry)
{
return getHexUIntUppercase(backup_entry->getChecksum());
}
static const constexpr std::string_view NO_CHECKSUM = "no checksum";
static String getPartialChecksum(const BackupEntryPtr & backup_entry, size_t prefix_length)
{
auto partial_checksum = backup_entry->getPartialChecksum(prefix_length);
if (!partial_checksum)
return String{NO_CHECKSUM};
return getHexUIntUppercase(*partial_checksum);
}
static String readAll(const BackupEntryPtr & backup_entry)
{
auto in = backup_entry->getReadBuffer({});
String str;
readStringUntilEOF(str, *in);
return str;
}
std::unique_ptr<Poco::TemporaryFile> temp_dir;
std::shared_ptr<DiskLocal> local_disk;
std::shared_ptr<DiskEncrypted> encrypted_disk;
};
static const constexpr std::string_view ZERO_CHECKSUM = "00000000000000000000000000000000";
static const constexpr std::string_view SOME_TEXT_CHECKSUM = "28B5529750AC210952FFD366774363ED";
static const constexpr std::string_view S_CHECKSUM = "C27395C39AFB5557BFE47661CC9EB86C";
static const constexpr std::string_view SOME_TEX_CHECKSUM = "D00D9BE8D87919A165F14EDD31088A0E";
static const constexpr std::string_view SOME_TEXT_APPENDED_CHECKSUM = "5A1F10F638DC7A226231F3FD927D1726";
static const constexpr std::string_view PRECALCULATED_CHECKSUM = "1122334455667788AABBCCDDAABBCCDD";
static const constexpr UInt128 PRECALCULATED_CHECKSUM_UINT128 = (UInt128(0x1122334455667788) << 64) | 0xAABBCCDDAABBCCDD;
static const size_t PRECALCULATED_SIZE = 123;
TEST_F(BackupEntriesTest, BackupEntryFromImmutableFile)
{
writeFile(local_disk, "a.txt");
auto entry = std::make_shared<BackupEntryFromImmutableFile>(local_disk, "a.txt");
EXPECT_EQ(entry->getSize(), 9);
EXPECT_EQ(getChecksum(entry), SOME_TEXT_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 0), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 1), NO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 8), NO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 9), SOME_TEXT_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 1000), SOME_TEXT_CHECKSUM);
EXPECT_EQ(readAll(entry), "Some text");
writeEmptyFile(local_disk, "empty.txt");
auto empty_entry = std::make_shared<BackupEntryFromImmutableFile>(local_disk, "empty.txt");
EXPECT_EQ(empty_entry->getSize(), 0);
EXPECT_EQ(getChecksum(empty_entry), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(empty_entry, 0), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(empty_entry, 1), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(empty_entry, 1000), ZERO_CHECKSUM);
EXPECT_EQ(readAll(empty_entry), "");
auto precalculated_entry = std::make_shared<BackupEntryFromImmutableFile>(local_disk, "a.txt", false, PRECALCULATED_SIZE, PRECALCULATED_CHECKSUM_UINT128);
EXPECT_EQ(precalculated_entry->getSize(), PRECALCULATED_SIZE);
EXPECT_EQ(getChecksum(precalculated_entry), PRECALCULATED_CHECKSUM);
EXPECT_EQ(getPartialChecksum(precalculated_entry, 0), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(precalculated_entry, 1), NO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(precalculated_entry, PRECALCULATED_SIZE - 1), NO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(precalculated_entry, PRECALCULATED_SIZE), PRECALCULATED_CHECKSUM);
EXPECT_EQ(getPartialChecksum(precalculated_entry, 1000), PRECALCULATED_CHECKSUM);
EXPECT_EQ(readAll(precalculated_entry), "Some text");
}
TEST_F(BackupEntriesTest, BackupEntryFromAppendOnlyFile)
{
writeFile(local_disk, "a.txt");
auto entry = std::make_shared<BackupEntryFromAppendOnlyFile>(local_disk, "a.txt");
EXPECT_EQ(entry->getSize(), 9);
EXPECT_EQ(getChecksum(entry), SOME_TEXT_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 0), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 1), S_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 8), SOME_TEX_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 9), SOME_TEXT_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 1000), SOME_TEXT_CHECKSUM);
EXPECT_EQ(readAll(entry), "Some text");
appendFile(local_disk, "a.txt");
EXPECT_EQ(entry->getSize(), 9);
EXPECT_EQ(getChecksum(entry), SOME_TEXT_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 0), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 1), S_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 8), SOME_TEX_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 9), SOME_TEXT_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 1000), SOME_TEXT_CHECKSUM);
EXPECT_EQ(readAll(entry), "Some text");
auto appended_entry = std::make_shared<BackupEntryFromAppendOnlyFile>(local_disk, "a.txt");
EXPECT_EQ(appended_entry->getSize(), 17);
EXPECT_EQ(getChecksum(appended_entry), SOME_TEXT_APPENDED_CHECKSUM);
EXPECT_EQ(getPartialChecksum(appended_entry, 0), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(appended_entry, 1), S_CHECKSUM);
EXPECT_EQ(getPartialChecksum(appended_entry, 8), SOME_TEX_CHECKSUM);
EXPECT_EQ(getPartialChecksum(appended_entry, 9), SOME_TEXT_CHECKSUM);
EXPECT_EQ(getPartialChecksum(appended_entry, 22), SOME_TEXT_APPENDED_CHECKSUM);
EXPECT_EQ(getPartialChecksum(appended_entry, 1000), SOME_TEXT_APPENDED_CHECKSUM);
EXPECT_EQ(readAll(appended_entry), "Some textAppended");
writeEmptyFile(local_disk, "empty_appended.txt");
auto empty_entry = std::make_shared<BackupEntryFromAppendOnlyFile>(local_disk, "empty_appended.txt");
EXPECT_EQ(empty_entry->getSize(), 0);
EXPECT_EQ(getChecksum(empty_entry), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(empty_entry, 0), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(empty_entry, 1), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(empty_entry, 1000), ZERO_CHECKSUM);
EXPECT_EQ(readAll(empty_entry), "");
appendFile(local_disk, "empty_appended.txt");
EXPECT_EQ(empty_entry->getSize(), 0);
EXPECT_EQ(getChecksum(empty_entry), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(empty_entry, 0), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(empty_entry, 1), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(empty_entry, 1000), ZERO_CHECKSUM);
EXPECT_EQ(readAll(empty_entry), "");
}
TEST_F(BackupEntriesTest, PartialChecksumBeforeFullChecksum)
{
writeFile(local_disk, "a.txt");
auto entry = std::make_shared<BackupEntryFromAppendOnlyFile>(local_disk, "a.txt");
EXPECT_EQ(entry->getSize(), 9);
EXPECT_EQ(getPartialChecksum(entry, 0), ZERO_CHECKSUM);
EXPECT_EQ(getChecksum(entry), SOME_TEXT_CHECKSUM);
EXPECT_EQ(readAll(entry), "Some text");
entry = std::make_shared<BackupEntryFromAppendOnlyFile>(local_disk, "a.txt");
EXPECT_EQ(entry->getSize(), 9);
EXPECT_EQ(getPartialChecksum(entry, 1), S_CHECKSUM);
EXPECT_EQ(getChecksum(entry), SOME_TEXT_CHECKSUM);
EXPECT_EQ(readAll(entry), "Some text");
}
TEST_F(BackupEntriesTest, BackupEntryFromSmallFile)
{
writeFile(local_disk, "a.txt");
auto entry = std::make_shared<BackupEntryFromSmallFile>(local_disk, "a.txt");
local_disk->removeFile("a.txt");
EXPECT_EQ(entry->getSize(), 9);
EXPECT_EQ(getChecksum(entry), SOME_TEXT_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 0), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 1), S_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 8), SOME_TEX_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 9), SOME_TEXT_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 1000), SOME_TEXT_CHECKSUM);
EXPECT_EQ(readAll(entry), "Some text");
}
TEST_F(BackupEntriesTest, DecryptedEntriesFromEncryptedDisk)
{
{
writeFile(encrypted_disk, "a.txt");
std::pair<BackupEntryPtr, bool /* partial_checksum_allowed */> test_cases[]
= {{std::make_shared<BackupEntryFromImmutableFile>(encrypted_disk, "a.txt"), false},
{std::make_shared<BackupEntryFromAppendOnlyFile>(encrypted_disk, "a.txt"), true},
{std::make_shared<BackupEntryFromSmallFile>(encrypted_disk, "a.txt"), true}};
for (const auto & [entry, partial_checksum_allowed] : test_cases)
{
EXPECT_EQ(entry->getSize(), 9);
EXPECT_EQ(getChecksum(entry), SOME_TEXT_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 0), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 1), partial_checksum_allowed ? S_CHECKSUM : NO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 8), partial_checksum_allowed ? SOME_TEX_CHECKSUM : NO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 9), SOME_TEXT_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 1000), SOME_TEXT_CHECKSUM);
EXPECT_EQ(readAll(entry), "Some text");
}
}
{
writeEmptyFile(encrypted_disk, "empty.txt");
BackupEntryPtr entries[]
= {std::make_shared<BackupEntryFromImmutableFile>(encrypted_disk, "empty.txt"),
std::make_shared<BackupEntryFromAppendOnlyFile>(encrypted_disk, "empty.txt"),
std::make_shared<BackupEntryFromSmallFile>(encrypted_disk, "empty.txt")};
for (const auto & entry : entries)
{
EXPECT_EQ(entry->getSize(), 0);
EXPECT_EQ(getChecksum(entry), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 0), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 1), ZERO_CHECKSUM);
EXPECT_EQ(readAll(entry), "");
}
}
{
auto precalculated_entry = std::make_shared<BackupEntryFromImmutableFile>(encrypted_disk, "a.txt", false, PRECALCULATED_SIZE, PRECALCULATED_CHECKSUM_UINT128);
EXPECT_EQ(precalculated_entry->getSize(), PRECALCULATED_SIZE);
EXPECT_EQ(getChecksum(precalculated_entry), PRECALCULATED_CHECKSUM);
EXPECT_EQ(getPartialChecksum(precalculated_entry, 0), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(precalculated_entry, 1), NO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(precalculated_entry, PRECALCULATED_SIZE), PRECALCULATED_CHECKSUM);
EXPECT_EQ(getPartialChecksum(precalculated_entry, 1000), PRECALCULATED_CHECKSUM);
EXPECT_EQ(readAll(precalculated_entry), "Some text");
}
}
TEST_F(BackupEntriesTest, EncryptedEntriesFromEncryptedDisk)
{
{
writeFile(encrypted_disk, "a.txt");
BackupEntryPtr entries[]
= {std::make_shared<BackupEntryFromImmutableFile>(encrypted_disk, "a.txt", /* copy_encrypted= */ true),
std::make_shared<BackupEntryFromAppendOnlyFile>(encrypted_disk, "a.txt", /* copy_encrypted= */ true),
std::make_shared<BackupEntryFromSmallFile>(encrypted_disk, "a.txt", /* copy_encrypted= */ true)};
auto encrypted_checksum = getChecksum(entries[0]);
EXPECT_NE(encrypted_checksum, NO_CHECKSUM);
EXPECT_NE(encrypted_checksum, ZERO_CHECKSUM);
EXPECT_NE(encrypted_checksum, SOME_TEXT_CHECKSUM);
auto partial_checksum = getPartialChecksum(entries[1], 9);
EXPECT_NE(partial_checksum, NO_CHECKSUM);
EXPECT_NE(partial_checksum, ZERO_CHECKSUM);
EXPECT_NE(partial_checksum, SOME_TEXT_CHECKSUM);
EXPECT_NE(partial_checksum, encrypted_checksum);
auto encrypted_data = readAll(entries[0]);
EXPECT_EQ(encrypted_data.size(), 9 + FileEncryption::Header::kSize);
for (const auto & entry : entries)
{
EXPECT_EQ(entry->getSize(), 9 + FileEncryption::Header::kSize);
EXPECT_EQ(getChecksum(entry), encrypted_checksum);
EXPECT_EQ(getPartialChecksum(entry, 0), ZERO_CHECKSUM);
auto encrypted_checksum_9 = getPartialChecksum(entry, 9);
EXPECT_TRUE(encrypted_checksum_9 == NO_CHECKSUM || encrypted_checksum_9 == partial_checksum);
EXPECT_EQ(getPartialChecksum(entry, 9 + FileEncryption::Header::kSize), encrypted_checksum);
EXPECT_EQ(getPartialChecksum(entry, 1000), encrypted_checksum);
EXPECT_EQ(readAll(entry), encrypted_data);
}
}
{
writeEmptyFile(encrypted_disk, "empty.txt");
BackupEntryPtr entries[]
= {std::make_shared<BackupEntryFromImmutableFile>(encrypted_disk, "empty.txt", /* copy_encrypted= */ true),
std::make_shared<BackupEntryFromAppendOnlyFile>(encrypted_disk, "empty.txt", /* copy_encrypted= */ true),
std::make_shared<BackupEntryFromSmallFile>(encrypted_disk, "empty.txt", /* copy_encrypted= */ true)};
for (const auto & entry : entries)
{
EXPECT_EQ(entry->getSize(), 0);
EXPECT_EQ(getChecksum(entry), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 0), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(entry, 1), ZERO_CHECKSUM);
EXPECT_EQ(readAll(entry), "");
}
}
{
auto precalculated_entry = std::make_shared<BackupEntryFromImmutableFile>(encrypted_disk, "a.txt", /* copy_encrypted= */ true, PRECALCULATED_SIZE, PRECALCULATED_CHECKSUM_UINT128);
EXPECT_EQ(precalculated_entry->getSize(), PRECALCULATED_SIZE + FileEncryption::Header::kSize);
auto encrypted_checksum = getChecksum(precalculated_entry);
EXPECT_NE(encrypted_checksum, NO_CHECKSUM);
EXPECT_NE(encrypted_checksum, ZERO_CHECKSUM);
EXPECT_NE(encrypted_checksum, SOME_TEXT_CHECKSUM);
EXPECT_NE(encrypted_checksum, PRECALCULATED_CHECKSUM);
EXPECT_EQ(getPartialChecksum(precalculated_entry, 0), ZERO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(precalculated_entry, 1), NO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(precalculated_entry, PRECALCULATED_SIZE), NO_CHECKSUM);
EXPECT_EQ(getPartialChecksum(precalculated_entry, PRECALCULATED_SIZE + FileEncryption::Header::kSize), encrypted_checksum);
EXPECT_EQ(getPartialChecksum(precalculated_entry, 1000), encrypted_checksum);
auto encrypted_data = readAll(precalculated_entry);
EXPECT_EQ(encrypted_data.size(), 9 + FileEncryption::Header::kSize);
}
}

View File

@ -1246,6 +1246,14 @@ void ClientBase::setInsertionTable(const ASTInsertQuery & insert_query)
}
void ClientBase::addMultiquery(std::string_view query, Arguments & common_arguments) const
{
common_arguments.emplace_back("--multiquery");
common_arguments.emplace_back("-q");
common_arguments.emplace_back(query);
}
void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr parsed_query)
{
auto query = query_to_execute;
@ -2592,15 +2600,19 @@ void ClientBase::init(int argc, char ** argv)
("version-clean", "print version in machine-readable format and exit")
("config-file,C", po::value<std::string>(), "config-file path")
("queries-file", po::value<std::vector<std::string>>()->multitoken(),
"file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)")
("database,d", po::value<std::string>(), "database")
("history_file", po::value<std::string>(), "path to history file")
("query,q", po::value<std::string>(), "query")
("stage", po::value<std::string>()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit")
("queries-file", po::value<std::vector<std::string>>()->multitoken(),
"file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)")
("multiquery,n", "If specified, multiple queries separated by semicolons can be listed after --query. For convenience, it is also possible to omit --query and pass the queries directly after --multiquery.")
("multiline,m", "If specified, allow multiline queries (do not send the query on Enter)")
("database,d", po::value<std::string>(), "database")
("query_kind", po::value<std::string>()->default_value("initial_query"), "One of initial_query/secondary_query/no_query")
("query_id", po::value<std::string>(), "query_id")
("history_file", po::value<std::string>(), "path to history file")
("stage", po::value<std::string>()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit")
("progress", po::value<ProgressOption>()->implicit_value(ProgressOption::TTY, "tty")->default_value(ProgressOption::DEFAULT, "default"), "Print progress of queries execution - to TTY: tty|on|1|true|yes; to STDERR non-interactive mode: err; OFF: off|0|false|no; DEFAULT - interactive to TTY, non-interactive is off")
("disable_suggestion,A", "Disable loading suggestion data. Note that suggestion data is loaded asynchronously through a second connection to ClickHouse server. Also it is reasonable to disable suggestion if you want to paste a query with TAB characters. Shorthand option -A is for those who get used to mysql client.")
@ -2612,9 +2624,6 @@ void ClientBase::init(int argc, char ** argv)
("log-level", po::value<std::string>(), "log level")
("server_logs_file", po::value<std::string>(), "put server logs into specified file")
("multiline,m", "multiline")
("multiquery,n", "multiquery")
("suggestion_limit", po::value<int>()->default_value(10000),
"Suggestion limit for how many databases, tables and columns to fetch.")

View File

@ -129,6 +129,7 @@ protected:
void setInsertionTable(const ASTInsertQuery & insert_query);
void addMultiquery(std::string_view query, Arguments & common_arguments) const;
private:
void receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, bool partial_result_on_first_cancel);

View File

@ -232,12 +232,28 @@ void Connection::disconnect()
maybe_compressed_out = nullptr;
in = nullptr;
last_input_packet_type.reset();
out = nullptr; // can write to socket
std::exception_ptr finalize_exception;
try
{
// finalize() can write to socket and throw an exception.
if (out)
out->finalize();
}
catch (...)
{
/// Don't throw an exception here, it will leave Connection in invalid state.
finalize_exception = std::current_exception();
}
out = nullptr;
if (socket)
socket->close();
socket = nullptr;
connected = false;
nonce.reset();
if (finalize_exception)
std::rethrow_exception(finalize_exception);
}

View File

@ -4,6 +4,8 @@
namespace ProfileEvents
{
extern const Event DistributedConnectionTries;
extern const Event DistributedConnectionUsable;
extern const Event DistributedConnectionMissingTable;
extern const Event DistributedConnectionStaleReplica;
}
@ -35,6 +37,7 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
SCOPE_EXIT(is_finished = true);
try
{
ProfileEvents::increment(ProfileEvents::DistributedConnectionTries);
result.entry = pool->get(*timeouts, settings, /* force_connected = */ false);
AsyncCallbackSetter async_setter(&*result.entry, std::move(async_callback));
@ -45,6 +48,7 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
if (!table_to_check || server_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
{
result.entry->forceConnected(*timeouts);
ProfileEvents::increment(ProfileEvents::DistributedConnectionUsable);
result.is_usable = true;
result.is_up_to_date = true;
return;
@ -65,6 +69,7 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
return;
}
ProfileEvents::increment(ProfileEvents::DistributedConnectionUsable);
result.is_usable = true;
UInt64 max_allowed_delay = settings ? UInt64(settings->max_replica_delay_for_distributed_queries) : 0;
@ -116,7 +121,7 @@ ConnectionEstablisherAsync::ConnectionEstablisherAsync(
epoll.add(timeout_descriptor.getDescriptor());
}
void ConnectionEstablisherAsync::Task::run(AsyncCallback async_callback, ResumeCallback)
void ConnectionEstablisherAsync::Task::run(AsyncCallback async_callback, SuspendCallback)
{
connection_establisher_async.reset();
connection_establisher_async.connection_establisher.setAsyncCallback(async_callback);

View File

@ -91,7 +91,7 @@ private:
ConnectionEstablisherAsync & connection_establisher_async;
void run(AsyncCallback async_callback, ResumeCallback suspend_callback) override;
void run(AsyncCallback async_callback, SuspendCallback suspend_callback) override;
};
void cancelAfter() override;

View File

@ -174,7 +174,7 @@ void HedgedConnections::sendQuery(
modified_settings.group_by_two_level_threshold_bytes = 0;
}
const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && !settings.allow_experimental_parallel_reading_from_replicas;
const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0;
if (offset_states.size() > 1 && enable_sample_offset_parallel_processing)
{

View File

@ -142,7 +142,7 @@ void MultiplexedConnections::sendQuery(
}
}
const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && !settings.allow_experimental_parallel_reading_from_replicas;
const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0;
size_t num_replicas = replica_states.size();
if (num_replicas > 1)

View File

@ -57,7 +57,7 @@ bool PacketReceiver::checkTimeout()
return true;
}
void PacketReceiver::Task::run(AsyncCallback async_callback, ResumeCallback suspend_callback)
void PacketReceiver::Task::run(AsyncCallback async_callback, SuspendCallback suspend_callback)
{
while (true)
{

View File

@ -57,7 +57,7 @@ private:
PacketReceiver & receiver;
void run(AsyncCallback async_callback, ResumeCallback suspend_callback) override;
void run(AsyncCallback async_callback, SuspendCallback suspend_callback) override;
};
/// When epoll file descriptor is ready, check if it's an expired timeout.

View File

@ -6,6 +6,7 @@
#include <Common/noexcept_scope.h>
#include <Common/setThreadName.h>
#include <Common/logger_useful.h>
#include <Common/ThreadPool.h>
namespace DB
{
@ -41,9 +42,14 @@ std::exception_ptr LoadJob::exception() const
return load_exception;
}
ssize_t LoadJob::priority() const
size_t LoadJob::executionPool() const
{
return load_priority;
return execution_pool_id;
}
size_t LoadJob::pool() const
{
return pool_id;
}
void LoadJob::wait() const
@ -112,8 +118,9 @@ void LoadJob::enqueued()
enqueue_time = std::chrono::system_clock::now();
}
void LoadJob::execute(const LoadJobPtr & self)
void LoadJob::execute(size_t pool, const LoadJobPtr & self)
{
execution_pool_id = pool;
start_time = std::chrono::system_clock::now();
func(self);
}
@ -148,22 +155,35 @@ void LoadTask::remove()
{
loader.remove(jobs);
jobs.clear();
goal_jobs.clear();
}
}
void LoadTask::detach()
{
jobs.clear();
goal_jobs.clear();
}
AsyncLoader::AsyncLoader(Metric metric_threads, Metric metric_active_threads, size_t max_threads_, bool log_failures_, bool log_progress_)
AsyncLoader::AsyncLoader(std::vector<PoolInitializer> pool_initializers, bool log_failures_, bool log_progress_)
: log_failures(log_failures_)
, log_progress(log_progress_)
, log(&Poco::Logger::get("AsyncLoader"))
, max_threads(max_threads_)
, pool(metric_threads, metric_active_threads, max_threads)
{
pools.reserve(pool_initializers.size());
for (auto && init : pool_initializers)
pools.push_back({
.name = init.name,
.priority = init.priority,
.thread_pool = std::make_unique<ThreadPool>(
init.metric_threads,
init.metric_active_threads,
init.max_threads,
/* max_free_threads = */ 0,
init.max_threads),
.max_threads = init.max_threads
});
}
AsyncLoader::~AsyncLoader()
@ -175,13 +195,20 @@ void AsyncLoader::start()
{
std::unique_lock lock{mutex};
is_running = true;
for (size_t i = 0; workers < max_threads && i < ready_queue.size(); i++)
spawn(lock);
updateCurrentPriorityAndSpawn(lock);
}
void AsyncLoader::wait()
{
pool.wait();
// Because job can create new jobs in other pools we have to recheck in cycle
std::unique_lock lock{mutex};
while (!scheduled_jobs.empty())
{
lock.unlock();
for (auto & p : pools)
p.thread_pool->wait();
lock.lock();
}
}
void AsyncLoader::stop()
@ -191,7 +218,7 @@ void AsyncLoader::stop()
is_running = false;
// NOTE: there is no need to notify because workers never wait
}
pool.wait();
wait();
}
void AsyncLoader::schedule(LoadTask & task)
@ -229,9 +256,9 @@ void AsyncLoader::scheduleImpl(const LoadJobSet & input_jobs)
old_jobs = finished_jobs.size();
}
// Make set of jobs to schedule:
// Pass 1. Make set of jobs to schedule:
// 1) exclude already scheduled or finished jobs
// 2) include pending dependencies, that are not yet scheduled
// 2) include assigned job dependencies (that are not yet scheduled)
LoadJobSet jobs;
for (const auto & job : input_jobs)
gatherNotScheduled(job, jobs, lock);
@ -242,17 +269,18 @@ void AsyncLoader::scheduleImpl(const LoadJobSet & input_jobs)
// We do not want any exception to be throws after this point, because the following code is not exception-safe
DENY_ALLOCATIONS_IN_SCOPE;
// Schedule all incoming jobs
// Pass 2. Schedule all incoming jobs
for (const auto & job : jobs)
{
chassert(job->pool() < pools.size());
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
scheduled_jobs.emplace(job, Info{.initial_priority = job->load_priority, .priority = job->load_priority});
scheduled_jobs.try_emplace(job);
job->scheduled();
});
}
// Process dependencies on scheduled pending jobs
// Pass 3. Process dependencies on scheduled jobs, priority inheritance
for (const auto & job : jobs)
{
Info & info = scheduled_jobs.find(job)->second;
@ -267,17 +295,18 @@ void AsyncLoader::scheduleImpl(const LoadJobSet & input_jobs)
});
info.dependencies_left++;
// Priority inheritance: prioritize deps to have at least given `priority` to avoid priority inversion
prioritize(dep, info.priority, lock);
// Priority inheritance: prioritize deps to have at least given `pool.priority` to avoid priority inversion
prioritize(dep, job->pool_id, lock);
}
}
// Enqueue non-blocked jobs (w/o dependencies) to ready queue
if (!info.is_blocked())
if (!info.isBlocked())
enqueue(info, job, lock);
}
// Process dependencies on other jobs. It is done in a separate pass to facilitate propagation of cancel signals (if any).
// Pass 4: Process dependencies on other jobs.
// It is done in a separate pass to facilitate cancelling due to already failed dependencies.
for (const auto & job : jobs)
{
if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end())
@ -285,12 +314,12 @@ void AsyncLoader::scheduleImpl(const LoadJobSet & input_jobs)
for (const auto & dep : job->dependencies)
{
if (scheduled_jobs.contains(dep))
continue; // Skip dependencies on scheduled pending jobs (already processed)
continue; // Skip dependencies on scheduled jobs (already processed in pass 3)
LoadStatus dep_status = dep->status();
if (dep_status == LoadStatus::OK)
continue; // Dependency on already successfully finished job -- it's okay.
// Dependency on not scheduled pending job -- it's bad.
// Dependency on assigned job -- it's bad.
// Probably, there is an error in `jobs` set, `gatherNotScheduled()` should have fixed it.
chassert(dep_status != LoadStatus::PENDING);
@ -305,7 +334,7 @@ void AsyncLoader::scheduleImpl(const LoadJobSet & input_jobs)
job->name,
getExceptionMessage(dep->exception(), /* with_stacktrace = */ false)));
});
finish(lock, job, LoadStatus::CANCELED, e);
finish(job, LoadStatus::CANCELED, e, lock);
break; // This job is now finished, stop its dependencies processing
}
}
@ -327,13 +356,14 @@ void AsyncLoader::gatherNotScheduled(const LoadJobPtr & job, LoadJobSet & jobs,
}
}
void AsyncLoader::prioritize(const LoadJobPtr & job, ssize_t new_priority)
void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool)
{
if (!job)
return;
chassert(new_pool < pools.size());
DENY_ALLOCATIONS_IN_SCOPE;
std::unique_lock lock{mutex};
prioritize(job, new_priority, lock);
prioritize(job, new_pool, lock);
}
void AsyncLoader::remove(const LoadJobSet & jobs)
@ -347,14 +377,14 @@ void AsyncLoader::remove(const LoadJobSet & jobs)
{
if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end())
{
if (info->second.is_executing())
if (info->second.isExecuting())
continue; // Skip executing jobs on the first pass
std::exception_ptr e;
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
e = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED, "Load job '{}' canceled", job->name));
});
finish(lock, job, LoadStatus::CANCELED, e);
finish(job, LoadStatus::CANCELED, e, lock);
}
}
// On the second pass wait for executing jobs to finish
@ -363,7 +393,7 @@ void AsyncLoader::remove(const LoadJobSet & jobs)
if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end())
{
// Job is currently executing
chassert(info->second.is_executing());
chassert(info->second.isExecuting());
lock.unlock();
job->waitNoThrow(); // Wait for job to finish
lock.lock();
@ -379,25 +409,36 @@ void AsyncLoader::remove(const LoadJobSet & jobs)
}
}
void AsyncLoader::setMaxThreads(size_t value)
void AsyncLoader::setMaxThreads(size_t pool, size_t value)
{
std::unique_lock lock{mutex};
pool.setMaxThreads(value);
pool.setMaxFreeThreads(value);
pool.setQueueSize(value);
max_threads = value;
auto & p = pools[pool];
p.thread_pool->setMaxThreads(value);
p.thread_pool->setQueueSize(value); // Keep queue size equal max threads count to avoid blocking during spawning
p.max_threads = value;
if (!is_running)
return;
for (size_t i = 0; workers < max_threads && i < ready_queue.size(); i++)
spawn(lock);
for (size_t i = 0; canSpawnWorker(p, lock) && i < p.ready_queue.size(); i++)
spawn(p, lock);
}
size_t AsyncLoader::getMaxThreads() const
size_t AsyncLoader::getMaxThreads(size_t pool) const
{
std::unique_lock lock{mutex};
return max_threads;
return pools[pool].max_threads;
}
const String & AsyncLoader::getPoolName(size_t pool) const
{
return pools[pool].name; // NOTE: lock is not needed because `name` is const and `pools` are immutable
}
Priority AsyncLoader::getPoolPriority(size_t pool) const
{
return pools[pool].priority; // NOTE: lock is not needed because `priority` is const and `pools` are immutable
}
size_t AsyncLoader::getScheduledJobCount() const
{
std::unique_lock lock{mutex};
@ -412,11 +453,10 @@ std::vector<AsyncLoader::JobState> AsyncLoader::getJobStates() const
states.emplace(job->name, JobState{
.job = job,
.dependencies_left = info.dependencies_left,
.is_executing = info.is_executing(),
.is_blocked = info.is_blocked(),
.is_ready = info.is_ready(),
.initial_priority = info.initial_priority,
.ready_seqno = last_ready_seqno
.ready_seqno = info.ready_seqno,
.is_blocked = info.isBlocked(),
.is_ready = info.isReady(),
.is_executing = info.isExecuting()
});
for (const auto & job : finished_jobs)
states.emplace(job->name, JobState{.job = job});
@ -462,21 +502,21 @@ String AsyncLoader::checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, Lo
return {};
}
void AsyncLoader::finish(std::unique_lock<std::mutex> & lock, const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job)
void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job, std::unique_lock<std::mutex> & lock)
{
chassert(scheduled_jobs.contains(job)); // Job was pending
if (status == LoadStatus::OK)
{
// Notify waiters
job->ok();
// Update dependent jobs and enqueue if ready
chassert(scheduled_jobs.contains(job)); // Job was pending
for (const auto & dep : scheduled_jobs[job].dependent_jobs)
{
chassert(scheduled_jobs.contains(dep)); // All depended jobs must be pending
Info & dep_info = scheduled_jobs[dep];
dep_info.dependencies_left--;
if (!dep_info.is_blocked())
if (!dep_info.isBlocked())
enqueue(dep_info, dep, lock);
}
}
@ -488,11 +528,10 @@ void AsyncLoader::finish(std::unique_lock<std::mutex> & lock, const LoadJobPtr &
else if (status == LoadStatus::CANCELED)
job->canceled(exception_from_job);
chassert(scheduled_jobs.contains(job)); // Job was pending
Info & info = scheduled_jobs[job];
if (info.is_ready())
if (info.isReady())
{
ready_queue.erase(info.key());
pools[job->pool_id].ready_queue.erase(info.ready_seqno);
info.ready_seqno = 0;
}
@ -512,7 +551,7 @@ void AsyncLoader::finish(std::unique_lock<std::mutex> & lock, const LoadJobPtr &
dep->name,
getExceptionMessage(exception_from_job, /* with_stacktrace = */ false)));
});
finish(lock, dep, LoadStatus::CANCELED, e);
finish(dep, LoadStatus::CANCELED, e, lock);
}
// Clean dependency graph edges pointing to canceled jobs
@ -531,87 +570,130 @@ void AsyncLoader::finish(std::unique_lock<std::mutex> & lock, const LoadJobPtr &
});
}
void AsyncLoader::prioritize(const LoadJobPtr & job, ssize_t new_priority, std::unique_lock<std::mutex> & lock)
void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock<std::mutex> & lock)
{
if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end())
{
if (info->second.priority >= new_priority)
return; // Never lower priority
Pool & old_pool = pools[job->pool_id];
Pool & new_pool = pools[new_pool_id];
if (old_pool.priority <= new_pool.priority)
return; // Never lower priority or change pool leaving the same priority
// Update priority and push job forward through ready queue if needed
if (info->second.ready_seqno)
ready_queue.erase(info->second.key());
info->second.priority = new_priority;
job->load_priority.store(new_priority); // Set user-facing priority (may affect executing jobs)
if (info->second.ready_seqno)
UInt64 ready_seqno = info->second.ready_seqno;
// Requeue job into the new pool queue without allocations
if (ready_seqno)
{
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
ready_queue.emplace(info->second.key(), job);
});
new_pool.ready_queue.insert(old_pool.ready_queue.extract(ready_seqno));
if (canSpawnWorker(new_pool, lock))
spawn(new_pool, lock);
}
// Set user-facing pool (may affect executing jobs)
job->pool_id.store(new_pool_id);
// Recurse into dependencies
for (const auto & dep : job->dependencies)
prioritize(dep, new_priority, lock);
prioritize(dep, new_pool_id, lock);
}
}
void AsyncLoader::enqueue(Info & info, const LoadJobPtr & job, std::unique_lock<std::mutex> & lock)
{
chassert(!info.is_blocked());
chassert(!info.isBlocked());
chassert(info.ready_seqno == 0);
info.ready_seqno = ++last_ready_seqno;
Pool & pool = pools[job->pool_id];
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
ready_queue.emplace(info.key(), job);
pool.ready_queue.emplace(info.ready_seqno, job);
});
job->enqueued();
if (is_running && workers < max_threads)
spawn(lock);
if (canSpawnWorker(pool, lock))
spawn(pool, lock);
}
void AsyncLoader::spawn(std::unique_lock<std::mutex> &)
bool AsyncLoader::canSpawnWorker(Pool & pool, std::unique_lock<std::mutex> &)
{
workers++;
return is_running
&& !pool.ready_queue.empty()
&& pool.workers < pool.max_threads
&& (!current_priority || *current_priority >= pool.priority);
}
bool AsyncLoader::canWorkerLive(Pool & pool, std::unique_lock<std::mutex> &)
{
return is_running
&& !pool.ready_queue.empty()
&& pool.workers <= pool.max_threads
&& (!current_priority || *current_priority >= pool.priority);
}
void AsyncLoader::updateCurrentPriorityAndSpawn(std::unique_lock<std::mutex> & lock)
{
// Find current priority.
// NOTE: We assume low number of pools, so O(N) scans are fine.
std::optional<Priority> priority;
for (Pool & pool : pools)
{
if (pool.isActive() && (!priority || *priority > pool.priority))
priority = pool.priority;
}
current_priority = priority;
// Spawn workers in all pools with current priority
for (Pool & pool : pools)
{
for (size_t i = 0; canSpawnWorker(pool, lock) && i < pool.ready_queue.size(); i++)
spawn(pool, lock);
}
}
void AsyncLoader::spawn(Pool & pool, std::unique_lock<std::mutex> &)
{
pool.workers++;
current_priority = pool.priority; // canSpawnWorker() ensures this would not decrease current_priority
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
pool.scheduleOrThrowOnError([this] { worker(); });
pool.thread_pool->scheduleOrThrowOnError([this, &pool] { worker(pool); });
});
}
void AsyncLoader::worker()
void AsyncLoader::worker(Pool & pool)
{
DENY_ALLOCATIONS_IN_SCOPE;
size_t pool_id = &pool - &*pools.begin();
LoadJobPtr job;
std::exception_ptr exception_from_job;
while (true)
{
// This is inside the loop to also reset previous thread names set inside the jobs
setThreadName("AsyncLoader");
setThreadName(pool.name.c_str());
{
std::unique_lock lock{mutex};
// Handle just executed job
if (exception_from_job)
finish(lock, job, LoadStatus::FAILED, exception_from_job);
finish(job, LoadStatus::FAILED, exception_from_job, lock);
else if (job)
finish(lock, job, LoadStatus::OK);
finish(job, LoadStatus::OK, {}, lock);
if (!is_running || ready_queue.empty() || workers > max_threads)
if (!canWorkerLive(pool, lock))
{
workers--;
if (--pool.workers == 0)
updateCurrentPriorityAndSpawn(lock); // It will spawn lower priority workers if needed
return;
}
// Take next job to be executed from the ready queue
auto it = ready_queue.begin();
auto it = pool.ready_queue.begin();
job = it->second;
ready_queue.erase(it);
pool.ready_queue.erase(it);
scheduled_jobs.find(job)->second.ready_seqno = 0; // This job is no longer in the ready queue
}
@ -619,7 +701,7 @@ void AsyncLoader::worker()
try
{
job->execute(job);
job->execute(pool_id, job);
exception_from_job = {};
}
catch (...)

View File

@ -11,8 +11,9 @@
#include <boost/noncopyable.hpp>
#include <base/types.h>
#include <Common/CurrentMetrics.h>
#include <Common/Priority.h>
#include <Common/Stopwatch.h>
#include <Common/ThreadPool.h>
#include <Common/ThreadPool_fwd.h>
namespace Poco { class Logger; }
@ -46,22 +47,28 @@ class LoadJob : private boost::noncopyable
{
public:
template <class Func, class LoadJobSetType>
LoadJob(LoadJobSetType && dependencies_, String name_, Func && func_, ssize_t priority_ = 0)
LoadJob(LoadJobSetType && dependencies_, String name_, size_t pool_id_, Func && func_)
: dependencies(std::forward<LoadJobSetType>(dependencies_))
, name(std::move(name_))
, pool_id(pool_id_)
, func(std::forward<Func>(func_))
, load_priority(priority_)
{}
// Current job status.
LoadStatus status() const;
std::exception_ptr exception() const;
// Returns current value of a priority of the job. May differ from initial priority.
ssize_t priority() const;
// Returns pool in which the job is executing (was executed). May differ from initial pool and from current pool.
// Value is only valid (and constant) after execution started.
size_t executionPool() const;
// Returns current pool of the job. May differ from initial and execution pool.
// This value is intended for creating new jobs during this job execution.
// Value may change during job execution by `prioritize()`.
size_t pool() const;
// Sync wait for a pending job to be finished: OK, FAILED or CANCELED status.
// Throws if job is FAILED or CANCELED. Returns or throws immediately on non-pending job.
// Throws if job is FAILED or CANCELED. Returns or throws immediately if called on non-pending job.
void wait() const;
// Wait for a job to reach any non PENDING status.
@ -90,10 +97,11 @@ private:
void scheduled();
void enqueued();
void execute(const LoadJobPtr & self);
void execute(size_t pool, const LoadJobPtr & self);
std::atomic<size_t> execution_pool_id;
std::atomic<size_t> pool_id;
std::function<void(const LoadJobPtr & self)> func;
std::atomic<ssize_t> load_priority;
mutable std::mutex mutex;
mutable std::condition_variable finished;
@ -115,25 +123,25 @@ struct EmptyJobFunc
template <class Func = EmptyJobFunc>
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, String name, Func && func = EmptyJobFunc())
{
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), std::forward<Func>(func));
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), 0, std::forward<Func>(func));
}
template <class Func = EmptyJobFunc>
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, String name, Func && func = EmptyJobFunc())
{
return std::make_shared<LoadJob>(dependencies, std::move(name), std::forward<Func>(func));
return std::make_shared<LoadJob>(dependencies, std::move(name), 0, std::forward<Func>(func));
}
template <class Func = EmptyJobFunc>
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, ssize_t priority, String name, Func && func = EmptyJobFunc())
LoadJobPtr makeLoadJob(LoadJobSet && dependencies, size_t pool_id, String name, Func && func = EmptyJobFunc())
{
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), std::forward<Func>(func), priority);
return std::make_shared<LoadJob>(std::move(dependencies), std::move(name), pool_id, std::forward<Func>(func));
}
template <class Func = EmptyJobFunc>
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, ssize_t priority, String name, Func && func = EmptyJobFunc())
LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, size_t pool_id, String name, Func && func = EmptyJobFunc())
{
return std::make_shared<LoadJob>(dependencies, std::move(name), std::forward<Func>(func), priority);
return std::make_shared<LoadJob>(dependencies, std::move(name), pool_id, std::forward<Func>(func));
}
// Represents a logically connected set of LoadJobs required to achieve some goals (final LoadJob in the set).
@ -185,7 +193,7 @@ inline void scheduleLoad(const LoadTaskPtrs & tasks)
}
template <class... Args>
inline void scheduleLoad(Args && ... args)
inline void scheduleLoadAll(Args && ... args)
{
(scheduleLoad(std::forward<Args>(args)), ...);
}
@ -208,16 +216,16 @@ inline void waitLoad(const LoadTaskPtrs & tasks)
}
template <class... Args>
inline void waitLoad(Args && ... args)
inline void waitLoadAll(Args && ... args)
{
(waitLoad(std::forward<Args>(args)), ...);
}
template <class... Args>
inline void scheduleAndWaitLoad(Args && ... args)
inline void scheduleAndWaitLoadAll(Args && ... args)
{
scheduleLoad(std::forward<Args>(args)...);
waitLoad(std::forward<Args>(args)...);
scheduleLoadAll(std::forward<Args>(args)...);
waitLoadAll(std::forward<Args>(args)...);
}
inline LoadJobSet getGoals(const LoadTaskPtrs & tasks)
@ -228,6 +236,14 @@ inline LoadJobSet getGoals(const LoadTaskPtrs & tasks)
return result;
}
inline LoadJobSet getGoalsOr(const LoadTaskPtrs & tasks, const LoadJobSet & alternative)
{
LoadJobSet result;
for (const auto & task : tasks)
result.insert(task->goals().begin(), task->goals().end());
return result.empty() ? alternative : result;
}
inline LoadJobSet joinJobs(const LoadJobSet & jobs1, const LoadJobSet & jobs2)
{
LoadJobSet result;
@ -251,100 +267,118 @@ inline LoadTaskPtrs joinTasks(const LoadTaskPtrs & tasks1, const LoadTaskPtrs &
return result;
}
// `AsyncLoader` is a scheduler for DAG of `LoadJob`s. It tracks dependencies and priorities of jobs.
// `AsyncLoader` is a scheduler for DAG of `LoadJob`s. It tracks job dependencies and priorities.
// Basic usage example:
// // Start async_loader with two thread pools (0=fg, 1=bg):
// AsyncLoader async_loader({
// {"FgPool", CurrentMetrics::AsyncLoaderThreads, CurrentMetrics::AsyncLoaderThreadsActive, .max_threads = 2, .priority{0}}
// {"BgPool", CurrentMetrics::AsyncLoaderThreads, CurrentMetrics::AsyncLoaderThreadsActive, .max_threads = 1, .priority{1}}
// });
//
// // Create and schedule a task consisting of three jobs. Job1 has no dependencies and is run first.
// // Job2 and job3 depend on job1 and are run only after job1 completion.
// auto job_func = [&] (const LoadJobPtr & self) {
// LOG_TRACE(log, "Executing load job '{}' with priority '{}'", self->name, self->priority());
// LOG_TRACE(log, "Executing load job '{}' in pool '{}'", self->name, async_loader->getPoolName(self->pool()));
// };
// auto job1 = makeLoadJob({}, "job1", job_func);
// auto job2 = makeLoadJob({ job1 }, "job2", job_func);
// auto job3 = makeLoadJob({ job1 }, "job3", job_func);
// auto job1 = makeLoadJob({}, "job1", /* pool_id = */ 1, job_func);
// auto job2 = makeLoadJob({ job1 }, "job2", /* pool_id = */ 1, job_func);
// auto job3 = makeLoadJob({ job1 }, "job3", /* pool_id = */ 1, job_func);
// auto task = makeLoadTask(async_loader, { job1, job2, job3 });
// task.schedule();
// Here we have created and scheduled a task consisting of three jobs. Job1 has no dependencies and is run first.
// Job2 and job3 depend on job1 and are run only after job1 completion. Another thread may prioritize a job and wait for it:
// async_loader->prioritize(job3, /* priority = */ 1); // higher priority jobs are run first, default priority is zero.
// job3->wait(); // blocks until job completion or cancellation and rethrow an exception (if any)
//
// AsyncLoader tracks state of all scheduled jobs. Job lifecycle is the following:
// 1) Job is constructed with PENDING status and initial priority. The job is placed into a task.
// 2) The task is scheduled with all its jobs and their dependencies. A scheduled job may be ready (i.e. have all its dependencies finished) or blocked.
// 3a) When all dependencies are successfully executed, the job became ready. A ready job is enqueued into the ready queue.
// // Another thread may prioritize a job by changing its pool and wait for it:
// async_loader->prioritize(job3, /* pool_id = */ 0); // Increase priority: 1 -> 0 (lower is better)
// job3->wait(); // Blocks until job completion or cancellation and rethrow an exception (if any)
//
// Every job has a pool associated with it. AsyncLoader starts every job in its thread pool.
// Each pool has a constant priority and a mutable maximum number of threads.
// Higher priority (lower `pool.priority` value) jobs are run first.
// No job with lower priority is started while there is at least one higher priority job ready or running.
//
// Job priority can be elevated (but cannot be lowered)
// (a) if either it has a dependent job with higher priority:
// in this case the priority and the pool of a dependent job is inherited during `schedule()` call;
// (b) or job was explicitly prioritized by `prioritize(job, higher_priority_pool)` call:
// this also leads to a priority inheritance for all the dependencies.
// Value stored in load job `pool_id` field is atomic and can be changed even during job execution.
// Job is, of course, not moved from its initial thread pool, but it should use `self->pool()` for
// all new jobs it create to avoid priority inversion. To obtain pool in which job is being executed
// call `self->execution_pool()` instead.
//
// === IMPLEMENTATION DETAILS ===
// All possible states and statuses of a job:
// .---------- scheduled ----------.
// ctor --> assigned --> blocked --> ready --> executing --> finished ------> removed --> dtor
// STATUS: '------------------ PENDING -----------------' '-- OK|FAILED|CANCELED --'
//
// AsyncLoader tracks state of all scheduled and finished jobs. Job lifecycle is the following:
// 1) A job is constructed with PENDING status and assigned to a pool. The job is placed into a task.
// 2) The task is scheduled with all its jobs and their dependencies. A scheduled job may be ready, blocked (and later executing).
// 3a) When all dependencies are successfully finished, the job became ready. A ready job is enqueued into the ready queue of its pool.
// 3b) If at least one of the job dependencies is failed or canceled, then this job is canceled (with all it's dependent jobs as well).
// On cancellation an ASYNC_LOAD_CANCELED exception is generated and saved inside LoadJob object. The job status is changed to CANCELED.
// Exception is rethrown by any existing or new `wait()` call. The job is moved to the set of the finished jobs.
// 4) The scheduled pending ready job starts execution by a worker. The job is dequeued. Callback `job_func` is called.
// Status of an executing job is PENDING. And it is still considered as a scheduled job by AsyncLoader.
// Note that `job_func` of a CANCELED job is never executed.
// 4) The ready job starts execution by a worker. The job is dequeued. Callback `job_func` is called.
// Status of an executing job is PENDING. Note that `job_func` of a CANCELED job is never executed.
// 5a) On successful execution the job status is changed to OK and all existing and new `wait()` calls finish w/o exceptions.
// 5b) Any exception thrown out of `job_func` is wrapped into an ASYNC_LOAD_FAILED exception and saved inside LoadJob.
// The job status is changed to FAILED. All the dependent jobs are canceled. The exception is rethrown from all existing and new `wait()` calls.
// 6) The job is no longer considered as scheduled and is instead moved to the finished jobs set. This is just for introspection of the finished jobs.
// 7) The task containing this job is destructed or `remove()` is explicitly called. The job is removed from the finished job set.
// 8) The job is destructed.
//
// Every job has a priority associated with it. AsyncLoader runs higher priority (greater `priority` value) jobs first. Job priority can be elevated
// (a) if either it has a dependent job with higher priority (in this case priority of a dependent job is inherited);
// (b) or job was explicitly prioritized by `prioritize(job, higher_priority)` call (this also leads to a priority inheritance for all the dependencies).
// Note that to avoid priority inversion `job_func` should use `self->priority()` to schedule new jobs in AsyncLoader or any other pool.
// Value stored in load job priority field is atomic and can be increased even during job execution.
//
// When a task is scheduled it can contain dependencies on previously scheduled jobs. These jobs can have any status. If job A being scheduled depends on
// another job B that is not yet scheduled, then job B will also be scheduled (even if the task does not contain it).
class AsyncLoader : private boost::noncopyable
{
private:
// Key of a pending job in the ready queue.
struct ReadyKey
// Thread pool for job execution.
// Pools control the following aspects of job execution:
// 1) Concurrency: Amount of concurrently executing jobs in a pool is `max_threads`.
// 2) Priority: As long as there is executing worker with higher priority, workers with lower priorities are not started
// (although, they can finish last job started before higher priority jobs appeared)
struct Pool
{
ssize_t priority; // Ascending order
ssize_t initial_priority; // Ascending order
UInt64 ready_seqno; // Descending order
const String name;
const Priority priority;
std::unique_ptr<ThreadPool> thread_pool; // NOTE: we avoid using a `ThreadPool` queue to be able to move jobs between pools.
std::map<UInt64, LoadJobPtr> ready_queue; // FIFO queue of jobs to be executed in this pool. Map is used for faster erasing. Key is `ready_seqno`
size_t max_threads; // Max number of workers to be spawn
size_t workers = 0; // Number of currently execution workers
bool operator<(const ReadyKey & rhs) const
{
if (priority > rhs.priority)
return true;
if (priority < rhs.priority)
return false;
if (initial_priority > rhs.initial_priority)
return true;
if (initial_priority < rhs.initial_priority)
return false;
return ready_seqno < rhs.ready_seqno;
}
bool isActive() const { return workers > 0 || !ready_queue.empty(); }
};
// Scheduling information for a pending job.
struct Info
{
ssize_t initial_priority = 0; // Initial priority passed into schedule().
ssize_t priority = 0; // Elevated priority, due to priority inheritance or prioritize().
size_t dependencies_left = 0; // Current number of dependencies on pending jobs.
UInt64 ready_seqno = 0; // Zero means that job is not in ready queue.
LoadJobSet dependent_jobs; // Set of jobs dependent on this job.
// Three independent states of a non-finished job.
bool is_blocked() const { return dependencies_left > 0; }
bool is_ready() const { return dependencies_left == 0 && ready_seqno > 0; }
bool is_executing() const { return dependencies_left == 0 && ready_seqno == 0; }
// Get key of a ready job
ReadyKey key() const
{
return {.priority = priority, .initial_priority = initial_priority, .ready_seqno = ready_seqno};
}
// Three independent states of a scheduled job.
bool isBlocked() const { return dependencies_left > 0; }
bool isReady() const { return dependencies_left == 0 && ready_seqno > 0; }
bool isExecuting() const { return dependencies_left == 0 && ready_seqno == 0; }
};
public:
using Metric = CurrentMetrics::Metric;
AsyncLoader(Metric metric_threads, Metric metric_active_threads, size_t max_threads_, bool log_failures_, bool log_progress_);
// Helper struct for AsyncLoader construction
struct PoolInitializer
{
String name;
Metric metric_threads;
Metric metric_active_threads;
size_t max_threads;
Priority priority;
};
AsyncLoader(std::vector<PoolInitializer> pool_initializers, bool log_failures_, bool log_progress_);
// Stops AsyncLoader before destruction
// WARNING: all tasks instances should be destructed before associated AsyncLoader.
~AsyncLoader();
// Start workers to execute scheduled load jobs.
// Start workers to execute scheduled load jobs. Note that AsyncLoader is constructed as already started.
void start();
// Wait for all load jobs to finish, including all new jobs. So at first take care to stop adding new jobs.
@ -356,28 +390,32 @@ public:
// - or canceled using ~Task() or remove() later.
void stop();
// Schedule all jobs of given `task` and their dependencies (if any, not scheduled yet).
// Higher priority jobs (with greater `job->priority()` value) are executed earlier.
// All dependencies of a scheduled job inherit its priority if it is higher. This way higher priority job
// never wait for (blocked by) lower priority jobs. No priority inversion is possible.
// Schedule all jobs of given `task` and their dependencies (even if they are not in task).
// All dependencies of a scheduled job inherit its pool if it has higher priority. This way higher priority job
// never waits for (blocked by) lower priority jobs. No priority inversion is possible.
// Idempotent: multiple schedule() calls for the same job are no-op.
// Note that `task` destructor ensures that all its jobs are finished (OK, FAILED or CANCELED)
// and are removed from AsyncLoader, so it is thread-safe to destroy them.
void schedule(LoadTask & task);
void schedule(const LoadTaskPtr & task);
// Schedule all tasks atomically. To ensure only highest priority jobs among all tasks are run first.
void schedule(const std::vector<LoadTaskPtr> & tasks);
void schedule(const LoadTaskPtrs & tasks);
// Increase priority of a job and all its dependencies recursively.
void prioritize(const LoadJobPtr & job, ssize_t new_priority);
// Jobs from higher (than `new_pool`) priority pools are not changed.
void prioritize(const LoadJobPtr & job, size_t new_pool);
// Remove finished jobs, cancel scheduled jobs, wait for executing jobs to finish and remove them.
void remove(const LoadJobSet & jobs);
// Increase or decrease maximum number of simultaneously executing jobs.
void setMaxThreads(size_t value);
// Increase or decrease maximum number of simultaneously executing jobs in `pool`.
void setMaxThreads(size_t pool, size_t value);
size_t getMaxThreads(size_t pool) const;
const String & getPoolName(size_t pool) const;
Priority getPoolPriority(size_t pool) const;
size_t getMaxThreads() const;
size_t getScheduledJobCount() const;
// Helper class for introspection
@ -385,11 +423,10 @@ public:
{
LoadJobPtr job;
size_t dependencies_left = 0;
bool is_executing = false;
UInt64 ready_seqno = 0;
bool is_blocked = false;
bool is_ready = false;
std::optional<ssize_t> initial_priority;
std::optional<UInt64> ready_seqno;
bool is_executing = false;
};
// For introspection and debug only, see `system.async_loader` table
@ -398,42 +435,32 @@ public:
private:
void checkCycle(const LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);
String checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock<std::mutex> & lock);
void finish(std::unique_lock<std::mutex> & lock, const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job = {});
void finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job, std::unique_lock<std::mutex> & lock);
void scheduleImpl(const LoadJobSet & input_jobs);
void gatherNotScheduled(const LoadJobPtr & job, LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);
void prioritize(const LoadJobPtr & job, ssize_t new_priority, std::unique_lock<std::mutex> & lock);
void prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock<std::mutex> & lock);
void enqueue(Info & info, const LoadJobPtr & job, std::unique_lock<std::mutex> & lock);
void spawn(std::unique_lock<std::mutex> &);
void worker();
bool canSpawnWorker(Pool & pool, std::unique_lock<std::mutex> &);
bool canWorkerLive(Pool & pool, std::unique_lock<std::mutex> &);
void updateCurrentPriorityAndSpawn(std::unique_lock<std::mutex> &);
void spawn(Pool & pool, std::unique_lock<std::mutex> &);
void worker(Pool & pool);
// Logging
const bool log_failures; // Worker should log all exceptions caught from job functions.
const bool log_progress; // Periodically log total progress
Poco::Logger * log;
std::chrono::system_clock::time_point busy_period_start_time;
AtomicStopwatch stopwatch;
size_t old_jobs = 0; // Number of jobs that were finished in previous busy period (for correct progress indication)
mutable std::mutex mutex; // Guards all the fields below.
bool is_running = false;
// Full set of scheduled pending jobs along with scheduling info.
std::unordered_map<LoadJobPtr, Info> scheduled_jobs;
// Subset of scheduled pending non-blocked jobs (waiting for a worker to be executed).
// Represent a queue of jobs in order of decreasing priority and FIFO for jobs with equal priorities.
std::map<ReadyKey, LoadJobPtr> ready_queue;
// Set of finished jobs (for introspection only, until jobs are removed).
LoadJobSet finished_jobs;
// Increasing counter for `ReadyKey` assignment (to preserve FIFO order of the jobs with equal priorities).
UInt64 last_ready_seqno = 0;
// For executing jobs. Note that we avoid using an internal queue of the pool to be able to prioritize jobs.
size_t max_threads;
size_t workers = 0;
ThreadPool pool;
bool is_running = true;
std::optional<Priority> current_priority; // highest priority among active pools
UInt64 last_ready_seqno = 0; // Increasing counter for ready queue keys.
std::unordered_map<LoadJobPtr, Info> scheduled_jobs; // Full set of scheduled pending jobs along with scheduling info.
std::vector<Pool> pools; // Thread pools for job execution and ready queues
LoadJobSet finished_jobs; // Set of finished jobs (for introspection only, until jobs are removed).
AtomicStopwatch stopwatch; // For progress indication
size_t old_jobs = 0; // Number of jobs that were finished in previous busy period (for correct progress indication)
std::chrono::system_clock::time_point busy_period_start_time;
};
}

View File

@ -3,18 +3,11 @@
namespace DB
{
thread_local FiberInfo current_fiber_info;
AsyncTaskExecutor::AsyncTaskExecutor(std::unique_ptr<AsyncTask> task_) : task(std::move(task_))
{
createFiber();
}
FiberInfo AsyncTaskExecutor::getCurrentFiberInfo()
{
return current_fiber_info;
}
void AsyncTaskExecutor::resume()
{
if (routine_is_finished)
@ -38,10 +31,7 @@ void AsyncTaskExecutor::resume()
void AsyncTaskExecutor::resumeUnlocked()
{
auto parent_fiber_info = current_fiber_info;
current_fiber_info = FiberInfo{&fiber, &parent_fiber_info};
fiber = std::move(fiber).resume();
current_fiber_info = parent_fiber_info;
fiber.resume();
}
void AsyncTaskExecutor::cancel()
@ -69,30 +59,19 @@ struct AsyncTaskExecutor::Routine
struct AsyncCallback
{
AsyncTaskExecutor & executor;
Fiber & fiber;
SuspendCallback suspend_callback;
void operator()(int fd, Poco::Timespan timeout, AsyncEventTimeoutType type, const std::string & desc, uint32_t events)
{
executor.processAsyncEvent(fd, timeout, type, desc, events);
fiber = std::move(fiber).resume();
suspend_callback();
executor.clearAsyncEvent();
}
};
struct ResumeCallback
void operator()(SuspendCallback suspend_callback)
{
Fiber & fiber;
void operator()()
{
fiber = std::move(fiber).resume();
}
};
Fiber operator()(Fiber && sink)
{
auto async_callback = AsyncCallback{executor, sink};
auto suspend_callback = ResumeCallback{sink};
auto async_callback = AsyncCallback{executor, suspend_callback};
try
{
executor.task->run(async_callback, suspend_callback);
@ -110,18 +89,17 @@ struct AsyncTaskExecutor::Routine
}
executor.routine_is_finished = true;
return std::move(sink);
}
};
void AsyncTaskExecutor::createFiber()
{
fiber = boost::context::fiber(std::allocator_arg_t(), fiber_stack, Routine{*this});
fiber = Fiber(fiber_stack, Routine{*this});
}
void AsyncTaskExecutor::destroyFiber()
{
boost::context::fiber to_destroy = std::move(fiber);
Fiber to_destroy = std::move(fiber);
}
String getSocketTimeoutExceededMessageByTimeoutType(AsyncEventTimeoutType type, Poco::Timespan timeout, const String & socket_description)

View File

@ -22,7 +22,7 @@ enum class AsyncEventTimeoutType
};
using AsyncCallback = std::function<void(int, Poco::Timespan, AsyncEventTimeoutType, const std::string &, uint32_t)>;
using ResumeCallback = std::function<void()>;
using SuspendCallback = std::function<void()>;
struct FiberInfo
{
@ -38,7 +38,7 @@ struct FiberInfo
struct AsyncTask
{
public:
virtual void run(AsyncCallback async_callback, ResumeCallback suspend_callback) = 0;
virtual void run(AsyncCallback async_callback, SuspendCallback suspend_callback) = 0;
virtual ~AsyncTask() = default;
};
@ -80,7 +80,6 @@ public:
};
#endif
static FiberInfo getCurrentFiberInfo();
protected:
/// Method that is called in resume() before actual fiber resuming.
/// If it returns false, resume() will return immediately without actual fiber resuming.
@ -113,8 +112,8 @@ private:
void createFiber();
void destroyFiber();
Fiber fiber;
FiberStack fiber_stack;
Fiber fiber;
std::mutex fiber_lock;
std::exception_ptr exception;
@ -124,48 +123,6 @@ private:
std::unique_ptr<AsyncTask> task;
};
/// Simple implementation for fiber local variable.
template <typename T>
struct FiberLocal
{
public:
FiberLocal()
{
/// Initialize main instance for this thread. Instances for fibers will inherit it,
/// (it's needed because main instance could be changed before creating fibers
/// and changes should be visible in fibers).
data[nullptr] = T();
}
T & operator*()
{
return get();
}
T * operator->()
{
return &get();
}
private:
T & get()
{
return getInstanceForFiber(AsyncTaskExecutor::getCurrentFiberInfo());
}
T & getInstanceForFiber(FiberInfo info)
{
auto it = data.find(info.fiber);
/// If it's the first request, we need to initialize instance for the fiber
/// using instance from parent fiber or main thread that created fiber.
if (it == data.end())
it = data.insert({info.fiber, getInstanceForFiber(*info.parent_fiber_info)}).first;
return it->second;
}
std::unordered_map<const Fiber *, T> data;
};
String getSocketTimeoutExceededMessageByTimeoutType(AsyncEventTimeoutType type, Poco::Timespan timeout, const String & socket_description);
}

View File

@ -1041,18 +1041,16 @@ void AsynchronousMetrics::update(TimePoint update_time)
// It doesn't read the EOL itself.
++cpuinfo->position();
if (s.rfind("processor", 0) == 0)
static constexpr std::string_view PROCESSOR = "processor";
if (s.starts_with(PROCESSOR))
{
/// s390x example: processor 0: version = FF, identification = 039C88, machine = 3906
/// non s390x example: processor : 0
if (auto colon = s.find_first_of(':'))
{
#ifdef __s390x__
core_id = std::stoi(s.substr(10)); /// 10: length of "processor" plus 1
#else
core_id = std::stoi(s.substr(colon + 2));
#endif
}
auto core_id_start = std::ssize(PROCESSOR);
while (core_id_start < std::ssize(s) && !std::isdigit(s[core_id_start]))
++core_id_start;
core_id = std::stoi(s.substr(core_id_start));
}
else if (s.rfind("cpu MHz", 0) == 0)
{

View File

@ -10,6 +10,8 @@
#include <type_traits>
#define DATE_SECONDS_PER_DAY 86400 /// Number of seconds in a day, 60 * 60 * 24
#define DATE_LUT_MIN_YEAR 1900 /// 1900 since majority of financial organizations consider 1900 as an initial year.
#define DATE_LUT_MAX_YEAR 2299 /// Last supported year (complete)
#define DATE_LUT_YEARS (1 + DATE_LUT_MAX_YEAR - DATE_LUT_MIN_YEAR) /// Number of years in lookup table

View File

@ -3,5 +3,147 @@
/// BOOST_USE_ASAN, BOOST_USE_TSAN and BOOST_USE_UCONTEXT should be correctly defined for sanitizers.
#include <base/defines.h>
#include <boost/context/fiber.hpp>
#include <map>
/// Class wrapper for boost::context::fiber.
/// It tracks current executing fiber for thread and
/// supports storing fiber-specific data
/// that will be destroyed on fiber destructor.
class Fiber
{
private:
using Impl = boost::context::fiber;
using FiberPtr = Fiber *;
template <typename T> friend class FiberLocal;
public:
template< typename StackAlloc, typename Fn>
Fiber(StackAlloc && salloc, Fn && fn) : impl(std::allocator_arg_t(), std::forward<StackAlloc>(salloc), RoutineImpl(std::forward<Fn>(fn)))
{
}
Fiber() = default;
Fiber(Fiber && other) = default;
Fiber & operator=(Fiber && other) = default;
Fiber(const Fiber &) = delete;
Fiber & operator =(const Fiber &) = delete;
explicit operator bool() const
{
return impl.operator bool();
}
void resume()
{
/// Update information about current executing fiber.
FiberPtr & current_fiber = getCurrentFiber();
FiberPtr parent_fiber = current_fiber;
current_fiber = this;
impl = std::move(impl).resume();
/// Restore parent fiber.
current_fiber = parent_fiber;
}
private:
template <typename Fn>
struct RoutineImpl
{
struct SuspendCallback
{
Impl & impl;
void operator()()
{
impl = std::move(impl).resume();
}
};
explicit RoutineImpl(Fn && fn_) : fn(std::move(fn_))
{
}
Impl operator()(Impl && sink)
{
SuspendCallback suspend_callback{sink};
fn(suspend_callback);
return std::move(sink);
}
Fn fn;
};
static FiberPtr & getCurrentFiber()
{
thread_local static FiberPtr current_fiber;
return current_fiber;
}
/// Special wrapper to store data in uniquer_ptr.
struct DataWrapper
{
virtual ~DataWrapper() = default;
};
using DataPtr = std::unique_ptr<DataWrapper>;
/// Get reference to fiber-specific data by key
/// (the pointer to the structure that uses this data).
DataPtr & getLocalData(void * key)
{
return local_data[key];
}
Impl && release()
{
return std::move(impl);
}
Impl impl;
std::map<void *, DataPtr> local_data;
};
/// Implementation for fiber local variable.
/// If we are in fiber, it returns fiber local data,
/// otherwise it returns it's single field.
/// Fiber local data is destroyed in Fiber destructor.
/// Implementation is similar to boost::fiber::fiber_specific_ptr
/// (we cannot use it because we don't use boost::fiber API.
template <typename T>
class FiberLocal
{
public:
T & operator*()
{
return get();
}
T * operator->()
{
return &get();
}
private:
struct DataWrapperImpl : public Fiber::DataWrapper
{
T impl;
};
T & get()
{
Fiber * current_fiber = Fiber::getCurrentFiber();
if (!current_fiber)
return main_instance;
Fiber::DataPtr & ptr = current_fiber->getLocalData(this);
/// Initialize instance on first request.
if (!ptr)
ptr = std::make_unique<DataWrapperImpl>();
return dynamic_cast<DataWrapperImpl *>(ptr.get())->impl;
}
T main_instance;
};
using Fiber = boost::context::fiber;

View File

@ -138,7 +138,7 @@ void FileChecker::save() const
std::string tmp_files_info_path = parentPath(files_info_path) + "tmp_" + fileName(files_info_path);
{
std::unique_ptr<WriteBuffer> out = disk ? disk->writeFile(tmp_files_info_path) : std::make_unique<WriteBufferFromFile>(tmp_files_info_path);
std::unique_ptr<WriteBufferFromFileBase> out = disk ? disk->writeFile(tmp_files_info_path) : std::make_unique<WriteBufferFromFile>(tmp_files_info_path);
/// So complex JSON structure - for compatibility with the old format.
writeCString("{\"clickhouse\":{", *out);
@ -157,7 +157,9 @@ void FileChecker::save() const
}
writeCString("}}", *out);
out->next();
out->sync();
out->finalize();
}
if (disk)

View File

@ -5,7 +5,8 @@
#include <Common/Exception.h>
#include <base/hex.h>
#include <Core/Settings.h>
#include <IO/Operators.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/AsyncTaskExecutor.h>
@ -14,9 +15,8 @@ namespace DB
namespace OpenTelemetry
{
///// This code can be executed inside several fibers in one thread,
///// we should use fiber local tracing context.
thread_local FiberLocal<TracingContextOnThread> current_fiber_trace_context;
/// This code can be executed inside fibers, we should use fiber local tracing context.
thread_local FiberLocal<TracingContextOnThread> current_trace_context;
bool Span::addAttribute(std::string_view name, UInt64 value) noexcept
{
@ -108,7 +108,7 @@ bool Span::addAttributeImpl(std::string_view name, std::string_view value) noexc
SpanHolder::SpanHolder(std::string_view _operation_name, SpanKind _kind)
{
if (!current_fiber_trace_context->isTraceEnabled())
if (!current_trace_context->isTraceEnabled())
{
return;
}
@ -116,8 +116,8 @@ SpanHolder::SpanHolder(std::string_view _operation_name, SpanKind _kind)
/// Use try-catch to make sure the ctor is exception safe.
try
{
this->trace_id = current_fiber_trace_context->trace_id;
this->parent_span_id = current_fiber_trace_context->span_id;
this->trace_id = current_trace_context->trace_id;
this->parent_span_id = current_trace_context->span_id;
this->span_id = thread_local_rng(); // create a new id for this span
this->operation_name = _operation_name;
this->kind = _kind;
@ -136,7 +136,7 @@ SpanHolder::SpanHolder(std::string_view _operation_name, SpanKind _kind)
}
/// Set current span as parent of other spans created later on this thread.
current_fiber_trace_context->span_id = this->span_id;
current_trace_context->span_id = this->span_id;
}
void SpanHolder::finish() noexcept
@ -145,12 +145,12 @@ void SpanHolder::finish() noexcept
return;
// First of all, restore old value of current span.
assert(current_fiber_trace_context->span_id == span_id);
current_fiber_trace_context->span_id = parent_span_id;
assert(current_trace_context->span_id == span_id);
current_trace_context->span_id = parent_span_id;
try
{
auto log = current_fiber_trace_context->span_log.lock();
auto log = current_trace_context->span_log.lock();
/// The log might be disabled, check it before use
if (log)
@ -249,31 +249,31 @@ String TracingContext::composeTraceparentHeader() const
void TracingContext::deserialize(ReadBuffer & buf)
{
buf >> this->trace_id
>> "\n"
>> this->span_id
>> "\n"
>> this->tracestate
>> "\n"
>> this->trace_flags
>> "\n";
readUUIDText(trace_id, buf);
assertChar('\n', buf);
readIntText(span_id, buf);
assertChar('\n', buf);
readEscapedString(tracestate, buf);
assertChar('\n', buf);
readIntText(trace_flags, buf);
assertChar('\n', buf);
}
void TracingContext::serialize(WriteBuffer & buf) const
{
buf << this->trace_id
<< "\n"
<< this->span_id
<< "\n"
<< this->tracestate
<< "\n"
<< this->trace_flags
<< "\n";
writeUUIDText(trace_id, buf);
writeChar('\n', buf);
writeIntText(span_id, buf);
writeChar('\n', buf);
writeEscapedString(tracestate, buf);
writeChar('\n', buf);
writeIntText(trace_flags, buf);
writeChar('\n', buf);
}
const TracingContextOnThread & CurrentContext()
{
return *current_fiber_trace_context;
return *current_trace_context;
}
void TracingContextOnThread::reset() noexcept
@ -295,7 +295,7 @@ TracingContextHolder::TracingContextHolder(
/// If any exception is raised during the construction, the tracing is not enabled on current thread.
try
{
if (current_fiber_trace_context->isTraceEnabled())
if (current_trace_context->isTraceEnabled())
{
///
/// This is not the normal case,
@ -308,15 +308,15 @@ TracingContextHolder::TracingContextHolder(
/// So this branch ensures this class can be instantiated multiple times on one same thread safely.
///
this->is_context_owner = false;
this->root_span.trace_id = current_fiber_trace_context->trace_id;
this->root_span.parent_span_id = current_fiber_trace_context->span_id;
this->root_span.trace_id = current_trace_context->trace_id;
this->root_span.parent_span_id = current_trace_context->span_id;
this->root_span.span_id = thread_local_rng();
this->root_span.operation_name = _operation_name;
this->root_span.start_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
/// Set the root span as parent of other spans created on current thread
current_fiber_trace_context->span_id = this->root_span.span_id;
current_trace_context->span_id = this->root_span.span_id;
return;
}
@ -360,10 +360,10 @@ TracingContextHolder::TracingContextHolder(
}
/// Set up trace context on current thread only when the root span is successfully initialized.
*current_fiber_trace_context = _parent_trace_context;
current_fiber_trace_context->span_id = this->root_span.span_id;
current_fiber_trace_context->trace_flags = TRACE_FLAG_SAMPLED;
current_fiber_trace_context->span_log = _span_log;
*current_trace_context = _parent_trace_context;
current_trace_context->span_id = this->root_span.span_id;
current_trace_context->trace_flags = TRACE_FLAG_SAMPLED;
current_trace_context->span_log = _span_log;
}
TracingContextHolder::~TracingContextHolder()
@ -375,7 +375,7 @@ TracingContextHolder::~TracingContextHolder()
try
{
auto shared_span_log = current_fiber_trace_context->span_log.lock();
auto shared_span_log = current_trace_context->span_log.lock();
if (shared_span_log)
{
try
@ -406,11 +406,11 @@ TracingContextHolder::~TracingContextHolder()
if (this->is_context_owner)
{
/// Clear the context on current thread
current_fiber_trace_context->reset();
current_trace_context->reset();
}
else
{
current_fiber_trace_context->span_id = this->root_span.parent_span_id;
current_trace_context->span_id = this->root_span.parent_span_id;
}
}

Some files were not shown because too many files have changed in this diff Show More