mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-01 20:12:02 +00:00
Merge remote-tracking branch 'origin/master' into pr-local-plan
This commit is contained in:
commit
08032e97fd
2
.github/workflows/backport_branches.yml
vendored
2
.github/workflows/backport_branches.yml
vendored
@ -273,5 +273,5 @@ jobs:
|
||||
- name: Finish label
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 finish_check.py
|
||||
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
python3 merge_pr.py
|
||||
|
2
.github/workflows/master.yml
vendored
2
.github/workflows/master.yml
vendored
@ -173,4 +173,4 @@ jobs:
|
||||
- name: Finish label
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 finish_check.py
|
||||
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
|
4
.github/workflows/merge_queue.yml
vendored
4
.github/workflows/merge_queue.yml
vendored
@ -99,7 +99,7 @@ jobs:
|
||||
################################# Stage Final #################################
|
||||
#
|
||||
FinishCheck:
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
@ -112,4 +112,4 @@ jobs:
|
||||
- name: Finish label
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 finish_check.py ${{ (contains(needs.*.result, 'failure') && github.event_name == 'merge_group') && '--pipeline-failure' || '' }}
|
||||
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
|
2
.github/workflows/pull_request.yml
vendored
2
.github/workflows/pull_request.yml
vendored
@ -191,7 +191,7 @@ jobs:
|
||||
- name: Finish label
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 finish_check.py
|
||||
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
|
||||
#############################################################################################
|
||||
###################################### JEPSEN TESTS #########################################
|
||||
|
2
.github/workflows/release_branches.yml
vendored
2
.github/workflows/release_branches.yml
vendored
@ -496,4 +496,4 @@ jobs:
|
||||
- name: Finish label
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 finish_check.py
|
||||
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
|
5
.github/workflows/tags_stable.yml
vendored
5
.github/workflows/tags_stable.yml
vendored
@ -46,9 +46,10 @@ jobs:
|
||||
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
|
||||
./utils/list-versions/update-docker-version.sh
|
||||
GID=$(id -g "${UID}")
|
||||
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 \
|
||||
# --network=host and CI=1 are required for the S3 access from a container
|
||||
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \
|
||||
--volume="${GITHUB_WORKSPACE}:/ClickHouse" clickhouse/style-test \
|
||||
/ClickHouse/utils/changelog/changelog.py -v --debug-helpers \
|
||||
/ClickHouse/tests/ci/changelog.py -v --debug-helpers \
|
||||
--gh-user-or-token="$GITHUB_TOKEN" --jobs=5 \
|
||||
--output="/ClickHouse/docs/changelogs/${GITHUB_TAG}.md" "${GITHUB_TAG}"
|
||||
git add "./docs/changelogs/${GITHUB_TAG}.md"
|
||||
|
8
.gitmodules
vendored
8
.gitmodules
vendored
@ -91,13 +91,13 @@
|
||||
[submodule "contrib/aws"]
|
||||
path = contrib/aws
|
||||
url = https://github.com/ClickHouse/aws-sdk-cpp
|
||||
[submodule "aws-c-event-stream"]
|
||||
[submodule "contrib/aws-c-event-stream"]
|
||||
path = contrib/aws-c-event-stream
|
||||
url = https://github.com/awslabs/aws-c-event-stream
|
||||
[submodule "aws-c-common"]
|
||||
[submodule "contrib/aws-c-common"]
|
||||
path = contrib/aws-c-common
|
||||
url = https://github.com/awslabs/aws-c-common.git
|
||||
[submodule "aws-checksums"]
|
||||
[submodule "contrib/aws-checksums"]
|
||||
path = contrib/aws-checksums
|
||||
url = https://github.com/awslabs/aws-checksums
|
||||
[submodule "contrib/curl"]
|
||||
@ -161,7 +161,7 @@
|
||||
[submodule "contrib/xz"]
|
||||
path = contrib/xz
|
||||
url = https://github.com/xz-mirror/xz
|
||||
[submodule "abseil"]
|
||||
[submodule "contrib/abseil-cpp"]
|
||||
path = contrib/abseil-cpp
|
||||
url = https://github.com/ClickHouse/abseil-cpp.git
|
||||
[submodule "contrib/dragonbox"]
|
||||
|
@ -399,7 +399,7 @@ option (ENABLE_GWP_ASAN "Enable Gwp-Asan" ON)
|
||||
# but GWP-ASan also wants to use mmap frequently,
|
||||
# and due to a large number of memory mappings,
|
||||
# it does not work together well.
|
||||
if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG"))
|
||||
if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") OR SANITIZE)
|
||||
set(ENABLE_GWP_ASAN OFF)
|
||||
endif ()
|
||||
|
||||
|
2
contrib/aws
vendored
2
contrib/aws
vendored
@ -1 +1 @@
|
||||
Subproject commit deeaa9e7c5fe690e3dacc4005d7ecfa7a66a32bb
|
||||
Subproject commit 1c2946bfcb7f1e3ae0a858de0b59d4f1a7b4ccaf
|
2
contrib/openssl
vendored
2
contrib/openssl
vendored
@ -1 +1 @@
|
||||
Subproject commit f7b8721dfc66abb147f24ca07b9c9d1d64f40f71
|
||||
Subproject commit 67c0b63e578e4c751ac9edf490f5a96124fff8dc
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.5.1.1763"
|
||||
ARG VERSION="24.5.3.5"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.5.1.1763"
|
||||
ARG VERSION="24.5.3.5"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="24.5.1.1763"
|
||||
ARG VERSION="24.5.3.5"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
#docker-official-library:off
|
||||
|
@ -208,6 +208,7 @@ handle SIGPIPE nostop noprint pass
|
||||
handle SIGTERM nostop noprint pass
|
||||
handle SIGUSR1 nostop noprint pass
|
||||
handle SIGUSR2 nostop noprint pass
|
||||
handle SIGSEGV nostop pass
|
||||
handle SIG$RTMIN nostop noprint pass
|
||||
info signals
|
||||
continue
|
||||
|
@ -20,6 +20,7 @@ handle SIGPIPE nostop noprint pass
|
||||
handle SIGTERM nostop noprint pass
|
||||
handle SIGUSR1 nostop noprint pass
|
||||
handle SIGUSR2 nostop noprint pass
|
||||
handle SIGSEGV nostop pass
|
||||
handle SIG$RTMIN nostop noprint pass
|
||||
info signals
|
||||
continue
|
||||
|
@ -10,14 +10,15 @@ RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
|
||||
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
aspell \
|
||||
curl \
|
||||
git \
|
||||
gh \
|
||||
file \
|
||||
gh \
|
||||
git \
|
||||
libxml2-utils \
|
||||
locales \
|
||||
moreutils \
|
||||
python3-pip \
|
||||
yamllint \
|
||||
locales \
|
||||
zstd \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
@ -33,6 +34,7 @@ RUN pip3 install \
|
||||
flake8==4.0.1 \
|
||||
requests \
|
||||
thefuzz \
|
||||
tqdm==4.66.4 \
|
||||
types-requests \
|
||||
unidiff \
|
||||
&& rm -rf /root/.cache/pip
|
||||
|
45
docs/changelogs/v24.1.6.52-stable.md
Normal file
45
docs/changelogs/v24.1.6.52-stable.md
Normal file
@ -0,0 +1,45 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.1.6.52-stable (fa09f677bc9) FIXME as compared to v24.1.5.6-stable (7f67181ff31)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#60292](https://github.com/ClickHouse/ClickHouse/issues/60292): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Backported in [#60832](https://github.com/ClickHouse/ClickHouse/issues/60832): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Backported in [#60413](https://github.com/ClickHouse/ClickHouse/issues/60413): Fix segmentation fault in KQL parser when the input query exceeds the `max_query_size`. Also re-enable the KQL dialect. Fixes [#59036](https://github.com/ClickHouse/ClickHouse/issues/59036) and [#59037](https://github.com/ClickHouse/ClickHouse/issues/59037). [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Backported in [#60074](https://github.com/ClickHouse/ClickHouse/issues/60074): Fix error `Read beyond last offset` for `AsynchronousBoundedReadBuffer`. [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Backported in [#60299](https://github.com/ClickHouse/ClickHouse/issues/60299): Fix having neigher acked nor nacked messages. If exception happens during read-write phase, messages will be nacked. [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backported in [#60066](https://github.com/ClickHouse/ClickHouse/issues/60066): Fix optimize_uniq_to_count removing the column alias. [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#60638](https://github.com/ClickHouse/ClickHouse/issues/60638): Fixed a bug in parallel optimization for queries with `FINAL`, which could give an incorrect result in rare cases. [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Backported in [#60177](https://github.com/ClickHouse/ClickHouse/issues/60177): Fix cosineDistance crash with Nullable. [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#60279](https://github.com/ClickHouse/ClickHouse/issues/60279): Hide sensitive info for `S3Queue` table engine. [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backported in [#61000](https://github.com/ClickHouse/ClickHouse/issues/61000): Reduce the number of read rows from `system.numbers`. Fixes [#59418](https://github.com/ClickHouse/ClickHouse/issues/59418). [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Backported in [#60791](https://github.com/ClickHouse/ClickHouse/issues/60791): Fix buffer overflow that can happen if the attacker asks the HTTP server to decompress data with a composition of codecs and size triggering numeric overflow. Fix buffer overflow that can happen inside codec NONE on wrong input data. This was submitted by TIANGONG research team through our [Bug Bounty program](https://github.com/ClickHouse/ClickHouse/issues/38986). [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#60783](https://github.com/ClickHouse/ClickHouse/issues/60783): Functions for SQL/JSON were able to read uninitialized memory. This closes [#60017](https://github.com/ClickHouse/ClickHouse/issues/60017). Found by Fuzzer. [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#60803](https://github.com/ClickHouse/ClickHouse/issues/60803): Do not set aws custom metadata `x-amz-meta-*` headers on UploadPart & CompleteMultipartUpload calls. [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Backported in [#60820](https://github.com/ClickHouse/ClickHouse/issues/60820): Fix crash in arrayEnumerateRanked. [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#60841](https://github.com/ClickHouse/ClickHouse/issues/60841): Fix crash when using input() in INSERT SELECT JOIN. Closes [#60035](https://github.com/ClickHouse/ClickHouse/issues/60035). [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#60904](https://github.com/ClickHouse/ClickHouse/issues/60904): Avoid segfault if too many keys are skipped when reading from S3. [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### NO CL CATEGORY
|
||||
|
||||
* Backported in [#60186](https://github.com/ClickHouse/ClickHouse/issues/60186):. [#60181](https://github.com/ClickHouse/ClickHouse/pull/60181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Backported in [#60333](https://github.com/ClickHouse/ClickHouse/issues/60333): CI: Fix job failures due to jepsen artifacts. [#59890](https://github.com/ClickHouse/ClickHouse/pull/59890) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60034](https://github.com/ClickHouse/ClickHouse/issues/60034): Fix mark release ready. [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60326](https://github.com/ClickHouse/ClickHouse/issues/60326): Ability to detect undead ZooKeeper sessions. [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#60363](https://github.com/ClickHouse/ClickHouse/issues/60363): CI: hot fix for gh statuses. [#60201](https://github.com/ClickHouse/ClickHouse/pull/60201) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60648](https://github.com/ClickHouse/ClickHouse/issues/60648): Detect io_uring in tests. [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#60569](https://github.com/ClickHouse/ClickHouse/issues/60569): Remove broken test while we fix it. [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#60756](https://github.com/ClickHouse/ClickHouse/issues/60756): Update shellcheck. [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60584](https://github.com/ClickHouse/ClickHouse/issues/60584): CI: fix docker build job name. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)).
|
||||
|
100
docs/changelogs/v24.3.4.147-lts.md
Normal file
100
docs/changelogs/v24.3.4.147-lts.md
Normal file
@ -0,0 +1,100 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.3.4.147-lts (31a7bdc346d) FIXME as compared to v24.3.3.102-lts (7e7f3bdd9be)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#63465](https://github.com/ClickHouse/ClickHouse/issues/63465): Make rabbitmq nack broken messages. Closes [#45350](https://github.com/ClickHouse/ClickHouse/issues/45350). [#60312](https://github.com/ClickHouse/ClickHouse/pull/60312) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backported in [#64290](https://github.com/ClickHouse/ClickHouse/issues/64290): Fix logical-error when undoing quorum insert transaction. [#61953](https://github.com/ClickHouse/ClickHouse/pull/61953) ([Han Fei](https://github.com/hanfei1991)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#63610](https://github.com/ClickHouse/ClickHouse/issues/63610): The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#65128](https://github.com/ClickHouse/ClickHouse/issues/65128): Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Backported in [#64277](https://github.com/ClickHouse/ClickHouse/issues/64277): Fix queries with FINAL give wrong result when table does not use adaptive granularity. [#62432](https://github.com/ClickHouse/ClickHouse/pull/62432) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Backported in [#63716](https://github.com/ClickHouse/ClickHouse/issues/63716): Fix excessive memory usage for queries with nested lambdas. Fixes [#62036](https://github.com/ClickHouse/ClickHouse/issues/62036). [#62462](https://github.com/ClickHouse/ClickHouse/pull/62462) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#63247](https://github.com/ClickHouse/ClickHouse/issues/63247): Fix size checks when updating materialized nested columns ( fixes [#62731](https://github.com/ClickHouse/ClickHouse/issues/62731) ). [#62773](https://github.com/ClickHouse/ClickHouse/pull/62773) ([Eliot Hautefeuille](https://github.com/hileef)).
|
||||
* Backported in [#62984](https://github.com/ClickHouse/ClickHouse/issues/62984): Fix the `Unexpected return type` error for queries that read from `StorageBuffer` with `PREWHERE` when the source table has different types. Fixes [#62545](https://github.com/ClickHouse/ClickHouse/issues/62545). [#62916](https://github.com/ClickHouse/ClickHouse/pull/62916) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#63185](https://github.com/ClickHouse/ClickHouse/issues/63185): Sanity check: Clamp values instead of throwing. [#63119](https://github.com/ClickHouse/ClickHouse/pull/63119) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#63293](https://github.com/ClickHouse/ClickHouse/issues/63293): Fix crash with untuple and unresolved lambda. [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#63411](https://github.com/ClickHouse/ClickHouse/issues/63411): Fix a misbehavior when SQL security defaults don't load for old tables during server startup. [#63209](https://github.com/ClickHouse/ClickHouse/pull/63209) ([pufit](https://github.com/pufit)).
|
||||
* Backported in [#63616](https://github.com/ClickHouse/ClickHouse/issues/63616): Fix bug which could potentially lead to rare LOGICAL_ERROR during SELECT query with message: `Unexpected return type from materialize. Expected type_XXX. Got type_YYY.` Introduced in [#59379](https://github.com/ClickHouse/ClickHouse/issues/59379). [#63353](https://github.com/ClickHouse/ClickHouse/pull/63353) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#63455](https://github.com/ClickHouse/ClickHouse/issues/63455): Fix `X-ClickHouse-Timezone` header returning wrong timezone when using `session_timezone` as query level setting. [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
* Backported in [#63603](https://github.com/ClickHouse/ClickHouse/issues/63603): Fix backup of projection part in case projection was removed from table metadata, but part still has projection. [#63426](https://github.com/ClickHouse/ClickHouse/pull/63426) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backported in [#63508](https://github.com/ClickHouse/ClickHouse/issues/63508): Fix 'Every derived table must have its own alias' error for MYSQL dictionary source, close [#63341](https://github.com/ClickHouse/ClickHouse/issues/63341). [#63481](https://github.com/ClickHouse/ClickHouse/pull/63481) ([vdimir](https://github.com/vdimir)).
|
||||
* Backported in [#63595](https://github.com/ClickHouse/ClickHouse/issues/63595): Avoid segafult in `MergeTreePrefetchedReadPool` while fetching projection parts. [#63513](https://github.com/ClickHouse/ClickHouse/pull/63513) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#63748](https://github.com/ClickHouse/ClickHouse/issues/63748): Read only the necessary columns from VIEW (new analyzer). Closes [#62594](https://github.com/ClickHouse/ClickHouse/issues/62594). [#63688](https://github.com/ClickHouse/ClickHouse/pull/63688) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Backported in [#63770](https://github.com/ClickHouse/ClickHouse/issues/63770): Fix [#63539](https://github.com/ClickHouse/ClickHouse/issues/63539). Forbid WINDOW redefinition in new analyzer. [#63694](https://github.com/ClickHouse/ClickHouse/pull/63694) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Backported in [#64189](https://github.com/ClickHouse/ClickHouse/issues/64189): Fix `Not found column` and `CAST AS Map from array requires nested tuple of 2 elements` exceptions for distributed queries which use `Map(Nothing, Nothing)` type. Fixes [#63637](https://github.com/ClickHouse/ClickHouse/issues/63637). [#63753](https://github.com/ClickHouse/ClickHouse/pull/63753) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#63845](https://github.com/ClickHouse/ClickHouse/issues/63845): Fix possible `ILLEGAL_COLUMN` error in `partial_merge` join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#63755](https://github.com/ClickHouse/ClickHouse/pull/63755) ([vdimir](https://github.com/vdimir)).
|
||||
* Backported in [#63906](https://github.com/ClickHouse/ClickHouse/issues/63906): `query_plan_remove_redundant_distinct` can break queries with WINDOW FUNCTIONS (with `allow_experimental_analyzer` is on). Fixes [#62820](https://github.com/ClickHouse/ClickHouse/issues/62820). [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Backported in [#63989](https://github.com/ClickHouse/ClickHouse/issues/63989): Fix incorrect select query result when parallel replicas were used to read from a Materialized View. [#63861](https://github.com/ClickHouse/ClickHouse/pull/63861) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Backported in [#64031](https://github.com/ClickHouse/ClickHouse/issues/64031): Fix a error `Database name is empty` for remote queries with lambdas over the cluster with modified default database. Fixes [#63471](https://github.com/ClickHouse/ClickHouse/issues/63471). [#63864](https://github.com/ClickHouse/ClickHouse/pull/63864) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#64559](https://github.com/ClickHouse/ClickHouse/issues/64559): Fix SIGSEGV due to CPU/Real (`query_profiler_real_time_period_ns`/`query_profiler_cpu_time_period_ns`) profiler (has been an issue since 2022, that leads to periodic server crashes, especially if you were using distributed engine). [#63865](https://github.com/ClickHouse/ClickHouse/pull/63865) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#64009](https://github.com/ClickHouse/ClickHouse/issues/64009): Fix analyzer - IN function with arbitrary deep sub-selects in materialized view to use insertion block. [#63930](https://github.com/ClickHouse/ClickHouse/pull/63930) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#64236](https://github.com/ClickHouse/ClickHouse/issues/64236): Fix resolve of unqualified COLUMNS matcher. Preserve the input columns order and forbid usage of unknown identifiers. [#63962](https://github.com/ClickHouse/ClickHouse/pull/63962) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Backported in [#64106](https://github.com/ClickHouse/ClickHouse/issues/64106): Deserialize untrusted binary inputs in a safer way. [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Backported in [#64168](https://github.com/ClickHouse/ClickHouse/issues/64168): Add missing settings to recoverLostReplica. [#64040](https://github.com/ClickHouse/ClickHouse/pull/64040) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#64320](https://github.com/ClickHouse/ClickHouse/issues/64320): This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline Closes [#63777](https://github.com/ClickHouse/ClickHouse/issues/63777). [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)).
|
||||
* Backported in [#64380](https://github.com/ClickHouse/ClickHouse/issues/64380): Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Backported in [#64567](https://github.com/ClickHouse/ClickHouse/issues/64567): Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#64270](https://github.com/ClickHouse/ClickHouse/issues/64270): Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView. [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#64339](https://github.com/ClickHouse/ClickHouse/issues/64339): The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Backported in [#64259](https://github.com/ClickHouse/ClickHouse/issues/64259): Ignore `text_log` config when using Keeper. [#64218](https://github.com/ClickHouse/ClickHouse/pull/64218) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#64688](https://github.com/ClickHouse/ClickHouse/issues/64688): Fix Query Tree size validation. Closes [#63701](https://github.com/ClickHouse/ClickHouse/issues/63701). [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Backported in [#64725](https://github.com/ClickHouse/ClickHouse/issues/64725): Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#64621](https://github.com/ClickHouse/ClickHouse/issues/64621): Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#64678](https://github.com/ClickHouse/ClickHouse/issues/64678): Fix [#64612](https://github.com/ClickHouse/ClickHouse/issues/64612). Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Backported in [#64831](https://github.com/ClickHouse/ClickHouse/issues/64831): Fix bug which could lead to non-working TTLs with expressions. Fixes [#63700](https://github.com/ClickHouse/ClickHouse/issues/63700). [#64694](https://github.com/ClickHouse/ClickHouse/pull/64694) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#64940](https://github.com/ClickHouse/ClickHouse/issues/64940): Fix OrderByLimitByDuplicateEliminationVisitor across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#64869](https://github.com/ClickHouse/ClickHouse/issues/64869): Fixed memory possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#64980](https://github.com/ClickHouse/ClickHouse/issues/64980): Fix the `Block structure mismatch` error for queries reading with `PREWHERE` from the materialized view when the materialized view has columns of different types than the source table. Fixes [#64611](https://github.com/ClickHouse/ClickHouse/issues/64611). [#64855](https://github.com/ClickHouse/ClickHouse/pull/64855) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#64972](https://github.com/ClickHouse/ClickHouse/issues/64972): Fix rare crash when table has TTL with subquery + database replicated + parallel replicas + analyzer. It's really rare, but please don't use TTLs with subqueries. [#64858](https://github.com/ClickHouse/ClickHouse/pull/64858) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#65070](https://github.com/ClickHouse/ClickHouse/issues/65070): Fix `ALTER MODIFY COMMENT` query that was broken for parameterized VIEWs in https://github.com/ClickHouse/ClickHouse/pull/54211. [#65031](https://github.com/ClickHouse/ClickHouse/pull/65031) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Backported in [#65175](https://github.com/ClickHouse/ClickHouse/issues/65175): Fix the `Unknown expression identifier` error for remote queries with `INTERPOLATE (alias)` (new analyzer). Fixes [#64636](https://github.com/ClickHouse/ClickHouse/issues/64636). [#65090](https://github.com/ClickHouse/ClickHouse/pull/65090) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
|
||||
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
|
||||
|
||||
* Backported in [#64587](https://github.com/ClickHouse/ClickHouse/issues/64587): Disabled `enable_vertical_final` setting by default. This feature should not be used because it has a bug: [#64543](https://github.com/ClickHouse/ClickHouse/issues/64543). [#64544](https://github.com/ClickHouse/ClickHouse/pull/64544) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#64878](https://github.com/ClickHouse/ClickHouse/issues/64878): This PR fixes an error when a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)).
|
||||
|
||||
#### NO CL CATEGORY
|
||||
|
||||
* Backported in [#63304](https://github.com/ClickHouse/ClickHouse/issues/63304):. [#63297](https://github.com/ClickHouse/ClickHouse/pull/63297) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#63708](https://github.com/ClickHouse/ClickHouse/issues/63708):. [#63415](https://github.com/ClickHouse/ClickHouse/pull/63415) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
|
||||
#### NO CL ENTRY
|
||||
|
||||
* NO CL ENTRY: 'Revert "Backport [#64363](https://github.com/ClickHouse/ClickHouse/issues/64363) to 24.3: Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts"'. [#64907](https://github.com/ClickHouse/ClickHouse/pull/64907) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Backported in [#63751](https://github.com/ClickHouse/ClickHouse/issues/63751): group_by_use_nulls strikes back. [#62922](https://github.com/ClickHouse/ClickHouse/pull/62922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#63558](https://github.com/ClickHouse/ClickHouse/issues/63558): Try fix segfault in `MergeTreeReadPoolBase::createTask`. [#63323](https://github.com/ClickHouse/ClickHouse/pull/63323) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#63336](https://github.com/ClickHouse/ClickHouse/issues/63336): The commit url has different pattern. [#63331](https://github.com/ClickHouse/ClickHouse/pull/63331) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#63374](https://github.com/ClickHouse/ClickHouse/issues/63374): Add tags for the test 03000_traverse_shadow_system_data_paths.sql to make it stable. [#63366](https://github.com/ClickHouse/ClickHouse/pull/63366) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Backported in [#63625](https://github.com/ClickHouse/ClickHouse/issues/63625): Workaround for `oklch()` inside canvas bug for firefox. [#63404](https://github.com/ClickHouse/ClickHouse/pull/63404) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Backported in [#63569](https://github.com/ClickHouse/ClickHouse/issues/63569): Add `jwcrypto` to integration tests runner. [#63551](https://github.com/ClickHouse/ClickHouse/pull/63551) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* Backported in [#63649](https://github.com/ClickHouse/ClickHouse/issues/63649): Fix `02362_part_log_merge_algorithm` flaky test. [#63635](https://github.com/ClickHouse/ClickHouse/pull/63635) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
|
||||
* Backported in [#63762](https://github.com/ClickHouse/ClickHouse/issues/63762): Cancel S3 reads properly when parallel reads are used. [#63687](https://github.com/ClickHouse/ClickHouse/pull/63687) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#63741](https://github.com/ClickHouse/ClickHouse/issues/63741): Userspace page cache: don't collect stats if cache is unused. [#63730](https://github.com/ClickHouse/ClickHouse/pull/63730) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Backported in [#63826](https://github.com/ClickHouse/ClickHouse/issues/63826): Fix `test_odbc_interaction` for arm64 on linux. [#63787](https://github.com/ClickHouse/ClickHouse/pull/63787) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#63895](https://github.com/ClickHouse/ClickHouse/issues/63895): Fix `test_catboost_evaluate` for aarch64. [#63789](https://github.com/ClickHouse/ClickHouse/pull/63789) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#63887](https://github.com/ClickHouse/ClickHouse/issues/63887): Fix `test_disk_types` for aarch64. [#63832](https://github.com/ClickHouse/ClickHouse/pull/63832) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#63879](https://github.com/ClickHouse/ClickHouse/issues/63879): Fix `test_short_strings_aggregation` for arm. [#63836](https://github.com/ClickHouse/ClickHouse/pull/63836) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#63916](https://github.com/ClickHouse/ClickHouse/issues/63916): Disable `test_non_default_compression/test.py::test_preconfigured_deflateqpl_codec` on arm. [#63839](https://github.com/ClickHouse/ClickHouse/pull/63839) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#63969](https://github.com/ClickHouse/ClickHouse/issues/63969): fix 02124_insert_deduplication_token_multiple_blocks. [#63950](https://github.com/ClickHouse/ClickHouse/pull/63950) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Backported in [#64047](https://github.com/ClickHouse/ClickHouse/issues/64047): Do not create new release in release branch automatically. [#64039](https://github.com/ClickHouse/ClickHouse/pull/64039) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#64076](https://github.com/ClickHouse/ClickHouse/issues/64076): Files without shebang have mime 'text/plain' or 'inode/x-empty'. [#64062](https://github.com/ClickHouse/ClickHouse/pull/64062) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#64142](https://github.com/ClickHouse/ClickHouse/issues/64142): Fix sanitizers. [#64090](https://github.com/ClickHouse/ClickHouse/pull/64090) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#64159](https://github.com/ClickHouse/ClickHouse/issues/64159): Add retries in `git submodule update`. [#64125](https://github.com/ClickHouse/ClickHouse/pull/64125) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#64473](https://github.com/ClickHouse/ClickHouse/issues/64473): Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts. [#64363](https://github.com/ClickHouse/ClickHouse/pull/64363) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#65113](https://github.com/ClickHouse/ClickHouse/issues/65113): Adjust the `version_helper` and script to a new release scheme. [#64759](https://github.com/ClickHouse/ClickHouse/pull/64759) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#64999](https://github.com/ClickHouse/ClickHouse/issues/64999): Fix crash with DISTINCT and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
|
38
docs/changelogs/v24.5.2.34-stable.md
Normal file
38
docs/changelogs/v24.5.2.34-stable.md
Normal file
@ -0,0 +1,38 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.5.2.34-stable (45589aeee49) FIXME as compared to v24.5.1.1763-stable (647c154a94d)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#65096](https://github.com/ClickHouse/ClickHouse/issues/65096): The setting `allow_experimental_join_condition` was accidentally marked as important which may prevent distributed queries in a mixed versions cluster from being executed successfully. [#65008](https://github.com/ClickHouse/ClickHouse/pull/65008) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#65132](https://github.com/ClickHouse/ClickHouse/issues/65132): Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Backported in [#64729](https://github.com/ClickHouse/ClickHouse/issues/64729): Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#65061](https://github.com/ClickHouse/ClickHouse/issues/65061): Fix the `Expression nodes list expected 1 projection names` and `Unknown expression or identifier` errors for queries with aliases to `GLOBAL IN.` Fixes [#64445](https://github.com/ClickHouse/ClickHouse/issues/64445). [#64517](https://github.com/ClickHouse/ClickHouse/pull/64517) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088): Fix removing the `WHERE` and `PREWHERE` expressions, which are always true (for the new analyzer). Fixes [#64575](https://github.com/ClickHouse/ClickHouse/issues/64575). [#64695](https://github.com/ClickHouse/ClickHouse/pull/64695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#64944](https://github.com/ClickHouse/ClickHouse/issues/64944): Fix OrderByLimitByDuplicateEliminationVisitor across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#64873](https://github.com/ClickHouse/ClickHouse/issues/64873): Fixed memory possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#64984](https://github.com/ClickHouse/ClickHouse/issues/64984): Fix the `Block structure mismatch` error for queries reading with `PREWHERE` from the materialized view when the materialized view has columns of different types than the source table. Fixes [#64611](https://github.com/ClickHouse/ClickHouse/issues/64611). [#64855](https://github.com/ClickHouse/ClickHouse/pull/64855) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#64976](https://github.com/ClickHouse/ClickHouse/issues/64976): Fix rare crash when table has TTL with subquery + database replicated + parallel replicas + analyzer. It's really rare, but please don't use TTLs with subqueries. [#64858](https://github.com/ClickHouse/ClickHouse/pull/64858) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#65074](https://github.com/ClickHouse/ClickHouse/issues/65074): Fix `ALTER MODIFY COMMENT` query that was broken for parameterized VIEWs in https://github.com/ClickHouse/ClickHouse/pull/54211. [#65031](https://github.com/ClickHouse/ClickHouse/pull/65031) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Backported in [#65179](https://github.com/ClickHouse/ClickHouse/issues/65179): Fix the `Unknown expression identifier` error for remote queries with `INTERPOLATE (alias)` (new analyzer). Fixes [#64636](https://github.com/ClickHouse/ClickHouse/issues/64636). [#65090](https://github.com/ClickHouse/ClickHouse/pull/65090) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#65163](https://github.com/ClickHouse/ClickHouse/issues/65163): Fix pushing arithmetic operations out of aggregation. In the new analyzer, optimization was applied only once. Part of [#62245](https://github.com/ClickHouse/ClickHouse/issues/62245). [#65104](https://github.com/ClickHouse/ClickHouse/pull/65104) ([Dmitry Novik](https://github.com/novikd)).
|
||||
|
||||
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
|
||||
|
||||
* Backported in [#64882](https://github.com/ClickHouse/ClickHouse/issues/64882): This PR fixes an error when a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Backported in [#65002](https://github.com/ClickHouse/ClickHouse/issues/65002): Be more graceful with existing tables with `inverted` indexes. [#64656](https://github.com/ClickHouse/ClickHouse/pull/64656) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Backported in [#65115](https://github.com/ClickHouse/ClickHouse/issues/65115): Adjust the `version_helper` and script to a new release scheme. [#64759](https://github.com/ClickHouse/ClickHouse/pull/64759) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#64796](https://github.com/ClickHouse/ClickHouse/issues/64796): Fix crash with DISTINCT and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
|
14
docs/changelogs/v24.5.3.5-stable.md
Normal file
14
docs/changelogs/v24.5.3.5-stable.md
Normal file
@ -0,0 +1,14 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.5.3.5-stable (e0eb66f8e17) FIXME as compared to v24.5.2.34-stable (45589aeee49)
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Backported in [#65227](https://github.com/ClickHouse/ClickHouse/issues/65227): Capture weak_ptr of ContextAccess for safety. [#65051](https://github.com/ClickHouse/ClickHouse/pull/65051) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Backported in [#65219](https://github.com/ClickHouse/ClickHouse/issues/65219): Fix false positives leaky memory warnings in OpenSSL. [#65125](https://github.com/ClickHouse/ClickHouse/pull/65125) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
@ -229,6 +229,10 @@ For production builds, clang is used, but we also test make gcc builds. For deve
|
||||
|
||||
## Sanitizers {#sanitizers}
|
||||
|
||||
:::note
|
||||
If the process (ClickHouse server or client) crashes at startup when running it locally, you might need to disable address space layout randomization: `sudo sysctl kernel.randomize_va_space=0`
|
||||
:::
|
||||
|
||||
### Address sanitizer
|
||||
We run functional, integration, stress and unit tests under ASan on per-commit basis.
|
||||
|
||||
|
@ -75,7 +75,7 @@ Possible values:
|
||||
- unordered — With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKeeper.
|
||||
- ordered — With ordered mode, only the max name of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper.
|
||||
|
||||
Default value: `unordered`.
|
||||
Default value: `ordered` in versions before 24.6. Starting with 24.6 there is no default value, the setting becomes required to be specified manually. For tables created on earlier versions the default value will remain `Ordered` for compatibility.
|
||||
|
||||
### after_processing {#after_processing}
|
||||
|
||||
@ -181,6 +181,10 @@ For 'Ordered' mode. Defines a maximum boundary for reschedule interval for a bac
|
||||
|
||||
Default value: `30000`.
|
||||
|
||||
### s3queue_buckets {#buckets}
|
||||
|
||||
For 'Ordered' mode. Available since `24.6`. If there are several replicas of S3Queue table, each working with the same metadata directory in keeper, the value of `s3queue_buckets` needs to be equal to at least the number of replicas. If `s3queue_processing_threads` setting is used as well, it makes sense to increase the value of `s3queue_buckets` setting even further, as it defines the actual parallelism of `S3Queue` processing.
|
||||
|
||||
## S3-related Settings {#s3-settings}
|
||||
|
||||
Engine supports all s3 related settings. For more information about S3 settings see [here](../../../engines/table-engines/integrations/s3.md).
|
||||
|
@ -480,7 +480,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
|
||||
- [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`.
|
||||
- [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
|
||||
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
|
||||
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
|
||||
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
|
||||
- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`.
|
||||
- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`.
|
||||
- [input_format_csv_try_infer_numbers_from_strings](/docs/en/operations/settings/settings-formats.md/#input_format_csv_try_infer_numbers_from_strings) - Try to infer numbers from string fields while schema inference. Default value - `false`.
|
||||
|
@ -2924,6 +2924,8 @@ Define proxy servers for HTTP and HTTPS requests, currently supported by S3 stor
|
||||
|
||||
There are three ways to define proxy servers: environment variables, proxy lists, and remote proxy resolvers.
|
||||
|
||||
Bypassing proxy servers for specific hosts is also supported with the use of `no_proxy`.
|
||||
|
||||
### Environment variables
|
||||
|
||||
The `http_proxy` and `https_proxy` environment variables allow you to specify a
|
||||
@ -3033,6 +3035,29 @@ This also allows a mix of resolver types can be used.
|
||||
|
||||
By default, tunneling (i.e, `HTTP CONNECT`) is used to make `HTTPS` requests over `HTTP` proxy. This setting can be used to disable it.
|
||||
|
||||
### no_proxy
|
||||
By default, all requests will go through the proxy. In order to disable it for specific hosts, the `no_proxy` variable must be set.
|
||||
It can be set inside the `<proxy>` clause for list and remote resolvers and as an environment variable for environment resolver.
|
||||
It supports IP addresses, domains, subdomains and `'*'` wildcard for full bypass. Leading dots are stripped just like curl does.
|
||||
|
||||
Example:
|
||||
|
||||
The below configuration bypasses proxy requests to `clickhouse.cloud` and all of its subdomains (e.g, `auth.clickhouse.cloud`).
|
||||
The same applies to GitLab, even though it has a leading dot. Both `gitlab.com` and `about.gitlab.com` would bypass the proxy.
|
||||
|
||||
``` xml
|
||||
<proxy>
|
||||
<no_proxy>clickhouse.cloud,.gitlab.com</no_proxy>
|
||||
<http>
|
||||
<uri>http://proxy1</uri>
|
||||
<uri>http://proxy2:3128</uri>
|
||||
</http>
|
||||
<https>
|
||||
<uri>http://proxy1:3128</uri>
|
||||
</https>
|
||||
</proxy>
|
||||
```
|
||||
|
||||
## max_materialized_views_count_for_table {#max_materialized_views_count_for_table}
|
||||
|
||||
A limit on the number of materialized views attached to a table.
|
||||
|
@ -106,8 +106,8 @@ To work with these states, use:
|
||||
- [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engine.
|
||||
- [finalizeAggregation](../../sql-reference/functions/other-functions.md#function-finalizeaggregation) function.
|
||||
- [runningAccumulate](../../sql-reference/functions/other-functions.md#runningaccumulate) function.
|
||||
- [-Merge](#aggregate_functions_combinators-merge) combinator.
|
||||
- [-MergeState](#aggregate_functions_combinators-mergestate) combinator.
|
||||
- [-Merge](#-merge) combinator.
|
||||
- [-MergeState](#-mergestate) combinator.
|
||||
|
||||
## -Merge
|
||||
|
||||
|
@ -82,10 +82,12 @@ FROM
|
||||
|
||||
In this case, you should remember that you do not know the histogram bin borders.
|
||||
|
||||
## sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
|
||||
## sequenceMatch
|
||||
|
||||
Checks whether the sequence contains an event chain that matches the pattern.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
|
||||
```
|
||||
@ -102,7 +104,7 @@ Events that occur at the same second may lay in the sequence in an undefined ord
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#sequencematch).
|
||||
|
||||
**Returned values**
|
||||
|
||||
@ -170,9 +172,9 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM
|
||||
|
||||
**See Also**
|
||||
|
||||
- [sequenceCount](#function-sequencecount)
|
||||
- [sequenceCount](#sequencecount)
|
||||
|
||||
## sequenceCount(pattern)(time, cond1, cond2, ...)
|
||||
## sequenceCount
|
||||
|
||||
Counts the number of event chains that matched the pattern. The function searches event chains that do not overlap. It starts to search for the next chain after the current chain is matched.
|
||||
|
||||
@ -180,6 +182,8 @@ Counts the number of event chains that matched the pattern. The function searche
|
||||
Events that occur at the same second may lay in the sequence in an undefined order affecting the result.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
sequenceCount(pattern)(timestamp, cond1, cond2, ...)
|
||||
```
|
||||
@ -192,7 +196,7 @@ sequenceCount(pattern)(timestamp, cond1, cond2, ...)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#sequencematch).
|
||||
|
||||
**Returned values**
|
||||
|
||||
@ -229,7 +233,7 @@ SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t
|
||||
|
||||
**See Also**
|
||||
|
||||
- [sequenceMatch](#function-sequencematch)
|
||||
- [sequenceMatch](#sequencematch)
|
||||
|
||||
## windowFunnel
|
||||
|
||||
|
@ -3,7 +3,7 @@ slug: /en/sql-reference/aggregate-functions/reference/stochasticlinearregression
|
||||
sidebar_position: 221
|
||||
---
|
||||
|
||||
# stochasticLinearRegression
|
||||
# stochasticLinearRegression {#agg_functions_stochasticlinearregression_parameters}
|
||||
|
||||
This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size, and has a few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), and [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)).
|
||||
|
||||
@ -72,5 +72,5 @@ The query will return a column of predicted values. Note that first argument of
|
||||
|
||||
**See Also**
|
||||
|
||||
- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression)
|
||||
- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#stochasticlogisticregression)
|
||||
- [Difference between linear and logistic regressions](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression)
|
||||
|
@ -11,7 +11,7 @@ This function implements stochastic logistic regression. It can be used for bina
|
||||
|
||||
Parameters are exactly the same as in stochasticLinearRegression:
|
||||
`learning rate`, `l2 regularization coefficient`, `mini-batch size`, `method for updating weights`.
|
||||
For more information see [parameters](#agg_functions-stochasticlinearregression-parameters).
|
||||
For more information see [parameters](../reference/stochasticlinearregression.md/#parameters).
|
||||
|
||||
``` text
|
||||
stochasticLogisticRegression(1.0, 1.0, 10, 'SGD')
|
||||
|
@ -27,7 +27,7 @@ Returns an integer of type `Float64`.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable` function](#varPopStable).
|
||||
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable`](#varpopstable) function.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -76,7 +76,7 @@ Returns an integer of type `Float64`.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
Unlike [`varPop()`](#varPop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations.
|
||||
Unlike [`varPop`](#varpop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -40,7 +40,7 @@ Where:
|
||||
|
||||
The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead.
|
||||
|
||||
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable` function](#varSampStable).
|
||||
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable`](#varsampstable) function.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -82,11 +82,11 @@ varSampStable(expr)
|
||||
|
||||
**Returned value**
|
||||
|
||||
The `varSampStable()` function returns a Float64 value representing the sample variance of the input data set.
|
||||
The `varSampStable` function returns a Float64 value representing the sample variance of the input data set.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
The `varSampStable()` function calculates the sample variance using the same formula as the [`varSamp()`](#varSamp function):
|
||||
The `varSampStable` function calculates the sample variance using the same formula as the [`varSamp`](#varsamp) function:
|
||||
|
||||
```plaintext
|
||||
∑(x - mean(x))^2 / (n - 1)
|
||||
@ -97,9 +97,9 @@ Where:
|
||||
- `mean(x)` is the arithmetic mean of the data set.
|
||||
- `n` is the number of data points in the data set.
|
||||
|
||||
The difference between `varSampStable()` and `varSamp()` is that `varSampStable()` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values.
|
||||
The difference between `varSampStable` and `varSamp` is that `varSampStable` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values.
|
||||
|
||||
Like `varSamp()`, the `varSampStable()` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable()` function](./varpop#varpopstable) instead.
|
||||
Like `varSamp`, the `varSampStable` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable`](./varpop#varpopstable) function instead.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -125,4 +125,4 @@ Response:
|
||||
0.865
|
||||
```
|
||||
|
||||
This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp()` due to the more precise handling of floating-point arithmetic.
|
||||
This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp` due to the more precise handling of floating-point arithmetic.
|
||||
|
@ -33,7 +33,7 @@ Result:
|
||||
|
||||
## Ring
|
||||
|
||||
`Ring` is a simple polygon without holes stored as an array of points: [Array](array.md)([Point](#point-data-type)).
|
||||
`Ring` is a simple polygon without holes stored as an array of points: [Array](array.md)([Point](#point)).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -54,7 +54,7 @@ Result:
|
||||
|
||||
## Polygon
|
||||
|
||||
`Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring-data-type)). First element of outer array is the outer shape of polygon and all the following elements are holes.
|
||||
`Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring)). First element of outer array is the outer shape of polygon and all the following elements are holes.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -76,7 +76,7 @@ Result:
|
||||
|
||||
## MultiPolygon
|
||||
|
||||
`MultiPolygon` consists of multiple polygons and is stored as an array of polygons: [Array](array.md)([Polygon](#polygon-data-type)).
|
||||
`MultiPolygon` consists of multiple polygons and is stored as an array of polygons: [Array](array.md)([Polygon](#polygon)).
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -16,7 +16,7 @@ ClickHouse supports special functions for working with dictionaries that can be
|
||||
ClickHouse supports:
|
||||
|
||||
- Dictionaries with a [set of functions](../../sql-reference/functions/ext-dict-functions.md).
|
||||
- [Embedded dictionaries](#embedded_dictionaries) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md).
|
||||
- [Embedded dictionaries](#embedded-dictionaries) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md).
|
||||
|
||||
|
||||
:::tip Tutorial
|
||||
@ -82,7 +82,7 @@ You can [configure](#configuring-a-dictionary) any number of dictionaries in the
|
||||
You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../../sql-reference/functions/other-functions.md) function). This functionality is not related to dictionaries.
|
||||
:::
|
||||
|
||||
## Configuring a Dictionary {#configuring-a-dictionary}
|
||||
## Configuring a Dictionary
|
||||
|
||||
<CloudDetails />
|
||||
|
||||
@ -123,7 +123,7 @@ LAYOUT(...) -- Memory layout configuration
|
||||
LIFETIME(...) -- Lifetime of dictionary in memory
|
||||
```
|
||||
|
||||
## Storing Dictionaries in Memory {#storing-dictionaries-in-memory}
|
||||
## Storing Dictionaries in Memory
|
||||
|
||||
There are a variety of ways to store dictionaries in memory.
|
||||
|
||||
@ -415,7 +415,7 @@ or
|
||||
LAYOUT(COMPLEX_KEY_HASHED_ARRAY([SHARDS 1]))
|
||||
```
|
||||
|
||||
### range_hashed {#range_hashed}
|
||||
### range_hashed
|
||||
|
||||
The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values.
|
||||
|
||||
@ -679,7 +679,7 @@ When searching for a dictionary, the cache is searched first. For each block of
|
||||
|
||||
If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`.
|
||||
|
||||
For cache dictionaries, the expiration [lifetime](#dictionary-updates) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired. The key is re-requested the next time it needs to be used. This behaviour can be configured with setting `allow_read_expired_keys`.
|
||||
For cache dictionaries, the expiration [lifetime](#refreshing-dictionary-data-using-lifetime) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired. The key is re-requested the next time it needs to be used. This behaviour can be configured with setting `allow_read_expired_keys`.
|
||||
|
||||
This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the [system.dictionaries](../../operations/system-tables/dictionaries.md) table.
|
||||
|
||||
@ -899,7 +899,7 @@ Other types are not supported yet. The function returns the attribute for the pr
|
||||
|
||||
Data must completely fit into RAM.
|
||||
|
||||
## Refreshing dictionary data using LIFETIME {#lifetime}
|
||||
## Refreshing dictionary data using LIFETIME
|
||||
|
||||
ClickHouse periodically updates dictionaries based on the `LIFETIME` tag (defined in seconds). `LIFETIME` is the update interval for fully downloaded dictionaries and the invalidation interval for cached dictionaries.
|
||||
|
||||
@ -1031,7 +1031,7 @@ SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15))
|
||||
...
|
||||
```
|
||||
|
||||
## Dictionary Sources {#dictionary-sources}
|
||||
## Dictionary Sources
|
||||
|
||||
<CloudDetails />
|
||||
|
||||
@ -1065,7 +1065,7 @@ SOURCE(SOURCE_TYPE(param1 val1 ... paramN valN)) -- Source configuration
|
||||
|
||||
The source is configured in the `source` section.
|
||||
|
||||
For source types [Local file](#local_file), [Executable file](#executable), [HTTP(s)](#https), [ClickHouse](#clickhouse)
|
||||
For source types [Local file](#local-file), [Executable file](#executable-file), [HTTP(s)](#https), [ClickHouse](#clickhouse)
|
||||
optional settings are available:
|
||||
|
||||
``` xml
|
||||
@ -1089,10 +1089,10 @@ SETTINGS(format_csv_allow_single_quotes = 0)
|
||||
|
||||
Types of sources (`source_type`):
|
||||
|
||||
- [Local file](#local_file)
|
||||
- [Executable File](#executable)
|
||||
- [Executable Pool](#executable_pool)
|
||||
- [HTTP(S)](#http)
|
||||
- [Local file](#local-file)
|
||||
- [Executable File](#executable-file)
|
||||
- [Executable Pool](#executable-pool)
|
||||
- [HTTP(S)](#https)
|
||||
- DBMS
|
||||
- [ODBC](#odbc)
|
||||
- [MySQL](#mysql)
|
||||
@ -1102,7 +1102,7 @@ Types of sources (`source_type`):
|
||||
- [Cassandra](#cassandra)
|
||||
- [PostgreSQL](#postgresql)
|
||||
|
||||
### Local File {#local_file}
|
||||
### Local File
|
||||
|
||||
Example of settings:
|
||||
|
||||
@ -1132,9 +1132,9 @@ When a dictionary with source `FILE` is created via DDL command (`CREATE DICTION
|
||||
|
||||
- [Dictionary function](../../sql-reference/table-functions/dictionary.md#dictionary-function)
|
||||
|
||||
### Executable File {#executable}
|
||||
### Executable File
|
||||
|
||||
Working with executable files depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data.
|
||||
Working with executable files depends on [how the dictionary is stored in memory](#storing-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data.
|
||||
|
||||
Example of settings:
|
||||
|
||||
@ -1161,7 +1161,7 @@ Setting fields:
|
||||
|
||||
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node.
|
||||
|
||||
### Executable Pool {#executable_pool}
|
||||
### Executable Pool
|
||||
|
||||
Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts.
|
||||
|
||||
@ -1196,9 +1196,9 @@ Setting fields:
|
||||
|
||||
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.
|
||||
|
||||
### HTTP(S) {#https}
|
||||
### HTTP(S)
|
||||
|
||||
Working with an HTTP(S) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method.
|
||||
Working with an HTTP(S) server depends on [how the dictionary is stored in memory](#storing-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method.
|
||||
|
||||
Example of settings:
|
||||
|
||||
@ -1285,7 +1285,7 @@ Setting fields:
|
||||
- `db` – Name of the database. Omit it if the database name is set in the `<connection_string>` parameters.
|
||||
- `table` – Name of the table and schema if exists.
|
||||
- `connection_string` – Connection string.
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates).
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime).
|
||||
- `query` – The custom query. Optional parameter.
|
||||
|
||||
:::note
|
||||
@ -1575,7 +1575,7 @@ Setting fields:
|
||||
|
||||
- `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in MySQL, for example, `id > 10 AND id < 20`. Optional parameter.
|
||||
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates).
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime).
|
||||
|
||||
- `fail_on_connection_loss` – The configuration parameter that controls behavior of the server on connection loss. If `true`, an exception is thrown immediately if the connection between client and server was lost. If `false`, the ClickHouse server retries to execute the query three times before throwing an exception. Note that retrying leads to increased response times. Default value: `false`.
|
||||
|
||||
@ -1672,7 +1672,7 @@ Setting fields:
|
||||
- `db` – Name of the database.
|
||||
- `table` – Name of the table.
|
||||
- `where` – The selection criteria. May be omitted.
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates).
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime).
|
||||
- `secure` - Use ssl for connection.
|
||||
- `query` – The custom query. Optional parameter.
|
||||
|
||||
@ -1849,7 +1849,7 @@ Setting fields:
|
||||
- `db` – Name of the database.
|
||||
- `table` – Name of the table.
|
||||
- `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in PostgreSQL. For example, `id > 10 AND id < 20`. Optional parameter.
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates).
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime).
|
||||
- `query` – The custom query. Optional parameter.
|
||||
|
||||
:::note
|
||||
@ -1873,7 +1873,7 @@ LAYOUT(FLAT())
|
||||
LIFETIME(0);
|
||||
```
|
||||
|
||||
## Dictionary Key and Fields {#dictionary-key-and-fields}
|
||||
## Dictionary Key and Fields
|
||||
|
||||
<CloudDetails />
|
||||
|
||||
@ -1963,7 +1963,7 @@ PRIMARY KEY Id
|
||||
|
||||
### Composite Key
|
||||
|
||||
The key can be a `tuple` from any types of fields. The [layout](#storig-dictionaries-in-memory) in this case must be `complex_key_hashed` or `complex_key_cache`.
|
||||
The key can be a `tuple` from any types of fields. The [layout](#storing-dictionaries-in-memory) in this case must be `complex_key_hashed` or `complex_key_cache`.
|
||||
|
||||
:::tip
|
||||
A composite key can consist of a single element. This makes it possible to use a string as the key, for instance.
|
||||
@ -2031,7 +2031,7 @@ CREATE DICTIONARY somename (
|
||||
Configuration fields:
|
||||
|
||||
| Tag | Description | Required |
|
||||
|------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
|
||||
|------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
|
||||
| `name` | Column name. | Yes |
|
||||
| `type` | ClickHouse data type: [UInt8](../../sql-reference/data-types/int-uint.md), [UInt16](../../sql-reference/data-types/int-uint.md), [UInt32](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md), [Int8](../../sql-reference/data-types/int-uint.md), [Int16](../../sql-reference/data-types/int-uint.md), [Int32](../../sql-reference/data-types/int-uint.md), [Int64](../../sql-reference/data-types/int-uint.md), [Float32](../../sql-reference/data-types/float.md), [Float64](../../sql-reference/data-types/float.md), [UUID](../../sql-reference/data-types/uuid.md), [Decimal32](../../sql-reference/data-types/decimal.md), [Decimal64](../../sql-reference/data-types/decimal.md), [Decimal128](../../sql-reference/data-types/decimal.md), [Decimal256](../../sql-reference/data-types/decimal.md),[Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), [String](../../sql-reference/data-types/string.md), [Array](../../sql-reference/data-types/array.md).<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../sql-reference/data-types/nullable.md) is currently supported for [Flat](#flat), [Hashed](#hashed), [ComplexKeyHashed](#complex_key_hashed), [Direct](#direct), [ComplexKeyDirect](#complex_key_direct), [RangeHashed](#range_hashed), Polygon, [Cache](#cache), [ComplexKeyCache](#complex_key_cache), [SSDCache](#ssd_cache), [SSDComplexKeyCache](#complex_key_ssd_cache) dictionaries. In [IPTrie](#ip_trie) dictionaries `Nullable` types are not supported. | Yes |
|
||||
| `null_value` | Default value for a non-existing element.<br/>In the example, it is an empty string. [NULL](../syntax.md#null) value can be used only for the `Nullable` types (see the previous line with types description). | Yes |
|
||||
@ -2040,7 +2040,7 @@ Configuration fields:
|
||||
| `injective` | Flag that shows whether the `id -> attribute` image is [injective](https://en.wikipedia.org/wiki/Injective_function).<br/>If `true`, ClickHouse can automatically place after the `GROUP BY` clause the requests to dictionaries with injection. Usually it significantly reduces the amount of such requests.<br/><br/>Default value: `false`. | No |
|
||||
| `is_object_id` | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.<br/><br/>Default value: `false`.
|
||||
|
||||
## Hierarchical Dictionaries {#hierarchical-dictionaries}
|
||||
## Hierarchical Dictionaries
|
||||
|
||||
ClickHouse supports hierarchical dictionaries with a [numeric key](#numeric-key).
|
||||
|
||||
@ -2165,7 +2165,7 @@ Points can be specified as an array or a tuple of their coordinates. In the curr
|
||||
|
||||
The user can upload their own data in all formats supported by ClickHouse.
|
||||
|
||||
There are 3 types of [in-memory storage](#storig-dictionaries-in-memory) available:
|
||||
There are 3 types of [in-memory storage](#storing-dictionaries-in-memory) available:
|
||||
|
||||
- `POLYGON_SIMPLE`. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes.
|
||||
|
||||
@ -2435,7 +2435,7 @@ LIFETIME(0)
|
||||
LAYOUT(regexp_tree);
|
||||
```
|
||||
|
||||
## Embedded Dictionaries {#embedded-dictionaries}
|
||||
## Embedded Dictionaries
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
|
@ -1261,7 +1261,7 @@ SELECT arraySort((x) -> -x, [1, 2, 3]) as res;
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
For each element of the source array, the lambda function returns the sorting key, that is, \[1 –\> -1, 2 –\> -2, 3 –\> -3\]. Since the `arraySort` function sorts the keys in ascending order, the result is \[3, 2, 1\]. Thus, the `(x) –> -x` lambda function sets the [descending order](#reverse-sort) in a sorting.
|
||||
For each element of the source array, the lambda function returns the sorting key, that is, \[1 –\> -1, 2 –\> -2, 3 –\> -3\]. Since the `arraySort` function sorts the keys in ascending order, the result is \[3, 2, 1\]. Thus, the `(x) –> -x` lambda function sets the [descending order](#arrayreversesort) in a sorting.
|
||||
|
||||
The lambda function can accept multiple arguments. In this case, you need to pass the `arraySort` function several arrays of identical length that the arguments of lambda function will correspond to. The resulting array will consist of elements from the first input array; elements from the next input array(s) specify the sorting keys. For example:
|
||||
|
||||
@ -1307,10 +1307,15 @@ To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.
|
||||
|
||||
Same as `arraySort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in ascending order. Remaining elements `(limit..N]` shall contain elements in unspecified order.
|
||||
|
||||
## arrayReverseSort(\[func,\] arr, ...) {#reverse-sort}
|
||||
## arrayReverseSort
|
||||
|
||||
Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
arrayReverseSort([func,] arr, ...)
|
||||
```
|
||||
Example of integer values sorting:
|
||||
|
||||
``` sql
|
||||
@ -1907,10 +1912,16 @@ FROM numbers(1,10);
|
||||
|
||||
- [arrayReduce](#arrayreduce)
|
||||
|
||||
## arrayReverse(arr)
|
||||
## arrayReverse
|
||||
|
||||
Returns an array of the same size as the original array containing the elements in reverse order.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
arrayReverse(arr)
|
||||
```
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
|
@ -74,7 +74,7 @@ bitmapSubsetInRange(bitmap, range_start, range_end)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
|
||||
- `bitmap` – [Bitmap object](#bitmapbuild).
|
||||
- `range_start` – Start of the range (inclusive). [UInt32](../data-types/int-uint.md).
|
||||
- `range_end` – End of the range (exclusive). [UInt32](../data-types/int-uint.md).
|
||||
|
||||
@ -104,7 +104,7 @@ bitmapSubsetLimit(bitmap, range_start, cardinality_limit)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
|
||||
- `bitmap` – [Bitmap object](#bitmapbuild).
|
||||
- `range_start` – Start of the range (inclusive). [UInt32](../data-types/int-uint.md).
|
||||
- `cardinality_limit` – Maximum cardinality of the subset. [UInt32](../data-types/int-uint.md).
|
||||
|
||||
@ -134,7 +134,7 @@ subBitmap(bitmap, offset, cardinality_limit)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `bitmap` – The bitmap. [Bitmap object](#bitmap_functions-bitmapbuild).
|
||||
- `bitmap` – The bitmap. [Bitmap object](#bitmapbuild).
|
||||
- `offset` – The position of the first element of the subset. [UInt32](../data-types/int-uint.md).
|
||||
- `cardinality_limit` – The maximum number of elements in the subset. [UInt32](../data-types/int-uint.md).
|
||||
|
||||
@ -162,7 +162,7 @@ bitmapContains(bitmap, needle)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
|
||||
- `bitmap` – [Bitmap object](#bitmapbuild).
|
||||
- `needle` – Searched bit value. [UInt32](../data-types/int-uint.md).
|
||||
|
||||
**Returned values**
|
||||
@ -188,7 +188,7 @@ Result:
|
||||
|
||||
Checks whether two bitmaps intersect.
|
||||
|
||||
If `bitmap2` contains exactly one element, consider using [bitmapContains](#bitmap_functions-bitmapcontains) instead as it works more efficiently.
|
||||
If `bitmap2` contains exactly one element, consider using [bitmapContains](#bitmapcontains) instead as it works more efficiently.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -83,7 +83,7 @@ Result:
|
||||
```
|
||||
## makeDate32
|
||||
|
||||
Like [makeDate](#makeDate) but produces a [Date32](../data-types/date32.md).
|
||||
Like [makeDate](#makedate) but produces a [Date32](../data-types/date32.md).
|
||||
|
||||
## makeDateTime
|
||||
|
||||
@ -214,7 +214,7 @@ Result:
|
||||
|
||||
**See also**
|
||||
|
||||
- [serverTimeZone](#serverTimeZone)
|
||||
- [serverTimeZone](#servertimezone)
|
||||
|
||||
## serverTimeZone
|
||||
|
||||
@ -249,7 +249,7 @@ Result:
|
||||
|
||||
**See also**
|
||||
|
||||
- [timeZone](#timeZone)
|
||||
- [timeZone](#timezone)
|
||||
|
||||
## toTimeZone
|
||||
|
||||
@ -305,7 +305,7 @@ int32samoa: 1546300800
|
||||
|
||||
**See Also**
|
||||
|
||||
- [formatDateTime](#formatDateTime) - supports non-constant timezone.
|
||||
- [formatDateTime](#formatdatetime) - supports non-constant timezone.
|
||||
- [toString](type-conversion-functions.md#tostring) - supports non-constant timezone.
|
||||
|
||||
## timeZoneOf
|
||||
@ -1006,7 +1006,7 @@ toStartOfWeek(t[, mode[, timezone]])
|
||||
**Arguments**
|
||||
|
||||
- `t` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
|
||||
- `mode` - determines the first day of the week as described in the [toWeek()](date-time-functions#toweek) function
|
||||
- `mode` - determines the first day of the week as described in the [toWeek()](#toweek) function
|
||||
- `timezone` - Optional parameter, it behaves like any other conversion function
|
||||
|
||||
**Returned value**
|
||||
@ -1049,7 +1049,7 @@ toLastDayOfWeek(t[, mode[, timezone]])
|
||||
**Arguments**
|
||||
|
||||
- `t` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
|
||||
- `mode` - determines the last day of the week as described in the [toWeek()](date-time-functions#toweek) function
|
||||
- `mode` - determines the last day of the week as described in the [toWeek](#toweek) function
|
||||
- `timezone` - Optional parameter, it behaves like any other conversion function
|
||||
|
||||
**Returned value**
|
||||
@ -1719,7 +1719,7 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- [fromDaysSinceYearZero](#fromDaysSinceYearZero)
|
||||
- [fromDaysSinceYearZero](#fromdayssinceyearzero)
|
||||
|
||||
## fromDaysSinceYearZero
|
||||
|
||||
@ -1759,11 +1759,11 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- [toDaysSinceYearZero](#toDaysSinceYearZero)
|
||||
- [toDaysSinceYearZero](#todayssinceyearzero)
|
||||
|
||||
## fromDaysSinceYearZero32
|
||||
|
||||
Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../data-types/date32.md).
|
||||
Like [fromDaysSinceYearZero](#fromdayssinceyearzero) but returns a [Date32](../data-types/date32.md).
|
||||
|
||||
## age
|
||||
|
||||
@ -1982,7 +1982,7 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- [toStartOfInterval](#tostartofintervaldate_or_date_with_time-interval-x-unit--time_zone)
|
||||
- [toStartOfInterval](#tostartofinterval)
|
||||
|
||||
## date\_add
|
||||
|
||||
@ -2055,7 +2055,7 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- [addDate](#addDate)
|
||||
- [addDate](#adddate)
|
||||
|
||||
## date\_sub
|
||||
|
||||
@ -2129,7 +2129,7 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- [subDate](#subDate)
|
||||
- [subDate](#subdate)
|
||||
|
||||
## timestamp\_add
|
||||
|
||||
@ -2310,7 +2310,7 @@ Alias: `SUBDATE`
|
||||
|
||||
- [date_sub](#date_sub)
|
||||
|
||||
## now {#now}
|
||||
## now
|
||||
|
||||
Returns the current date and time at the moment of query analysis. The function is a constant expression.
|
||||
|
||||
@ -3609,7 +3609,7 @@ SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64
|
||||
└───────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## formatDateTime {#formatDateTime}
|
||||
## formatDateTime
|
||||
|
||||
Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column.
|
||||
|
||||
@ -3734,10 +3734,9 @@ LIMIT 10
|
||||
|
||||
**See Also**
|
||||
|
||||
- [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax)
|
||||
- [formatDateTimeInJodaSyntax](#formatdatetimeinjodasyntax)
|
||||
|
||||
|
||||
## formatDateTimeInJodaSyntax {#formatDateTimeInJodaSyntax}
|
||||
## formatDateTimeInJodaSyntax
|
||||
|
||||
Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
|
||||
|
||||
@ -3902,11 +3901,11 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- [fromUnixTimestampInJodaSyntax](##fromUnixTimestampInJodaSyntax)
|
||||
- [fromUnixTimestampInJodaSyntax](#fromunixtimestampinjodasyntax)
|
||||
|
||||
## fromUnixTimestampInJodaSyntax
|
||||
|
||||
Same as [fromUnixTimestamp](#fromUnixTimestamp) but when called in the second way (two or three arguments), the formatting is performed using [Joda style](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL style.
|
||||
Same as [fromUnixTimestamp](#fromunixtimestamp) but when called in the second way (two or three arguments), the formatting is performed using [Joda style](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL style.
|
||||
|
||||
**Example:**
|
||||
|
||||
@ -4121,7 +4120,7 @@ Result:
|
||||
Returns the current date and time at the moment of query analysis. The function is a constant expression.
|
||||
|
||||
:::note
|
||||
This function gives the same result that `now('UTC')` would. It was added only for MySQL support and [`now`](#now-now) is the preferred usage.
|
||||
This function gives the same result that `now('UTC')` would. It was added only for MySQL support and [`now`](#now) is the preferred usage.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
@ -12,7 +12,7 @@ For dictionaries created with [DDL queries](../../sql-reference/statements/creat
|
||||
|
||||
For information on connecting and configuring dictionaries, see [Dictionaries](../../sql-reference/dictionaries/index.md).
|
||||
|
||||
## dictGet, dictGetOrDefault, dictGetOrNull {#dictGet}
|
||||
## dictGet, dictGetOrDefault, dictGetOrNull
|
||||
|
||||
Retrieves values from a dictionary.
|
||||
|
||||
|
@ -4,6 +4,8 @@ sidebar_label: Geohash
|
||||
title: "Functions for Working with Geohash"
|
||||
---
|
||||
|
||||
## Geohash
|
||||
|
||||
[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earth’s surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer is the geohash string, the more precise is the geographic location.
|
||||
|
||||
If you need to manually convert geographic coordinates to geohash strings, you can use [geohash.org](http://geohash.org/).
|
||||
|
@ -4,6 +4,8 @@ sidebar_label: H3 Indexes
|
||||
title: "Functions for Working with H3 Indexes"
|
||||
---
|
||||
|
||||
## H3 Index
|
||||
|
||||
[H3](https://eng.uber.com/h3/) is a geographical indexing system where Earth’s surface divided into a grid of even hexagonal cells. This system is hierarchical, i. e. each hexagon on the top level ("parent") can be split into seven even but smaller ones ("children"), and so on.
|
||||
|
||||
The level of the hierarchy is called `resolution` and can receive a value from `0` till `15`, where `0` is the `base` level with the largest and coarsest cells.
|
||||
@ -16,7 +18,7 @@ The full description of the H3 system is available at [the Uber Engineering site
|
||||
|
||||
## h3IsValid
|
||||
|
||||
Verifies whether the number is a valid [H3](#h3index) index.
|
||||
Verifies whether the number is a valid [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -51,7 +53,7 @@ Result:
|
||||
|
||||
## h3GetResolution
|
||||
|
||||
Defines the resolution of the given [H3](#h3index) index.
|
||||
Defines the resolution of the given [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -86,7 +88,7 @@ Result:
|
||||
|
||||
## h3EdgeAngle
|
||||
|
||||
Calculates the average length of the [H3](#h3index) hexagon edge in grades.
|
||||
Calculates the average length of the [H3](#h3-index) hexagon edge in grades.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -100,7 +102,7 @@ h3EdgeAngle(resolution)
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The average length of the [H3](#h3index) hexagon edge in grades. [Float64](../../data-types/float.md).
|
||||
- The average length of the [H3](#h3-index) hexagon edge in grades. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -120,7 +122,7 @@ Result:
|
||||
|
||||
## h3EdgeLengthM
|
||||
|
||||
Calculates the average length of the [H3](#h3index) hexagon edge in meters.
|
||||
Calculates the average length of the [H3](#h3-index) hexagon edge in meters.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -134,7 +136,7 @@ h3EdgeLengthM(resolution)
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The average length of the [H3](#h3index) hexagon edge in meters. [Float64](../../data-types/float.md).
|
||||
- The average length of the [H3](#h3-index) hexagon edge in meters. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -154,7 +156,7 @@ Result:
|
||||
|
||||
## h3EdgeLengthKm
|
||||
|
||||
Calculates the average length of the [H3](#h3index) hexagon edge in kilometers.
|
||||
Calculates the average length of the [H3](#h3-index) hexagon edge in kilometers.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -168,7 +170,7 @@ h3EdgeLengthKm(resolution)
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The average length of the [H3](#h3index) hexagon edge in kilometers. [Float64](../../data-types/float.md).
|
||||
- The average length of the [H3](#h3-index) hexagon edge in kilometers. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -188,7 +190,7 @@ Result:
|
||||
|
||||
## geoToH3
|
||||
|
||||
Returns [H3](#h3index) point index `(lon, lat)` with specified resolution.
|
||||
Returns [H3](#h3-index) point index `(lon, lat)` with specified resolution.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -225,7 +227,7 @@ Result:
|
||||
|
||||
## h3ToGeo
|
||||
|
||||
Returns the centroid longitude and latitude corresponding to the provided [H3](#h3index) index.
|
||||
Returns the centroid longitude and latitude corresponding to the provided [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -294,7 +296,7 @@ Result:
|
||||
|
||||
## h3kRing
|
||||
|
||||
Lists all the [H3](#h3index) hexagons in the raduis of `k` from the given hexagon in random order.
|
||||
Lists all the [H3](#h3-index) hexagons in the raduis of `k` from the given hexagon in random order.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -335,7 +337,7 @@ Result:
|
||||
|
||||
## h3GetBaseCell
|
||||
|
||||
Returns the base cell number of the [H3](#h3index) index.
|
||||
Returns the base cell number of the [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -437,7 +439,7 @@ Result:
|
||||
|
||||
## h3IndexesAreNeighbors
|
||||
|
||||
Returns whether or not the provided [H3](#h3index) indexes are neighbors.
|
||||
Returns whether or not the provided [H3](#h3-index) indexes are neighbors.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -473,7 +475,7 @@ Result:
|
||||
|
||||
## h3ToChildren
|
||||
|
||||
Returns an array of child indexes for the given [H3](#h3index) index.
|
||||
Returns an array of child indexes for the given [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -508,7 +510,7 @@ Result:
|
||||
|
||||
## h3ToParent
|
||||
|
||||
Returns the parent (coarser) index containing the given [H3](#h3index) index.
|
||||
Returns the parent (coarser) index containing the given [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -609,7 +611,7 @@ Result:
|
||||
|
||||
## h3GetResolution
|
||||
|
||||
Returns the resolution of the [H3](#h3index) index.
|
||||
Returns the resolution of the [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -643,7 +645,7 @@ Result:
|
||||
|
||||
## h3IsResClassIII
|
||||
|
||||
Returns whether [H3](#h3index) index has a resolution with Class III orientation.
|
||||
Returns whether [H3](#h3-index) index has a resolution with Class III orientation.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -678,7 +680,7 @@ Result:
|
||||
|
||||
## h3IsPentagon
|
||||
|
||||
Returns whether this [H3](#h3index) index represents a pentagonal cell.
|
||||
Returns whether this [H3](#h3-index) index represents a pentagonal cell.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -713,7 +715,7 @@ Result:
|
||||
|
||||
## h3GetFaces
|
||||
|
||||
Returns icosahedron faces intersected by a given [H3](#h3index) index.
|
||||
Returns icosahedron faces intersected by a given [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -815,7 +817,7 @@ Result:
|
||||
|
||||
## h3ToCenterChild
|
||||
|
||||
Returns the center child (finer) [H3](#h3index) index contained by given [H3](#h3index) at the given resolution.
|
||||
Returns the center child (finer) [H3](#h3-index) index contained by given [H3](#h3-index) at the given resolution.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -830,7 +832,7 @@ h3ToCenterChild(index, resolution)
|
||||
|
||||
**Returned values**
|
||||
|
||||
- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution. [UInt64](../../data-types/int-uint.md).
|
||||
- [H3](#h3-index) index of the center child contained by given [H3](#h3-index) at the given resolution. [UInt64](../../data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -5,6 +5,8 @@ sidebar_label: S2 Geometry
|
||||
|
||||
# Functions for Working with S2 Index
|
||||
|
||||
## S2Index
|
||||
|
||||
[S2](https://s2geometry.io/) is a geographical indexing system where all geographical data is represented on a three-dimensional sphere (similar to a globe).
|
||||
|
||||
In the S2 library points are represented as the S2 Index - a specific number which encodes internally a point on the surface of a unit sphere, unlike traditional (latitude, longitude) pairs. To get the S2 point index for a given point specified in the format (latitude, longitude) use the [geoToS2](#geotos2) function. Also, you can use the [s2ToGeo](#s2togeo) function for getting geographical coordinates corresponding to the specified S2 point index.
|
||||
|
@ -45,13 +45,13 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')
|
||||
|
||||
Calculates the MD4 from a string and returns the resulting set of bytes as FixedString(16).
|
||||
|
||||
## MD5 {#md5}
|
||||
## MD5
|
||||
|
||||
Calculates the MD5 from a string and returns the resulting set of bytes as FixedString(16).
|
||||
If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead.
|
||||
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
|
||||
|
||||
## sipHash64 {#siphash64}
|
||||
## sipHash64
|
||||
|
||||
Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
|
||||
|
||||
|
@ -295,7 +295,7 @@ Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null
|
||||
## toIPv6
|
||||
|
||||
Converts a string form of IPv6 address to [IPv6](../data-types/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value.
|
||||
Similar to [IPv6StringToNum](#ipv6stringtonums) function, which converts IPv6 address to binary format.
|
||||
Similar to [IPv6StringToNum](#ipv6stringtonum) function, which converts IPv6 address to binary format.
|
||||
|
||||
If the input string contains a valid IPv4 address, then the IPv6 equivalent of the IPv4 address is returned.
|
||||
|
||||
|
@ -5,10 +5,10 @@ sidebar_label: JSON
|
||||
---
|
||||
|
||||
There are two sets of functions to parse JSON:
|
||||
- [`simpleJSON*` (`visitParam*`)](#simplejson--visitparam-functions) which is made for parsing a limited subset of JSON extremely fast.
|
||||
- [`simpleJSON*` (`visitParam*`)](#simplejson-visitparam-functions) which is made for parsing a limited subset of JSON extremely fast.
|
||||
- [`JSONExtract*`](#jsonextract-functions) which is made for parsing ordinary JSON.
|
||||
|
||||
## simpleJSON / visitParam functions
|
||||
## simpleJSON (visitParam) functions
|
||||
|
||||
ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be. They try to do as little as possible to get the job done as quickly as possible.
|
||||
|
||||
|
@ -762,7 +762,7 @@ LIMIT 10
|
||||
|
||||
Given a size (number of bytes), this function returns a readable, rounded size with suffix (KB, MB, etc.) as string.
|
||||
|
||||
The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull).
|
||||
The opposite operations of this function are [parseReadableSize](#parsereadablesize), [parseReadableSizeOrZero](#parsereadablesizeorzero), and [parseReadableSizeOrNull](#parsereadablesizeornull).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -795,7 +795,7 @@ Result:
|
||||
|
||||
Given a size (number of bytes), this function returns a readable, rounded size with suffix (KiB, MiB, etc.) as string.
|
||||
|
||||
The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull).
|
||||
The opposite operations of this function are [parseReadableSize](#parsereadablesize), [parseReadableSizeOrZero](#parsereadablesizeorzero), and [parseReadableSizeOrNull](#parsereadablesizeornull).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -926,7 +926,7 @@ SELECT
|
||||
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes.
|
||||
If the function is unable to parse the input value, it throws an exception.
|
||||
|
||||
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize).
|
||||
The inverse operations of this function are [formatReadableSize](#formatreadablesize) and [formatReadableDecimalSize](#formatreadabledecimalsize).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -964,7 +964,7 @@ SELECT
|
||||
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes.
|
||||
If the function is unable to parse the input value, it returns `NULL`.
|
||||
|
||||
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize).
|
||||
The inverse operations of this function are [formatReadableSize](#formatreadablesize) and [formatReadableDecimalSize](#formatreadabledecimalsize).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1002,7 +1002,7 @@ SELECT
|
||||
|
||||
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. If the function is unable to parse the input value, it returns `0`.
|
||||
|
||||
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize).
|
||||
The inverse operations of this function are [formatReadableSize](#formatreadablesize) and [formatReadableDecimalSize](#formatreadabledecimalsize).
|
||||
|
||||
|
||||
**Syntax**
|
||||
@ -2711,7 +2711,7 @@ countDigits(x)
|
||||
- Number of digits. [UInt8](../data-types/int-uint.md#uint-ranges).
|
||||
|
||||
:::note
|
||||
For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow).
|
||||
For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#isdecimaloverflow).
|
||||
:::
|
||||
|
||||
**Example**
|
||||
@ -2803,7 +2803,7 @@ currentProfiles()
|
||||
|
||||
## enabledProfiles
|
||||
|
||||
Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#current-profiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file).
|
||||
Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#currentprofiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -2916,11 +2916,11 @@ Result:
|
||||
└───────────────────────────┘
|
||||
```
|
||||
|
||||
## queryID {#queryID}
|
||||
## queryID
|
||||
|
||||
Returns the ID of the current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `query_id`.
|
||||
|
||||
In contrast to [initialQueryID](#initial-query-id) function, `queryID` can return different results on different shards (see the example).
|
||||
In contrast to [initialQueryID](#initialqueryid) function, `queryID` can return different results on different shards (see the example).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -2954,7 +2954,7 @@ Result:
|
||||
|
||||
Returns the ID of the initial current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `initial_query_id`.
|
||||
|
||||
In contrast to [queryID](#query-id) function, `initialQueryID` returns the same results on different shards (see example).
|
||||
In contrast to [queryID](#queryid) function, `initialQueryID` returns the same results on different shards (see example).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -3041,7 +3041,7 @@ shardCount()
|
||||
|
||||
**See Also**
|
||||
|
||||
- [shardNum()](#shard-num) function example also contains `shardCount()` function call.
|
||||
- [shardNum()](#shardnum) function example also contains `shardCount()` function call.
|
||||
|
||||
## getOSKernelVersion
|
||||
|
||||
|
@ -200,7 +200,7 @@ Banker's rounding is a method of rounding fractional numbers
|
||||
When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position.
|
||||
For example: 3.5 rounds up to 4, 2.5 rounds down to 2.
|
||||
It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest).
|
||||
The [round](#rounding_functions-round) function performs the same rounding for floating point numbers.
|
||||
The [round](#round) function performs the same rounding for floating point numbers.
|
||||
The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`.
|
||||
|
||||
In other cases, the function rounds numbers to the nearest integer.
|
||||
@ -274,7 +274,7 @@ roundBankers(10.755, 2) = 10.76
|
||||
|
||||
**See Also**
|
||||
|
||||
- [round](#rounding_functions-round)
|
||||
- [round](#round)
|
||||
|
||||
## roundToExp2
|
||||
|
||||
|
@ -1136,16 +1136,136 @@ SELECT tryBase58Decode('3dc8KtHrwM') as res, tryBase58Decode('invalid') as res_i
|
||||
|
||||
## base64Encode
|
||||
|
||||
Encodes a String or FixedString as base64.
|
||||
Encodes a String or FixedString as base64, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-4).
|
||||
|
||||
Alias: `TO_BASE64`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
base64Encode(plaintext)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `plaintext` — [String](../data-types/string.md) column or constant.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A string containing the encoded value of the argument.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT base64Encode('clickhouse');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─base64Encode('clickhouse')─┐
|
||||
│ Y2xpY2tob3VzZQ== │
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
||||
## base64UrlEncode
|
||||
|
||||
Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
base64UrlEncode(url)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `url` — [String](../data-types/string.md) column or constant.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A string containing the encoded value of the argument.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT base64UrlEncode('https://clickhouse.com');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─base64UrlEncode('https://clickhouse.com')─┐
|
||||
│ aHR0cDovL2NsaWNraG91c2UuY29t │
|
||||
└───────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## base64Decode
|
||||
|
||||
Decodes a base64-encoded String or FixedString. Throws an exception in case of error.
|
||||
Accepts a String and decodes it from base64, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-4). Throws an exception in case of an error.
|
||||
|
||||
Alias: `FROM_BASE64`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
base64Decode(encoded)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `encoded` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, an exception is thrown.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A string containing the decoded value of the argument.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT base64Decode('Y2xpY2tob3VzZQ==');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─base64Decode('Y2xpY2tob3VzZQ==')─┐
|
||||
│ clickhouse │
|
||||
└──────────────────────────────────┘
|
||||
```
|
||||
|
||||
## base64UrlDecode
|
||||
|
||||
Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5). Throws an exception in case of an error.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
base64UrlDecode(encodedUrl)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `encodedUrl` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, an exception is thrown.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A string containing the decoded value of the argument.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t')─┐
|
||||
│ https://clickhouse.com │
|
||||
└─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## tryBase64Decode
|
||||
|
||||
Like `base64Decode` but returns an empty string in case of error.
|
||||
@ -1156,9 +1276,13 @@ Like `base64Decode` but returns an empty string in case of error.
|
||||
tryBase64Decode(encoded)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error.
|
||||
- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, returns an empty string.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A string containing the decoded value of the argument.
|
||||
|
||||
**Examples**
|
||||
|
||||
@ -1169,9 +1293,41 @@ SELECT tryBase64Decode('RW5jb2RlZA==') as res, tryBase64Decode('invalid') as res
|
||||
```
|
||||
|
||||
```response
|
||||
┌─res─────┬─res_invalid─┐
|
||||
│ Encoded │ │
|
||||
└─────────┴─────────────┘
|
||||
┌─res────────┬─res_invalid─┐
|
||||
│ clickhouse │ │
|
||||
└────────────┴─────────────┘
|
||||
```
|
||||
|
||||
## tryBase64UrlDecode
|
||||
|
||||
Like `base64UrlDecode` but returns an empty string in case of error.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
tryBase64UrlDecode(encodedUrl)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `encodedUrl`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A string containing the decoded value of the argument.
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT tryBase64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t') as res, tryBase64Decode('aHR0cHM6Ly9jbGlja') as res_invalid;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─res────────────────────┬─res_invalid─┐
|
||||
│ https://clickhouse.com │ │
|
||||
└────────────────────────┴─────────────┘
|
||||
```
|
||||
|
||||
## endsWith {#endswith}
|
||||
@ -1994,7 +2150,7 @@ Result:
|
||||
|
||||
## stringJaccardIndexUTF8
|
||||
|
||||
Like [stringJaccardIndex](#stringJaccardIndex) but for UTF8-encoded strings.
|
||||
Like [stringJaccardIndex](#stringjaccardindex) but for UTF8-encoded strings.
|
||||
|
||||
## editDistance
|
||||
|
||||
|
@ -262,7 +262,7 @@ Result:
|
||||
|
||||
## multiSearchAllPositionsUTF8
|
||||
|
||||
Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings.
|
||||
Like [multiSearchAllPositions](#multisearchallpositions) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -336,7 +336,7 @@ Result:
|
||||
|
||||
Like [`position`](#position) but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
|
||||
|
||||
Functions [`multiSearchFirstPositionCaseInsensitive`](#multiSearchFirstPositionCaseInsensitive), [`multiSearchFirstPositionUTF8`](#multiSearchFirstPositionUTF8) and [`multiSearchFirstPositionCaseInsensitiveUTF8`](#multiSearchFirstPositionCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function.
|
||||
Functions [`multiSearchFirstPositionCaseInsensitive`](#multisearchfirstpositioncaseinsensitive), [`multiSearchFirstPositionUTF8`](#multisearchfirstpositionutf8) and [`multiSearchFirstPositionCaseInsensitiveUTF8`](#multisearchfirstpositioncaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -370,7 +370,7 @@ Result:
|
||||
|
||||
## multiSearchFirstPositionCaseInsensitive
|
||||
|
||||
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but ignores case.
|
||||
Like [`multiSearchFirstPosition`](#multisearchfirstposition) but ignores case.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -404,7 +404,7 @@ Result:
|
||||
|
||||
## multiSearchFirstPositionUTF8
|
||||
|
||||
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings.
|
||||
Like [`multiSearchFirstPosition`](#multisearchfirstposition) but assumes `haystack` and `needle` to be UTF-8 strings.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -440,7 +440,7 @@ Result:
|
||||
|
||||
## multiSearchFirstPositionCaseInsensitiveUTF8
|
||||
|
||||
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings and ignores case.
|
||||
Like [`multiSearchFirstPosition`](#multisearchfirstposition) but assumes `haystack` and `needle` to be UTF-8 strings and ignores case.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -478,7 +478,7 @@ Result:
|
||||
|
||||
Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise.
|
||||
|
||||
Functions [`multiSearchFirstIndexCaseInsensitive`](#multiSearchFirstIndexCaseInsensitive), [`multiSearchFirstIndexUTF8`](#multiSearchFirstIndexUTF8) and [`multiSearchFirstIndexCaseInsensitiveUTF8`](#multiSearchFirstIndexCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function.
|
||||
Functions [`multiSearchFirstIndexCaseInsensitive`](#multisearchfirstindexcaseinsensitive), [`multiSearchFirstIndexUTF8`](#multisearchfirstindexutf8) and [`multiSearchFirstIndexCaseInsensitiveUTF8`](#multisearchfirstindexcaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -615,7 +615,7 @@ Result:
|
||||
|
||||
Returns 1, if at least one string needle<sub>i</sub> matches the string `haystack` and 0 otherwise.
|
||||
|
||||
Functions [`multiSearchAnyCaseInsensitive`](#multiSearchAnyCaseInsensitive), [`multiSearchAnyUTF8`](#multiSearchAnyUTF8) and []`multiSearchAnyCaseInsensitiveUTF8`](#multiSearchAnyCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function.
|
||||
Functions [`multiSearchAnyCaseInsensitive`](#multisearchanycaseinsensitive), [`multiSearchAnyUTF8`](#multisearchanyutf8) and [`multiSearchAnyCaseInsensitiveUTF8`](#multisearchanycaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -719,7 +719,7 @@ Result:
|
||||
|
||||
## multiSearchAnyCaseInsensitiveUTF8
|
||||
|
||||
Like [multiSearchAnyUTF8](#multiSearchAnyUTF8) but ignores case.
|
||||
Like [multiSearchAnyUTF8](#multisearchanyutf8) but ignores case.
|
||||
|
||||
*Syntax**
|
||||
|
||||
@ -880,7 +880,7 @@ extractAll(haystack, pattern)
|
||||
|
||||
Matches all groups of the `haystack` string using the `pattern` regular expression. Returns an array of arrays, where the first array includes all fragments matching the first group, the second array - matching the second group, etc.
|
||||
|
||||
This function is slower than [extractAllGroupsVertical](#extractallgroups-vertical).
|
||||
This function is slower than [extractAllGroupsVertical](#extractallgroupsvertical).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -952,7 +952,7 @@ Result:
|
||||
└────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## like {#like}
|
||||
## like
|
||||
|
||||
Returns whether string `haystack` matches the LIKE expression `pattern`.
|
||||
|
||||
@ -1215,7 +1215,7 @@ Result:
|
||||
|
||||
## ngramSearchCaseInsensitive
|
||||
|
||||
Provides a case-insensitive variant of [ngramSearch](#ngramSearch).
|
||||
Provides a case-insensitive variant of [ngramSearch](#ngramsearch).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1630,7 +1630,7 @@ Result:
|
||||
|
||||
## hasSubsequenceCaseInsensitive
|
||||
|
||||
Like [hasSubsequence](#hasSubsequence) but searches case-insensitively.
|
||||
Like [hasSubsequence](#hassubsequence) but searches case-insensitively.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1665,7 +1665,7 @@ Result:
|
||||
|
||||
## hasSubsequenceUTF8
|
||||
|
||||
Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings.
|
||||
Like [hasSubsequence](#hassubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1700,7 +1700,7 @@ Result:
|
||||
|
||||
## hasSubsequenceCaseInsensitiveUTF8
|
||||
|
||||
Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively.
|
||||
Like [hasSubsequenceUTF8](#hassubsequenceutf8) but searches case-insensitively.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -10,7 +10,7 @@ sidebar_label: Type Conversion
|
||||
|
||||
ClickHouse generally uses the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion).
|
||||
|
||||
`to<type>` functions and [cast](#castx-t) behave differently in some cases, for example in case of [LowCardinality](../data-types/lowcardinality.md): [cast](#castx-t) removes [LowCardinality](../data-types/lowcardinality.md) trait `to<type>` functions don't. The same with [Nullable](../data-types/nullable.md), this behaviour is not compatible with SQL standard, and it can be changed using [cast_keep_nullable](../../operations/settings/settings.md/#cast_keep_nullable) setting.
|
||||
`to<type>` functions and [cast](#cast) behave differently in some cases, for example in case of [LowCardinality](../data-types/lowcardinality.md): [cast](#cast) removes [LowCardinality](../data-types/lowcardinality.md) trait `to<type>` functions don't. The same with [Nullable](../data-types/nullable.md), this behaviour is not compatible with SQL standard, and it can be changed using [cast_keep_nullable](../../operations/settings/settings.md/#cast_keep_nullable) setting.
|
||||
|
||||
:::note
|
||||
Be aware of potential data loss if values of a datatype are converted to a smaller datatype (for example from `Int64` to `Int32`) or between
|
||||
@ -70,7 +70,7 @@ Integer value in the `Int8`, `Int16`, `Int32`, `Int64`, `Int128` or `Int256` dat
|
||||
|
||||
Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
|
||||
|
||||
The behavior of functions for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
|
||||
The behavior of functions for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#common-issues-with-data-conversion), when using the functions.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -169,7 +169,7 @@ Converts an input value to the [UInt](../data-types/int-uint.md) data type. This
|
||||
|
||||
Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
|
||||
|
||||
The behavior of functions for negative arguments and for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
|
||||
The behavior of functions for negative arguments and for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#common-issues-with-data-conversion), when using the functions.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -996,7 +996,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt8
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt8. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1034,7 +1034,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt16
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt16. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1072,7 +1072,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt32
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1110,7 +1110,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt64
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1148,7 +1148,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt128
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt128. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1186,7 +1186,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt256
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1224,7 +1224,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt8
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int8. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1262,7 +1262,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt16
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1300,7 +1300,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt32
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1338,7 +1338,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt64
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1376,7 +1376,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt128
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int128. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1414,7 +1414,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt256
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int256. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1452,7 +1452,7 @@ Result:
|
||||
|
||||
## reinterpretAsFloat32
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Float32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Float32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1486,7 +1486,7 @@ Result:
|
||||
|
||||
## reinterpretAsFloat64
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Float64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Float64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1730,7 +1730,7 @@ Result:
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## reinterpret(x, T)
|
||||
## reinterpret
|
||||
|
||||
Uses the same source in-memory bytes sequence for `x` value and reinterprets it to destination type.
|
||||
|
||||
@ -1766,9 +1766,9 @@ Result:
|
||||
└─────────────┴──────────────┴───────────────┘
|
||||
```
|
||||
|
||||
## CAST(x, T)
|
||||
## CAST
|
||||
|
||||
Converts an input value to the specified data type. Unlike the [reinterpret](#type_conversion_function-reinterpret) function, `CAST` tries to present the same value using the new data type. If the conversion can not be done then an exception is raised.
|
||||
Converts an input value to the specified data type. Unlike the [reinterpret](#reinterpret) function, `CAST` tries to present the same value using the new data type. If the conversion can not be done then an exception is raised.
|
||||
Several syntax variants are supported.
|
||||
|
||||
**Syntax**
|
||||
@ -1875,7 +1875,7 @@ Result:
|
||||
|
||||
Converts `x` to the `T` data type.
|
||||
|
||||
The difference from [cast(x, T)](#type_conversion_function-cast) is that `accurateCast` does not allow overflow of numeric types during cast if type value `x` does not fit the bounds of type `T`. For example, `accurateCast(-1, 'UInt8')` throws an exception.
|
||||
The difference from [cast](#cast) is that `accurateCast` does not allow overflow of numeric types during cast if type value `x` does not fit the bounds of type `T`. For example, `accurateCast(-1, 'UInt8')` throws an exception.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -2061,7 +2061,7 @@ Result:
|
||||
└───────────────────────────┴──────────────────────────────┘
|
||||
```
|
||||
|
||||
## parseDateTime {#type_conversion_functions-parseDateTime}
|
||||
## parseDateTime
|
||||
|
||||
Converts a [String](../data-types/string.md) to [DateTime](../data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format).
|
||||
|
||||
@ -2102,15 +2102,15 @@ Alias: `TO_TIMESTAMP`.
|
||||
|
||||
## parseDateTimeOrZero
|
||||
|
||||
Same as for [parseDateTime](#type_conversion_functions-parseDateTime) except that it returns zero date when it encounters a date format that cannot be processed.
|
||||
Same as for [parseDateTime](#parsedatetime) except that it returns zero date when it encounters a date format that cannot be processed.
|
||||
|
||||
## parseDateTimeOrNull
|
||||
|
||||
Same as for [parseDateTime](#type_conversion_functions-parseDateTime) except that it returns `NULL` when it encounters a date format that cannot be processed.
|
||||
Same as for [parseDateTime](#parsedatetime) except that it returns `NULL` when it encounters a date format that cannot be processed.
|
||||
|
||||
Alias: `str_to_date`.
|
||||
|
||||
## parseDateTimeInJodaSyntax {#type_conversion_functions-parseDateTimeInJodaSyntax}
|
||||
## parseDateTimeInJodaSyntax
|
||||
|
||||
Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax.
|
||||
|
||||
@ -2151,11 +2151,11 @@ SELECT parseDateTimeInJodaSyntax('2023-02-24 14:53:31', 'yyyy-MM-dd HH:mm:ss', '
|
||||
|
||||
## parseDateTimeInJodaSyntaxOrZero
|
||||
|
||||
Same as for [parseDateTimeInJodaSyntax](#type_conversion_functions-parseDateTimeInJodaSyntax) except that it returns zero date when it encounters a date format that cannot be processed.
|
||||
Same as for [parseDateTimeInJodaSyntax](#parsedatetimeinjodasyntax) except that it returns zero date when it encounters a date format that cannot be processed.
|
||||
|
||||
## parseDateTimeInJodaSyntaxOrNull
|
||||
|
||||
Same as for [parseDateTimeInJodaSyntax](#type_conversion_functions-parseDateTimeInJodaSyntax) except that it returns `NULL` when it encounters a date format that cannot be processed.
|
||||
Same as for [parseDateTimeInJodaSyntax](#parsedatetimeinjodasyntax) except that it returns `NULL` when it encounters a date format that cannot be processed.
|
||||
|
||||
## parseDateTimeBestEffort
|
||||
## parseDateTime32BestEffort
|
||||
@ -2313,11 +2313,11 @@ Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it r
|
||||
|
||||
## parseDateTimeBestEffortUSOrNull
|
||||
|
||||
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns `NULL` when it encounters a date format that cannot be processed.
|
||||
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortus) function except that it returns `NULL` when it encounters a date format that cannot be processed.
|
||||
|
||||
## parseDateTimeBestEffortUSOrZero
|
||||
|
||||
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns zero date (`1970-01-01`) or zero date with time (`1970-01-01 00:00:00`) when it encounters a date format that cannot be processed.
|
||||
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortus) function except that it returns zero date (`1970-01-01`) or zero date with time (`1970-01-01 00:00:00`) when it encounters a date format that cannot be processed.
|
||||
|
||||
## parseDateTime64BestEffort
|
||||
|
||||
@ -2389,7 +2389,7 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that
|
||||
|
||||
Converts input parameter to the [LowCardinality](../data-types/lowcardinality.md) version of same data type.
|
||||
|
||||
To convert data from the `LowCardinality` data type use the [CAST](#type_conversion_function-cast) function. For example, `CAST(x as String)`.
|
||||
To convert data from the `LowCardinality` data type use the [CAST](#cast) function. For example, `CAST(x as String)`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -150,7 +150,7 @@ The function also works for [Arrays](array-functions.md#function-empty) and [Str
|
||||
|
||||
**Example**
|
||||
|
||||
To generate the UUID value, ClickHouse provides the [generateUUIDv4](#uuid-function-generate) function.
|
||||
To generate the UUID value, ClickHouse provides the [generateUUIDv4](#generateuuidv4) function.
|
||||
|
||||
Query:
|
||||
|
||||
@ -190,7 +190,7 @@ The function also works for [Arrays](array-functions.md#function-notempty) or [S
|
||||
|
||||
**Example**
|
||||
|
||||
To generate the UUID value, ClickHouse provides the [generateUUIDv4](#uuid-function-generate) function.
|
||||
To generate the UUID value, ClickHouse provides the [generateUUIDv4](#generateuuidv4) function.
|
||||
|
||||
Query:
|
||||
|
||||
|
@ -235,7 +235,7 @@ If `some_predicate` is not selective enough, it will return a large amount of da
|
||||
|
||||
### Distributed Subqueries and max_parallel_replicas
|
||||
|
||||
When [max_parallel_replicas](#settings-max_parallel_replicas) is greater than 1, distributed queries are further transformed.
|
||||
When [max_parallel_replicas](#distributed-subqueries-and-max_parallel_replicas) is greater than 1, distributed queries are further transformed.
|
||||
|
||||
For example, the following:
|
||||
|
||||
@ -255,7 +255,7 @@ where `M` is between `1` and `3` depending on which replica the local query is e
|
||||
|
||||
These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table.
|
||||
|
||||
Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if `local_table_2` does not have a sampling key, incorrect results will be produced. The same rule applies to `JOIN`.
|
||||
Therefore adding the [max_parallel_replicas](#distributed-subqueries-and-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if `local_table_2` does not have a sampling key, incorrect results will be produced. The same rule applies to `JOIN`.
|
||||
|
||||
One workaround if `local_table_2` does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`.
|
||||
|
||||
|
@ -108,7 +108,7 @@ ALTER TABLE visits RENAME COLUMN webBrowser TO browser
|
||||
CLEAR COLUMN [IF EXISTS] name IN PARTITION partition_name
|
||||
```
|
||||
|
||||
Resets all data in a column for a specified partition. Read more about setting the partition name in the section [How to set the partition expression](partition.md/#how-to-set-partition-expression).
|
||||
Resets all data in a column for a specified partition. Read more about setting the partition name in the section [How to set the partition expression](../alter/partition.md/#how-to-set-partition-expression).
|
||||
|
||||
If the `IF EXISTS` clause is specified, the query won’t return an error if the column does not exist.
|
||||
|
||||
@ -173,7 +173,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
|
||||
|
||||
Changing the column type is the only complex action – it changes the contents of files with data. For large tables, this may take a long time.
|
||||
|
||||
The query also can change the order of the columns using `FIRST | AFTER` clause, see [ADD COLUMN](#alter_add-column) description, but column type is mandatory in this case.
|
||||
The query also can change the order of the columns using `FIRST | AFTER` clause, see [ADD COLUMN](#add-column) description, but column type is mandatory in this case.
|
||||
|
||||
Example:
|
||||
|
||||
|
@ -31,7 +31,7 @@ The following operations with [partitions](/docs/en/engines/table-engines/merget
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] DETACH PARTITION|PART partition_expr
|
||||
```
|
||||
|
||||
Moves all data for the specified partition to the `detached` directory. The server forgets about the detached data partition as if it does not exist. The server will not know about this data until you make the [ATTACH](#alter_attach-partition) query.
|
||||
Moves all data for the specified partition to the `detached` directory. The server forgets about the detached data partition as if it does not exist. The server will not know about this data until you make the [ATTACH](#attach-partitionpart) query.
|
||||
|
||||
Example:
|
||||
|
||||
@ -252,7 +252,7 @@ Downloads a partition from another server. This query only works for the replica
|
||||
The query does the following:
|
||||
|
||||
1. Downloads the partition|part from the specified shard. In ‘path-in-zookeeper’ you must specify a path to the shard in ZooKeeper.
|
||||
2. Then the query puts the downloaded data to the `detached` directory of the `table_name` table. Use the [ATTACH PARTITION\|PART](#alter_attach-partition) query to add the data to the table.
|
||||
2. Then the query puts the downloaded data to the `detached` directory of the `table_name` table. Use the [ATTACH PARTITION\|PART](#attach-partitionpart) query to add the data to the table.
|
||||
|
||||
For example:
|
||||
|
||||
@ -353,7 +353,7 @@ You can specify the partition expression in `ALTER ... PARTITION` queries in dif
|
||||
- Using the keyword `ALL`. It can be used only with DROP/DETACH/ATTACH. For example, `ALTER TABLE visits ATTACH PARTITION ALL`.
|
||||
- As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
|
||||
- Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
|
||||
- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.
|
||||
- In the [ALTER ATTACH PART](#attach-partitionpart) and [DROP DETACHED PART](#drop-detached-partitionpart) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.
|
||||
|
||||
Usage of quotes when specifying the partition depends on the type of partition expression. For example, for the `String` type, you have to specify its name in quotes (`'`). For the `Date` and `Int*` types no quotes are needed.
|
||||
|
||||
|
@ -17,8 +17,8 @@ By default, tables are created only on the current server. Distributed DDL queri
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1] [COMMENT 'comment for column'],
|
||||
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2] [COMMENT 'comment for column'],
|
||||
name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [COMMENT 'comment for column'] [compression_codec] [TTL expr1],
|
||||
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [COMMENT 'comment for column'] [compression_codec] [TTL expr2],
|
||||
...
|
||||
) ENGINE = engine
|
||||
COMMENT 'comment for table'
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: VIEW
|
||||
|
||||
# CREATE VIEW
|
||||
|
||||
Creates a new view. Views can be [normal](#normal-view), [materialized](#materialized-view), [live](#live-view-experimental), and [window](#window-view-experimental) (live view and window view are experimental features).
|
||||
Creates a new view. Views can be [normal](#normal-view), [materialized](#materialized-view), [live](#live-view-deprecated), and [window](#window-view-experimental) (live view and window view are experimental features).
|
||||
|
||||
## Normal View
|
||||
|
||||
|
@ -33,7 +33,7 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US
|
||||
- `role` — ClickHouse user role.
|
||||
- `user` — ClickHouse user account.
|
||||
|
||||
The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option-privilege) privilege to `user` or `role`.
|
||||
The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option) privilege to `user` or `role`.
|
||||
The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if is not specified it appends roles.
|
||||
|
||||
## Grant Current Grants Syntax
|
||||
@ -201,7 +201,7 @@ Hierarchy of privileges:
|
||||
- `HDFS`
|
||||
- `S3`
|
||||
- [dictGet](#dictget)
|
||||
- [displaySecretsInShowAndSelect](#display-secrets)
|
||||
- [displaySecretsInShowAndSelect](#displaysecretsinshowandselect)
|
||||
- [NAMED COLLECTION ADMIN](#named-collection-admin)
|
||||
- `CREATE NAMED COLLECTION`
|
||||
- `DROP NAMED COLLECTION`
|
||||
@ -498,7 +498,7 @@ Privilege level: `DICTIONARY`.
|
||||
- `GRANT dictGet ON mydictionary TO john`
|
||||
|
||||
|
||||
### displaySecretsInShowAndSelect {#display-secrets}
|
||||
### displaySecretsInShowAndSelect
|
||||
|
||||
Allows a user to view secrets in `SHOW` and `SELECT` queries if both
|
||||
[`display_secrets_in_show_and_select` server setting](../../operations/server-configuration-parameters/settings#display_secrets_in_show_and_select)
|
||||
|
@ -28,13 +28,13 @@ The features of data sampling are listed below:
|
||||
For the `SAMPLE` clause the following syntax is supported:
|
||||
|
||||
| SAMPLE Clause Syntax | Description |
|
||||
|----------------------|------------------------------|
|
||||
| `SAMPLE k` | Here `k` is the number from 0 to 1. The query is executed on `k` fraction of data. For example, `SAMPLE 0.1` runs the query on 10% of data. [Read more](#select-sample-k) |
|
||||
| `SAMPLE n` | Here `n` is a sufficiently large integer. The query is executed on a sample of at least `n` rows (but not significantly more than this). For example, `SAMPLE 10000000` runs the query on a minimum of 10,000,000 rows. [Read more](#select-sample-n) |
|
||||
| `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1. The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#select-sample-offset) |
|
||||
|----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `SAMPLE k` | Here `k` is the number from 0 to 1. The query is executed on `k` fraction of data. For example, `SAMPLE 0.1` runs the query on 10% of data. [Read more](#sample-k) |
|
||||
| `SAMPLE n` | Here `n` is a sufficiently large integer. The query is executed on a sample of at least `n` rows (but not significantly more than this). For example, `SAMPLE 10000000` runs the query on a minimum of 10,000,000 rows. [Read more](#sample-n) |
|
||||
| `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1. The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#sample-k-offset-m) |
|
||||
|
||||
|
||||
## SAMPLE K {#select-sample-k}
|
||||
## SAMPLE K
|
||||
|
||||
Here `k` is the number from 0 to 1 (both fractional and decimal notations are supported). For example, `SAMPLE 1/2` or `SAMPLE 0.5`.
|
||||
|
||||
@ -54,7 +54,7 @@ ORDER BY PageViews DESC LIMIT 1000
|
||||
|
||||
In this example, the query is executed on a sample from 0.1 (10%) of data. Values of aggregate functions are not corrected automatically, so to get an approximate result, the value `count()` is manually multiplied by 10.
|
||||
|
||||
## SAMPLE N {#select-sample-n}
|
||||
## SAMPLE N
|
||||
|
||||
Here `n` is a sufficiently large integer. For example, `SAMPLE 10000000`.
|
||||
|
||||
@ -90,7 +90,7 @@ FROM visits
|
||||
SAMPLE 10000000
|
||||
```
|
||||
|
||||
## SAMPLE K OFFSET M {#select-sample-offset}
|
||||
## SAMPLE K OFFSET M
|
||||
|
||||
Here `k` and `m` are numbers from 0 to 1. Examples are shown below.
|
||||
|
||||
|
@ -174,7 +174,7 @@ Aborts ClickHouse process (like `kill -9 {$ pid_clickhouse-server}`)
|
||||
|
||||
## Managing Distributed Tables
|
||||
|
||||
ClickHouse can manage [distributed](../../engines/table-engines/special/distributed.md) tables. When a user inserts data into these tables, ClickHouse first creates a queue of the data that should be sent to cluster nodes, then asynchronously sends it. You can manage queue processing with the [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [FLUSH DISTRIBUTED](#query_language-system-flush-distributed), and [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) queries. You can also synchronously insert distributed data with the [distributed_foreground_insert](../../operations/settings/settings.md#distributed_foreground_insert) setting.
|
||||
ClickHouse can manage [distributed](../../engines/table-engines/special/distributed.md) tables. When a user inserts data into these tables, ClickHouse first creates a queue of the data that should be sent to cluster nodes, then asynchronously sends it. You can manage queue processing with the [STOP DISTRIBUTED SENDS](#stop-distributed-sends), [FLUSH DISTRIBUTED](#flush-distributed), and [START DISTRIBUTED SENDS](#start-distributed-sends) queries. You can also synchronously insert distributed data with the [distributed_foreground_insert](../../operations/settings/settings.md#distributed_foreground_insert) setting.
|
||||
|
||||
### STOP DISTRIBUTED SENDS
|
||||
|
||||
|
@ -54,11 +54,11 @@ Identifiers are:
|
||||
- Cluster, database, table, partition, and column names.
|
||||
- Functions.
|
||||
- Data types.
|
||||
- [Expression aliases](#expression_aliases).
|
||||
- [Expression aliases](#expression-aliases).
|
||||
|
||||
Identifiers can be quoted or non-quoted. The latter is preferred.
|
||||
|
||||
Non-quoted identifiers must match the regex `^[a-zA-Z_][0-9a-zA-Z_]*$` and can not be equal to [keywords](#syntax-keywords). Examples: `x`, `_1`, `X_y__Z123_`.
|
||||
Non-quoted identifiers must match the regex `^[a-zA-Z_][0-9a-zA-Z_]*$` and can not be equal to [keywords](#keywords). Examples: `x`, `_1`, `X_y__Z123_`.
|
||||
|
||||
If you want to use identifiers the same as keywords or you want to use other symbols in identifiers, quote it using double quotes or backticks, for example, `"id"`, `` `id` ``.
|
||||
|
||||
|
@ -18,7 +18,7 @@ file([path_to_archive ::] path [,format] [,structure] [,compression])
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports in read-only mode the following [globs](#globs_in_path): `*`, `?`, `{abc,def}` (with `'abc'` and `'def'` being strings) and `{N..M}` (with `N` and `M` being numbers).
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports in read-only mode the following [globs](#globs-in-path): `*`, `?`, `{abc,def}` (with `'abc'` and `'def'` being strings) and `{N..M}` (with `N` and `M` being numbers).
|
||||
- `path_to_archive` - The relative path to a zip/tar/7z archive. Supports the same globs as `path`.
|
||||
- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
@ -128,7 +128,7 @@ Reading data from `table.csv`, located in `archive1.zip` or/and `archive2.zip`:
|
||||
SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv');
|
||||
```
|
||||
|
||||
## Globs in path {#globs_in_path}
|
||||
## Globs in path
|
||||
|
||||
Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix.
|
||||
|
||||
|
@ -22,7 +22,7 @@ fileCluster(cluster_name, path[, format, structure, compression_method])
|
||||
**Arguments**
|
||||
|
||||
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file also supports [globs](#globs_in_path).
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file also supports [globs](#globs-in-path).
|
||||
- `format` — [Format](../../interfaces/formats.md#formats) of the files. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `structure` — Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `compression_method` — Compression method. Supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
|
||||
@ -74,7 +74,7 @@ SELECT * FROM fileCluster('my_cluster', 'file{1,2}.csv', 'CSV', 'i UInt32, s Str
|
||||
```
|
||||
|
||||
|
||||
## Globs in Path {#globs_in_path}
|
||||
## Globs in Path
|
||||
|
||||
All patterns supported by [File](../../sql-reference/table-functions/file.md#globs-in-path) table function are supported by FileCluster.
|
||||
|
||||
|
@ -538,16 +538,28 @@ SELECT base58Decode('3dc8KtHrwM');
|
||||
|
||||
Синоним: `TO_BASE64`.
|
||||
|
||||
## base64UrlEncode(s)
|
||||
|
||||
Производит кодирование URL (String или FixedString) в base64-представление в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648).
|
||||
|
||||
## base64Decode(s) {#base64decode}
|
||||
|
||||
Декодирует base64-представление s в исходную строку. При невозможности декодирования выбрасывает исключение
|
||||
|
||||
Синоним: `FROM_BASE64`.
|
||||
|
||||
## base64UrlDecode(s)
|
||||
|
||||
Декодирует base64-представление URL в исходную строку в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648). При невозможности декодирования выбрасывает исключение
|
||||
|
||||
## tryBase64Decode(s) {#trybase64decode}
|
||||
|
||||
Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку.
|
||||
|
||||
## tryBase64UrlDecode(s)
|
||||
|
||||
Функционал аналогичен base64UrlDecode, но при невозможности декодирования возвращает пустую строку.
|
||||
|
||||
## endsWith(s, suffix) {#endswith}
|
||||
|
||||
Возвращает 1, если строка завершается указанным суффиксом, и 0 в противном случае.
|
||||
|
@ -123,7 +123,7 @@ Bloom filter是一种数据结构,它允许对集合成员进行高效的是
|
||||
|
||||
有三种基于Bloom过滤器的数据跳数索引类型:
|
||||
|
||||
* 基本的**bloom_filter**接受一个可选参数,该参数表示在0到1之间允许的“假阳性”率(如果未指定,则使用.025)。
|
||||
* 基本的**bloom_filter**接受一个可选参数,该参数表示在0到1之间允许的“假阳性”率(如果未指定,则使用0.025)。
|
||||
|
||||
* 更专业的**tokenbf_v1**。需要三个参数,用来优化布隆过滤器:(1)过滤器的大小字节(大过滤器有更少的假阳性,有更高的存储成本),(2)哈希函数的个数(更多的散列函数可以减少假阳性)。(3)布隆过滤器哈希函数的种子。有关这些参数如何影响布隆过滤器功能的更多细节,请参阅 [这里](https://hur.st/bloomfilter/) 。此索引仅适用于String、FixedString和Map类型的数据。输入表达式被分割为由非字母数字字符分隔的字符序列。例如,列值`This is a candidate for a "full text" search`将被分割为`This` `is` `a` `candidate` `for` `full` `text` `search`。它用于LIKE、EQUALS、in、hasToken()和类似的长字符串中单词和其他值的搜索。例如,一种可能的用途是在非结构的应用程序日志行列中搜索少量的类名或行号。
|
||||
|
||||
|
@ -154,8 +154,6 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolRemoteFSReader.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolReader.cpp
|
||||
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Storages/StorageS3Settings.cpp
|
||||
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp
|
||||
|
@ -361,9 +361,10 @@ try
|
||||
}
|
||||
|
||||
GlobalThreadPool::initialize(
|
||||
config().getUInt("max_thread_pool_size", 100),
|
||||
config().getUInt("max_thread_pool_free_size", 1000),
|
||||
config().getUInt("thread_pool_queue_size", 10000)
|
||||
/// We need to have sufficient amount of threads for connections + nuraft workers + keeper workers, 1000 is an estimation
|
||||
std::min(1000U, config().getUInt("max_thread_pool_size", 1000)),
|
||||
config().getUInt("max_thread_pool_free_size", 100),
|
||||
config().getUInt("thread_pool_queue_size", 1000)
|
||||
);
|
||||
/// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed).
|
||||
SCOPE_EXIT({
|
||||
|
@ -983,6 +983,18 @@ try
|
||||
}
|
||||
}
|
||||
|
||||
std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
|
||||
fs::path path = path_str;
|
||||
|
||||
/// Check that the process user id matches the owner of the data.
|
||||
assertProcessUserMatchesDataOwner(path_str, [&](const std::string & message){ global_context->addWarningMessage(message); });
|
||||
|
||||
global_context->setPath(path_str);
|
||||
|
||||
StatusFile status{path / "status", StatusFile::write_full_info};
|
||||
|
||||
ServerUUID::load(path / "uuid", log);
|
||||
|
||||
zkutil::validateZooKeeperConfig(config());
|
||||
bool has_zookeeper = zkutil::hasZooKeeperConfig(config());
|
||||
|
||||
@ -994,7 +1006,7 @@ try
|
||||
ConfigProcessor config_processor(config_path);
|
||||
loaded_config = config_processor.loadConfigWithZooKeeperIncludes(
|
||||
main_config_zk_node_cache, main_config_zk_changed_event, /* fallback_to_preprocessed = */ true);
|
||||
config_processor.savePreprocessedConfig(loaded_config, config().getString("path", DBMS_DEFAULT_PATH));
|
||||
config_processor.savePreprocessedConfig(loaded_config, path_str);
|
||||
config().removeConfiguration(old_configuration.get());
|
||||
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
|
||||
global_context->setConfig(loaded_config.configuration);
|
||||
@ -1128,19 +1140,6 @@ try
|
||||
global_context->setRemoteHostFilter(config());
|
||||
global_context->setHTTPHeaderFilter(config());
|
||||
|
||||
std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
|
||||
fs::path path = path_str;
|
||||
std::string default_database = server_settings.default_database.toString();
|
||||
|
||||
/// Check that the process user id matches the owner of the data.
|
||||
assertProcessUserMatchesDataOwner(path_str, [&](const std::string & message){ global_context->addWarningMessage(message); });
|
||||
|
||||
global_context->setPath(path_str);
|
||||
|
||||
StatusFile status{path / "status", StatusFile::write_full_info};
|
||||
|
||||
ServerUUID::load(path / "uuid", log);
|
||||
|
||||
/// Try to increase limit on number of open files.
|
||||
{
|
||||
rlimit rlim;
|
||||
@ -1671,6 +1670,10 @@ try
|
||||
if (global_context->isServerCompletelyStarted())
|
||||
CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability);
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
GWPAsan::setForceSampleProbability(new_server_settings.gwp_asan_force_sample_probability);
|
||||
#endif
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::MainConfigLoads);
|
||||
|
||||
/// Must be the last.
|
||||
@ -1928,6 +1931,7 @@ try
|
||||
|
||||
/// Set current database name before loading tables and databases because
|
||||
/// system logs may copy global context.
|
||||
std::string default_database = server_settings.default_database.toString();
|
||||
global_context->setCurrentDatabaseNameInGlobalContext(default_database);
|
||||
|
||||
LOG_INFO(log, "Loading metadata from {}", path_str);
|
||||
@ -2120,6 +2124,10 @@ try
|
||||
|
||||
CannotAllocateThreadFaultInjector::setFaultProbability(server_settings.cannot_allocate_thread_fault_injection_probability);
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
GWPAsan::setForceSampleProbability(server_settings.gwp_asan_force_sample_probability);
|
||||
#endif
|
||||
|
||||
try
|
||||
{
|
||||
global_context->startClusterDiscovery();
|
||||
|
@ -360,10 +360,13 @@ void ContextAccess::setUser(const UserPtr & user_) const
|
||||
|
||||
subscription_for_roles_changes.reset();
|
||||
enabled_roles = access_control->getEnabledRoles(current_roles, current_roles_with_admin_option);
|
||||
subscription_for_roles_changes = enabled_roles->subscribeForChanges([this](const std::shared_ptr<const EnabledRolesInfo> & roles_info_)
|
||||
subscription_for_roles_changes = enabled_roles->subscribeForChanges([weak_ptr = weak_from_this()](const std::shared_ptr<const EnabledRolesInfo> & roles_info_)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
setRolesInfo(roles_info_);
|
||||
auto ptr = weak_ptr.lock();
|
||||
if (!ptr)
|
||||
return;
|
||||
std::lock_guard lock{ptr->mutex};
|
||||
ptr->setRolesInfo(roles_info_);
|
||||
});
|
||||
|
||||
setRolesInfo(enabled_roles->getRolesInfo());
|
||||
|
@ -10,9 +10,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
InterpolateNode::InterpolateNode(QueryTreeNodePtr expression_, QueryTreeNodePtr interpolate_expression_)
|
||||
InterpolateNode::InterpolateNode(std::shared_ptr<IdentifierNode> expression_, QueryTreeNodePtr interpolate_expression_)
|
||||
: IQueryTreeNode(children_size)
|
||||
{
|
||||
if (expression_)
|
||||
expression_name = expression_->getIdentifier().getFullName();
|
||||
|
||||
children[expression_child_index] = std::move(expression_);
|
||||
children[interpolate_expression_child_index] = std::move(interpolate_expression_);
|
||||
}
|
||||
@ -41,13 +44,23 @@ void InterpolateNode::updateTreeHashImpl(HashState &, CompareOptions) const
|
||||
|
||||
QueryTreeNodePtr InterpolateNode::cloneImpl() const
|
||||
{
|
||||
return std::make_shared<InterpolateNode>(nullptr /*expression*/, nullptr /*interpolate_expression*/);
|
||||
auto cloned = std::make_shared<InterpolateNode>(nullptr /*expression*/, nullptr /*interpolate_expression*/);
|
||||
cloned->expression_name = expression_name;
|
||||
return cloned;
|
||||
}
|
||||
|
||||
ASTPtr InterpolateNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
auto result = std::make_shared<ASTInterpolateElement>();
|
||||
result->column = getExpression()->toAST(options)->getColumnName();
|
||||
|
||||
/// Interpolate parser supports only identifier node.
|
||||
/// In case of alias, identifier is replaced to expression, which can't be parsed.
|
||||
/// In this case, keep original alias name.
|
||||
if (const auto * identifier = getExpression()->as<IdentifierNode>())
|
||||
result->column = identifier->toAST(options)->getColumnName();
|
||||
else
|
||||
result->column = expression_name;
|
||||
|
||||
result->children.push_back(getInterpolateExpression()->toAST(options));
|
||||
result->expr = result->children.back();
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/IdentifierNode.h>
|
||||
#include <Analyzer/ListNode.h>
|
||||
|
||||
namespace DB
|
||||
@ -19,7 +19,7 @@ class InterpolateNode final : public IQueryTreeNode
|
||||
{
|
||||
public:
|
||||
/// Initialize interpolate node with expression and interpolate expression
|
||||
explicit InterpolateNode(QueryTreeNodePtr expression_, QueryTreeNodePtr interpolate_expression_);
|
||||
explicit InterpolateNode(std::shared_ptr<IdentifierNode> expression_, QueryTreeNodePtr interpolate_expression_);
|
||||
|
||||
/// Get expression to interpolate
|
||||
const QueryTreeNodePtr & getExpression() const
|
||||
@ -61,6 +61,9 @@ protected:
|
||||
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
/// Initial name from column identifier.
|
||||
std::string expression_name;
|
||||
|
||||
private:
|
||||
static constexpr size_t expression_child_index = 0;
|
||||
static constexpr size_t interpolate_expression_child_index = 1;
|
||||
|
@ -51,7 +51,7 @@ public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<AggregateFunctionsArithmericOperationsVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void leaveImpl(QueryTreeNodePtr & node)
|
||||
void enterImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_arithmetic_operations_in_aggregate_functions)
|
||||
return;
|
||||
|
@ -41,9 +41,9 @@ public:
|
||||
return;
|
||||
|
||||
bool replaced_argument = false;
|
||||
auto & uniq_function_arguments_nodes = function_node->getArguments().getNodes();
|
||||
auto replaced_uniq_function_arguments_nodes = function_node->getArguments().getNodes();
|
||||
|
||||
for (auto & uniq_function_argument_node : uniq_function_arguments_nodes)
|
||||
for (auto & uniq_function_argument_node : replaced_uniq_function_arguments_nodes)
|
||||
{
|
||||
auto * uniq_function_argument_node_typed = uniq_function_argument_node->as<FunctionNode>();
|
||||
if (!uniq_function_argument_node_typed || !uniq_function_argument_node_typed->isOrdinaryFunction())
|
||||
@ -67,12 +67,10 @@ public:
|
||||
if (!replaced_argument)
|
||||
return;
|
||||
|
||||
const auto & function_node_argument_nodes = function_node->getArguments().getNodes();
|
||||
|
||||
DataTypes argument_types;
|
||||
argument_types.reserve(function_node_argument_nodes.size());
|
||||
argument_types.reserve(replaced_uniq_function_arguments_nodes.size());
|
||||
|
||||
for (const auto & function_node_argument : function_node_argument_nodes)
|
||||
for (const auto & function_node_argument : replaced_uniq_function_arguments_nodes)
|
||||
argument_types.emplace_back(function_node_argument->getResultType());
|
||||
|
||||
AggregateFunctionProperties properties;
|
||||
@ -83,6 +81,11 @@ public:
|
||||
function_node->getAggregateFunction()->getParameters(),
|
||||
properties);
|
||||
|
||||
/// uniqCombined returns nullable with nullable arguments so the result type might change which breaks the pass
|
||||
if (!aggregate_function->getResultType()->equals(*function_node->getAggregateFunction()->getResultType()))
|
||||
return;
|
||||
|
||||
function_node->getArguments().getNodes() = replaced_uniq_function_arguments_nodes;
|
||||
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
}
|
||||
};
|
||||
|
@ -88,14 +88,10 @@ namespace
|
||||
std::move(headers),
|
||||
S3::CredentialsConfiguration
|
||||
{
|
||||
settings.auth_settings.use_environment_credentials.value_or(
|
||||
context->getConfigRef().getBool("s3.use_environment_credentials", true)),
|
||||
settings.auth_settings.use_insecure_imds_request.value_or(
|
||||
context->getConfigRef().getBool("s3.use_insecure_imds_request", false)),
|
||||
settings.auth_settings.expiration_window_seconds.value_or(
|
||||
context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
|
||||
settings.auth_settings.no_sign_request.value_or(
|
||||
context->getConfigRef().getBool("s3.no_sign_request", false)),
|
||||
settings.auth_settings.use_environment_credentials,
|
||||
settings.auth_settings.use_insecure_imds_request,
|
||||
settings.auth_settings.expiration_window_seconds,
|
||||
settings.auth_settings.no_sign_request
|
||||
});
|
||||
}
|
||||
|
||||
@ -131,12 +127,18 @@ BackupReaderS3::BackupReaderS3(
|
||||
: BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderS3"))
|
||||
, s3_uri(s3_uri_)
|
||||
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false}
|
||||
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup).value_or(S3Settings{}))
|
||||
{
|
||||
auto & request_settings = s3_settings.request_settings;
|
||||
request_settings.updateFromSettingsIfChanged(context_->getSettingsRef());
|
||||
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
|
||||
request_settings.allow_native_copy = allow_s3_native_copy;
|
||||
s3_settings.loadFromConfig(context_->getConfigRef(), "s3", context_->getSettingsRef());
|
||||
|
||||
if (auto endpoint_settings = context_->getStorageS3Settings().getSettings(
|
||||
s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup))
|
||||
{
|
||||
s3_settings.updateIfChanged(*endpoint_settings);
|
||||
}
|
||||
|
||||
s3_settings.request_settings.updateFromSettings(context_->getSettingsRef(), /* if_changed */true);
|
||||
s3_settings.request_settings.allow_native_copy = allow_s3_native_copy;
|
||||
|
||||
client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_);
|
||||
|
||||
if (auto blob_storage_system_log = context_->getBlobStorageLog())
|
||||
@ -223,13 +225,19 @@ BackupWriterS3::BackupWriterS3(
|
||||
: BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterS3"))
|
||||
, s3_uri(s3_uri_)
|
||||
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false}
|
||||
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup).value_or(S3Settings{}))
|
||||
{
|
||||
auto & request_settings = s3_settings.request_settings;
|
||||
request_settings.updateFromSettingsIfChanged(context_->getSettingsRef());
|
||||
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
|
||||
request_settings.allow_native_copy = allow_s3_native_copy;
|
||||
request_settings.setStorageClassName(storage_class_name);
|
||||
s3_settings.loadFromConfig(context_->getConfigRef(), "s3", context_->getSettingsRef());
|
||||
|
||||
if (auto endpoint_settings = context_->getStorageS3Settings().getSettings(
|
||||
s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup))
|
||||
{
|
||||
s3_settings.updateIfChanged(*endpoint_settings);
|
||||
}
|
||||
|
||||
s3_settings.request_settings.updateFromSettings(context_->getSettingsRef(), /* if_changed */true);
|
||||
s3_settings.request_settings.allow_native_copy = allow_s3_native_copy;
|
||||
s3_settings.request_settings.storage_class_name = storage_class_name;
|
||||
|
||||
client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_);
|
||||
if (auto blob_storage_system_log = context_->getBlobStorageLog())
|
||||
{
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <Common/Logger.h>
|
||||
#include <Disks/DiskType.h>
|
||||
#include <IO/S3Common.h>
|
||||
#include <Storages/StorageS3Settings.h>
|
||||
#include <IO/S3Settings.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <IO/S3/BlobStorageLogWriter.h>
|
||||
|
||||
|
@ -285,7 +285,7 @@ if (TARGET ch_contrib::llvm)
|
||||
endif ()
|
||||
|
||||
if (TARGET ch_contrib::gwp_asan)
|
||||
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::gwp_asan)
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::gwp_asan)
|
||||
target_link_libraries (clickhouse_new_delete PRIVATE ch_contrib::gwp_asan)
|
||||
endif()
|
||||
|
||||
|
@ -1188,7 +1188,10 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b
|
||||
std::rethrow_exception(local_format_error);
|
||||
|
||||
if (cancelled && is_interactive)
|
||||
{
|
||||
std::cout << "Query was cancelled." << std::endl;
|
||||
cancelled_printed = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1302,8 +1305,13 @@ void ClientBase::onEndOfStream()
|
||||
|
||||
resetOutput();
|
||||
|
||||
if (is_interactive && !written_first_block)
|
||||
if (is_interactive)
|
||||
{
|
||||
if (cancelled && !cancelled_printed)
|
||||
std::cout << "Query was cancelled." << std::endl;
|
||||
else if (!written_first_block)
|
||||
std::cout << "Ok." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1866,6 +1874,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
|
||||
resetOutput();
|
||||
have_error = false;
|
||||
cancelled = false;
|
||||
cancelled_printed = false;
|
||||
client_exception.reset();
|
||||
server_exception.reset();
|
||||
|
||||
|
@ -329,6 +329,7 @@ protected:
|
||||
bool allow_merge_tree_settings = false;
|
||||
|
||||
bool cancelled = false;
|
||||
bool cancelled_printed = false;
|
||||
|
||||
/// Does log_comment has specified by user?
|
||||
bool has_log_comment = false;
|
||||
|
@ -51,7 +51,7 @@ public:
|
||||
std::string getName() const override { return "Nullable(" + nested_column->getName() + ")"; }
|
||||
TypeIndex getDataType() const override { return TypeIndex::Nullable; }
|
||||
MutableColumnPtr cloneResized(size_t size) const override;
|
||||
size_t size() const override { return nested_column->size(); }
|
||||
size_t size() const override { return assert_cast<const ColumnUInt8 &>(*null_map).size(); }
|
||||
bool isNullAt(size_t n) const override { return assert_cast<const ColumnUInt8 &>(*null_map).getData()[n] != 0;}
|
||||
Field operator[](size_t n) const override;
|
||||
void get(size_t n, Field & res) const override;
|
||||
|
@ -23,15 +23,10 @@ struct ConstantFilterDescription
|
||||
|
||||
struct IFilterDescription
|
||||
{
|
||||
/// has_one can be pre-compute during creating the filter description in some cases
|
||||
Int64 has_one = -1;
|
||||
virtual ColumnPtr filter(const IColumn & column, ssize_t result_size_hint) const = 0;
|
||||
virtual size_t countBytesInFilter() const = 0;
|
||||
virtual ~IFilterDescription() = default;
|
||||
bool hasOne() { return has_one >= 0 ? has_one : hasOneImpl();}
|
||||
protected:
|
||||
/// Calculate if filter has a non-zero from the filter values, may update has_one
|
||||
virtual bool hasOneImpl() = 0;
|
||||
};
|
||||
|
||||
/// Obtain a filter from non constant Column, that may have type: UInt8, Nullable(UInt8).
|
||||
@ -45,7 +40,6 @@ struct FilterDescription final : public IFilterDescription
|
||||
ColumnPtr filter(const IColumn & column, ssize_t result_size_hint) const override { return column.filter(*data, result_size_hint); }
|
||||
size_t countBytesInFilter() const override { return DB::countBytesInFilter(*data); }
|
||||
protected:
|
||||
bool hasOneImpl() override { return data ? (has_one = !memoryIsZero(data->data(), 0, data->size())) : false; }
|
||||
};
|
||||
|
||||
struct SparseFilterDescription final : public IFilterDescription
|
||||
@ -56,7 +50,6 @@ struct SparseFilterDescription final : public IFilterDescription
|
||||
ColumnPtr filter(const IColumn & column, ssize_t) const override { return column.index(*filter_indices, 0); }
|
||||
size_t countBytesInFilter() const override { return filter_indices->size(); }
|
||||
protected:
|
||||
bool hasOneImpl() override { return filter_indices && !filter_indices->empty(); }
|
||||
};
|
||||
|
||||
struct ColumnWithTypeAndName;
|
||||
|
@ -1,8 +1,9 @@
|
||||
#include <Common/Allocator.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/GWPAsan.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
#include <base/errnoToString.h>
|
||||
#include <base/getPageSize.h>
|
||||
@ -10,6 +11,12 @@
|
||||
#include <Poco/Logger.h>
|
||||
#include <sys/mman.h> /// MADV_POPULATE_WRITE
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event GWPAsanAllocateSuccess;
|
||||
extern const Event GWPAsanAllocateFailed;
|
||||
extern const Event GWPAsanFree;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -60,6 +67,27 @@ template <bool clear_memory, bool populate>
|
||||
void * allocNoTrack(size_t size, size_t alignment)
|
||||
{
|
||||
void * buf;
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GWPAsan::GuardedAlloc.shouldSample()))
|
||||
{
|
||||
if (void * ptr = GWPAsan::GuardedAlloc.allocate(size, alignment))
|
||||
{
|
||||
if constexpr (clear_memory)
|
||||
memset(ptr, 0, size);
|
||||
|
||||
if constexpr (populate)
|
||||
prefaultPages(ptr, size);
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
if constexpr (clear_memory)
|
||||
@ -91,6 +119,15 @@ void * allocNoTrack(size_t size, size_t alignment)
|
||||
|
||||
void freeNoTrack(void * buf)
|
||||
{
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(buf)))
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanFree);
|
||||
GWPAsan::GuardedAlloc.deallocate(buf);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
::free(buf);
|
||||
}
|
||||
|
||||
@ -144,8 +181,54 @@ void * Allocator<clear_memory_, populate>::realloc(void * buf, size_t old_size,
|
||||
{
|
||||
/// nothing to do.
|
||||
/// BTW, it's not possible to change alignment while doing realloc.
|
||||
return buf;
|
||||
}
|
||||
else if (alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GWPAsan::GuardedAlloc.shouldSample()))
|
||||
{
|
||||
if (void * ptr = GWPAsan::GuardedAlloc.allocate(new_size, alignment))
|
||||
{
|
||||
auto trace_free = CurrentMemoryTracker::free(old_size);
|
||||
auto trace_alloc = CurrentMemoryTracker::alloc(new_size);
|
||||
trace_free.onFree(buf, old_size);
|
||||
|
||||
memcpy(ptr, buf, std::min(old_size, new_size));
|
||||
free(buf, old_size);
|
||||
trace_alloc.onAlloc(buf, new_size);
|
||||
|
||||
if constexpr (clear_memory)
|
||||
if (new_size > old_size)
|
||||
memset(reinterpret_cast<char *>(ptr) + old_size, 0, new_size - old_size);
|
||||
|
||||
if constexpr (populate)
|
||||
prefaultPages(ptr, new_size);
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess);
|
||||
return ptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(buf)))
|
||||
{
|
||||
/// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
|
||||
void * new_buf = alloc(new_size, alignment);
|
||||
memcpy(new_buf, buf, std::min(old_size, new_size));
|
||||
free(buf, old_size);
|
||||
buf = new_buf;
|
||||
|
||||
if constexpr (populate)
|
||||
prefaultPages(buf, new_size);
|
||||
|
||||
return buf;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
/// Resize malloc'd memory region with no special alignment requirement.
|
||||
auto trace_free = CurrentMemoryTracker::free(old_size);
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include "EnvironmentProxyConfigurationResolver.h"
|
||||
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/proxyConfigurationToPocoProxyConfig.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
namespace DB
|
||||
@ -12,6 +13,7 @@ namespace DB
|
||||
* */
|
||||
static constexpr auto PROXY_HTTP_ENVIRONMENT_VARIABLE = "http_proxy";
|
||||
static constexpr auto PROXY_HTTPS_ENVIRONMENT_VARIABLE = "https_proxy";
|
||||
static constexpr auto NO_PROXY_ENVIRONMENT_VARIABLE = "no_proxy";
|
||||
|
||||
EnvironmentProxyConfigurationResolver::EnvironmentProxyConfigurationResolver(
|
||||
Protocol request_protocol_, bool disable_tunneling_for_https_requests_over_http_proxy_)
|
||||
@ -34,21 +36,31 @@ namespace
|
||||
return std::getenv(PROXY_HTTPS_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProxyConfiguration EnvironmentProxyConfigurationResolver::resolve()
|
||||
{
|
||||
const auto * proxy_host = getProxyHost(request_protocol);
|
||||
const char * getNoProxyHosts()
|
||||
{
|
||||
return std::getenv(NO_PROXY_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe)
|
||||
}
|
||||
|
||||
if (!proxy_host)
|
||||
ProxyConfiguration buildProxyConfiguration(
|
||||
ProxyConfiguration::Protocol request_protocol,
|
||||
const Poco::URI & uri,
|
||||
const std::string & no_proxy_hosts_string,
|
||||
bool disable_tunneling_for_https_requests_over_http_proxy)
|
||||
{
|
||||
if (uri.empty())
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
auto uri = Poco::URI(proxy_host);
|
||||
auto host = uri.getHost();
|
||||
auto scheme = uri.getScheme();
|
||||
auto port = uri.getPort();
|
||||
const auto & host = uri.getHost();
|
||||
const auto & scheme = uri.getScheme();
|
||||
const auto port = uri.getPort();
|
||||
|
||||
const bool use_tunneling_for_https_requests_over_http_proxy = ProxyConfiguration::useTunneling(
|
||||
request_protocol,
|
||||
ProxyConfiguration::protocolFromString(scheme),
|
||||
disable_tunneling_for_https_requests_over_http_proxy);
|
||||
|
||||
LOG_TRACE(getLogger("EnvironmentProxyConfigurationResolver"), "Use proxy from environment: {}://{}:{}", scheme, host, port);
|
||||
|
||||
@ -56,9 +68,28 @@ ProxyConfiguration EnvironmentProxyConfigurationResolver::resolve()
|
||||
host,
|
||||
ProxyConfiguration::protocolFromString(scheme),
|
||||
port,
|
||||
useTunneling(request_protocol, ProxyConfiguration::protocolFromString(scheme), disable_tunneling_for_https_requests_over_http_proxy),
|
||||
request_protocol
|
||||
use_tunneling_for_https_requests_over_http_proxy,
|
||||
request_protocol,
|
||||
no_proxy_hosts_string
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
ProxyConfiguration EnvironmentProxyConfigurationResolver::resolve()
|
||||
{
|
||||
static const auto * http_proxy_host = getProxyHost(Protocol::HTTP);
|
||||
static const auto * https_proxy_host = getProxyHost(Protocol::HTTPS);
|
||||
static const auto * no_proxy = getNoProxyHosts();
|
||||
static const auto poco_no_proxy_hosts = no_proxy ? buildPocoNonProxyHosts(no_proxy) : "";
|
||||
|
||||
static const Poco::URI http_proxy_uri(http_proxy_host ? http_proxy_host : "");
|
||||
static const Poco::URI https_proxy_uri(https_proxy_host ? https_proxy_host : "");
|
||||
|
||||
return buildProxyConfiguration(
|
||||
request_protocol,
|
||||
request_protocol == Protocol::HTTP ? http_proxy_uri : https_proxy_uri,
|
||||
poco_no_proxy_hosts,
|
||||
disable_tunneling_for_https_requests_over_http_proxy);
|
||||
}
|
||||
|
||||
}
|
||||
|
226
src/Common/GWPAsan.cpp
Normal file
226
src/Common/GWPAsan.cpp
Normal file
@ -0,0 +1,226 @@
|
||||
#include <Common/GWPAsan.h>
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
# include <IO/ReadHelpers.h>
|
||||
# include <gwp_asan/common.h>
|
||||
# include <gwp_asan/crash_handler.h>
|
||||
# include <gwp_asan/guarded_pool_allocator.h>
|
||||
# include <gwp_asan/optional/options_parser.h>
|
||||
# include <Common/ErrorCodes.h>
|
||||
# include <Common/Exception.h>
|
||||
# include <Common/Logger.h>
|
||||
# include <Common/StackTrace.h>
|
||||
# include <Common/logger_useful.h>
|
||||
|
||||
# include <atomic>
|
||||
# include <iostream>
|
||||
|
||||
namespace GWPAsan
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
size_t getBackTrace(uintptr_t * trace_buffer, size_t buffer_size)
|
||||
{
|
||||
StackTrace stacktrace;
|
||||
auto trace_size = std::min(buffer_size, stacktrace.getSize());
|
||||
const auto & frame_pointers = stacktrace.getFramePointers();
|
||||
memcpy(trace_buffer, frame_pointers.data(), trace_size * sizeof(uintptr_t));
|
||||
return trace_size;
|
||||
}
|
||||
|
||||
__attribute__((__format__ (__printf__, 1, 0)))
|
||||
void printString(const char * format, ...) // NOLINT(cert-dcl50-cpp)
|
||||
{
|
||||
std::array<char, 1024> formatted;
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
|
||||
if (vsnprintf(formatted.data(), formatted.size(), format, args) > 0)
|
||||
std::cerr << formatted.data() << std::endl;
|
||||
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
gwp_asan::GuardedPoolAllocator GuardedAlloc;
|
||||
|
||||
static bool guarded_alloc_initialized = []
|
||||
{
|
||||
const char * env_options_raw = std::getenv("GWP_ASAN_OPTIONS"); // NOLINT(concurrency-mt-unsafe)
|
||||
if (env_options_raw)
|
||||
gwp_asan::options::initOptions(env_options_raw, printString);
|
||||
|
||||
auto & opts = gwp_asan::options::getOptions();
|
||||
if (!env_options_raw || !std::string_view{env_options_raw}.contains("MaxSimultaneousAllocations"))
|
||||
opts.MaxSimultaneousAllocations = 1024;
|
||||
|
||||
if (!env_options_raw || !std::string_view{env_options_raw}.contains("SampleRate"))
|
||||
opts.SampleRate = 50000;
|
||||
|
||||
opts.Backtrace = getBackTrace;
|
||||
GuardedAlloc.init(opts);
|
||||
|
||||
return true;
|
||||
}();
|
||||
|
||||
bool isGWPAsanError(uintptr_t fault_address)
|
||||
{
|
||||
const auto * state = GuardedAlloc.getAllocatorState();
|
||||
if (state->FailureType != gwp_asan::Error::UNKNOWN && state->FailureAddress != 0)
|
||||
return true;
|
||||
|
||||
return fault_address < state->GuardedPagePoolEnd && state->GuardedPagePool <= fault_address;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct ScopedEndOfReportDecorator
|
||||
{
|
||||
explicit ScopedEndOfReportDecorator(Poco::LoggerPtr log_) : log(std::move(log_)) { }
|
||||
~ScopedEndOfReportDecorator() { LOG_FATAL(log, "*** End GWP-ASan report ***"); }
|
||||
Poco::LoggerPtr log;
|
||||
};
|
||||
|
||||
// Prints the provided error and metadata information.
|
||||
void printHeader(gwp_asan::Error error, uintptr_t fault_address, const gwp_asan::AllocationMetadata * allocation_meta, Poco::LoggerPtr log)
|
||||
{
|
||||
bool access_was_in_bounds = false;
|
||||
std::string description;
|
||||
if (error != gwp_asan::Error::UNKNOWN && allocation_meta != nullptr)
|
||||
{
|
||||
uintptr_t address = __gwp_asan_get_allocation_address(allocation_meta);
|
||||
size_t size = __gwp_asan_get_allocation_size(allocation_meta);
|
||||
if (fault_address < address)
|
||||
{
|
||||
description = fmt::format(
|
||||
"({} byte{} to the left of a {}-byte allocation at 0x{}) ",
|
||||
address - fault_address,
|
||||
(address - fault_address == 1) ? "" : "s",
|
||||
size,
|
||||
address);
|
||||
}
|
||||
else if (fault_address > address)
|
||||
{
|
||||
description = fmt::format(
|
||||
"({} byte{} to the right of a {}-byte allocation at 0x{}) ",
|
||||
fault_address - address,
|
||||
(fault_address - address == 1) ? "" : "s",
|
||||
size,
|
||||
address);
|
||||
}
|
||||
else if (error == gwp_asan::Error::DOUBLE_FREE)
|
||||
{
|
||||
description = fmt::format("(a {}-byte allocation) ", size);
|
||||
}
|
||||
else
|
||||
{
|
||||
access_was_in_bounds = true;
|
||||
description = fmt::format(
|
||||
"({} byte{} into a {}-byte allocation at 0x{}) ",
|
||||
fault_address - address,
|
||||
(fault_address - address == 1) ? "" : "s",
|
||||
size,
|
||||
address);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t thread_id = gwp_asan::getThreadID();
|
||||
std::string thread_id_string = thread_id == gwp_asan::kInvalidThreadID ? "<unknown" : fmt::format("{}", thread_id);
|
||||
|
||||
std::string_view out_of_bounds_and_use_after_free_warning;
|
||||
if (error == gwp_asan::Error::USE_AFTER_FREE && !access_was_in_bounds)
|
||||
{
|
||||
out_of_bounds_and_use_after_free_warning = " (warning: buffer overflow/underflow detected on a free()'d "
|
||||
"allocation. This either means you have a buffer-overflow and a "
|
||||
"use-after-free at the same time, or you have a long-lived "
|
||||
"use-after-free bug where the allocation/deallocation metadata below "
|
||||
"has already been overwritten and is likely bogus)";
|
||||
}
|
||||
|
||||
LOG_FATAL(
|
||||
log,
|
||||
"{}{} at 0x{} {}by thread {} here:",
|
||||
gwp_asan::ErrorToString(error),
|
||||
out_of_bounds_and_use_after_free_warning,
|
||||
fault_address,
|
||||
description,
|
||||
thread_id_string);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void printReport([[maybe_unused]] uintptr_t fault_address)
|
||||
{
|
||||
const auto logger = getLogger("GWPAsan");
|
||||
const auto * state = GuardedAlloc.getAllocatorState();
|
||||
if (uintptr_t internal_error_ptr = __gwp_asan_get_internal_crash_address(state); internal_error_ptr)
|
||||
fault_address = internal_error_ptr;
|
||||
|
||||
const gwp_asan::AllocationMetadata * allocation_meta = __gwp_asan_get_metadata(state, GuardedAlloc.getMetadataRegion(), fault_address);
|
||||
|
||||
static constexpr std::string_view unknown_crash_text =
|
||||
"GWP-ASan cannot provide any more information about this error. This may "
|
||||
"occur due to a wild memory access into the GWP-ASan pool, or an "
|
||||
"overflow/underflow that is > 512B in length.\n";
|
||||
|
||||
if (allocation_meta == nullptr)
|
||||
{
|
||||
LOG_FATAL(logger, "*** GWP-ASan detected a memory error ***");
|
||||
ScopedEndOfReportDecorator decorator(logger);
|
||||
LOG_FATAL(logger, fmt::runtime(unknown_crash_text));
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_FATAL(logger, "*** GWP-ASan detected a memory error ***");
|
||||
ScopedEndOfReportDecorator decorator(logger);
|
||||
|
||||
gwp_asan::Error error = __gwp_asan_diagnose_error(state, allocation_meta, fault_address);
|
||||
if (error == gwp_asan::Error::UNKNOWN)
|
||||
{
|
||||
LOG_FATAL(logger, fmt::runtime(unknown_crash_text));
|
||||
return;
|
||||
}
|
||||
|
||||
// Print the error header.
|
||||
printHeader(error, fault_address, allocation_meta, logger);
|
||||
|
||||
static constexpr size_t maximum_stack_frames = 512;
|
||||
std::array<uintptr_t, maximum_stack_frames> trace;
|
||||
|
||||
// Maybe print the deallocation trace.
|
||||
if (__gwp_asan_is_deallocated(allocation_meta))
|
||||
{
|
||||
uint64_t thread_id = __gwp_asan_get_deallocation_thread_id(allocation_meta);
|
||||
if (thread_id == gwp_asan::kInvalidThreadID)
|
||||
LOG_FATAL(logger, "0x{} was deallocated by thread <unknown> here:", fault_address);
|
||||
else
|
||||
LOG_FATAL(logger, "0x{} was deallocated by thread {} here:", fault_address, thread_id);
|
||||
const auto trace_length = __gwp_asan_get_deallocation_trace(allocation_meta, trace.data(), maximum_stack_frames);
|
||||
StackTrace::toStringEveryLine(
|
||||
reinterpret_cast<void **>(trace.data()), 0, trace_length, [&](const auto line) { LOG_FATAL(logger, fmt::runtime(line)); });
|
||||
}
|
||||
|
||||
// Print the allocation trace.
|
||||
uint64_t thread_id = __gwp_asan_get_allocation_thread_id(allocation_meta);
|
||||
if (thread_id == gwp_asan::kInvalidThreadID)
|
||||
LOG_FATAL(logger, "0x{} was allocated by thread <unknown> here:", fault_address);
|
||||
else
|
||||
LOG_FATAL(logger, "0x{} was allocated by thread {} here:", fault_address, thread_id);
|
||||
const auto trace_length = __gwp_asan_get_allocation_trace(allocation_meta, trace.data(), maximum_stack_frames);
|
||||
StackTrace::toStringEveryLine(
|
||||
reinterpret_cast<void **>(trace.data()), 0, trace_length, [&](const auto line) { LOG_FATAL(logger, fmt::runtime(line)); });
|
||||
}
|
||||
|
||||
std::atomic<double> force_sample_probability = 0.0;
|
||||
|
||||
void setForceSampleProbability(double value)
|
||||
{
|
||||
force_sample_probability.store(value, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
34
src/Common/GWPAsan.h
Normal file
34
src/Common/GWPAsan.h
Normal file
@ -0,0 +1,34 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
|
||||
#include <gwp_asan/guarded_pool_allocator.h>
|
||||
#include <Common/thread_local_rng.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <random>
|
||||
|
||||
namespace GWPAsan
|
||||
{
|
||||
|
||||
extern gwp_asan::GuardedPoolAllocator GuardedAlloc;
|
||||
|
||||
bool isGWPAsanError(uintptr_t fault_address);
|
||||
|
||||
void printReport(uintptr_t fault_address);
|
||||
|
||||
extern std::atomic<double> force_sample_probability;
|
||||
|
||||
void setForceSampleProbability(double value);
|
||||
|
||||
inline bool shouldForceSample()
|
||||
{
|
||||
std::bernoulli_distribution dist(force_sample_probability.load(std::memory_order_relaxed));
|
||||
return dist(thread_local_rng);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -9,6 +9,7 @@
|
||||
#include <Common/ProxyConfiguration.h>
|
||||
#include <Common/MemoryTrackerSwitcher.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/proxyConfigurationToPocoProxyConfig.h>
|
||||
|
||||
#include <Poco/Net/HTTPChunkedStream.h>
|
||||
#include <Poco/Net/HTTPClientSession.h>
|
||||
@ -70,20 +71,6 @@ namespace CurrentMetrics
|
||||
|
||||
namespace
|
||||
{
|
||||
Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const DB::ProxyConfiguration & proxy_configuration)
|
||||
{
|
||||
Poco::Net::HTTPClientSession::ProxyConfig poco_proxy_config;
|
||||
|
||||
poco_proxy_config.host = proxy_configuration.host;
|
||||
poco_proxy_config.port = proxy_configuration.port;
|
||||
poco_proxy_config.protocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.protocol);
|
||||
poco_proxy_config.tunnel = proxy_configuration.tunneling;
|
||||
poco_proxy_config.originalRequestProtocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.original_request_protocol);
|
||||
|
||||
return poco_proxy_config;
|
||||
}
|
||||
|
||||
|
||||
constexpr size_t roundUp(size_t x, size_t rounding)
|
||||
{
|
||||
chassert(rounding > 0);
|
||||
@ -696,7 +683,8 @@ struct EndpointPoolKey
|
||||
proxy_config.port,
|
||||
proxy_config.protocol,
|
||||
proxy_config.tunneling,
|
||||
proxy_config.original_request_protocol)
|
||||
proxy_config.original_request_protocol,
|
||||
proxy_config.no_proxy_hosts)
|
||||
== std::tie(
|
||||
rhs.connection_group,
|
||||
rhs.target_host,
|
||||
@ -706,7 +694,8 @@ struct EndpointPoolKey
|
||||
rhs.proxy_config.port,
|
||||
rhs.proxy_config.protocol,
|
||||
rhs.proxy_config.tunneling,
|
||||
rhs.proxy_config.original_request_protocol);
|
||||
rhs.proxy_config.original_request_protocol,
|
||||
rhs.proxy_config.no_proxy_hosts);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1,17 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/Allocator.h>
|
||||
#include <Common/BitHelpers.h>
|
||||
#include <Common/memcpySmall.h>
|
||||
#include <Common/PODArray_fwd.h>
|
||||
#include "config.h"
|
||||
|
||||
#include <base/getPageSize.h>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <Common/Allocator.h>
|
||||
#include <Common/BitHelpers.h>
|
||||
#include <Common/GWPAsan.h>
|
||||
#include <Common/PODArray_fwd.h>
|
||||
#include <Common/memcpySmall.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstddef>
|
||||
#include <cassert>
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
#ifndef NDEBUG
|
||||
#include <sys/mman.h>
|
||||
@ -112,6 +115,11 @@ protected:
|
||||
template <typename ... TAllocatorParams>
|
||||
void alloc(size_t bytes, TAllocatorParams &&... allocator_params)
|
||||
{
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GWPAsan::shouldForceSample()))
|
||||
gwp_asan::getThreadLocals()->NextSampleCounter = 1;
|
||||
#endif
|
||||
|
||||
char * allocated = reinterpret_cast<char *>(TAllocator::alloc(bytes, std::forward<TAllocatorParams>(allocator_params)...));
|
||||
|
||||
c_start = allocated + pad_left;
|
||||
@ -141,6 +149,11 @@ protected:
|
||||
return;
|
||||
}
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GWPAsan::shouldForceSample()))
|
||||
gwp_asan::getThreadLocals()->NextSampleCounter = 1;
|
||||
#endif
|
||||
|
||||
unprotect();
|
||||
|
||||
ptrdiff_t end_diff = c_end - c_start;
|
||||
|
@ -643,7 +643,8 @@ The server successfully detected this situation and will download merged part fr
|
||||
\
|
||||
M(ServerStartupMilliseconds, "Time elapsed from starting server to listening to sockets in milliseconds")\
|
||||
M(IOUringSQEsSubmitted, "Total number of io_uring SQEs submitted") \
|
||||
M(IOUringSQEsResubmits, "Total number of io_uring SQE resubmits performed") \
|
||||
M(IOUringSQEsResubmitsAsync, "Total number of asynchronous io_uring SQE resubmits performed") \
|
||||
M(IOUringSQEsResubmitsSync, "Total number of synchronous io_uring SQE resubmits performed") \
|
||||
M(IOUringCQEsCompleted, "Total number of successfully completed io_uring CQEs") \
|
||||
M(IOUringCQEsFailed, "Total number of completed io_uring CQEs with failures") \
|
||||
\
|
||||
@ -753,6 +754,10 @@ The server successfully detected this situation and will download merged part fr
|
||||
\
|
||||
M(ReadWriteBufferFromHTTPRequestsSent, "Number of HTTP requests sent by ReadWriteBufferFromHTTP") \
|
||||
M(ReadWriteBufferFromHTTPBytes, "Total size of payload bytes received and sent by ReadWriteBufferFromHTTP. Doesn't include HTTP headers.") \
|
||||
\
|
||||
M(GWPAsanAllocateSuccess, "Number of successful allocations done by GWPAsan") \
|
||||
M(GWPAsanAllocateFailed, "Number of failed allocations done by GWPAsan (i.e. filled pool)") \
|
||||
M(GWPAsanFree, "Number of free operations done by GWPAsan") \
|
||||
|
||||
|
||||
#ifdef APPLY_FOR_EXTERNAL_EVENTS
|
||||
|
@ -40,6 +40,7 @@ namespace ProfileEvents
|
||||
Timer(Counters & counters_, Event timer_event_, Event counter_event, Resolution resolution_);
|
||||
~Timer() { end(); }
|
||||
void cancel() { watch.reset(); }
|
||||
void restart() { watch.restart(); }
|
||||
void end();
|
||||
UInt64 get();
|
||||
|
||||
|
@ -44,11 +44,18 @@ struct ProxyConfiguration
|
||||
}
|
||||
}
|
||||
|
||||
static bool useTunneling(Protocol request_protocol, Protocol proxy_protocol, bool disable_tunneling_for_https_requests_over_http_proxy)
|
||||
{
|
||||
bool is_https_request_over_http_proxy = request_protocol == Protocol::HTTPS && proxy_protocol == Protocol::HTTP;
|
||||
return is_https_request_over_http_proxy && !disable_tunneling_for_https_requests_over_http_proxy;
|
||||
}
|
||||
|
||||
std::string host = std::string{};
|
||||
Protocol protocol = Protocol::HTTP;
|
||||
uint16_t port = 0;
|
||||
bool tunneling = false;
|
||||
Protocol original_request_protocol = Protocol::HTTP;
|
||||
std::string no_proxy_hosts = std::string{};
|
||||
|
||||
bool isEmpty() const { return host.empty(); }
|
||||
};
|
||||
|
@ -19,13 +19,6 @@ struct ProxyConfigurationResolver
|
||||
virtual void errorReport(const ProxyConfiguration & config) = 0;
|
||||
|
||||
protected:
|
||||
|
||||
static bool useTunneling(Protocol request_protocol, Protocol proxy_protocol, bool disable_tunneling_for_https_requests_over_http_proxy)
|
||||
{
|
||||
bool is_https_request_over_http_proxy = request_protocol == Protocol::HTTPS && proxy_protocol == Protocol::HTTP;
|
||||
return is_https_request_over_http_proxy && !disable_tunneling_for_https_requests_over_http_proxy;
|
||||
}
|
||||
|
||||
Protocol request_protocol;
|
||||
bool disable_tunneling_for_https_requests_over_http_proxy = false;
|
||||
};
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Common/ProxyConfigurationResolverProvider.h>
|
||||
|
||||
#include <Common/EnvironmentProxyConfigurationResolver.h>
|
||||
#include <Common/proxyConfigurationToPocoProxyConfig.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/ProxyListConfigurationResolver.h>
|
||||
#include <Common/RemoteProxyConfigurationResolver.h>
|
||||
@ -17,6 +18,11 @@ namespace ErrorCodes
|
||||
|
||||
namespace
|
||||
{
|
||||
std::string getNoProxyHosts(const Poco::Util::AbstractConfiguration & configuration)
|
||||
{
|
||||
return configuration.getString("proxy.no_proxy", "");
|
||||
}
|
||||
|
||||
bool isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(
|
||||
const Poco::Util::AbstractConfiguration & configuration)
|
||||
{
|
||||
@ -49,6 +55,7 @@ namespace
|
||||
return std::make_shared<RemoteProxyConfigurationResolver>(
|
||||
server_configuration,
|
||||
request_protocol,
|
||||
buildPocoNonProxyHosts(getNoProxyHosts(configuration)),
|
||||
std::make_shared<RemoteProxyHostFetcherImpl>(),
|
||||
isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration));
|
||||
}
|
||||
@ -88,7 +95,11 @@ namespace
|
||||
|
||||
return uris.empty()
|
||||
? nullptr
|
||||
: std::make_shared<ProxyListConfigurationResolver>(uris, request_protocol, isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration));
|
||||
: std::make_shared<ProxyListConfigurationResolver>(
|
||||
uris,
|
||||
request_protocol,
|
||||
buildPocoNonProxyHosts(getNoProxyHosts(configuration)),
|
||||
isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration));
|
||||
}
|
||||
|
||||
bool hasRemoteResolver(const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration)
|
||||
|
@ -1,7 +1,6 @@
|
||||
#include <Common/ProxyListConfigurationResolver.h>
|
||||
|
||||
#include <Common/StringUtils.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
namespace DB
|
||||
@ -9,8 +8,11 @@ namespace DB
|
||||
|
||||
ProxyListConfigurationResolver::ProxyListConfigurationResolver(
|
||||
std::vector<Poco::URI> proxies_,
|
||||
Protocol request_protocol_, bool disable_tunneling_for_https_requests_over_http_proxy_)
|
||||
: ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_), proxies(std::move(proxies_))
|
||||
Protocol request_protocol_,
|
||||
const std::string & no_proxy_hosts_,
|
||||
bool disable_tunneling_for_https_requests_over_http_proxy_)
|
||||
: ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_),
|
||||
proxies(std::move(proxies_)), no_proxy_hosts(no_proxy_hosts_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -26,12 +28,18 @@ ProxyConfiguration ProxyListConfigurationResolver::resolve()
|
||||
|
||||
auto & proxy = proxies[index];
|
||||
|
||||
bool use_tunneling_for_https_requests_over_http_proxy = ProxyConfiguration::useTunneling(
|
||||
request_protocol,
|
||||
ProxyConfiguration::protocolFromString(proxy.getScheme()),
|
||||
disable_tunneling_for_https_requests_over_http_proxy);
|
||||
|
||||
return ProxyConfiguration {
|
||||
proxy.getHost(),
|
||||
ProxyConfiguration::protocolFromString(proxy.getScheme()),
|
||||
proxy.getPort(),
|
||||
useTunneling(request_protocol, ProxyConfiguration::protocolFromString(proxy.getScheme()), disable_tunneling_for_https_requests_over_http_proxy),
|
||||
request_protocol
|
||||
use_tunneling_for_https_requests_over_http_proxy,
|
||||
request_protocol,
|
||||
no_proxy_hosts
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,11 @@ namespace DB
|
||||
class ProxyListConfigurationResolver : public ProxyConfigurationResolver
|
||||
{
|
||||
public:
|
||||
ProxyListConfigurationResolver(std::vector<Poco::URI> proxies_, Protocol request_protocol_, bool disable_tunneling_for_https_requests_over_http_proxy_ = false);
|
||||
ProxyListConfigurationResolver(
|
||||
std::vector<Poco::URI> proxies_,
|
||||
Protocol request_protocol_,
|
||||
const std::string & no_proxy_hosts_,
|
||||
bool disable_tunneling_for_https_requests_over_http_proxy_ = false);
|
||||
|
||||
ProxyConfiguration resolve() override;
|
||||
|
||||
@ -23,6 +27,7 @@ public:
|
||||
|
||||
private:
|
||||
std::vector<Poco::URI> proxies;
|
||||
std::string no_proxy_hosts;
|
||||
|
||||
/// Access counter to get proxy using round-robin strategy.
|
||||
std::atomic<size_t> access_counter;
|
||||
|
@ -42,11 +42,12 @@ std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const
|
||||
RemoteProxyConfigurationResolver::RemoteProxyConfigurationResolver(
|
||||
const RemoteServerConfiguration & remote_server_configuration_,
|
||||
Protocol request_protocol_,
|
||||
const std::string & no_proxy_hosts_,
|
||||
std::shared_ptr<RemoteProxyHostFetcher> fetcher_,
|
||||
bool disable_tunneling_for_https_requests_over_http_proxy_
|
||||
)
|
||||
: ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_),
|
||||
remote_server_configuration(remote_server_configuration_), fetcher(fetcher_)
|
||||
remote_server_configuration(remote_server_configuration_), no_proxy_hosts(no_proxy_hosts_), fetcher(fetcher_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -84,7 +85,7 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve()
|
||||
|
||||
auto proxy_protocol = ProxyConfiguration::protocolFromString(proxy_protocol_string);
|
||||
|
||||
bool use_tunneling_for_https_requests_over_http_proxy = useTunneling(
|
||||
bool use_tunneling_for_https_requests_over_http_proxy = ProxyConfiguration::useTunneling(
|
||||
request_protocol,
|
||||
proxy_protocol,
|
||||
disable_tunneling_for_https_requests_over_http_proxy);
|
||||
@ -94,6 +95,7 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve()
|
||||
cached_config.port = proxy_port;
|
||||
cached_config.tunneling = use_tunneling_for_https_requests_over_http_proxy;
|
||||
cached_config.original_request_protocol = request_protocol;
|
||||
cached_config.no_proxy_hosts = no_proxy_hosts;
|
||||
cache_timestamp = std::chrono::system_clock::now();
|
||||
cache_valid = true;
|
||||
|
||||
|
@ -41,6 +41,7 @@ public:
|
||||
RemoteProxyConfigurationResolver(
|
||||
const RemoteServerConfiguration & remote_server_configuration_,
|
||||
Protocol request_protocol_,
|
||||
const std::string & no_proxy_hosts_,
|
||||
std::shared_ptr<RemoteProxyHostFetcher> fetcher_,
|
||||
bool disable_tunneling_for_https_requests_over_http_proxy_ = false);
|
||||
|
||||
@ -50,6 +51,7 @@ public:
|
||||
|
||||
private:
|
||||
RemoteServerConfiguration remote_server_configuration;
|
||||
std::string no_proxy_hosts;
|
||||
std::shared_ptr<RemoteProxyHostFetcher> fetcher;
|
||||
|
||||
std::mutex cache_mutex;
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#include <Common/Concepts.h>
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/GWPAsan.h>
|
||||
#include "config.h"
|
||||
|
||||
#if USE_JEMALLOC
|
||||
@ -15,11 +17,12 @@
|
||||
# include <cstdlib>
|
||||
#endif
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
# include <gwp_asan/guarded_pool_allocator.h>
|
||||
|
||||
static gwp_asan::GuardedPoolAllocator GuardedAlloc;
|
||||
#endif
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event GWPAsanAllocateSuccess;
|
||||
extern const Event GWPAsanAllocateFailed;
|
||||
extern const Event GWPAsanFree;
|
||||
}
|
||||
|
||||
namespace Memory
|
||||
{
|
||||
@ -34,17 +37,31 @@ requires DB::OptionalArgument<TAlign...>
|
||||
inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align)
|
||||
{
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GuardedAlloc.shouldSample()))
|
||||
if (unlikely(GWPAsan::GuardedAlloc.shouldSample()))
|
||||
{
|
||||
if constexpr (sizeof...(TAlign) == 1)
|
||||
{
|
||||
if (void * ptr = GuardedAlloc.allocate(size, alignToSizeT(align...)))
|
||||
if (void * ptr = GWPAsan::GuardedAlloc.allocate(size, alignToSizeT(align...)))
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess);
|
||||
return ptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (void * ptr = GuardedAlloc.allocate(size))
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (void * ptr = GWPAsan::GuardedAlloc.allocate(size))
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess);
|
||||
return ptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@ -66,11 +83,18 @@ inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align)
|
||||
inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
|
||||
{
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GuardedAlloc.shouldSample()))
|
||||
if (unlikely(GWPAsan::GuardedAlloc.shouldSample()))
|
||||
{
|
||||
if (void * ptr = GuardedAlloc.allocate(size))
|
||||
if (void * ptr = GWPAsan::GuardedAlloc.allocate(size))
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess);
|
||||
return ptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return malloc(size);
|
||||
}
|
||||
@ -78,11 +102,18 @@ inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
|
||||
inline ALWAYS_INLINE void * newNoExept(std::size_t size, std::align_val_t align) noexcept
|
||||
{
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GuardedAlloc.shouldSample()))
|
||||
if (unlikely(GWPAsan::GuardedAlloc.shouldSample()))
|
||||
{
|
||||
if (void * ptr = GuardedAlloc.allocate(size, alignToSizeT(align)))
|
||||
if (void * ptr = GWPAsan::GuardedAlloc.allocate(size, alignToSizeT(align)))
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess);
|
||||
return ptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return aligned_alloc(static_cast<size_t>(align), size);
|
||||
}
|
||||
@ -90,9 +121,10 @@ inline ALWAYS_INLINE void * newNoExept(std::size_t size, std::align_val_t align)
|
||||
inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
|
||||
{
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
|
||||
if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(ptr)))
|
||||
{
|
||||
GuardedAlloc.deallocate(ptr);
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanFree);
|
||||
GWPAsan::GuardedAlloc.deallocate(ptr);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@ -109,9 +141,10 @@ inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size, TAlign... al
|
||||
return;
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
|
||||
if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(ptr)))
|
||||
{
|
||||
GuardedAlloc.deallocate(ptr);
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanFree);
|
||||
GWPAsan::GuardedAlloc.deallocate(ptr);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@ -129,9 +162,10 @@ requires DB::OptionalArgument<TAlign...>
|
||||
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]], TAlign... /* align */) noexcept
|
||||
{
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
|
||||
if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(ptr)))
|
||||
{
|
||||
GuardedAlloc.deallocate(ptr);
|
||||
ProfileEvents::increment(ProfileEvents::GWPAsanFree);
|
||||
GWPAsan::GuardedAlloc.deallocate(ptr);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@ -183,10 +217,10 @@ inline ALWAYS_INLINE size_t untrackMemory(void * ptr [[maybe_unused]], Allocatio
|
||||
std::size_t actual_size = 0;
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
if (unlikely(GuardedAlloc.pointerIsMine(ptr)))
|
||||
if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(ptr)))
|
||||
{
|
||||
if (!size)
|
||||
size = GuardedAlloc.getSize(ptr);
|
||||
size = GWPAsan::GuardedAlloc.getSize(ptr);
|
||||
trace = CurrentMemoryTracker::free(size);
|
||||
return size;
|
||||
}
|
||||
|
@ -1,5 +1,4 @@
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <new>
|
||||
#include "config.h"
|
||||
#include <Common/memory.h>
|
||||
@ -42,27 +41,6 @@ static struct InitializeJemallocZoneAllocatorForOSX
|
||||
} initializeJemallocZoneAllocatorForOSX;
|
||||
#endif
|
||||
|
||||
#if USE_GWP_ASAN
|
||||
|
||||
#include <gwp_asan/optional/options_parser.h>
|
||||
|
||||
/// Both clickhouse_new_delete and clickhouse_common_io links gwp_asan, but It should only init once, otherwise it
|
||||
/// will cause unexpected deadlock.
|
||||
static struct InitGwpAsan
|
||||
{
|
||||
InitGwpAsan()
|
||||
{
|
||||
gwp_asan::options::initOptions();
|
||||
gwp_asan::options::Options &opts = gwp_asan::options::getOptions();
|
||||
GuardedAlloc.init(opts);
|
||||
|
||||
///std::cerr << "GwpAsan is initialized, the options are { Enabled: " << opts.Enabled
|
||||
/// << ", MaxSimultaneousAllocations: " << opts.MaxSimultaneousAllocations
|
||||
/// << ", SampleRate: " << opts.SampleRate << " }\n";
|
||||
}
|
||||
} init_gwp_asan;
|
||||
#endif
|
||||
|
||||
/// Replace default new/delete with memory tracking versions.
|
||||
/// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new
|
||||
/// https://en.cppreference.com/w/cpp/memory/new/operator_delete
|
||||
|
117
src/Common/proxyConfigurationToPocoProxyConfig.cpp
Normal file
117
src/Common/proxyConfigurationToPocoProxyConfig.cpp
Normal file
@ -0,0 +1,117 @@
|
||||
#include <Common/proxyConfigurationToPocoProxyConfig.h>
|
||||
|
||||
|
||||
#include <Common/StringUtils.h>
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#pragma clang diagnostic ignored "-Wgnu-anonymous-struct"
|
||||
#pragma clang diagnostic ignored "-Wnested-anon-types"
|
||||
#pragma clang diagnostic ignored "-Wunused-parameter"
|
||||
#pragma clang diagnostic ignored "-Wshadow-field-in-constructor"
|
||||
#pragma clang diagnostic ignored "-Wdtor-name"
|
||||
#include <re2/re2.h>
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/*
|
||||
* Copy `curl` behavior instead of `wget` as it seems to be more flexible.
|
||||
* `curl` strips leading dot and accepts url gitlab.com as a match for no_proxy .gitlab.com,
|
||||
* while `wget` does an exact match.
|
||||
* */
|
||||
std::string buildPocoRegexpEntryWithoutLeadingDot(const std::string & host)
|
||||
{
|
||||
std::string_view view_without_leading_dot = host;
|
||||
if (host[0] == '.')
|
||||
{
|
||||
view_without_leading_dot = std::string_view {host.begin() + 1u, host.end()};
|
||||
}
|
||||
|
||||
return RE2::QuoteMeta(view_without_leading_dot);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Even though there is not an RFC that defines NO_PROXY, it is usually a comma-separated list of domains.
|
||||
* Different tools implement their own versions of `NO_PROXY` support. Some support CIDR blocks, some support wildcard etc.
|
||||
* Opting for a simple implementation that covers most use cases:
|
||||
* * Support only single wildcard * (match anything)
|
||||
* * Match subdomains
|
||||
* * Strip leading dots
|
||||
* * No regex
|
||||
* * No CIDR blocks
|
||||
* * No fancy stuff about loopback IPs
|
||||
* https://about.gitlab.com/blog/2021/01/27/we-need-to-talk-no-proxy/
|
||||
* Open for discussions
|
||||
* */
|
||||
std::string buildPocoNonProxyHosts(const std::string & no_proxy_hosts_string)
|
||||
{
|
||||
if (no_proxy_hosts_string.empty())
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
static constexpr auto OR_SEPARATOR = "|";
|
||||
static constexpr auto MATCH_ANYTHING = R"(.*)";
|
||||
static constexpr auto MATCH_SUBDOMAINS_REGEX = R"((?:.*\.)?)";
|
||||
|
||||
bool match_any_host = no_proxy_hosts_string.size() == 1 && no_proxy_hosts_string[0] == '*';
|
||||
|
||||
if (match_any_host)
|
||||
{
|
||||
return MATCH_ANYTHING;
|
||||
}
|
||||
|
||||
std::vector<std::string> no_proxy_hosts;
|
||||
splitInto<','>(no_proxy_hosts, no_proxy_hosts_string);
|
||||
|
||||
bool first = true;
|
||||
std::string result;
|
||||
|
||||
for (auto & host : no_proxy_hosts)
|
||||
{
|
||||
trim(host);
|
||||
|
||||
if (host.empty())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!first)
|
||||
{
|
||||
result.append(OR_SEPARATOR);
|
||||
}
|
||||
|
||||
auto escaped_host_without_leading_dot = buildPocoRegexpEntryWithoutLeadingDot(host);
|
||||
|
||||
result.append(MATCH_SUBDOMAINS_REGEX);
|
||||
result.append(escaped_host_without_leading_dot);
|
||||
|
||||
first = false;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const DB::ProxyConfiguration & proxy_configuration)
|
||||
{
|
||||
Poco::Net::HTTPClientSession::ProxyConfig poco_proxy_config;
|
||||
|
||||
poco_proxy_config.host = proxy_configuration.host;
|
||||
poco_proxy_config.port = proxy_configuration.port;
|
||||
poco_proxy_config.protocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.protocol);
|
||||
poco_proxy_config.tunnel = proxy_configuration.tunneling;
|
||||
poco_proxy_config.originalRequestProtocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.original_request_protocol);
|
||||
poco_proxy_config.nonProxyHosts = proxy_configuration.no_proxy_hosts;
|
||||
|
||||
return poco_proxy_config;
|
||||
}
|
||||
|
||||
}
|
13
src/Common/proxyConfigurationToPocoProxyConfig.h
Normal file
13
src/Common/proxyConfigurationToPocoProxyConfig.h
Normal file
@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <Poco/Net/HTTPClientSession.h>
|
||||
#include <Common/ProxyConfiguration.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const DB::ProxyConfiguration & proxy_configuration);
|
||||
|
||||
std::string buildPocoNonProxyHosts(const std::string & no_proxy_hosts_string);
|
||||
|
||||
}
|
@ -76,22 +76,28 @@ inline std::string xmlNodeAsString(Poco::XML::Node *pNode)
|
||||
|
||||
struct EnvironmentProxySetter
|
||||
{
|
||||
EnvironmentProxySetter(const Poco::URI & http_proxy, const Poco::URI & https_proxy)
|
||||
{
|
||||
if (!http_proxy.empty())
|
||||
{
|
||||
setenv("http_proxy", http_proxy.toString().c_str(), 1); // NOLINT(concurrency-mt-unsafe)
|
||||
}
|
||||
static constexpr auto * NO_PROXY = "*";
|
||||
static constexpr auto * HTTP_PROXY = "http://proxy_server:3128";
|
||||
static constexpr auto * HTTPS_PROXY = "https://proxy_server:3128";
|
||||
|
||||
if (!https_proxy.empty())
|
||||
EnvironmentProxySetter()
|
||||
{
|
||||
setenv("https_proxy", https_proxy.toString().c_str(), 1); // NOLINT(concurrency-mt-unsafe)
|
||||
}
|
||||
setenv("http_proxy", HTTP_PROXY, 1); // NOLINT(concurrency-mt-unsafe)
|
||||
|
||||
setenv("https_proxy", HTTPS_PROXY, 1); // NOLINT(concurrency-mt-unsafe)
|
||||
|
||||
// Some other tests rely on HTTP clients (e.g, gtest_aws_s3_client), which depend on proxy configuration
|
||||
// since in https://github.com/ClickHouse/ClickHouse/pull/63314 the environment proxy resolver reads only once
|
||||
// from the environment, the proxy configuration will always be there.
|
||||
// The problem is that the proxy server does not exist, causing the test to fail.
|
||||
// To work around this issue, `no_proxy` is set to bypass all domains.
|
||||
setenv("no_proxy", NO_PROXY, 1); // NOLINT(concurrency-mt-unsafe)
|
||||
}
|
||||
|
||||
~EnvironmentProxySetter()
|
||||
{
|
||||
unsetenv("http_proxy"); // NOLINT(concurrency-mt-unsafe)
|
||||
unsetenv("https_proxy"); // NOLINT(concurrency-mt-unsafe)
|
||||
unsetenv("no_proxy"); // NOLINT(concurrency-mt-unsafe)
|
||||
}
|
||||
};
|
||||
|
24
src/Common/tests/gtest_poco_no_proxy_regex.cpp
Normal file
24
src/Common/tests/gtest_poco_no_proxy_regex.cpp
Normal file
@ -0,0 +1,24 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Common/proxyConfigurationToPocoProxyConfig.h>
|
||||
|
||||
TEST(ProxyConfigurationToPocoProxyConfiguration, TestNoProxyHostRegexBuild)
|
||||
{
|
||||
ASSERT_EQ(
|
||||
DB::buildPocoNonProxyHosts("localhost,127.0.0.1,some_other_domain:8080,sub-domain.domain.com"),
|
||||
R"((?:.*\.)?localhost|(?:.*\.)?127\.0\.0\.1|(?:.*\.)?some_other_domain\:8080|(?:.*\.)?sub\-domain\.domain\.com)");
|
||||
}
|
||||
|
||||
TEST(ProxyConfigurationToPocoProxyConfiguration, TestNoProxyHostRegexBuildMatchAnything)
|
||||
{
|
||||
ASSERT_EQ(
|
||||
DB::buildPocoNonProxyHosts("*"),
|
||||
".*");
|
||||
}
|
||||
|
||||
TEST(ProxyConfigurationToPocoProxyConfiguration, TestNoProxyHostRegexBuildEmpty)
|
||||
{
|
||||
ASSERT_EQ(
|
||||
DB::buildPocoNonProxyHosts(""),
|
||||
"");
|
||||
}
|
@ -1,6 +1,9 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Common/ProxyConfigurationResolverProvider.h>
|
||||
#include <Common/RemoteProxyConfigurationResolver.h>
|
||||
#include <Common/ProxyListConfigurationResolver.h>
|
||||
#include <Common/EnvironmentProxyConfigurationResolver.h>
|
||||
#include <Common/tests/gtest_global_context.h>
|
||||
#include <Common/tests/gtest_helper_functions.h>
|
||||
|
||||
@ -25,27 +28,19 @@ protected:
|
||||
|
||||
DB::ContextMutablePtr ProxyConfigurationResolverProviderTests::context;
|
||||
|
||||
Poco::URI http_env_proxy_server = Poco::URI("http://http_environment_proxy:3128");
|
||||
Poco::URI https_env_proxy_server = Poco::URI("http://https_environment_proxy:3128");
|
||||
|
||||
Poco::URI http_list_proxy_server = Poco::URI("http://http_list_proxy:3128");
|
||||
Poco::URI https_list_proxy_server = Poco::URI("http://https_list_proxy:3128");
|
||||
|
||||
TEST_F(ProxyConfigurationResolverProviderTests, EnvironmentResolverShouldBeUsedIfNoSettings)
|
||||
{
|
||||
EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server);
|
||||
EnvironmentProxySetter setter;
|
||||
const auto & config = getContext().context->getConfigRef();
|
||||
|
||||
auto http_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, config)->resolve();
|
||||
auto https_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, config)->resolve();
|
||||
auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, config);
|
||||
auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, config);
|
||||
|
||||
ASSERT_EQ(http_configuration.host, http_env_proxy_server.getHost());
|
||||
ASSERT_EQ(http_configuration.port, http_env_proxy_server.getPort());
|
||||
ASSERT_EQ(http_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_env_proxy_server.getScheme()));
|
||||
|
||||
ASSERT_EQ(https_configuration.host, https_env_proxy_server.getHost());
|
||||
ASSERT_EQ(https_configuration.port, https_env_proxy_server.getPort());
|
||||
ASSERT_EQ(https_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_env_proxy_server.getScheme()));
|
||||
ASSERT_TRUE(std::dynamic_pointer_cast<DB::EnvironmentProxyConfigurationResolver>(http_resolver));
|
||||
ASSERT_TRUE(std::dynamic_pointer_cast<DB::EnvironmentProxyConfigurationResolver>(https_resolver));
|
||||
}
|
||||
|
||||
TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPOnly)
|
||||
@ -57,17 +52,11 @@ TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPOnly)
|
||||
config->setString("proxy.http.uri", http_list_proxy_server.toString());
|
||||
context->setConfig(config);
|
||||
|
||||
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config)->resolve();
|
||||
auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config);
|
||||
auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config);
|
||||
|
||||
ASSERT_EQ(http_proxy_configuration.host, http_list_proxy_server.getHost());
|
||||
ASSERT_EQ(http_proxy_configuration.port, http_list_proxy_server.getPort());
|
||||
ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_list_proxy_server.getScheme()));
|
||||
|
||||
auto https_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config)->resolve();
|
||||
|
||||
// No https configuration since it's not set
|
||||
ASSERT_EQ(https_proxy_configuration.host, "");
|
||||
ASSERT_EQ(https_proxy_configuration.port, 0);
|
||||
ASSERT_TRUE(std::dynamic_pointer_cast<DB::ProxyListConfigurationResolver>(http_resolver));
|
||||
ASSERT_TRUE(std::dynamic_pointer_cast<DB::EnvironmentProxyConfigurationResolver>(https_resolver));
|
||||
}
|
||||
|
||||
TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPSOnly)
|
||||
@ -79,18 +68,11 @@ TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPSOnly)
|
||||
config->setString("proxy.https.uri", https_list_proxy_server.toString());
|
||||
context->setConfig(config);
|
||||
|
||||
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config)->resolve();
|
||||
auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config);
|
||||
auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config);
|
||||
|
||||
ASSERT_EQ(http_proxy_configuration.host, "");
|
||||
ASSERT_EQ(http_proxy_configuration.port, 0);
|
||||
|
||||
auto https_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config)->resolve();
|
||||
|
||||
ASSERT_EQ(https_proxy_configuration.host, https_list_proxy_server.getHost());
|
||||
|
||||
// still HTTP because the proxy host is not HTTPS
|
||||
ASSERT_EQ(https_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_list_proxy_server.getScheme()));
|
||||
ASSERT_EQ(https_proxy_configuration.port, https_list_proxy_server.getPort());
|
||||
ASSERT_TRUE(std::dynamic_pointer_cast<DB::EnvironmentProxyConfigurationResolver>(http_resolver));
|
||||
ASSERT_TRUE(std::dynamic_pointer_cast<DB::ProxyListConfigurationResolver>(https_resolver));
|
||||
}
|
||||
|
||||
TEST_F(ProxyConfigurationResolverProviderTests, ListBoth)
|
||||
@ -107,70 +89,15 @@ TEST_F(ProxyConfigurationResolverProviderTests, ListBoth)
|
||||
|
||||
context->setConfig(config);
|
||||
|
||||
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config)->resolve();
|
||||
auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config);
|
||||
auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config);
|
||||
|
||||
ASSERT_EQ(http_proxy_configuration.host, http_list_proxy_server.getHost());
|
||||
ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_list_proxy_server.getScheme()));
|
||||
ASSERT_EQ(http_proxy_configuration.port, http_list_proxy_server.getPort());
|
||||
|
||||
auto https_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config)->resolve();
|
||||
|
||||
ASSERT_EQ(https_proxy_configuration.host, https_list_proxy_server.getHost());
|
||||
|
||||
// still HTTP because the proxy host is not HTTPS
|
||||
ASSERT_EQ(https_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_list_proxy_server.getScheme()));
|
||||
ASSERT_EQ(https_proxy_configuration.port, https_list_proxy_server.getPort());
|
||||
}
|
||||
|
||||
TEST_F(ProxyConfigurationResolverProviderTests, RemoteResolverIsBasedOnProtocolConfigurationHTTP)
|
||||
{
|
||||
/*
|
||||
* Since there is no way to call `ProxyConfigurationResolver::resolve` on remote resolver,
|
||||
* it is hard to verify the remote resolver was actually picked. One hackish way to assert
|
||||
* the remote resolver was OR was not picked based on the configuration, is to use the
|
||||
* environment resolver. Since the environment resolver is always returned as a fallback,
|
||||
* we can assert the remote resolver was not picked if `ProxyConfigurationResolver::resolve`
|
||||
* succeeds and returns an environment proxy configuration.
|
||||
* */
|
||||
EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server);
|
||||
|
||||
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
|
||||
|
||||
config->setString("proxy", "");
|
||||
config->setString("proxy.https", "");
|
||||
config->setString("proxy.https.resolver", "");
|
||||
config->setString("proxy.https.resolver.endpoint", "http://resolver:8080/hostname");
|
||||
|
||||
// even tho proxy protocol / scheme is http, it should not be picked (prior to this PR, it would be picked)
|
||||
config->setString("proxy.https.resolver.proxy_scheme", "http");
|
||||
config->setString("proxy.https.resolver.proxy_port", "80");
|
||||
config->setString("proxy.https.resolver.proxy_cache_time", "10");
|
||||
|
||||
context->setConfig(config);
|
||||
|
||||
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config)->resolve();
|
||||
|
||||
/*
|
||||
* Asserts env proxy is used and not the remote resolver. If the remote resolver is picked, it is an error because
|
||||
* there is no `http` specification for remote resolver
|
||||
* */
|
||||
ASSERT_EQ(http_proxy_configuration.host, http_env_proxy_server.getHost());
|
||||
ASSERT_EQ(http_proxy_configuration.port, http_env_proxy_server.getPort());
|
||||
ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_env_proxy_server.getScheme()));
|
||||
ASSERT_TRUE(std::dynamic_pointer_cast<DB::ProxyListConfigurationResolver>(http_resolver));
|
||||
ASSERT_TRUE(std::dynamic_pointer_cast<DB::ProxyListConfigurationResolver>(https_resolver));
|
||||
}
|
||||
|
||||
TEST_F(ProxyConfigurationResolverProviderTests, RemoteResolverIsBasedOnProtocolConfigurationHTTPS)
|
||||
{
|
||||
/*
|
||||
* Since there is no way to call `ProxyConfigurationResolver::resolve` on remote resolver,
|
||||
* it is hard to verify the remote resolver was actually picked. One hackish way to assert
|
||||
* the remote resolver was OR was not picked based on the configuration, is to use the
|
||||
* environment resolver. Since the environment resolver is always returned as a fallback,
|
||||
* we can assert the remote resolver was not picked if `ProxyConfigurationResolver::resolve`
|
||||
* succeeds and returns an environment proxy configuration.
|
||||
* */
|
||||
EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server);
|
||||
|
||||
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
|
||||
|
||||
config->setString("proxy", "");
|
||||
@ -185,27 +112,44 @@ TEST_F(ProxyConfigurationResolverProviderTests, RemoteResolverIsBasedOnProtocolC
|
||||
|
||||
context->setConfig(config);
|
||||
|
||||
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config)->resolve();
|
||||
auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config);
|
||||
auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config);
|
||||
|
||||
/*
|
||||
* Asserts env proxy is used and not the remote resolver. If the remote resolver is picked, it is an error because
|
||||
* there is no `http` specification for remote resolver
|
||||
* */
|
||||
ASSERT_EQ(http_proxy_configuration.host, https_env_proxy_server.getHost());
|
||||
ASSERT_EQ(http_proxy_configuration.port, https_env_proxy_server.getPort());
|
||||
ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_env_proxy_server.getScheme()));
|
||||
ASSERT_TRUE(std::dynamic_pointer_cast<DB::RemoteProxyConfigurationResolver>(http_resolver));
|
||||
ASSERT_TRUE(std::dynamic_pointer_cast<DB::EnvironmentProxyConfigurationResolver>(https_resolver));
|
||||
}
|
||||
|
||||
// remote resolver is tricky to be tested in unit tests
|
||||
TEST_F(ProxyConfigurationResolverProviderTests, RemoteResolverHTTPSOnly)
|
||||
{
|
||||
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
|
||||
|
||||
config->setString("proxy", "");
|
||||
config->setString("proxy.https", "");
|
||||
config->setString("proxy.https.resolver", "");
|
||||
config->setString("proxy.https.resolver.endpoint", "http://resolver:8080/hostname");
|
||||
|
||||
// even tho proxy protocol / scheme is http, it should not be picked (prior to this PR, it would be picked)
|
||||
config->setString("proxy.https.resolver.proxy_scheme", "http");
|
||||
config->setString("proxy.https.resolver.proxy_port", "80");
|
||||
config->setString("proxy.https.resolver.proxy_cache_time", "10");
|
||||
|
||||
context->setConfig(config);
|
||||
|
||||
auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config);
|
||||
auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config);
|
||||
|
||||
ASSERT_TRUE(std::dynamic_pointer_cast<DB::EnvironmentProxyConfigurationResolver>(http_resolver));
|
||||
ASSERT_TRUE(std::dynamic_pointer_cast<DB::RemoteProxyConfigurationResolver>(https_resolver));
|
||||
}
|
||||
|
||||
template <bool DISABLE_TUNNELING_FOR_HTTPS_REQUESTS_OVER_HTTP_PROXY, bool STRING>
|
||||
void test_tunneling(DB::ContextMutablePtr context)
|
||||
{
|
||||
EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server);
|
||||
|
||||
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
|
||||
|
||||
config->setString("proxy", "");
|
||||
config->setString("proxy.https", "");
|
||||
config->setString("proxy.https.uri", http_list_proxy_server.toString());
|
||||
|
||||
if constexpr (STRING)
|
||||
{
|
||||
@ -230,4 +174,3 @@ TEST_F(ProxyConfigurationResolverProviderTests, TunnelingForHTTPSRequestsOverHTT
|
||||
test_tunneling<true, false>(context);
|
||||
test_tunneling<true, true>(context);
|
||||
}
|
||||
|
||||
|
@ -2,81 +2,38 @@
|
||||
|
||||
#include <Common/EnvironmentProxyConfigurationResolver.h>
|
||||
#include <Common/tests/gtest_helper_functions.h>
|
||||
#include <Common/proxyConfigurationToPocoProxyConfig.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
TEST(EnvironmentProxyConfigurationResolver, TestHTTPandHTTPS)
|
||||
{
|
||||
auto http_proxy_server = Poco::URI("http://proxy_server:3128");
|
||||
auto https_proxy_server = Poco::URI("https://proxy_server:3128");
|
||||
}
|
||||
const auto http_proxy_server = Poco::URI(EnvironmentProxySetter::HTTP_PROXY);
|
||||
const auto https_proxy_server = Poco::URI(EnvironmentProxySetter::HTTPS_PROXY);
|
||||
|
||||
TEST(EnvironmentProxyConfigurationResolver, TestHTTP)
|
||||
{
|
||||
EnvironmentProxySetter setter(http_proxy_server, {});
|
||||
std::string poco_no_proxy_regex = buildPocoNonProxyHosts(EnvironmentProxySetter::NO_PROXY);
|
||||
|
||||
EnvironmentProxyConfigurationResolver resolver(ProxyConfiguration::Protocol::HTTP);
|
||||
EnvironmentProxySetter setter;
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
EnvironmentProxyConfigurationResolver http_resolver(ProxyConfiguration::Protocol::HTTP);
|
||||
|
||||
ASSERT_EQ(configuration.host, http_proxy_server.getHost());
|
||||
ASSERT_EQ(configuration.port, http_proxy_server.getPort());
|
||||
ASSERT_EQ(configuration.protocol, ProxyConfiguration::protocolFromString(http_proxy_server.getScheme()));
|
||||
}
|
||||
auto http_configuration = http_resolver.resolve();
|
||||
|
||||
TEST(EnvironmentProxyConfigurationResolver, TestHTTPNoEnv)
|
||||
{
|
||||
EnvironmentProxyConfigurationResolver resolver(ProxyConfiguration::Protocol::HTTP);
|
||||
ASSERT_EQ(http_configuration.host, http_proxy_server.getHost());
|
||||
ASSERT_EQ(http_configuration.port, http_proxy_server.getPort());
|
||||
ASSERT_EQ(http_configuration.protocol, ProxyConfiguration::protocolFromString(http_proxy_server.getScheme()));
|
||||
ASSERT_EQ(http_configuration.no_proxy_hosts, poco_no_proxy_regex);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
EnvironmentProxyConfigurationResolver https_resolver(ProxyConfiguration::Protocol::HTTPS);
|
||||
|
||||
ASSERT_EQ(configuration.host, "");
|
||||
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP);
|
||||
ASSERT_EQ(configuration.port, 0u);
|
||||
}
|
||||
auto https_configuration = https_resolver.resolve();
|
||||
|
||||
TEST(EnvironmentProxyConfigurationResolver, TestHTTPs)
|
||||
{
|
||||
EnvironmentProxySetter setter({}, https_proxy_server);
|
||||
|
||||
EnvironmentProxyConfigurationResolver resolver(ProxyConfiguration::Protocol::HTTPS);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, https_proxy_server.getHost());
|
||||
ASSERT_EQ(configuration.port, https_proxy_server.getPort());
|
||||
ASSERT_EQ(configuration.protocol, ProxyConfiguration::protocolFromString(https_proxy_server.getScheme()));
|
||||
}
|
||||
|
||||
TEST(EnvironmentProxyConfigurationResolver, TestHTTPsNoEnv)
|
||||
{
|
||||
EnvironmentProxyConfigurationResolver resolver(ProxyConfiguration::Protocol::HTTPS);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, "");
|
||||
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP);
|
||||
ASSERT_EQ(configuration.port, 0u);
|
||||
}
|
||||
|
||||
TEST(EnvironmentProxyConfigurationResolver, TestHTTPsOverHTTPTunnelingDisabled)
|
||||
{
|
||||
// use http proxy for https, this would use connect protocol by default
|
||||
EnvironmentProxySetter setter({}, http_proxy_server);
|
||||
|
||||
bool disable_tunneling_for_https_requests_over_http_proxy = true;
|
||||
|
||||
EnvironmentProxyConfigurationResolver resolver(
|
||||
ProxyConfiguration::Protocol::HTTPS, disable_tunneling_for_https_requests_over_http_proxy);
|
||||
|
||||
auto configuration = resolver.resolve();
|
||||
|
||||
ASSERT_EQ(configuration.host, http_proxy_server.getHost());
|
||||
ASSERT_EQ(configuration.port, http_proxy_server.getPort());
|
||||
ASSERT_EQ(configuration.protocol, ProxyConfiguration::protocolFromString(http_proxy_server.getScheme()));
|
||||
ASSERT_EQ(configuration.tunneling, false);
|
||||
ASSERT_EQ(https_configuration.host, https_proxy_server.getHost());
|
||||
ASSERT_EQ(https_configuration.port, https_proxy_server.getPort());
|
||||
ASSERT_EQ(https_configuration.protocol, ProxyConfiguration::protocolFromString(https_proxy_server.getScheme()));
|
||||
ASSERT_EQ(https_configuration.no_proxy_hosts, poco_no_proxy_regex);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -10,6 +10,8 @@ namespace
|
||||
{
|
||||
auto proxy_server1 = Poco::URI("http://proxy_server1:3128");
|
||||
auto proxy_server2 = Poco::URI("http://proxy_server2:3128");
|
||||
|
||||
std::string no_proxy_hosts = "localhost,,127.0.0.1,some_other_domain,,,, sub-domain.domain.com,";
|
||||
}
|
||||
|
||||
TEST(ProxyListConfigurationResolver, SimpleTest)
|
||||
@ -17,7 +19,8 @@ TEST(ProxyListConfigurationResolver, SimpleTest)
|
||||
|
||||
ProxyListConfigurationResolver resolver(
|
||||
{proxy_server1, proxy_server2},
|
||||
ProxyConfiguration::Protocol::HTTP);
|
||||
ProxyConfiguration::Protocol::HTTP,
|
||||
no_proxy_hosts);
|
||||
|
||||
auto configuration1 = resolver.resolve();
|
||||
auto configuration2 = resolver.resolve();
|
||||
@ -25,10 +28,12 @@ TEST(ProxyListConfigurationResolver, SimpleTest)
|
||||
ASSERT_EQ(configuration1.host, proxy_server1.getHost());
|
||||
ASSERT_EQ(configuration1.port, proxy_server1.getPort());
|
||||
ASSERT_EQ(configuration1.protocol, ProxyConfiguration::protocolFromString(proxy_server1.getScheme()));
|
||||
ASSERT_EQ(configuration1.no_proxy_hosts, no_proxy_hosts);
|
||||
|
||||
ASSERT_EQ(configuration2.host, proxy_server2.getHost());
|
||||
ASSERT_EQ(configuration2.port, proxy_server2.getPort());
|
||||
ASSERT_EQ(configuration2.protocol, ProxyConfiguration::protocolFromString(proxy_server2.getScheme()));
|
||||
ASSERT_EQ(configuration2.no_proxy_hosts, no_proxy_hosts);
|
||||
}
|
||||
|
||||
TEST(ProxyListConfigurationResolver, HTTPSRequestsOverHTTPProxyDefault)
|
||||
@ -36,7 +41,8 @@ TEST(ProxyListConfigurationResolver, HTTPSRequestsOverHTTPProxyDefault)
|
||||
|
||||
ProxyListConfigurationResolver resolver(
|
||||
{proxy_server1, proxy_server2},
|
||||
ProxyConfiguration::Protocol::HTTPS);
|
||||
ProxyConfiguration::Protocol::HTTPS,
|
||||
"");
|
||||
|
||||
auto configuration1 = resolver.resolve();
|
||||
auto configuration2 = resolver.resolve();
|
||||
@ -45,11 +51,12 @@ TEST(ProxyListConfigurationResolver, HTTPSRequestsOverHTTPProxyDefault)
|
||||
ASSERT_EQ(configuration1.port, proxy_server1.getPort());
|
||||
ASSERT_EQ(configuration1.protocol, ProxyConfiguration::protocolFromString(proxy_server1.getScheme()));
|
||||
ASSERT_EQ(configuration1.tunneling, true);
|
||||
ASSERT_EQ(configuration1.no_proxy_hosts, "");
|
||||
|
||||
ASSERT_EQ(configuration2.host, proxy_server2.getHost());
|
||||
ASSERT_EQ(configuration2.port, proxy_server2.getPort());
|
||||
ASSERT_EQ(configuration2.protocol, ProxyConfiguration::protocolFromString(proxy_server2.getScheme()));
|
||||
ASSERT_EQ(configuration1.tunneling, true);
|
||||
ASSERT_EQ(configuration2.no_proxy_hosts, "");
|
||||
}
|
||||
|
||||
TEST(ProxyListConfigurationResolver, SimpleTestTunnelingDisabled)
|
||||
@ -58,6 +65,7 @@ TEST(ProxyListConfigurationResolver, SimpleTestTunnelingDisabled)
|
||||
ProxyListConfigurationResolver resolver(
|
||||
{proxy_server1, proxy_server2},
|
||||
ProxyConfiguration::Protocol::HTTPS,
|
||||
"",
|
||||
disable_tunneling_for_https_requests_over_http_proxy);
|
||||
|
||||
auto configuration1 = resolver.resolve();
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user