Merge branch 'master' into schema-inference-union

This commit is contained in:
Kruglov Pavel 2023-10-20 22:54:11 +02:00 committed by GitHub
commit 6f61ccfe28
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
243 changed files with 6905 additions and 2718 deletions

View File

@ -77,6 +77,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:
@ -185,6 +186,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -227,6 +229,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -399,6 +402,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -448,6 +452,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -487,6 +492,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
filter: tree:0
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -18,6 +18,7 @@ on: # yamllint disable-line rule:truthy
- 'docs/**'
- 'utils/check-style/aspell-ignore/**'
- 'tests/ci/docs_check.py'
- '.github/workflows/docs_check.yml'
jobs:
CheckLabels:
runs-on: [self-hosted, style-checker]
@ -73,6 +74,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:

View File

@ -24,6 +24,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0
filter: tree:0
- name: Jepsen Test
run: |
sudo rm -fr "$TEMP_PATH"
@ -53,6 +54,7 @@ jobs:
# with:
# clear-repository: true
# fetch-depth: 0
# filter: tree:0
# - name: Jepsen Test
# run: |
# sudo rm -fr "$TEMP_PATH"

View File

@ -61,6 +61,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:
@ -200,6 +201,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -242,6 +244,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -283,6 +286,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -581,6 +585,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -630,6 +635,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -672,6 +678,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -714,6 +721,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -763,6 +771,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -805,6 +814,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -847,6 +857,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -889,6 +900,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -931,6 +943,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -963,6 +976,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
filter: tree:0
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -54,6 +54,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:
@ -90,6 +91,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
filter: tree:0
submodules: true
- name: Set up JDK 11
uses: actions/setup-java@v1

View File

@ -18,6 +18,7 @@ on: # yamllint disable-line rule:truthy
- 'docs/**'
- 'utils/check-style/aspell-ignore/**'
- 'tests/ci/docs_check.py'
- '.github/workflows/docs_check.yml'
##########################################################################################
##################################### SMALL CHECKS #######################################
##########################################################################################
@ -94,6 +95,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:
@ -266,6 +268,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # for performance artifact
filter: tree:0
submodules: true
- name: Build
run: |
@ -350,6 +353,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # for performance artifact
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -1021,6 +1025,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
filter: tree:0
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -49,6 +49,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # otherwise we will have no version info
filter: tree:0
ref: ${{ env.GITHUB_TAG }}
- name: Check docker clickhouse/clickhouse-server building
run: |

View File

@ -53,6 +53,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:
@ -161,6 +162,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -203,6 +205,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -456,6 +459,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -505,6 +509,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -544,6 +549,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
filter: tree:0
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -38,6 +38,7 @@ jobs:
with:
ref: master
fetch-depth: 0
filter: tree:0
- name: Update versions, docker version, changelog, security
env:
GITHUB_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}

2
contrib/grpc vendored

@ -1 +1 @@
Subproject commit 3f975ecab377cd5f739af780566596128f17bb74
Subproject commit c52656e2bfcda3450bd6a7c247d2d9eeb8498524

View File

@ -24,6 +24,12 @@ else ()
set (SNAPPY_HAVE_SSSE3 0)
endif ()
if (ARCH_AMD64 AND ENABLE_SSE42)
set (SNAPPY_HAVE_X86_CRC32 1)
else ()
set (SNAPPY_HAVE_X86_CRC32 0)
endif ()
configure_file(
"${SOURCE_DIR}/cmake/config.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/config.h")

View File

@ -2,8 +2,8 @@
# If the image is built from Dockerfile.alpine, then the `-alpine` suffix is added automatically,
# so the only purpose of Dockerfile.ubuntu is to push `latest`, `head` and so on w/o suffixes
FROM ubuntu:20.04 AS glibc-donor
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
@ -31,8 +31,10 @@ RUN arch=${TARGETARCH:-amd64} \
arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \
esac
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
ARG VERSION="23.9.1.1854"
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.9.2.56"
ARG PACKAGES="clickhouse-keeper"
# user/group precreated explicitly with fixed uid/gid on purpose.
@ -46,16 +48,14 @@ ARG PACKAGES="clickhouse-keeper"
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& for package in ${PACKAGES}; do \
{ \
{ echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \
} || \
{ echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \
} ; \
} || exit 1 \
( \
cd /tmp \
&& echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz.sha512" \
&& sed 's:/output/:/tmp/:' < "${package}-${VERSION}-${arch}.tgz.sha512" | sha512sum -c \
&& tar xvzf "${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / \
) \
; done \
&& rm /tmp/*.tgz /install -r \
&& addgroup -S -g 101 clickhouse \

View File

@ -172,10 +172,15 @@ then
# This is why we add this repository snapshot from CI to the performance test
# package.
mkdir "$PERF_OUTPUT"/ch
git -C "$PERF_OUTPUT"/ch init --bare
git -C "$PERF_OUTPUT"/ch remote add origin /build
git -C "$PERF_OUTPUT"/ch fetch --no-tags --depth 50 origin HEAD:pr
git -C "$PERF_OUTPUT"/ch fetch --no-tags --depth 50 origin master:master
# Copy .git only, but skip modules, using tar
tar c -C /build/ --exclude='.git/modules/**' .git | tar x -C "$PERF_OUTPUT"/ch
# Create branch pr and origin/master to have them for the following performance comparison
git -C "$PERF_OUTPUT"/ch branch pr
git -C "$PERF_OUTPUT"/ch fetch --no-tags --depth 50 origin master:origin/master
# Clean remote, to not have it stale
git -C "$PERF_OUTPUT"/ch remote | xargs -n1 git -C "$PERF_OUTPUT"/ch remote remove
# And clean all tags
git -C "$PERF_OUTPUT"/ch tag | xargs git -C "$PERF_OUTPUT"/ch tag -d
git -C "$PERF_OUTPUT"/ch reset --soft pr
git -C "$PERF_OUTPUT"/ch log -5
(

View File

@ -23,7 +23,6 @@ COPY docker_related_config.xml /etc/clickhouse-server/config.d/
COPY entrypoint.sh /entrypoint.sh
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \
@ -33,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.9.1.1854"
ARG VERSION="23.9.2.56"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.
@ -45,16 +44,14 @@ ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
RUN arch=${TARGETARCH:-amd64} \
&& for package in ${PACKAGES}; do \
{ \
{ echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \
} || \
{ echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \
} ; \
} || exit 1 \
( \
cd /tmp \
&& echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz.sha512" \
&& sed 's:/output/:/tmp/:' < "${package}-${VERSION}-${arch}.tgz.sha512" | sha512sum -c \
&& tar xvzf "${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / \
) \
; done \
&& rm /tmp/*.tgz /install -r \
&& addgroup -S -g 101 clickhouse \

View File

@ -5,6 +5,13 @@ ARG DEBIAN_FRONTEND=noninteractive
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
# can't do chown and owners of mounted volumes should be configured externally.
# We do that in advance at the begining of Dockerfile before any packages will be
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list \
&& groupadd -r clickhouse --gid=101 \
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
@ -23,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.9.1.1854"
ARG VERSION="23.9.2.56"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image
@ -35,13 +42,6 @@ ARG deb_location_url=""
# from a single binary url (useful for non-standard builds - with sanitizers, for arm64).
ARG single_binary_location_url=""
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
# can't do chown and owners of mounted volumes should be configured externally.
# We do that in advance at the begining of Dockerfile before any packages will be
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
ARG TARGETARCH
# install from a web location with deb packages

View File

@ -337,8 +337,8 @@ quit
# which is confusing.
task_exit_code=$fuzzer_exit_code
echo "failure" > status.txt
{ rg --text -o "Found error:.*" fuzzer.log \
|| rg --text -ao "Exception:.*" fuzzer.log \
{ rg -ao "Found error:.*" fuzzer.log \
|| rg -ao "Exception:.*" fuzzer.log \
|| echo "Fuzzer failed ($fuzzer_exit_code). See the logs." ; } \
| tail -1 > description.txt
fi

View File

@ -61,11 +61,13 @@ function configure()
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
}
# Randomize all Keeper feature flags
randomize_config_boolean_value filtered_list
randomize_config_boolean_value multi_read
randomize_config_boolean_value check_not_exists
randomize_config_boolean_value create_if_not_exists
if [[ -n "$RANDOMIZE_KEEPER_FEATURE_FLAGS" ]] && [[ "$RANDOMIZE_KEEPER_FEATURE_FLAGS" -eq 1 ]]; then
# Randomize all Keeper feature flags
randomize_config_boolean_value filtered_list
randomize_config_boolean_value multi_read
randomize_config_boolean_value check_not_exists
randomize_config_boolean_value create_if_not_exists
fi
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml

View File

@ -0,0 +1,52 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.3.14.78-lts (c8f4ba52c65) FIXME as compared to v23.3.13.6-lts (25635e27551)
#### Build/Testing/Packaging Improvement
* Backported in [#54312](https://github.com/ClickHouse/ClickHouse/issues/54312): Fix strange additional QEMU logging after [#47151](https://github.com/ClickHouse/ClickHouse/issues/47151), see https://s3.amazonaws.com/clickhouse-test-reports/50078/a4743996ee4f3583884d07bcd6501df0cfdaa346/stateless_tests__release__databasereplicated__[3_4].html. [#50442](https://github.com/ClickHouse/ClickHouse/pull/50442) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#55391](https://github.com/ClickHouse/ClickHouse/issues/55391): Resource with source code including submodules is built in Darwin special build task. It may be used to build ClickHouse without checkouting submodules. [#51435](https://github.com/ClickHouse/ClickHouse/pull/51435) ([Ilya Yatsishin](https://github.com/qoega)).
* Backported in [#54703](https://github.com/ClickHouse/ClickHouse/issues/54703): Enrich `changed_images.json` with the latest tag from master for images that are not changed in the pull request. [#54369](https://github.com/ClickHouse/ClickHouse/pull/54369) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#54679](https://github.com/ClickHouse/ClickHouse/issues/54679): We build and upload them for every push, which isn't worth it. [#54675](https://github.com/ClickHouse/ClickHouse/pull/54675) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)).
* Fix sorting of sparse columns with large limit [#52827](https://github.com/ClickHouse/ClickHouse/pull/52827) ([Anton Popov](https://github.com/CurtizJ)).
* Fix adding sub-second intervals to DateTime [#53309](https://github.com/ClickHouse/ClickHouse/pull/53309) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix: allow IPv6 for bloom filter [#54200](https://github.com/ClickHouse/ClickHouse/pull/54200) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Check for overflow before addition in `analysisOfVariance` function [#54385](https://github.com/ClickHouse/ClickHouse/pull/54385) ([Antonio Andelic](https://github.com/antonio2368)).
* reproduce and fix the bug in removeSharedRecursive [#54430](https://github.com/ClickHouse/ClickHouse/pull/54430) ([Sema Checherinda](https://github.com/CheSema)).
* Fix aggregate projections with normalized states [#54480](https://github.com/ClickHouse/ClickHouse/pull/54480) ([Amos Bird](https://github.com/amosbird)).
* Fix possible parsing error in WithNames formats with disabled input_format_with_names_use_header [#54513](https://github.com/ClickHouse/ClickHouse/pull/54513) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix "Invalid number of rows in Chunk" in MaterializedPostgreSQL [#54844](https://github.com/ClickHouse/ClickHouse/pull/54844) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Prevent attaching parts from tables with different projections or indices [#55062](https://github.com/ClickHouse/ClickHouse/pull/55062) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix trash optimization (up to a certain extent) [#55353](https://github.com/ClickHouse/ClickHouse/pull/55353) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix parsing of arrays in cast operator [#55417](https://github.com/ClickHouse/ClickHouse/pull/55417) ([Anton Popov](https://github.com/CurtizJ)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Backport [#54430](https://github.com/ClickHouse/ClickHouse/issues/54430) to 23.3: reproduce and fix the bug in removeSharedRecursive"'. [#54731](https://github.com/ClickHouse/ClickHouse/pull/54731) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Test libunwind changes. [#51436](https://github.com/ClickHouse/ClickHouse/pull/51436) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Refactor CI_CONFIG [#52948](https://github.com/ClickHouse/ClickHouse/pull/52948) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Use pathlib.Path in S3Helper, rewrite build reports, improve small things [#54010](https://github.com/ClickHouse/ClickHouse/pull/54010) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fixed parameterized_view test after backporting a fix 23.3 [#54401](https://github.com/ClickHouse/ClickHouse/pull/54401) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Update automated commit status comment [#54441](https://github.com/ClickHouse/ClickHouse/pull/54441) ([vdimir](https://github.com/vdimir)).
* S3 artifacts [#54504](https://github.com/ClickHouse/ClickHouse/pull/54504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix CI skip build and skip tests checks [#54532](https://github.com/ClickHouse/ClickHouse/pull/54532) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Update WebObjectStorage.cpp [#54695](https://github.com/ClickHouse/ClickHouse/pull/54695) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Do not set PR status label [#54799](https://github.com/ClickHouse/ClickHouse/pull/54799) ([vdimir](https://github.com/vdimir)).
* Get rid of the most of `os.path` stuff [#55028](https://github.com/ClickHouse/ClickHouse/pull/55028) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* check if block is empty after async insert retries [#55143](https://github.com/ClickHouse/ClickHouse/pull/55143) ([Han Fei](https://github.com/hanfei1991)).
* MaterializedPostgreSQL: remove back check [#55297](https://github.com/ClickHouse/ClickHouse/pull/55297) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Remove existing moving/ dir if allow_remove_stale_moving_parts is off [#55480](https://github.com/ClickHouse/ClickHouse/pull/55480) ([Mike Kot](https://github.com/myrrc)).

View File

@ -0,0 +1,51 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.8.4.69-lts (d4d1e7b9ded) FIXME as compared to v23.8.3.48-lts (ebe4eb3d23e)
#### Build/Testing/Packaging Improvement
* Backported in [#55673](https://github.com/ClickHouse/ClickHouse/issues/55673): If the database is already initialized, it doesn't need to be initialized again upon subsequent launches. This can potentially fix the issue of infinite container restarts when the database fails to load within 1000 attempts (relevant for very large databases and multi-node setups). [#50724](https://github.com/ClickHouse/ClickHouse/pull/50724) ([Alexander Nikolaev](https://github.com/AlexNik)).
* Backported in [#55293](https://github.com/ClickHouse/ClickHouse/issues/55293): Resource with source code including submodules is built in Darwin special build task. It may be used to build ClickHouse without checkouting submodules. [#51435](https://github.com/ClickHouse/ClickHouse/pull/51435) ([Ilya Yatsishin](https://github.com/qoega)).
* Backported in [#55366](https://github.com/ClickHouse/ClickHouse/issues/55366): Solve issue with launching standalone clickhouse-keeper from clickhouse-server package. [#55226](https://github.com/ClickHouse/ClickHouse/pull/55226) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#55725](https://github.com/ClickHouse/ClickHouse/issues/55725): Fix integration check python script to use gh api url - Add Readme for CI tests. [#55716](https://github.com/ClickHouse/ClickHouse/pull/55716) ([Max K.](https://github.com/mkaynov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix "Invalid number of rows in Chunk" in MaterializedPostgreSQL [#54844](https://github.com/ClickHouse/ClickHouse/pull/54844) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Move obsolete format settings to separate section [#54855](https://github.com/ClickHouse/ClickHouse/pull/54855) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix: insert quorum w/o keeper retries [#55026](https://github.com/ClickHouse/ClickHouse/pull/55026) ([Igor Nikonov](https://github.com/devcrafter)).
* Prevent attaching parts from tables with different projections or indices [#55062](https://github.com/ClickHouse/ClickHouse/pull/55062) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Proper cleanup in case of exception in ctor of ShellCommandSource [#55103](https://github.com/ClickHouse/ClickHouse/pull/55103) ([Alexander Gololobov](https://github.com/davenger)).
* Fix deadlock in LDAP assigned role update [#55119](https://github.com/ClickHouse/ClickHouse/pull/55119) ([Julian Maicher](https://github.com/jmaicher)).
* Fix for background download in fs cache [#55252](https://github.com/ClickHouse/ClickHouse/pull/55252) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix functions execution over sparse columns [#55275](https://github.com/ClickHouse/ClickHouse/pull/55275) ([Azat Khuzhin](https://github.com/azat)).
* Fix bug with inability to drop detached partition in replicated merge tree on top of S3 without zero copy [#55309](https://github.com/ClickHouse/ClickHouse/pull/55309) ([alesapin](https://github.com/alesapin)).
* Fix trash optimization (up to a certain extent) [#55353](https://github.com/ClickHouse/ClickHouse/pull/55353) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix parsing of arrays in cast operator [#55417](https://github.com/ClickHouse/ClickHouse/pull/55417) ([Anton Popov](https://github.com/CurtizJ)).
* Fix filtering by virtual columns with OR filter in query [#55418](https://github.com/ClickHouse/ClickHouse/pull/55418) ([Azat Khuzhin](https://github.com/azat)).
* Fix MongoDB connection issues [#55419](https://github.com/ClickHouse/ClickHouse/pull/55419) ([Nikolay Degterinsky](https://github.com/evillique)).
* Destroy fiber in case of exception in cancelBefore in AsyncTaskExecutor [#55516](https://github.com/ClickHouse/ClickHouse/pull/55516) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix crash in QueryNormalizer with cyclic aliases [#55602](https://github.com/ClickHouse/ClickHouse/pull/55602) ([vdimir](https://github.com/vdimir)).
* Fix filtering by virtual columns with OR filter in query (resubmit) [#55678](https://github.com/ClickHouse/ClickHouse/pull/55678) ([Azat Khuzhin](https://github.com/azat)).
#### NO CL CATEGORY
* Backported in [#55706](https://github.com/ClickHouse/ClickHouse/issues/55706):. [#55657](https://github.com/ClickHouse/ClickHouse/pull/55657) ([Antonio Andelic](https://github.com/antonio2368)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* S3 artifacts [#54504](https://github.com/ClickHouse/ClickHouse/pull/54504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix CI skip build and skip tests checks [#54532](https://github.com/ClickHouse/ClickHouse/pull/54532) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Update WebObjectStorage.cpp [#54695](https://github.com/ClickHouse/ClickHouse/pull/54695) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Do not set PR status label [#54799](https://github.com/ClickHouse/ClickHouse/pull/54799) ([vdimir](https://github.com/vdimir)).
* Get rid of the most of `os.path` stuff [#55028](https://github.com/ClickHouse/ClickHouse/pull/55028) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Clean data dir and always start an old server version in aggregate functions compatibility test. [#55105](https://github.com/ClickHouse/ClickHouse/pull/55105) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* check if block is empty after async insert retries [#55143](https://github.com/ClickHouse/ClickHouse/pull/55143) ([Han Fei](https://github.com/hanfei1991)).
* MaterializedPostgreSQL: remove back check [#55297](https://github.com/ClickHouse/ClickHouse/pull/55297) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Remove existing moving/ dir if allow_remove_stale_moving_parts is off [#55480](https://github.com/ClickHouse/ClickHouse/pull/55480) ([Mike Kot](https://github.com/myrrc)).
* Bump curl to 8.4 [#55492](https://github.com/ClickHouse/ClickHouse/pull/55492) ([Robert Schulze](https://github.com/rschu1ze)).

View File

@ -0,0 +1,46 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.9.2.56-stable (a1bf3f1de55) FIXME as compared to v23.9.1.1854-stable (8f9a227de1f)
#### Build/Testing/Packaging Improvement
* Backported in [#55295](https://github.com/ClickHouse/ClickHouse/issues/55295): Resource with source code including submodules is built in Darwin special build task. It may be used to build ClickHouse without checkouting submodules. [#51435](https://github.com/ClickHouse/ClickHouse/pull/51435) ([Ilya Yatsishin](https://github.com/qoega)).
* Backported in [#55368](https://github.com/ClickHouse/ClickHouse/issues/55368): Solve issue with launching standalone clickhouse-keeper from clickhouse-server package. [#55226](https://github.com/ClickHouse/ClickHouse/pull/55226) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#55727](https://github.com/ClickHouse/ClickHouse/issues/55727): Fix integration check python script to use gh api url - Add Readme for CI tests. [#55716](https://github.com/ClickHouse/ClickHouse/pull/55716) ([Max K.](https://github.com/mkaynov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix deadlock in LDAP assigned role update [#55119](https://github.com/ClickHouse/ClickHouse/pull/55119) ([Julian Maicher](https://github.com/jmaicher)).
* Fix for background download in fs cache [#55252](https://github.com/ClickHouse/ClickHouse/pull/55252) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix functions execution over sparse columns [#55275](https://github.com/ClickHouse/ClickHouse/pull/55275) ([Azat Khuzhin](https://github.com/azat)).
* Fix incorrect merging of Nested for SELECT FINAL FROM SummingMergeTree [#55276](https://github.com/ClickHouse/ClickHouse/pull/55276) ([Azat Khuzhin](https://github.com/azat)).
* Fix bug with inability to drop detached partition in replicated merge tree on top of S3 without zero copy [#55309](https://github.com/ClickHouse/ClickHouse/pull/55309) ([alesapin](https://github.com/alesapin)).
* Fix SIGSEGV in MergeSortingPartialResultTransform (due to zero chunks after remerge()) [#55335](https://github.com/ClickHouse/ClickHouse/pull/55335) ([Azat Khuzhin](https://github.com/azat)).
* Fix data-race in CreatingSetsTransform (on errors) due to throwing shared exception [#55338](https://github.com/ClickHouse/ClickHouse/pull/55338) ([Azat Khuzhin](https://github.com/azat)).
* Fix trash optimization (up to a certain extent) [#55353](https://github.com/ClickHouse/ClickHouse/pull/55353) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix parsing of arrays in cast operator [#55417](https://github.com/ClickHouse/ClickHouse/pull/55417) ([Anton Popov](https://github.com/CurtizJ)).
* Fix filtering by virtual columns with OR filter in query [#55418](https://github.com/ClickHouse/ClickHouse/pull/55418) ([Azat Khuzhin](https://github.com/azat)).
* Fix MongoDB connection issues [#55419](https://github.com/ClickHouse/ClickHouse/pull/55419) ([Nikolay Degterinsky](https://github.com/evillique)).
* Destroy fiber in case of exception in cancelBefore in AsyncTaskExecutor [#55516](https://github.com/ClickHouse/ClickHouse/pull/55516) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix crash in QueryNormalizer with cyclic aliases [#55602](https://github.com/ClickHouse/ClickHouse/pull/55602) ([vdimir](https://github.com/vdimir)).
* Fix filtering by virtual columns with OR filter in query (resubmit) [#55678](https://github.com/ClickHouse/ClickHouse/pull/55678) ([Azat Khuzhin](https://github.com/azat)).
#### NO CL CATEGORY
* Backported in [#55708](https://github.com/ClickHouse/ClickHouse/issues/55708):. [#55657](https://github.com/ClickHouse/ClickHouse/pull/55657) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#55691](https://github.com/ClickHouse/ClickHouse/issues/55691):. [#55682](https://github.com/ClickHouse/ClickHouse/pull/55682) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Add setting allow_experimental_partial_result [#54514](https://github.com/ClickHouse/ClickHouse/pull/54514) ([vdimir](https://github.com/vdimir)).
* Fix CI skip build and skip tests checks [#54532](https://github.com/ClickHouse/ClickHouse/pull/54532) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* check if block is empty after async insert retries [#55143](https://github.com/ClickHouse/ClickHouse/pull/55143) ([Han Fei](https://github.com/hanfei1991)).
* MaterializedPostgreSQL: remove back check [#55297](https://github.com/ClickHouse/ClickHouse/pull/55297) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Review [#51946](https://github.com/ClickHouse/ClickHouse/issues/51946) and partially revert it [#55336](https://github.com/ClickHouse/ClickHouse/pull/55336) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove existing moving/ dir if allow_remove_stale_moving_parts is off [#55480](https://github.com/ClickHouse/ClickHouse/pull/55480) ([Mike Kot](https://github.com/myrrc)).
* Bump curl to 8.4 [#55492](https://github.com/ClickHouse/ClickHouse/pull/55492) ([Robert Schulze](https://github.com/rschu1ze)).

View File

@ -24,12 +24,15 @@ CREATE TABLE s3_queue_engine_table (name String, value UInt32)
[after_processing = 'keep',]
[keeper_path = '',]
[s3queue_loading_retries = 0,]
[s3queue_processing_threads_num = 1,]
[s3queue_enable_logging_to_s3queue_log = 0,]
[s3queue_polling_min_timeout_ms = 1000,]
[s3queue_polling_max_timeout_ms = 10000,]
[s3queue_polling_backoff_ms = 0,]
[s3queue_tracked_files_limit = 1000,]
[s3queue_tracked_file_ttl_sec = 0,]
[s3queue_polling_size = 50,]
[s3queue_tracked_files_limit = 1000,]
[s3queue_cleanup_interval_min_ms = 10000,]
[s3queue_cleanup_interval_max_ms = 30000,]
```
**Engine parameters**
@ -46,7 +49,7 @@ CREATE TABLE s3_queue_engine_table (name String, value UInt32)
CREATE TABLE s3queue_engine_table (name String, value UInt32)
ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip')
SETTINGS
mode = 'ordered';
mode = 'unordered';
```
Using named collections:
@ -109,6 +112,18 @@ Possible values:
Default value: `0`.
### s3queue_processing_threads_num {#processing_threads_num}
Number of threads to perform processing. Applies only for `Unordered` mode.
Default value: `1`.
### s3queue_enable_logging_to_s3queue_log {#enable_logging_to_s3queue_log}
Enable logging to `system.s3queue_log`.
Default value: `0`.
### s3queue_polling_min_timeout_ms {#polling_min_timeout_ms}
Minimal timeout before next polling (in milliseconds).
@ -161,18 +176,17 @@ Possible values:
Default value: `0`.
### s3queue_polling_size {#polling_size}
### s3queue_cleanup_interval_min_ms {#cleanup_interval_min_ms}
Maximum files to fetch from S3 with SELECT or in background task.
Engine takes files for processing from S3 in batches.
We limit the batch size to increase concurrency if multiple table engines with the same `keeper_path` consume files from the same path.
For 'Ordered' mode. Defines a minimum boundary for reschedule interval for a background task, which is responsible for maintaining tracked file TTL and maximum tracked files set.
Possible values:
Default value: `10000`.
- Positive integer.
### s3queue_cleanup_interval_max_ms {#cleanup_interval_max_ms}
Default value: `50`.
For 'Ordered' mode. Defines a maximum boundary for reschedule interval for a background task, which is responsible for maintaining tracked file TTL and maximum tracked files set.
Default value: `30000`.
## S3-related Settings {#s3-settings}
@ -227,6 +241,118 @@ For more information about virtual columns see [here](../../../engines/table-eng
Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function.
:::note
If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
:::
## Limitations {#limitations}
1. Duplicated rows can be as a result of:
- an exception happens during parsing in the middle of file processing and retries are enabled via `s3queue_loading_retries`;
- `S3Queue` is configured on multiple servers pointing to the same path in zookeeper and keeper session expires before one server managed to commit processed file, which could lead to another server taking processing of the file, which could be partially or fully processed by the first server;
- abnormal server termination.
2. `S3Queue` is configured on multiple servers pointing to the same path in zookeeper and `Ordered` mode is used, then `s3queue_loading_retries` will not work. This will be fixed soon.
## Introspection {#introspection}
For introspection use `system.s3queue` stateless table and `system.s3queue_log` persistent table.
1. `system.s3queue`. This table is not persistent and shows in-memory state of `S3Queue`: which files are currently being processed, which files are processed or failed.
``` sql
┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE system.s3queue
(
`database` String,
`table` String,
`file_name` String,
`rows_processed` UInt64,
`status` String,
`processing_start_time` Nullable(DateTime),
`processing_end_time` Nullable(DateTime),
`ProfileEvents` Map(String, UInt64)
`exception` String
)
ENGINE = SystemS3Queue
COMMENT 'SYSTEM TABLE is built on the fly.' │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
Example:
``` sql
SELECT *
FROM system.s3queue
Row 1:
──────
zookeeper_path: /clickhouse/s3queue/25ea5621-ae8c-40c7-96d0-cec959c5ab88/3b3f66a1-9866-4c2e-ba78-b6bfa154207e
file_name: wikistat/original/pageviews-20150501-030000.gz
rows_processed: 5068534
status: Processed
processing_start_time: 2023-10-13 13:09:48
processing_end_time: 2023-10-13 13:10:31
ProfileEvents: {'ZooKeeperTransactions':3,'ZooKeeperGet':2,'ZooKeeperMulti':1,'SelectedRows':5068534,'SelectedBytes':198132283,'ContextLock':1,'S3QueueSetFileProcessingMicroseconds':2480,'S3QueueSetFileProcessedMicroseconds':9985,'S3QueuePullMicroseconds':273776,'LogTest':17}
exception:
```
2. `system.s3queue_log`. Persistent table. Has the same information as `system.s3queue`, but for `processed` and `failed` files.
The table has the following structure:
``` sql
SHOW CREATE TABLE system.s3queue_log
Query id: 0ad619c3-0f2a-4ee4-8b40-c73d86e04314
┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE system.s3queue_log
(
`event_date` Date,
`event_time` DateTime,
`table_uuid` String,
`file_name` String,
`rows_processed` UInt64,
`status` Enum8('Processed' = 0, 'Failed' = 1),
`processing_start_time` Nullable(DateTime),
`processing_end_time` Nullable(DateTime),
`ProfileEvents` Map(String, UInt64),
`exception` String
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(event_date)
ORDER BY (event_date, event_time)
SETTINGS index_granularity = 8192 │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
In order to use `system.s3queue_log` define its configuration in server config file:
``` xml
<s3queue_log>
<database>system</database>
<table>s3queue_log</table>
</s3queue_log>
```
Example:
``` sql
SELECT *
FROM system.s3queue_log
Row 1:
──────
event_date: 2023-10-13
event_time: 2023-10-13 13:10:12
table_uuid:
file_name: wikistat/original/pageviews-20150501-020000.gz
rows_processed: 5112621
status: Processed
processing_start_time: 2023-10-13 13:09:48
processing_end_time: 2023-10-13 13:10:12
ProfileEvents: {'ZooKeeperTransactions':3,'ZooKeeperGet':2,'ZooKeeperMulti':1,'SelectedRows':5112621,'SelectedBytes':198577687,'ContextLock':1,'S3QueueSetFileProcessingMicroseconds':1934,'S3QueueSetFileProcessedMicroseconds':17063,'S3QueuePullMicroseconds':5841972,'LogTest':17}
exception:
```

View File

@ -0,0 +1,91 @@
---
slug: /en/operations/external-authenticators/http
title: "HTTP"
---
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
<SelfManaged />
HTTP server can be used to authenticate ClickHouse users. HTTP authentication can only be used as an external authenticator for existing users, which are defined in `users.xml` or in local access control paths. Currently, [Basic](https://datatracker.ietf.org/doc/html/rfc7617) authentication scheme using GET method is supported.
## HTTP authentication server definition {#http-auth-server-definition}
To define HTTP authentication server you must add `http_authentication_servers` section to the `config.xml`.
**Example**
```xml
<clickhouse>
<!- ... -->
<http_authentication_servers>
<basic_auth_server>
<uri>http://localhost:8000/auth</uri>
<connection_timeout_ms>1000</connection_timeout_ms>
<receive_timeout_ms>1000</receive_timeout_ms>
<send_timeout_ms>1000</send_timeout_ms>
<max_tries>3</max_tries>
<retry_initial_backoff_ms>50</retry_initial_backoff_ms>
<retry_max_backoff_ms>1000</retry_max_backoff_ms>
</basic_auth_server>
</http_authentication_servers>
</clickhouse>
```
Note, that you can define multiple HTTP servers inside the `http_authentication_servers` section using distinct names.
**Parameters**
- `uri` - URI for making authentication request
Timeouts in milliseconds on the socket used for communicating with the server:
- `connection_timeout_ms` - Default: 1000 ms.
- `receive_timeout_ms` - Default: 1000 ms.
- `send_timeout_ms` - Default: 1000 ms.
Retry parameters:
- `max_tries` - The maximum number of attempts to make an authentication request. Default: 3
- `retry_initial_backoff_ms` - The backoff initial interval on retry. Default: 50 ms
- `retry_max_backoff_ms` - The maximum backoff interval. Default: 1000 ms
### Enabling HTTP authentication in `users.xml` {#enabling-http-auth-in-users-xml}
In order to enable HTTP authentication for the user, specify `http_authentication` section instead of `password` or similar sections in the user definition.
Parameters:
- `server` - Name of the HTTP authentication server configured in the main `config.xml` file as described previously.
- `scheme` - HTTP authentication scheme. `Basic` is only supported now. Default: Basic
Example (goes into `users.xml`):
```xml
<clickhouse>
<!- ... -->
<my_user>
<!- ... -->
<http_authentication>
<server>basic_server</server>
<scheme>basic</scheme>
</http_authentication>
</test_user_2>
</clickhouse>
```
:::note
Note that HTTP authentication cannot be used alongside with any other authentication mechanism. The presence of any other sections like `password` alongside `http_authentication` will force ClickHouse to shutdown.
:::
### Enabling HTTP authentication using SQL {#enabling-http-auth-using-sql}
When [SQL-driven Access Control and Account Management](/docs/en/guides/sre/user-management/index.md#access-control) is enabled in ClickHouse, users identified by HTTP authentication can also be created using SQL statements.
```sql
CREATE USER my_user IDENTIFIED WITH HTTP SERVER 'basic_server' SCHEME 'Basic'
```
...or, `Basic` is default without explicit scheme definition
```sql
CREATE USER my_user IDENTIFIED WITH HTTP SERVER 'basic_server'
```
### Passing session settings {#passing-session-settings}
If a response body from HTTP authentication server has JSON format and contains `settings` sub-object, ClickHouse will try parse its key: value pairs as string values and set them as session settings for authenticated user's current session. If parsing is failed, a response body from server will be ignored.

View File

@ -16,3 +16,4 @@ The following external authenticators and directories are supported:
- [LDAP](./ldap.md#external-authenticators-ldap) [Authenticator](./ldap.md#ldap-external-authenticator) and [Directory](./ldap.md#ldap-external-user-directory)
- Kerberos [Authenticator](./kerberos.md#external-authenticators-kerberos)
- [SSL X.509 authentication](./ssl-x509.md#ssl-external-authentication)
- HTTP [Authenticator](./http.md)

View File

@ -569,7 +569,6 @@ Both the cache for `local_disk`, and temporary data will be stored in `/tiny_loc
<max_size_rows>10M</max_size_rows>
<max_file_segment_size>1M</max_file_segment_size>
<cache_on_write_operations>1</cache_on_write_operations>
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
</tiny_local_cache>
<!-- highlight-end -->
</disks>
@ -2631,3 +2630,114 @@ Possible values:
- 1 — Enabled.
Default value: 0.
## proxy {#proxy}
Define proxy servers for HTTP and HTTPS requests, currently supported by S3 storage, S3 table functions, and URL functions.
There are three ways to define proxy servers: environment variables, proxy lists, and remote proxy resolvers.
### Environment variables
The `http_proxy` and `https_proxy` environment variables allow you to specify a
proxy server for a given protocol. If you have it set on your system, it should work seamlessly.
This is the simplest approach if a given protocol has
only one proxy server and that proxy server doesn't change.
### Proxy lists
This approach allows you to specify one or more
proxy servers for a protocol. If more than one proxy server is defined,
ClickHouse uses the different proxies on a round-robin basis, balancing the
load across the servers. This is the simplest approach if there is more than
one proxy server for a protocol and the list of proxy servers doesn't change.
### Configuration template
``` xml
<proxy>
<http>
<uri>http://proxy1</uri>
<uri>http://proxy2:3128</uri>
</http>
<https>
<uri>http://proxy1:3128</uri>
</https>
</proxy>
```
`<proxy>` fields
* `<http>` - A list of one or more HTTP proxies
* `<https>` - A list of one or more HTTPS proxies
`<http>` and `<https>` fields
* `<uri>` - The URI of the proxy
### Remote proxy resolvers
It's possible that the proxy servers change dynamically. In that
case, you can define the endpoint of a resolver. ClickHouse sends
an empty GET request to that endpoint, the remote resolver should return the proxy host.
ClickHouse will use it to form the proxy URI using the following template: `{proxy_scheme}://{proxy_host}:{proxy_port}`
### Configuration template
``` xml
<proxy>
<http>
<resolver>
<endpoint>http://resolver:8080/hostname</endpoint>
<proxy_scheme>http</proxy_scheme>
<proxy_port>80</proxy_port>
<proxy_cache_time>10</proxy_cache_time>
</resolver>
</http>
<https>
<resolver>
<endpoint>http://resolver:8080/hostname</endpoint>
<proxy_scheme>http</proxy_scheme>
<proxy_port>3128</proxy_port>
<proxy_cache_time>10</proxy_cache_time>
</resolver>
</https>
</proxy>
```
`<proxy>` fields
* `<http>` - A list of one or more resolvers*
* `<https>` - A list of one or more resolvers*
`<http>` and `<https>` fields
* `<resolver>` - The endpoint and other details for a resolver.
You can have multiple `<resolver>` elements, but only the first
`<resolver>` for a given protocol is used. Any other `<resolver>`
elements for that protocol are ignored. That means load balancing
(if needed) should be implemented by the remote resolver.
`<resolver>` fields
* `<endpoint>` - The URI of the proxy resolver
* `<proxy_scheme>` - The protocol of the final proxy URI. This can be either `http` or `https`.
* `<proxy_port>` - The port number of the proxy resolver
* `<proxy_cache_time>` - The time in seconds that values from the resolver
should be cached by ClickHouse. Setting this value to `0` causes ClickHouse
to contact the resolver for every HTTP or HTTPS request.
### Precedence
Proxy settings are determined in the following order:
1. Remote proxy resolvers
2. Proxy lists
3. Environment variables
ClickHouse will check the highest priority resolver type for the request protocol. If it is not defined,
it will check the next highest priority resolver type, until it reaches the environment resolver.
This also allows a mix of resolver types can be used.

View File

@ -3310,6 +3310,28 @@ Possible values:
Default value: `0`.
## mysql_map_string_to_text_in_show_columns {#mysql_map_string_to_text_in_show_columns}
When enabled, [String](../../sql-reference/data-types/string.md) ClickHouse data type will be displayed as `TEXT` in [SHOW COLUMNS](../../sql-reference/statements/show.md#show_columns).
Has effect only when [use_mysql_types_in_show_columns](#use_mysql_types_in_show_columns) is enabled.
- 0 - Use `BLOB`.
- 1 - Use `TEXT`.
Default value: `0`.
## mysql_map_fixed_string_to_text_in_show_columns {#mysql_map_fixed_string_to_text_in_show_columns}
When enabled, [FixedString](../../sql-reference/data-types/fixedstring.md) ClickHouse data type will be displayed as `TEXT` in [SHOW COLUMNS](../../sql-reference/statements/show.md#show_columns).
Has effect only when [use_mysql_types_in_show_columns](#use_mysql_types_in_show_columns) is enabled.
- 0 - Use `BLOB`.
- 1 - Use `TEXT`.
Default value: `0`.
## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold}
Enables special logic to perform merges on replicas.

View File

@ -114,7 +114,7 @@ Example of disk configuration:
## Using local cache {#using-local-cache}
It is possible to configure local cache over disks in storage configuration starting from version 22.3.
It is possible to configure local cache over disks in storage configuration starting from version 22.3.
For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc.
For versions >= 23.5 cache is supported only for remote disk types: S3, Azure, HDFS.
Cache uses `LRU` cache policy.
@ -192,12 +192,12 @@ These settings should be defined in the disk configuration section.
- `enable_bypass_cache_with_threshold` - allows to skip cache completely in case the requested read range exceeds the threshold. Default: `false`. This threshold can be defined by `bypass_cache_threashold`. Default: `268435456` (`256Mi`).
- `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `false`. This setting was added in version 22.8. If you used filesystem cache before this version, then it will not work on versions starting from 22.8 if this setting is set to `true`. If you want to use this setting, clear old cache created before version 22.8 before upgrading.
- `max_file_segment_size` - a maximum size of a single cache file in bytes or in readable format (`ki, Mi, Gi, etc`, example `10Gi`). Default: `8388608` (`8Mi`).
- `max_elements` - a limit for a number of cache files. Default: `10000000`.
- `load_metadata_threads` - number of threads being used to load cache metadata on starting time. Default: `1`.
File Cache **query/profile settings**:
Some of these settings will disable cache features per query/profile that are enabled by default or in disk configuration settings. For example, you can enable cache in disk configuration and disable it per query/profile setting `enable_filesystem_cache` to `false`. Also setting `cache_on_write_operations` to `true` in disk configuration means that "write-though" cache is enabled. But if you need to disable this general setting per specific queries then setting `enable_filesystem_cache_on_write_operations` to `false` means that write operations cache will be disabled for a specific query/profile.
@ -248,9 +248,9 @@ DESCRIBE FILESYSTEM CACHE 's3_cache'
```
``` text
┌────max_size─┬─max_elements─┬─max_file_segment_size─┬─cache_on_write_operations─┬─enable_cache_hits_threshold─┬─current_size─┬─current_elements─┬─path────────┬─do_not_evict_index_and_mark_files─┐
│ 10000000000 │ 1048576 │ 104857600 │ 1 │ 0 │ 3276 │ 54 │ /s3_cache/ │ 1
└─────────────┴──────────────┴───────────────────────┴────────────────────────────────────────────────────────┴──────────────┴──────────────────┴─────────────┴───────────────────────────────────┘
┌────max_size─┬─max_elements─┬─max_file_segment_size─┬─boundary_alignment─┬─cache_on_write_operations─┬─cache_hits_threshold─┬─current_size─┬─current_elements─┬─path───────┬─background_download_threads─┬─enable_bypass_cache_with_threshold─┐
│ 10000000000 │ 1048576 │ 104857600 │ 4194304 │ 1 │ 0 │ 3276 │ 54 │ /s3_cache/ │ 2 │ 0
└─────────────┴──────────────┴───────────────────────┴────────────────────┴───────────────────────────┴──────────────────────┴──────────────┴──────────────────┴────────────┴─────────────────────────────┴───────────────────────────────────┘
```
Cache current metrics:

View File

@ -16,6 +16,7 @@ Columns:
- `is_expired` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the current connection expired.
- `keeper_api_version` ([String](../../sql-reference/data-types/string.md)) — Keeper API version.
- `client_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Session id of the connection.
- `xid` ([Int32](../../sql-reference/data-types/int-uint.md)) — Xid of the current session.
Example:

View File

@ -103,4 +103,5 @@ ClickHouse-specific aggregate functions:
- [quantileInterpolatedWeighted](./quantileinterpolatedweighted.md)
- [sparkBar](./sparkbar.md)
- [sumCount](./sumcount.md)
- [largestTriangleThreeBuckets](./largestTriangleThreeBuckets.md)

View File

@ -0,0 +1,67 @@
---
slug: /en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets
sidebar_position: 312
sidebar_label: largestTriangleThreeBuckets
---
# largestTriangleThreeBuckets
Applies the [Largest-Triangle-Three-Buckets](https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf) algorithm to the input data.
The algorithm is used for downsampling time series data for visualization. It is designed to operate on series sorted by x coordinate.
It works by dividing the sorted series into buckets and then finding the largest triangle in each bucket. The number of buckets is equal to the number of points in the resulting series.
the function will sort data by `x` and then apply the downsampling algorithm to the sorted data.
**Syntax**
``` sql
largestTriangleThreeBuckets(n)(x, y)
```
Alias: `lttb`.
**Arguments**
- `x` — x coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md).
- `y` — y coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md).
**Parameters**
- `n` — number of points in the resulting series. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
[Array](../../../sql-reference/data-types/array.md) of [Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
**Example**
Input table:
``` text
┌─────x───────┬───────y──────┐
│ 1.000000000 │ 10.000000000 │
│ 2.000000000 │ 20.000000000 │
│ 3.000000000 │ 15.000000000 │
│ 8.000000000 │ 60.000000000 │
│ 9.000000000 │ 55.000000000 │
│ 10.00000000 │ 70.000000000 │
│ 4.000000000 │ 30.000000000 │
│ 5.000000000 │ 40.000000000 │
│ 6.000000000 │ 35.000000000 │
│ 7.000000000 │ 50.000000000 │
└─────────────┴──────────────┘
```
Query:
``` sql
SELECT largestTriangleThreeBuckets(4)(x, y) FROM largestTriangleThreeBuckets_test;
```
Result:
``` text
┌────────largestTriangleThreeBuckets(3)(x, y)───────────┐
│ [(1,10),(3,15),(5,40),(10,70)] │
└───────────────────────────────────────────────────────┘
```

View File

@ -12,7 +12,7 @@ Syntax:
``` sql
CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'}]
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}]
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[VALID UNTIL datetime]
[IN access_storage_type]
@ -40,6 +40,7 @@ There are multiple ways of user identification:
- `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'`
- `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'`
- `IDENTIFIED WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa', KEY 'another_public_key' TYPE 'ssh-ed25519'`
- `IDENTIFIED WITH http SERVER 'http_server'` or `IDENTIFIED WITH http SERVER 'http_server' SCHEME 'basic'`
- `IDENTIFIED BY 'qwerty'`
Password complexity requirements can be edited in [config.xml](/docs/en/operations/configuration-files). Below is an example configuration that requires passwords to be at least 12 characters long and contain 1 number. Each password complexity rule requires a regex to match against passwords and a description of the rule.

View File

@ -112,7 +112,8 @@ EOF
tar -czf "$TARBALL" -C "$OUTPUT_DIR" "$PKG_DIR"
fi
sha512sum "$TARBALL" > "$TARBALL".sha512
# Cut the $OUTPUT_DIR/ from the sha512sum output to make it universal
sha512sum "$TARBALL" | sed "s|$OUTPUT_DIR/||" > "$TARBALL".sha512
rm -r "$PKG_PATH"
}

View File

@ -391,7 +391,7 @@ zkutil::EphemeralNodeHolder::Ptr ClusterCopier::createTaskWorkerNodeAndWaitIfNee
auto code = zookeeper->tryMulti(ops, responses);
if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS)
return std::make_shared<zkutil::EphemeralNodeHolder>(current_worker_path, *zookeeper, false, false, description);
return zkutil::EphemeralNodeHolder::existing(current_worker_path, *zookeeper);
if (code == Coordination::Error::ZBADVERSION)
{

View File

@ -821,77 +821,85 @@ function insertChart(i) {
let move_text = document.createTextNode('✥');
move.appendChild(move_text);
let is_dragging = false;
move.addEventListener('mousedown', e => {
const idx = getCurrentIndex();
is_dragging = true;
let drag_state = {
is_dragging: false,
idx: null,
offset_x: null,
offset_y: null,
displace_idx: null,
displace_chart: null
};
function dragStop(e) {
drag_state.is_dragging = false;
chart.className = 'chart';
chart.style.left = null;
chart.style.top = null;
if (drag_state.displace_idx !== null) {
const elem = queries[drag_state.idx];
queries.splice(drag_state.idx, 1);
queries.splice(drag_state.displace_idx, 0, elem);
drag_state.displace_chart.className = 'chart';
drawAll();
}
}
function dragMove(e) {
if (!drag_state.is_dragging) return;
let x = e.clientX - drag_state.offset_x;
let y = e.clientY - drag_state.offset_y;
chart.style.left = `${x}px`;
chart.style.top = `${y}px`;
drag_state.displace_idx = null;
drag_state.displace_chart = null;
let current_idx = -1;
for (const elem of charts.querySelectorAll('.chart')) {
++current_idx;
if (current_idx == drag_state.idx) {
continue;
}
const this_rect = chart.getBoundingClientRect();
const this_center_x = this_rect.left + this_rect.width / 2;
const this_center_y = this_rect.top + this_rect.height / 2;
const elem_rect = elem.getBoundingClientRect();
if (this_center_x >= elem_rect.left && this_center_x <= elem_rect.right
&& this_center_y >= elem_rect.top && this_center_y <= elem_rect.bottom) {
elem.className = 'chart chart-displaced';
drag_state.displace_idx = current_idx;
drag_state.displace_chart = elem;
} else {
elem.className = 'chart';
}
}
}
function dragStart(e) {
if (e.button !== 0) return; /// left button only
move.setPointerCapture(e.pointerId);
drag_state.is_dragging = true;
drag_state.idx = getCurrentIndex();
chart.className = 'chart chart-moving';
let offset_x = e.clientX;
let offset_y = e.clientY;
drag_state.offset_x = e.clientX;
drag_state.offset_y = e.clientY;
}
let displace_idx = null;
let displace_chart = null;
function mouseup(e) {
is_dragging = false;
chart.className = 'chart';
chart.style.left = null;
chart.style.top = null;
if (displace_idx !== null) {
const elem = queries[idx];
queries.splice(idx, 1);
queries.splice(displace_idx, 0, elem);
displace_chart.className = 'chart';
drawAll();
}
}
function mousemove(e) {
if (!is_dragging) {
document.body.removeEventListener('mousemove', mousemove);
document.body.removeEventListener('mouseup', mouseup);
return;
}
let x = e.clientX - offset_x;
let y = e.clientY - offset_y;
chart.style.left = `${x}px`;
chart.style.top = `${y}px`;
displace_idx = null;
displace_chart = null;
let current_idx = -1;
for (const elem of charts.querySelectorAll('.chart')) {
++current_idx;
if (current_idx == idx) {
continue;
}
const this_rect = chart.getBoundingClientRect();
const this_center_x = this_rect.left + this_rect.width / 2;
const this_center_y = this_rect.top + this_rect.height / 2;
const elem_rect = elem.getBoundingClientRect();
if (this_center_x >= elem_rect.left && this_center_x <= elem_rect.right
&& this_center_y >= elem_rect.top && this_center_y <= elem_rect.bottom) {
elem.className = 'chart chart-displaced';
displace_idx = current_idx;
displace_chart = elem;
} else {
elem.className = 'chart';
}
}
}
document.body.addEventListener('mouseup', mouseup);
document.body.addEventListener('mousemove', mousemove);
});
/// Read https://www.redblobgames.com/making-of/draggable/
move.addEventListener('pointerdown', dragStart);
move.addEventListener('pointermove', dragMove);
move.addEventListener('pointerup', dragStop);
move.addEventListener('pointerancel', dragStop);
move.addEventListener('touchstart', (e) => e.preventDefault());
let maximize = document.createElement('a');
let maximize_text = document.createTextNode('🗖');

View File

@ -567,7 +567,7 @@ AccessChangesNotifier & AccessControl::getChangesNotifier()
}
UUID AccessControl::authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const
AuthResult AccessControl::authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const
{
try
{

View File

@ -118,7 +118,7 @@ public:
scope_guard subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const;
scope_guard subscribeForChanges(const std::vector<UUID> & ids, const OnChangedHandler & handler) const;
UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const;
AuthResult authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const;
/// Makes a backup of access entities.
void restoreFromBackup(RestorerFromBackup & restorer) override;

View File

@ -85,7 +85,11 @@ namespace
}
bool Authentication::areCredentialsValid(const Credentials & credentials, const AuthenticationData & auth_data, const ExternalAuthenticators & external_authenticators)
bool Authentication::areCredentialsValid(
const Credentials & credentials,
const AuthenticationData & auth_data,
const ExternalAuthenticators & external_authenticators,
SettingsChanges & settings)
{
if (!credentials.isReady())
return false;
@ -100,6 +104,7 @@ bool Authentication::areCredentialsValid(const Credentials & credentials, const
case AuthenticationType::DOUBLE_SHA1_PASSWORD:
case AuthenticationType::BCRYPT_PASSWORD:
case AuthenticationType::LDAP:
case AuthenticationType::HTTP:
throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
case AuthenticationType::KERBEROS:
@ -133,6 +138,7 @@ bool Authentication::areCredentialsValid(const Credentials & credentials, const
case AuthenticationType::BCRYPT_PASSWORD:
case AuthenticationType::LDAP:
case AuthenticationType::KERBEROS:
case AuthenticationType::HTTP:
throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
case AuthenticationType::SSL_CERTIFICATE:
@ -177,6 +183,14 @@ bool Authentication::areCredentialsValid(const Credentials & credentials, const
case AuthenticationType::BCRYPT_PASSWORD:
return checkPasswordBcrypt(basic_credentials->getPassword(), auth_data.getPasswordHashBinary());
case AuthenticationType::HTTP:
switch (auth_data.getHTTPAuthenticationScheme())
{
case HTTPAuthenticationScheme::BASIC:
return external_authenticators.checkHTTPBasicCredentials(
auth_data.getHTTPAuthenticationServerName(), *basic_credentials, settings);
}
case AuthenticationType::MAX:
break;
}
@ -192,6 +206,7 @@ bool Authentication::areCredentialsValid(const Credentials & credentials, const
case AuthenticationType::DOUBLE_SHA1_PASSWORD:
case AuthenticationType::BCRYPT_PASSWORD:
case AuthenticationType::LDAP:
case AuthenticationType::HTTP:
throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
case AuthenticationType::KERBEROS:
@ -218,6 +233,7 @@ bool Authentication::areCredentialsValid(const Credentials & credentials, const
case AuthenticationType::DOUBLE_SHA1_PASSWORD:
case AuthenticationType::BCRYPT_PASSWORD:
case AuthenticationType::LDAP:
case AuthenticationType::HTTP:
throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
case AuthenticationType::KERBEROS:

View File

@ -14,12 +14,19 @@ namespace ErrorCodes
class Credentials;
class ExternalAuthenticators;
class SettingsChanges;
/// TODO: Try to move this checking to Credentials.
struct Authentication
{
/// Checks the credentials (passwords, readiness, etc.)
static bool areCredentialsValid(const Credentials & credentials, const AuthenticationData & auth_data, const ExternalAuthenticators & external_authenticators);
/// If necessary, makes a request to external authenticators and fills in the session settings if they were
/// returned by the authentication server
static bool areCredentialsValid(
const Credentials & credentials,
const AuthenticationData & auth_data,
const ExternalAuthenticators & external_authenticators,
SettingsChanges & settings);
// A signaling class used to communicate requirements for credentials.
template <typename CredentialsType>

View File

@ -105,7 +105,8 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs)
return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash)
&& (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm)
&& (lhs.ssl_certificate_common_names == rhs.ssl_certificate_common_names)
&& (lhs.ssh_keys == rhs.ssh_keys);
&& (lhs.ssh_keys == rhs.ssh_keys) && (lhs.http_auth_scheme == rhs.http_auth_scheme)
&& (lhs.http_auth_server_name == rhs.http_auth_server_name);
}
@ -128,6 +129,7 @@ void AuthenticationData::setPassword(const String & password_)
case AuthenticationType::KERBEROS:
case AuthenticationType::SSL_CERTIFICATE:
case AuthenticationType::SSH_KEY:
case AuthenticationType::HTTP:
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot specify password for authentication type {}", toString(type));
case AuthenticationType::MAX:
@ -232,6 +234,7 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash)
case AuthenticationType::KERBEROS:
case AuthenticationType::SSL_CERTIFICATE:
case AuthenticationType::SSH_KEY:
case AuthenticationType::HTTP:
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot specify password binary hash for authentication type {}", toString(type));
case AuthenticationType::MAX:
@ -326,6 +329,12 @@ std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
#endif
}
case AuthenticationType::HTTP:
{
node->children.push_back(std::make_shared<ASTLiteral>(getHTTPAuthenticationServerName()));
node->children.push_back(std::make_shared<ASTLiteral>(toString(getHTTPAuthenticationScheme())));
break;
}
case AuthenticationType::NO_PASSWORD: [[fallthrough]];
case AuthenticationType::MAX:
@ -484,6 +493,17 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
auth_data.setSSLCertificateCommonNames(std::move(common_names));
}
else if (query.type == AuthenticationType::HTTP)
{
String server = checkAndGetLiteralArgument<String>(args[0], "http_auth_server_name");
auto scheme = HTTPAuthenticationScheme::BASIC; // Default scheme
if (args_size > 1)
scheme = parseHTTPAuthenticationScheme(checkAndGetLiteralArgument<String>(args[1], "scheme"));
auth_data.setHTTPAuthenticationServerName(server);
auth_data.setHTTPAuthenticationScheme(scheme);
}
else
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected ASTAuthenticationData structure");

View File

@ -1,13 +1,14 @@
#pragma once
#include <Access/Common/AuthenticationType.h>
#include <Common/SSH/Wrappers.h>
#include <Parsers/Access/ASTAuthenticationData.h>
#include <Access/Common/HTTPAuthenticationScheme.h>
#include <Interpreters/Context_fwd.h>
#include <Parsers/Access/ASTAuthenticationData.h>
#include <Common/SSH/Wrappers.h>
#include <vector>
#include <base/types.h>
#include <boost/container/flat_set.hpp>
#include <vector>
namespace DB
{
@ -61,6 +62,12 @@ public:
const std::vector<ssh::SSHKey> & getSSHKeys() const { return ssh_keys; }
void setSSHKeys(std::vector<ssh::SSHKey> && ssh_keys_) { ssh_keys = std::forward<std::vector<ssh::SSHKey>>(ssh_keys_); }
HTTPAuthenticationScheme getHTTPAuthenticationScheme() const { return http_auth_scheme; }
void setHTTPAuthenticationScheme(HTTPAuthenticationScheme scheme) { http_auth_scheme = scheme; }
const String & getHTTPAuthenticationServerName() const { return http_auth_server_name; }
void setHTTPAuthenticationServerName(const String & name) { http_auth_server_name = name; }
friend bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs);
friend bool operator !=(const AuthenticationData & lhs, const AuthenticationData & rhs) { return !(lhs == rhs); }
@ -88,6 +95,9 @@ private:
boost::container::flat_set<String> ssl_certificate_common_names;
String salt;
std::vector<ssh::SSHKey> ssh_keys;
/// HTTP authentication properties
String http_auth_server_name;
HTTPAuthenticationScheme http_auth_scheme = HTTPAuthenticationScheme::BASIC;
};
}

View File

@ -67,6 +67,11 @@ const AuthenticationTypeInfo & AuthenticationTypeInfo::get(AuthenticationType ty
static const auto info = make_info("SSH_KEY");
return info;
}
case AuthenticationType::HTTP:
{
static const auto info = make_info("HTTP");
return info;
}
case AuthenticationType::MAX:
break;
}

View File

@ -37,6 +37,9 @@ enum class AuthenticationType
/// The check is performed on server side by decrypting the data and comparing with the original string.
SSH_KEY,
/// Authentication through HTTP protocol
HTTP,
MAX,
};

View File

@ -0,0 +1,31 @@
#include "HTTPAuthenticationScheme.h"
#include <base/types.h>
#include <Poco/String.h>
#include <Common/Exception.h>
#include <magic_enum.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
String toString(HTTPAuthenticationScheme scheme)
{
return String(magic_enum::enum_name(scheme));
}
HTTPAuthenticationScheme parseHTTPAuthenticationScheme(const String & scheme_str)
{
auto scheme = magic_enum::enum_cast<HTTPAuthenticationScheme>(Poco::toUpper(scheme_str));
if (!scheme)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown HTTP authentication scheme: {}. Possible value is 'BASIC'", scheme_str);
return *scheme;
}
}

View File

@ -0,0 +1,16 @@
#pragma once
#include <base/types.h>
namespace DB
{
enum class HTTPAuthenticationScheme
{
BASIC,
};
String toString(HTTPAuthenticationScheme scheme);
HTTPAuthenticationScheme parseHTTPAuthenticationScheme(const String & scheme_str);
}

View File

@ -262,9 +262,16 @@ void ContextAccess::initialize()
UserPtr changed_user = entity ? typeid_cast<UserPtr>(entity) : nullptr;
std::lock_guard lock2{ptr->mutex};
ptr->setUser(changed_user);
if (!ptr->user && !ptr->user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041, a)");
});
setUser(access_control->read<User>(*params.user_id));
if (!user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041, b)");
initialized = true;
}
@ -378,12 +385,16 @@ UserPtr ContextAccess::tryGetUser() const
String ContextAccess::getUserName() const
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
return user_name;
}
std::shared_ptr<const EnabledRolesInfo> ContextAccess::getRolesInfo() const
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
if (roles_info)
return roles_info;
static const auto no_roles = std::make_shared<EnabledRolesInfo>();
@ -394,6 +405,9 @@ RowPolicyFilterPtr ContextAccess::getRowPolicyFilter(const String & database, co
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
RowPolicyFilterPtr filter;
if (enabled_row_policies)
filter = enabled_row_policies->getFilter(database, table_name, filter_type);
@ -414,6 +428,9 @@ std::shared_ptr<const EnabledQuota> ContextAccess::getQuota() const
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
if (!enabled_quota)
{
if (roles_info)
@ -445,6 +462,8 @@ std::optional<QuotaUsage> ContextAccess::getQuotaUsage() const
SettingsChanges ContextAccess::getDefaultSettings() const
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
if (enabled_settings)
{
if (auto info = enabled_settings->getInfo())
@ -457,6 +476,8 @@ SettingsChanges ContextAccess::getDefaultSettings() const
std::shared_ptr<const SettingsProfilesInfo> ContextAccess::getDefaultProfileInfo() const
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
if (enabled_settings)
return enabled_settings->getInfo();
static const auto everything_by_default = std::make_shared<SettingsProfilesInfo>(*access_control);
@ -467,6 +488,8 @@ std::shared_ptr<const SettingsProfilesInfo> ContextAccess::getDefaultProfileInfo
std::shared_ptr<const AccessRights> ContextAccess::getAccessRights() const
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
if (access)
return access;
static const auto nothing_granted = std::make_shared<AccessRights>();
@ -477,6 +500,8 @@ std::shared_ptr<const AccessRights> ContextAccess::getAccessRights() const
std::shared_ptr<const AccessRights> ContextAccess::getAccessRightsWithImplicit() const
{
std::lock_guard lock{mutex};
if (initialized && !user && !user_was_dropped)
throw Exception(ErrorCodes::LOGICAL_ERROR, "ContextAccess is inconsistent (bug 55041)");
if (access_with_implicit)
return access_with_implicit;
static const auto nothing_granted = std::make_shared<AccessRights>();

View File

@ -183,9 +183,11 @@ private:
const AccessControl * access_control = nullptr;
const Params params;
mutable std::atomic<bool> initialized = false; // can be removed after Bug 5504 is resolved
mutable std::atomic<bool> user_was_dropped = false;
mutable std::atomic<Poco::Logger *> trace_log = nullptr;
mutable std::mutex mutex;
mutable UserPtr user TSA_GUARDED_BY(mutex);
mutable String user_name TSA_GUARDED_BY(mutex);
mutable scope_guard subscription_for_user_change TSA_GUARDED_BY(mutex);
@ -198,8 +200,6 @@ private:
mutable std::shared_ptr<const EnabledRowPolicies> row_policies_of_initial_user TSA_GUARDED_BY(mutex);
mutable std::shared_ptr<const EnabledQuota> enabled_quota TSA_GUARDED_BY(mutex);
mutable std::shared_ptr<const EnabledSettings> enabled_settings TSA_GUARDED_BY(mutex);
mutable std::mutex mutex;
};
}

View File

@ -1,11 +1,14 @@
#include <Access/ExternalAuthenticators.h>
#include <Access/LDAPClient.h>
#include <Access/SettingsAuthResponseParser.h>
#include <Access/resolveSetting.h>
#include <Common/Exception.h>
#include <Common/quoteString.h>
#include <Common/SettingsChanges.h>
#include <Common/SipHash.h>
#include <Common/quoteString.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <boost/algorithm/string/case_conv.hpp>
#include <Poco/Util/AbstractConfiguration.h>
#include <optional>
#include <utility>
@ -230,6 +233,24 @@ void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util::
params.keytab = config.getString("kerberos.keytab", "");
}
HTTPAuthClientParams parseHTTPAuthParams(const Poco::Util::AbstractConfiguration & config, const String & prefix)
{
HTTPAuthClientParams http_auth_params;
http_auth_params.uri = config.getString(prefix + ".uri");
size_t connection_timeout_ms = config.getInt(prefix + ".connection_timeout_ms", 1000);
size_t receive_timeout_ms = config.getInt(prefix + ".receive_timeout_ms", 1000);
size_t send_timeout_ms = config.getInt(prefix + ".send_timeout_ms", 1000);
http_auth_params.timeouts = ConnectionTimeouts{connection_timeout_ms, receive_timeout_ms, send_timeout_ms};
http_auth_params.max_tries = config.getInt(prefix + ".max_tries", 3);
http_auth_params.retry_initial_backoff_ms = config.getInt(prefix + ".retry_initial_backoff_ms", 50);
http_auth_params.retry_max_backoff_ms = config.getInt(prefix + ".retry_max_backoff_ms", 1000);
return http_auth_params;
}
}
void parseLDAPRoleSearchParams(LDAPClient::RoleSearchParams & params, const Poco::Util::AbstractConfiguration & config, const String & prefix)
@ -265,6 +286,9 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur
std::size_t ldap_servers_key_count = 0;
std::size_t kerberos_keys_count = 0;
std::size_t http_auth_server_keys_count = 0;
const String http_auth_servers_config = "http_authentication_servers";
for (auto key : all_keys)
{
@ -276,6 +300,7 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur
ldap_servers_key_count += (key == "ldap_servers");
kerberos_keys_count += (key == "kerberos");
http_auth_server_keys_count += (key == http_auth_servers_config);
}
if (ldap_servers_key_count > 1)
@ -284,6 +309,25 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur
if (kerberos_keys_count > 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple kerberos sections are not allowed");
if (http_auth_server_keys_count > 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple http_authentication_servers sections are not allowed");
Poco::Util::AbstractConfiguration::Keys http_auth_server_names;
config.keys(http_auth_servers_config, http_auth_server_names);
http_auth_servers.clear();
for (const auto & http_auth_server_name : http_auth_server_names)
{
String prefix = fmt::format("{}.{}", http_auth_servers_config, http_auth_server_name);
try
{
http_auth_servers[http_auth_server_name] = parseHTTPAuthParams(config, prefix);
}
catch (...)
{
tryLogCurrentException(log, "Could not parse HTTP auth server" + backQuote(http_auth_server_name));
}
}
Poco::Util::AbstractConfiguration::Keys ldap_server_names;
config.keys("ldap_servers", ldap_server_names);
ldap_client_params_blueprint.clear();
@ -490,4 +534,27 @@ GSSAcceptorContext::Params ExternalAuthenticators::getKerberosParams() const
return kerberos_params.value();
}
HTTPAuthClientParams ExternalAuthenticators::getHTTPAuthenticationParams(const String& server) const
{
std::scoped_lock lock{mutex};
const auto it = http_auth_servers.find(server);
if (it == http_auth_servers.end())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HTTP server '{}' is not configured", server);
return it->second;
}
bool ExternalAuthenticators::checkHTTPBasicCredentials(
const String & server, const BasicCredentials & credentials, SettingsChanges & settings) const
{
auto params = getHTTPAuthenticationParams(server);
HTTPBasicAuthClient<SettingsAuthResponseParser> client(params);
auto [is_ok, settings_from_auth_server] = client.authenticate(credentials.getUserName(), credentials.getPassword());
if (is_ok)
std::ranges::move(settings_from_auth_server, std::back_inserter(settings));
return is_ok;
}
}

View File

@ -1,11 +1,14 @@
#pragma once
#include <Access/LDAPClient.h>
#include <Access/Credentials.h>
#include <Access/GSSAcceptor.h>
#include <Access/HTTPAuthClient.h>
#include <Access/LDAPClient.h>
#include <base/defines.h>
#include <base/types.h>
#include <base/extended_types.h>
#include <base/types.h>
#include <Poco/URI.h>
#include <chrono>
#include <map>
@ -27,6 +30,8 @@ namespace Poco
namespace DB
{
class SettingsChanges;
class ExternalAuthenticators
{
public:
@ -37,10 +42,13 @@ public:
bool checkLDAPCredentials(const String & server, const BasicCredentials & credentials,
const LDAPClient::RoleSearchParamsList * role_search_params = nullptr, LDAPClient::SearchResultsList * role_search_results = nullptr) const;
bool checkKerberosCredentials(const String & realm, const GSSAcceptorContext & credentials) const;
bool checkHTTPBasicCredentials(const String & server, const BasicCredentials & credentials, SettingsChanges & settings) const;
GSSAcceptorContext::Params getKerberosParams() const;
private:
HTTPAuthClientParams getHTTPAuthenticationParams(const String& server) const;
struct LDAPCacheEntry
{
UInt128 last_successful_params_hash = 0;
@ -56,6 +64,7 @@ private:
LDAPParams ldap_client_params_blueprint TSA_GUARDED_BY(mutex) ;
mutable LDAPCaches ldap_caches TSA_GUARDED_BY(mutex) ;
std::optional<GSSAcceptorContext::Params> kerberos_params TSA_GUARDED_BY(mutex) ;
std::unordered_map<String, HTTPAuthClientParams> http_auth_servers TSA_GUARDED_BY(mutex) ;
void resetImpl() TSA_REQUIRES(mutex);
};

View File

@ -0,0 +1,93 @@
#pragma once
#include <IO/ConnectionTimeouts.h>
#include <IO/HTTPCommon.h>
#include <base/sleep.h>
#include <Poco/Net/HTTPBasicCredentials.h>
namespace DB
{
struct HTTPAuthClientParams
{
Poco::URI uri;
ConnectionTimeouts timeouts;
size_t max_tries;
size_t retry_initial_backoff_ms;
size_t retry_max_backoff_ms;
};
template <typename TResponseParser>
class HTTPAuthClient
{
public:
using Result = TResponseParser::Result;
HTTPAuthClient(const HTTPAuthClientParams & params, const TResponseParser & parser_ = TResponseParser{})
: timeouts{params.timeouts}
, max_tries{params.max_tries}
, retry_initial_backoff_ms{params.retry_initial_backoff_ms}
, retry_max_backoff_ms{params.retry_max_backoff_ms}
, uri{params.uri}
, parser{parser_}
{
}
Result authenticateRequest(Poco::Net::HTTPRequest & request) const
{
auto session = makeHTTPSession(uri, timeouts);
Poco::Net::HTTPResponse response;
auto milliseconds_to_wait = retry_initial_backoff_ms;
for (size_t attempt = 0; attempt < max_tries; ++attempt)
{
bool last_attempt = attempt + 1 >= max_tries;
try
{
session->sendRequest(request);
auto & body_stream = session->receiveResponse(response);
return parser.parse(response, &body_stream);
}
catch (const Poco::Exception &) // TODO: make retries smarter
{
if (last_attempt)
throw;
sleepForMilliseconds(milliseconds_to_wait);
milliseconds_to_wait = std::min(milliseconds_to_wait * 2, retry_max_backoff_ms);
}
}
UNREACHABLE();
}
const Poco::URI & getURI() const { return uri; }
private:
const ConnectionTimeouts timeouts;
const size_t max_tries;
const size_t retry_initial_backoff_ms;
const size_t retry_max_backoff_ms;
const Poco::URI uri;
TResponseParser parser;
};
template <typename TResponseParser>
class HTTPBasicAuthClient : private HTTPAuthClient<TResponseParser>
{
public:
using HTTPAuthClient<TResponseParser>::HTTPAuthClient;
using Result = HTTPAuthClient<TResponseParser>::Result;
Result authenticate(const String & user_name, const String & password) const
{
Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_GET, this->getURI().getPathAndQuery()};
Poco::Net::HTTPBasicCredentials basic_credentials{user_name, password};
basic_credentials.authenticate(request);
return this->authenticateRequest(request);
}
};
}

View File

@ -488,7 +488,7 @@ bool IAccessStorage::updateImpl(const UUID & id, const UpdateFunc &, bool throw_
}
UUID IAccessStorage::authenticate(
AuthResult IAccessStorage::authenticate(
const Credentials & credentials,
const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
@ -499,7 +499,7 @@ UUID IAccessStorage::authenticate(
}
std::optional<UUID> IAccessStorage::authenticate(
std::optional<AuthResult> IAccessStorage::authenticate(
const Credentials & credentials,
const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
@ -511,7 +511,7 @@ std::optional<UUID> IAccessStorage::authenticate(
}
std::optional<UUID> IAccessStorage::authenticateImpl(
std::optional<AuthResult> IAccessStorage::authenticateImpl(
const Credentials & credentials,
const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
@ -523,6 +523,7 @@ std::optional<UUID> IAccessStorage::authenticateImpl(
{
if (auto user = tryRead<User>(*id))
{
AuthResult auth_result { .user_id = *id };
if (!isAddressAllowed(*user, address))
throwAddressNotAllowed(address);
@ -531,10 +532,10 @@ std::optional<UUID> IAccessStorage::authenticateImpl(
((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password))
throwAuthenticationTypeNotAllowed(auth_type);
if (!areCredentialsValid(*user, credentials, external_authenticators))
if (!areCredentialsValid(*user, credentials, external_authenticators, auth_result.settings))
throwInvalidCredentials();
return id;
return auth_result;
}
}
@ -548,7 +549,8 @@ std::optional<UUID> IAccessStorage::authenticateImpl(
bool IAccessStorage::areCredentialsValid(
const User & user,
const Credentials & credentials,
const ExternalAuthenticators & external_authenticators) const
const ExternalAuthenticators & external_authenticators,
SettingsChanges & settings) const
{
if (!credentials.isReady())
return false;
@ -564,7 +566,7 @@ bool IAccessStorage::areCredentialsValid(
return false;
}
return Authentication::areCredentialsValid(credentials, user.auth_data, external_authenticators);
return Authentication::areCredentialsValid(credentials, user.auth_data, external_authenticators, settings);
}

View File

@ -5,10 +5,12 @@
#include <Core/UUID.h>
#include <Parsers/IParser.h>
#include <Parsers/parseIdentifierOrStringLiteral.h>
#include <Common/SettingsChanges.h>
#include <atomic>
#include <functional>
#include <optional>
#include <vector>
#include <atomic>
namespace Poco { class Logger; }
@ -23,6 +25,14 @@ enum class AuthenticationType;
class BackupEntriesCollector;
class RestorerFromBackup;
/// Result of authentication
struct AuthResult
{
UUID user_id;
/// Session settings received from authentication server (if any)
SettingsChanges settings;
};
/// Contains entities, i.e. instances of classes derived from IAccessEntity.
/// The implementations of this class MUST be thread-safe.
class IAccessStorage : public boost::noncopyable
@ -171,8 +181,19 @@ public:
/// Finds a user, check the provided credentials and returns the ID of the user if they are valid.
/// Throws an exception if no such user or credentials are invalid.
UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool allow_no_password, bool allow_plaintext_password) const;
std::optional<UUID> authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const;
AuthResult authenticate(
const Credentials & credentials,
const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
bool allow_no_password,
bool allow_plaintext_password) const;
std::optional<AuthResult> authenticate(
const Credentials & credentials,
const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
bool throw_if_user_not_exists,
bool allow_no_password,
bool allow_plaintext_password) const;
/// Returns true if this storage can be stored to or restored from a backup.
virtual bool isBackupAllowed() const { return false; }
@ -190,8 +211,18 @@ protected:
virtual bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
virtual bool removeImpl(const UUID & id, bool throw_if_not_exists);
virtual bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
virtual std::optional<UUID> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const;
virtual bool areCredentialsValid(const User & user, const Credentials & credentials, const ExternalAuthenticators & external_authenticators) const;
virtual std::optional<AuthResult> authenticateImpl(
const Credentials & credentials,
const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
bool throw_if_user_not_exists,
bool allow_no_password,
bool allow_plaintext_password) const;
virtual bool areCredentialsValid(
const User & user,
const Credentials & credentials,
const ExternalAuthenticators & external_authenticators,
SettingsChanges & settings) const;
virtual bool isAddressAllowed(const User & user, const Poco::Net::IPAddress & address) const;
static UUID generateRandomID();
Poco::Logger * getLogger() const;

View File

@ -450,7 +450,7 @@ std::optional<std::pair<String, AccessEntityType>> LDAPAccessStorage::readNameWi
}
std::optional<UUID> LDAPAccessStorage::authenticateImpl(
std::optional<AuthResult> LDAPAccessStorage::authenticateImpl(
const Credentials & credentials,
const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
@ -503,7 +503,9 @@ std::optional<UUID> LDAPAccessStorage::authenticateImpl(
updateAssignedRolesNoLock(*id, user->getName(), external_roles);
}
return id;
if (id)
return AuthResult{ .user_id = *id };
return std::nullopt;
}
}

View File

@ -48,7 +48,7 @@ private: // IAccessStorage implementations.
virtual std::vector<UUID> findAllImpl(AccessEntityType type) const override;
virtual AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
virtual std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
virtual std::optional<UUID> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;
virtual std::optional<AuthResult> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;
void setConfiguration(const Poco::Util::AbstractConfiguration & config, const String & prefix);
void processRoleChange(const UUID & id, const AccessEntityPtr & entity);

View File

@ -415,7 +415,7 @@ bool MultipleAccessStorage::updateImpl(const UUID & id, const UpdateFunc & updat
}
std::optional<UUID>
std::optional<AuthResult>
MultipleAccessStorage::authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address,
const ExternalAuthenticators & external_authenticators,
bool throw_if_user_not_exists,
@ -426,14 +426,14 @@ MultipleAccessStorage::authenticateImpl(const Credentials & credentials, const P
{
const auto & storage = (*storages)[i];
bool is_last_storage = (i == storages->size() - 1);
auto id = storage->authenticate(credentials, address, external_authenticators,
auto auth_result = storage->authenticate(credentials, address, external_authenticators,
(throw_if_user_not_exists && is_last_storage),
allow_no_password, allow_plaintext_password);
if (id)
if (auth_result)
{
std::lock_guard lock{mutex};
ids_cache.set(*id, storage);
return id;
ids_cache.set(auth_result->user_id, storage);
return auth_result;
}
}

View File

@ -67,7 +67,7 @@ protected:
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
std::optional<UUID> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;
std::optional<AuthResult> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;
private:
using Storages = std::vector<StoragePtr>;

View File

@ -0,0 +1,45 @@
#include "SettingsAuthResponseParser.h"
#include <Access/resolveSetting.h>
#include <IO/HTTPCommon.h>
#include <Poco/JSON/Object.h>
#include <Poco/JSON/Parser.h>
#include <Poco/Timespan.h>
namespace DB
{
SettingsAuthResponseParser::Result
SettingsAuthResponseParser::parse(const Poco::Net::HTTPResponse & response, std::istream * body_stream) const
{
Result result;
if (response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_OK)
return result;
result.is_ok = true;
if (!body_stream)
return result;
Poco::JSON::Parser parser;
Poco::JSON::Object::Ptr parsed_body;
try
{
Poco::Dynamic::Var json = parser.parse(*body_stream);
Poco::JSON::Object::Ptr obj = json.extract<Poco::JSON::Object::Ptr>();
Poco::JSON::Object::Ptr settings_obj = obj->getObject(settings_key);
if (settings_obj)
for (const auto & [key, value] : *settings_obj)
result.settings.emplace_back(key, settingStringToValueUtil(key, value));
}
catch (...)
{
LOG_INFO(&Poco::Logger::get("HTTPAuthentication"), "Failed to parse settings from authentication response. Skip it.");
}
return result;
}
}

View File

@ -0,0 +1,28 @@
#pragma once
#include <Common/SettingsChanges.h>
#include <istream>
namespace Poco::Net
{
class HTTPResponse;
}
namespace DB
{
/// Class for parsing authentication response containing session settings
class SettingsAuthResponseParser
{
static constexpr auto settings_key = "settings";
public:
struct Result
{
bool is_ok = false;
SettingsChanges settings;
};
Result parse(const Poco::Net::HTTPResponse & response, std::istream * body_stream) const;
};
}

View File

@ -135,18 +135,22 @@ namespace
const auto ssh_keys_config = user_config + ".ssh_keys";
bool has_ssh_keys = config.has(ssh_keys_config);
size_t num_password_fields = has_no_password + has_password_plaintext + has_password_sha256_hex + has_password_double_sha1_hex + has_ldap + has_kerberos + has_certificates + has_ssh_keys;
const auto http_auth_config = user_config + ".http_authentication";
bool has_http_auth = config.has(http_auth_config);
size_t num_password_fields = has_no_password + has_password_plaintext + has_password_sha256_hex + has_password_double_sha1_hex
+ has_ldap + has_kerberos + has_certificates + has_ssh_keys + has_http_auth;
if (num_password_fields > 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "More than one field of 'password', 'password_sha256_hex', "
"'password_double_sha1_hex', 'no_password', 'ldap', 'kerberos', 'ssl_certificates', 'ssh_keys' "
"are used to specify authentication info for user {}. "
"'password_double_sha1_hex', 'no_password', 'ldap', 'kerberos', 'ssl_certificates', 'ssh_keys', "
"'http_authentication' are used to specify authentication info for user {}. "
"Must be only one of them.", user_name);
if (num_password_fields < 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Either 'password' or 'password_sha256_hex' "
"or 'password_double_sha1_hex' or 'no_password' or 'ldap' or 'kerberos "
"or 'ssl_certificates' or 'ssh_keys' must be specified for user {}.", user_name);
"or 'ssl_certificates' or 'ssh_keys' or 'http_authentication' must be specified for user {}.", user_name);
if (has_password_plaintext)
{
@ -248,6 +252,13 @@ namespace
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL");
#endif
}
else if (has_http_auth)
{
user->auth_data = AuthenticationData{AuthenticationType::HTTP};
user->auth_data.setHTTPAuthenticationServerName(config.getString(http_auth_config + ".server"));
auto scheme = config.getString(http_auth_config + ".scheme");
user->auth_data.setHTTPAuthenticationScheme(parseHTTPAuthenticationScheme(scheme));
}
auto auth_type = user->auth_data.getType();
if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) ||

View File

@ -0,0 +1,52 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Helpers.h>
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
namespace DB
{
struct Settings;
namespace
{
AggregateFunctionPtr
createAggregateFunctionLargestTriangleThreeBuckets(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
if (!(isNumber(argument_types[0]) || isDateOrDate32(argument_types[0]) || isDateTime(argument_types[0])
|| isDateTime64(argument_types[0])))
throw Exception(
ErrorCodes::NOT_IMPLEMENTED,
"Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the first argument",
name);
if (!(isNumber(argument_types[1]) || isDateOrDate32(argument_types[1]) || isDateTime(argument_types[1])
|| isDateTime64(argument_types[1])))
throw Exception(
ErrorCodes::NOT_IMPLEMENTED,
"Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the second argument",
name);
return std::make_shared<AggregateFunctionLargestTriangleThreeBuckets>(argument_types, parameters);
}
}
void registerAggregateFunctionLargestTriangleThreeBuckets(AggregateFunctionFactory & factory)
{
factory.registerFunction(AggregateFunctionLargestTriangleThreeBuckets::name, createAggregateFunctionLargestTriangleThreeBuckets);
factory.registerAlias("lttb", AggregateFunctionLargestTriangleThreeBuckets::name);
}
}

View File

@ -0,0 +1,327 @@
#pragma once
#include <iostream>
#include <limits>
#include <numeric>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/StatCommon.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnsDateTime.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <base/types.h>
#include <Common/PODArray_fwd.h>
#include <Common/assert_cast.h>
#include <boost/math/distributions/normal.hpp>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
struct LargestTriangleThreeBucketsData : public StatisticalSample<Float64, Float64>
{
void add(const Float64 xval, const Float64 yval, Arena * arena)
{
this->addX(xval, arena);
this->addY(yval, arena);
}
void sort(Arena * arena)
{
// sort the this->x and this->y in ascending order of this->x using index
std::vector<size_t> index(this->x.size());
std::iota(index.begin(), index.end(), 0);
::sort(index.begin(), index.end(), [&](size_t i1, size_t i2) { return this->x[i1] < this->x[i2]; });
SampleX temp_x{};
SampleY temp_y{};
for (size_t i = 0; i < this->x.size(); ++i)
{
temp_x.push_back(this->x[index[i]], arena);
temp_y.push_back(this->y[index[i]], arena);
}
for (size_t i = 0; i < this->x.size(); ++i)
{
this->x[i] = temp_x[i];
this->y[i] = temp_y[i];
}
}
PODArray<std::pair<Float64, Float64>> getResult(size_t total_buckets, Arena * arena)
{
// Sort the data
this->sort(arena);
PODArray<std::pair<Float64, Float64>> result;
// Handle special cases for small data list
if (this->x.size() <= total_buckets)
{
for (size_t i = 0; i < this->x.size(); ++i)
{
result.emplace_back(std::make_pair(this->x[i], this->y[i]));
}
return result;
}
// Handle special cases for 0 or 1 or 2 buckets
if (total_buckets == 0)
return result;
if (total_buckets == 1)
{
result.emplace_back(std::make_pair(this->x.front(), this->y.front()));
return result;
}
if (total_buckets == 2)
{
result.emplace_back(std::make_pair(this->x.front(), this->y.front()));
result.emplace_back(std::make_pair(this->x.back(), this->y.back()));
return result;
}
// Find the size of each bucket
size_t single_bucket_size = this->x.size() / total_buckets;
// Include the first data point
result.emplace_back(std::make_pair(this->x[0], this->y[0]));
for (size_t i = 1; i < total_buckets - 1; ++i) // Skip the first and last bucket
{
size_t start_index = i * single_bucket_size;
size_t end_index = (i + 1) * single_bucket_size;
// Compute the average point in the next bucket
Float64 avg_x = 0;
Float64 avg_y = 0;
for (size_t j = end_index; j < (i + 2) * single_bucket_size; ++j)
{
avg_x += this->x[j];
avg_y += this->y[j];
}
avg_x /= single_bucket_size;
avg_y /= single_bucket_size;
// Find the point in the current bucket that forms the largest triangle
size_t max_index = start_index;
Float64 max_area = 0.0;
for (size_t j = start_index; j < end_index; ++j)
{
Float64 area = std::abs(
0.5
* (result.back().first * this->y[j] + this->x[j] * avg_y + avg_x * result.back().second - result.back().first * avg_y
- this->x[j] * result.back().second - avg_x * this->y[j]));
if (area > max_area)
{
max_area = area;
max_index = j;
}
}
// Include the selected point
result.emplace_back(std::make_pair(this->x[max_index], this->y[max_index]));
}
// Include the last data point
result.emplace_back(std::make_pair(this->x.back(), this->y.back()));
return result;
}
};
class AggregateFunctionLargestTriangleThreeBuckets final : public IAggregateFunctionDataHelper<LargestTriangleThreeBucketsData, AggregateFunctionLargestTriangleThreeBuckets>
{
private:
UInt64 total_buckets{0};
TypeIndex x_type;
TypeIndex y_type;
public:
explicit AggregateFunctionLargestTriangleThreeBuckets(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper<LargestTriangleThreeBucketsData, AggregateFunctionLargestTriangleThreeBuckets>({arguments}, {}, createResultType(arguments))
{
if (params.size() != 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require one parameter", getName());
if (params[0].getType() != Field::Types::UInt64)
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a UInt64", getName());
total_buckets = params[0].get<UInt64>();
this->x_type = WhichDataType(arguments[0]).idx;
this->y_type = WhichDataType(arguments[1]).idx;
}
static constexpr auto name = "largestTriangleThreeBuckets";
String getName() const override { return name; }
bool allocatesMemoryInArena() const override { return true; }
static DataTypePtr createResultType(const DataTypes & arguments)
{
TypeIndex x_type = arguments[0]->getTypeId();
TypeIndex y_type = arguments[1]->getTypeId();
UInt32 x_scale = 0;
UInt32 y_scale = 0;
if (const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(arguments[0].get()))
{
x_scale = datetime64_type->getScale();
}
if (const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(arguments[1].get()))
{
y_scale = datetime64_type->getScale();
}
DataTypes types = {getDataTypeFromTypeIndex(x_type, x_scale), getDataTypeFromTypeIndex(y_type, y_scale)};
auto tuple = std::make_shared<DataTypeTuple>(std::move(types));
return std::make_shared<DataTypeArray>(tuple);
}
static DataTypePtr getDataTypeFromTypeIndex(TypeIndex type_index, UInt32 scale)
{
DataTypePtr data_type;
switch (type_index)
{
case TypeIndex::Date:
data_type = std::make_shared<DataTypeDate>();
break;
case TypeIndex::Date32:
data_type = std::make_shared<DataTypeDate32>();
break;
case TypeIndex::DateTime:
data_type = std::make_shared<DataTypeDateTime>();
break;
case TypeIndex::DateTime64:
data_type = std::make_shared<DataTypeDateTime64>(scale);
break;
default:
data_type = std::make_shared<DataTypeNumber<Float64>>();
}
return data_type;
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
Float64 x = getFloat64DataFromColumn(columns[0], row_num, this->x_type);
Float64 y = getFloat64DataFromColumn(columns[1], row_num, this->y_type);
this->data(place).add(x, y, arena);
}
Float64 getFloat64DataFromColumn(const IColumn * column, size_t row_num, TypeIndex type_index) const
{
switch (type_index)
{
case TypeIndex::Date:
return static_cast<const ColumnDate &>(*column).getData()[row_num];
case TypeIndex::Date32:
return static_cast<const ColumnDate32 &>(*column).getData()[row_num];
case TypeIndex::DateTime:
return static_cast<const ColumnDateTime &>(*column).getData()[row_num];
case TypeIndex::DateTime64:
return static_cast<const ColumnDateTime64 &>(*column).getData()[row_num];
default:
return column->getFloat64(row_num);
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & a = this->data(place);
const auto & b = this->data(rhs);
a.merge(b, arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).write(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).read(buf, arena);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
{
auto res = this->data(place).getResult(total_buckets, arena);
auto & col = assert_cast<ColumnArray &>(to);
auto & col_offsets = assert_cast<ColumnArray::ColumnOffsets &>(col.getOffsetsColumn());
auto column_x_adder_func = getColumnAdderFunc(x_type);
auto column_y_adder_func = getColumnAdderFunc(y_type);
for (size_t i = 0; i < res.size(); ++i)
{
auto & column_tuple = assert_cast<ColumnTuple &>(col.getData());
column_x_adder_func(column_tuple.getColumn(0), res[i].first);
column_y_adder_func(column_tuple.getColumn(1), res[i].second);
}
col_offsets.getData().push_back(col.getData().size());
}
std::function<void(IColumn &, Float64)> getColumnAdderFunc(TypeIndex type_index) const
{
switch (type_index)
{
case TypeIndex::Date:
return [](IColumn & column, Float64 value)
{
auto & col = assert_cast<ColumnDate &>(column);
col.getData().push_back(static_cast<UInt16>(value));
};
case TypeIndex::Date32:
return [](IColumn & column, Float64 value)
{
auto & col = assert_cast<ColumnDate32 &>(column);
col.getData().push_back(static_cast<UInt32>(value));
};
case TypeIndex::DateTime:
return [](IColumn & column, Float64 value)
{
auto & col = assert_cast<ColumnDateTime &>(column);
col.getData().push_back(static_cast<UInt32>(value));
};
case TypeIndex::DateTime64:
return [](IColumn & column, Float64 value)
{
auto & col = assert_cast<ColumnDateTime64 &>(column);
col.getData().push_back(static_cast<UInt64>(value));
};
default:
return [](IColumn & column, Float64 value)
{
auto & col = assert_cast<ColumnFloat64 &>(column);
col.getData().push_back(value);
};
}
}
};
}

View File

@ -129,7 +129,10 @@ public:
{
writePODBinary(value[i].first, buf);
writePODBinary(zero_padding, buf);
writePODBinary(value[i].second, buf);
if constexpr (std::endian::native == std::endian::little)
writePODBinary(value[i].second, buf);
else
writePODBinary(std::byteswap(value[i].second), buf);
}
}

View File

@ -82,6 +82,7 @@ void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
void registerAggregateFunctionFlameGraph(AggregateFunctionFactory &);
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory);
void registerAggregateFunctionLargestTriangleThreeBuckets(AggregateFunctionFactory & factory);
class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -176,6 +177,7 @@ void registerAggregateFunctions()
registerAggregateFunctionAnalysisOfVariance(factory);
registerAggregateFunctionFlameGraph(factory);
registerAggregateFunctionKolmogorovSmirnovTest(factory);
registerAggregateFunctionLargestTriangleThreeBuckets(factory);
registerWindowFunctions(factory);
}

View File

@ -107,6 +107,7 @@ namespace ErrorCodes
extern const int CANNOT_OPEN_FILE;
extern const int FILE_ALREADY_EXISTS;
extern const int USER_SESSION_LIMIT_EXCEEDED;
extern const int NOT_IMPLEMENTED;
}
}
@ -1432,7 +1433,7 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
}
/// Process the query that requires transferring data blocks to the server.
const auto parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
const auto & parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && std_in.eof())))
{
const auto & settings = global_context->getSettingsRef();
@ -1878,10 +1879,20 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (insert && insert->select)
insert->tryFindInputFunction(input_function);
bool is_async_insert = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData();
bool is_async_insert_with_inlined_data = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData();
if (is_async_insert_with_inlined_data)
{
bool have_data_in_stdin = !is_interactive && !stdin_is_a_tty && !std_in.eof();
bool have_external_data = have_data_in_stdin || insert->infile;
if (have_external_data)
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"Processing async inserts with both inlined and external data (from stdin or infile) is not supported");
}
/// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately.
if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert)
if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert_with_inlined_data)
{
if (input_function && insert->format.empty())
throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()");

View File

@ -95,12 +95,18 @@ void LocalConnection::sendQuery(
else
query_context = session.makeQueryContext();
query_context->setCurrentQueryId(query_id);
if (send_progress)
{
query_context->setProgressCallback([this] (const Progress & value) { this->updateProgress(value); });
query_context->setFileProgressCallback([this](const FileProgress & value) { this->updateProgress(Progress(value)); });
}
if (!current_database.empty())
/// Switch the database to the desired one (set by the USE query)
/// but don't attempt to do it if we are already in that database.
/// (there is a rare case when it matters - if we deleted the current database,
// we can still do some queries, but we cannot switch to the same database)
if (!current_database.empty() && current_database != query_context->getCurrentDatabase())
query_context->setCurrentDatabase(current_database);
query_context->addQueryParameters(query_parameters);
@ -125,7 +131,7 @@ void LocalConnection::sendQuery(
try
{
state->io = executeQuery(state->query, query_context, false, state->stage);
state->io = executeQuery(state->query, query_context, false, state->stage).second;
if (state->io.pipeline.pushing())
{

View File

@ -1,7 +1,6 @@
#include <Client/MultiplexedConnections.h>
#include <Common/thread_local_rng.h>
#include <Common/logger_useful.h>
#include <Core/Protocol.h>
#include <IO/ConnectionTimeouts.h>
#include <IO/Operators.h>
@ -24,14 +23,6 @@ namespace ErrorCodes
}
#define MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION \
mutex_last_locked_by.store((getThreadId() << 32) | __LINE__); \
memcpy(mutex_memory_dump.data(), &cancel_mutex, mutex_memory_dump.size()); \
mutex_locked += 1; \
SCOPE_EXIT({ mutex_locked -= 1; });
/// When you remove this macro, please also remove the clang-tidy suppressions at the beginning + end of this file.
MultiplexedConnections::MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler)
: settings(settings_)
{
@ -86,7 +77,6 @@ MultiplexedConnections::MultiplexedConnections(
void MultiplexedConnections::sendScalarsData(Scalars & data)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (!sent_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot send scalars data: query not yet sent.");
@ -102,7 +92,6 @@ void MultiplexedConnections::sendScalarsData(Scalars & data)
void MultiplexedConnections::sendExternalTablesData(std::vector<ExternalTablesData> & data)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (!sent_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot send external tables data: query not yet sent.");
@ -131,7 +120,6 @@ void MultiplexedConnections::sendQuery(
bool with_pending_data)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (sent_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query already sent.");
@ -189,7 +177,6 @@ void MultiplexedConnections::sendQuery(
void MultiplexedConnections::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (sent_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot send uuids after query is sent.");
@ -206,7 +193,6 @@ void MultiplexedConnections::sendIgnoredPartUUIDs(const std::vector<UUID> & uuid
void MultiplexedConnections::sendReadTaskResponse(const String & response)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (cancelled)
return;
current_connection->sendReadTaskResponse(response);
@ -216,7 +202,6 @@ void MultiplexedConnections::sendReadTaskResponse(const String & response)
void MultiplexedConnections::sendMergeTreeReadTaskResponse(const ParallelReadResponse & response)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (cancelled)
return;
current_connection->sendMergeTreeReadTaskResponse(response);
@ -226,29 +211,13 @@ void MultiplexedConnections::sendMergeTreeReadTaskResponse(const ParallelReadRes
Packet MultiplexedConnections::receivePacket()
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
Packet packet = receivePacketUnlocked({});
return packet;
}
void MultiplexedConnections::disconnect()
{
/// We've seen this lock mysteriously get stuck forever, without any other thread seeming to
/// hold the mutex. This is temporary code to print some extra information next time it happens.
/// std::lock_guard lock(cancel_mutex);
if (!cancel_mutex.try_lock_for(std::chrono::hours(1)))
{
UInt64 last_locked = mutex_last_locked_by.load();
std::array<UInt8, sizeof(std::timed_mutex)> new_memory_dump;
memcpy(new_memory_dump.data(), &cancel_mutex, new_memory_dump.size());
LOG_ERROR(&Poco::Logger::get("MultiplexedConnections"), "Deadlock in MultiplexedConnections::disconnect()! Mutex was last (instrumentedly) locked by thread {} on line {}, lock balance: {}, mutex memory when last locked: {}, mutex memory now: {}", last_locked >> 32, last_locked & 0xffffffff, mutex_locked.load(), hexString(mutex_memory_dump.data(), mutex_memory_dump.size()), hexString(new_memory_dump.data(), new_memory_dump.size()));
throw Exception(ErrorCodes::LOGICAL_ERROR, "Deadlock in MultiplexedConnections::disconnect()");
}
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wthread-safety-analysis"
std::lock_guard lock(cancel_mutex, std::adopt_lock);
#pragma clang diagnostic pop
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
std::lock_guard lock(cancel_mutex);
for (ReplicaState & state : replica_states)
{
@ -264,7 +233,6 @@ void MultiplexedConnections::disconnect()
void MultiplexedConnections::sendCancel()
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (!sent_query || cancelled)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot cancel. Either no query sent or already cancelled.");
@ -282,7 +250,6 @@ void MultiplexedConnections::sendCancel()
Packet MultiplexedConnections::drain()
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (!cancelled)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot drain connections: cancel first.");
@ -323,7 +290,6 @@ Packet MultiplexedConnections::drain()
std::string MultiplexedConnections::dumpAddresses() const
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
return dumpAddressesUnlocked();
}

View File

@ -106,14 +106,7 @@ private:
std::optional<ReplicaInfo> replica_info;
/// A mutex for the sendCancel function to execute safely in separate thread.
mutable std::timed_mutex cancel_mutex;
/// Temporary instrumentation to debug a weird deadlock on cancel_mutex.
/// TODO: Once the investigation is done, get rid of these, and of INSTRUMENTED_LOCK_MUTEX, and
/// change cancel_mutex to std::mutex.
mutable std::atomic<UInt64> mutex_last_locked_by{0};
mutable std::atomic<Int64> mutex_locked{0};
mutable std::array<UInt8, sizeof(std::timed_mutex)> mutex_memory_dump;
mutable std::mutex cancel_mutex;
friend struct RemoteQueryExecutorRoutine;
};

View File

@ -25,25 +25,12 @@ namespace
* getenv is safe to use here because ClickHouse code does not make any call to `setenv` or `putenv`
* aside from tests and a very early call during startup: https://github.com/ClickHouse/ClickHouse/blob/master/src/Daemon/BaseDaemon.cpp#L791
* */
if (protocol == DB::ProxyConfiguration::Protocol::HTTP)
switch (protocol)
{
return std::getenv(PROXY_HTTP_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe)
}
else if (protocol == DB::ProxyConfiguration::Protocol::HTTPS)
{
return std::getenv(PROXY_HTTPS_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe)
}
else
{
if (const char * http_proxy_host = std::getenv(PROXY_HTTP_ENVIRONMENT_VARIABLE)) // NOLINT(concurrency-mt-unsafe)
{
return http_proxy_host;
}
else
{
case ProxyConfiguration::Protocol::HTTP:
return std::getenv(PROXY_HTTP_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe)
case ProxyConfiguration::Protocol::HTTPS:
return std::getenv(PROXY_HTTPS_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe)
}
}
}
}

View File

@ -530,6 +530,13 @@ The server successfully detected this situation and will download merged part fr
M(OverflowThrow, "Number of times, data processing was cancelled by query complexity limitation with setting '*_overflow_mode' = 'throw' and exception was thrown.") \
M(OverflowAny, "Number of times approximate GROUP BY was in effect: when aggregation was performed only on top of first 'max_rows_to_group_by' unique keys and other keys were ignored due to 'group_by_overflow_mode' = 'any'.") \
\
M(S3QueueSetFileProcessingMicroseconds, "Time spent to set file as processing")\
M(S3QueueSetFileProcessedMicroseconds, "Time spent to set file as processed")\
M(S3QueueSetFileFailedMicroseconds, "Time spent to set file as failed")\
M(S3QueueCleanupMaxSetSizeOrTTLMicroseconds, "Time spent to set file as failed")\
M(S3QueuePullMicroseconds, "Time spent to read file data")\
M(S3QueueLockLocalFileStatusesMicroseconds, "Time spent to lock local file statuses")\
\
M(ServerStartupMilliseconds, "Time elapsed from starting server to listening to sockets in milliseconds")\
M(IOUringSQEsSubmitted, "Total number of io_uring SQEs submitted") \
M(IOUringSQEsResubmits, "Total number of io_uring SQE resubmits performed") \
@ -589,9 +596,14 @@ Timer::Timer(Counters & counters_, Event timer_event_, Event counter_event, Reso
counters.increment(counter_event);
}
UInt64 Timer::get()
{
return watch.elapsedNanoseconds() / static_cast<UInt64>(resolution);
}
void Timer::end()
{
counters.increment(timer_event, watch.elapsedNanoseconds() / static_cast<UInt64>(resolution));
counters.increment(timer_event, get());
watch.reset();
}

View File

@ -41,6 +41,7 @@ namespace ProfileEvents
~Timer() { end(); }
void cancel() { watch.reset(); }
void end();
UInt64 get();
private:
Counters & counters;

View File

@ -1,17 +1,23 @@
#pragma once
#include <string>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
struct ProxyConfiguration
{
enum class Protocol
{
HTTP,
HTTPS,
ANY
HTTPS
};
static auto protocolFromString(const std::string & str)
@ -24,10 +30,8 @@ struct ProxyConfiguration
{
return Protocol::HTTPS;
}
else
{
return Protocol::ANY;
}
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown proxy protocol: {}", str);
}
static auto protocolToString(Protocol protocol)
@ -38,8 +42,6 @@ struct ProxyConfiguration
return "http";
case Protocol::HTTPS:
return "https";
case Protocol::ANY:
return "any";
}
}

View File

@ -20,12 +20,13 @@ namespace
std::shared_ptr<ProxyConfigurationResolver> getRemoteResolver(
const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration)
{
auto endpoint = Poco::URI(configuration.getString(config_prefix + ".endpoint"));
auto proxy_scheme = configuration.getString(config_prefix + ".proxy_scheme");
auto resolver_prefix = config_prefix + ".resolver";
auto endpoint = Poco::URI(configuration.getString(resolver_prefix + ".endpoint"));
auto proxy_scheme = configuration.getString(resolver_prefix + ".proxy_scheme");
if (proxy_scheme != "http" && proxy_scheme != "https")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only HTTP/HTTPS schemas allowed in proxy resolver config: {}", proxy_scheme);
auto proxy_port = configuration.getUInt(config_prefix + ".proxy_port");
auto cache_ttl = configuration.getUInt(config_prefix + ".proxy_cache_time", 10);
auto proxy_port = configuration.getUInt(resolver_prefix + ".proxy_port");
auto cache_ttl = configuration.getUInt(resolver_prefix + ".proxy_cache_time", 10);
LOG_DEBUG(&Poco::Logger::get("ProxyConfigurationResolverProvider"), "Configured remote proxy resolver: {}, Scheme: {}, Port: {}",
endpoint.toString(), proxy_scheme, proxy_port);
@ -33,31 +34,6 @@ namespace
return std::make_shared<RemoteProxyConfigurationResolver>(endpoint, proxy_scheme, proxy_port, cache_ttl);
}
std::shared_ptr<ProxyConfigurationResolver> getRemoteResolver(
ProxyConfiguration::Protocol protocol, const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration)
{
std::vector<String> keys;
configuration.keys(config_prefix, keys);
std::vector<Poco::URI> uris;
for (const auto & key : keys)
{
if (startsWith(key, "resolver"))
{
auto prefix_with_key = config_prefix + "." + key;
auto proxy_scheme_config_string = prefix_with_key + ".proxy_scheme";
auto config_protocol = configuration.getString(proxy_scheme_config_string);
if (ProxyConfiguration::Protocol::ANY == protocol || config_protocol == ProxyConfiguration::protocolToString(protocol))
{
return getRemoteResolver(prefix_with_key, configuration);
}
}
}
return nullptr;
}
auto extractURIList(const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration)
{
std::vector<String> keys;
@ -84,34 +60,7 @@ namespace
return uris;
}
std::shared_ptr<ProxyConfigurationResolver> getListResolverNewSyntax(
ProxyConfiguration::Protocol protocol,
const String & config_prefix,
const Poco::Util::AbstractConfiguration & configuration
)
{
std::vector<Poco::URI> uris;
bool include_http_uris = ProxyConfiguration::Protocol::ANY == protocol || ProxyConfiguration::Protocol::HTTP == protocol;
if (include_http_uris && configuration.has(config_prefix + ".http"))
{
auto http_uris = extractURIList(config_prefix + ".http", configuration);
uris.insert(uris.end(), http_uris.begin(), http_uris.end());
}
bool include_https_uris = ProxyConfiguration::Protocol::ANY == protocol || ProxyConfiguration::Protocol::HTTPS == protocol;
if (include_https_uris && configuration.has(config_prefix + ".https"))
{
auto https_uris = extractURIList(config_prefix + ".https", configuration);
uris.insert(uris.end(), https_uris.begin(), https_uris.end());
}
return uris.empty() ? nullptr : std::make_shared<ProxyListConfigurationResolver>(uris);
}
std::shared_ptr<ProxyConfigurationResolver> getListResolverOldSyntax(
std::shared_ptr<ProxyConfigurationResolver> getListResolver(
const String & config_prefix,
const Poco::Util::AbstractConfiguration & configuration
)
@ -121,29 +70,58 @@ namespace
return uris.empty() ? nullptr : std::make_shared<ProxyListConfigurationResolver>(uris);
}
std::shared_ptr<ProxyConfigurationResolver> getListResolver(
ProxyConfiguration::Protocol protocol, const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration
bool hasRemoteResolver(const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration)
{
return configuration.has(config_prefix + ".resolver");
}
bool hasListResolver(const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration)
{
return configuration.has(config_prefix + ".uri");
}
/*
* New syntax requires protocol prefix "<http> or <https>"
* */
std::optional<std::string> getProtocolPrefix(
ProxyConfiguration::Protocol request_protocol,
const String & config_prefix,
const Poco::Util::AbstractConfiguration & configuration
)
{
std::vector<String> keys;
configuration.keys(config_prefix, keys);
auto protocol_prefix = config_prefix + "." + ProxyConfiguration::protocolToString(request_protocol);
if (!configuration.has(protocol_prefix))
{
return std::nullopt;
}
bool new_setting_syntax = std::find_if(
keys.begin(),
keys.end(),
[](const String & key)
{
return startsWith(key, "http") || startsWith(key, "https");
}) != keys.end();
return protocol_prefix;
}
return new_setting_syntax ? getListResolverNewSyntax(protocol, config_prefix, configuration)
: getListResolverOldSyntax(config_prefix, configuration);
template <bool new_syntax>
std::optional<std::string> calculatePrefixBasedOnSettingsSyntax(
ProxyConfiguration::Protocol request_protocol,
const String & config_prefix,
const Poco::Util::AbstractConfiguration & configuration
)
{
if (!configuration.has(config_prefix))
{
return std::nullopt;
}
if constexpr (new_syntax)
{
return getProtocolPrefix(request_protocol, config_prefix, configuration);
}
return config_prefix;
}
}
std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::get(Protocol protocol, const Poco::Util::AbstractConfiguration & configuration)
{
if (auto resolver = getFromSettings(protocol, "", configuration))
if (auto resolver = getFromSettings(protocol, "proxy", configuration))
{
return resolver;
}
@ -151,34 +129,36 @@ std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::
return std::make_shared<EnvironmentProxyConfigurationResolver>(protocol);
}
template <bool is_new_syntax>
std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::getFromSettings(
Protocol protocol,
Protocol request_protocol,
const String & config_prefix,
const Poco::Util::AbstractConfiguration & configuration
)
{
auto proxy_prefix = config_prefix.empty() ? "proxy" : config_prefix + ".proxy";
auto prefix_opt = calculatePrefixBasedOnSettingsSyntax<is_new_syntax>(request_protocol, config_prefix, configuration);
if (configuration.has(proxy_prefix))
if (!prefix_opt)
{
std::vector<String> config_keys;
configuration.keys(proxy_prefix, config_keys);
return nullptr;
}
if (auto remote_resolver = getRemoteResolver(protocol, proxy_prefix, configuration))
{
return remote_resolver;
}
auto prefix = *prefix_opt;
if (auto list_resolver = getListResolver(protocol, proxy_prefix, configuration))
{
return list_resolver;
}
if (hasRemoteResolver(prefix, configuration))
{
return getRemoteResolver(prefix, configuration);
}
else if (hasListResolver(prefix, configuration))
{
return getListResolver(prefix, configuration);
}
return nullptr;
}
std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::getFromOldSettingsFormat(
Protocol request_protocol,
const String & config_prefix,
const Poco::Util::AbstractConfiguration & configuration
)
@ -187,7 +167,7 @@ std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::
* First try to get it from settings only using the combination of config_prefix and configuration.
* This logic exists for backward compatibility with old S3 storage specific proxy configuration.
* */
if (auto resolver = ProxyConfigurationResolverProvider::getFromSettings(Protocol::ANY, config_prefix, configuration))
if (auto resolver = ProxyConfigurationResolverProvider::getFromSettings<false>(request_protocol, config_prefix + ".proxy", configuration))
{
return resolver;
}
@ -196,7 +176,7 @@ std::shared_ptr<ProxyConfigurationResolver> ProxyConfigurationResolverProvider::
* In case the combination of config_prefix and configuration does not provide a resolver, try to get it from general / new settings.
* Falls back to Environment resolver if no configuration is found.
* */
return ProxyConfigurationResolverProvider::get(Protocol::ANY, configuration);
return ProxyConfigurationResolverProvider::get(request_protocol, configuration);
}
}

View File

@ -27,11 +27,13 @@ public:
* If no configuration is found, returns nullptr.
* */
static std::shared_ptr<ProxyConfigurationResolver> getFromOldSettingsFormat(
Protocol request_protocol,
const String & config_prefix,
const Poco::Util::AbstractConfiguration & configuration
);
private:
template <bool is_new_syntax = true>
static std::shared_ptr<ProxyConfigurationResolver> getFromSettings(
Protocol protocol,
const String & config_prefix,

View File

@ -10,6 +10,7 @@
#include <Interpreters/TextLog.h>
#include <Interpreters/TraceLog.h>
#include <Interpreters/FilesystemCacheLog.h>
#include <Interpreters/S3QueueLog.h>
#include <Interpreters/FilesystemReadPrefetchesLog.h>
#include <Interpreters/ProcessorsProfileLog.h>
#include <Interpreters/ZooKeeperLog.h>

View File

@ -27,6 +27,7 @@
M(ZooKeeperLogElement) \
M(ProcessorProfileLogElement) \
M(TextLogElement) \
M(S3QueueLogElement) \
M(FilesystemCacheLogElement) \
M(FilesystemReadPrefetchesLogElement) \
M(AsynchronousInsertLogElement) \

View File

@ -544,6 +544,15 @@ public:
/// If expired, you can only destroy the object. All other methods will throw exception.
virtual bool isExpired() const = 0;
/// Get the current connected node idx.
virtual Int8 getConnectedNodeIdx() const = 0;
/// Get the current connected host and port.
virtual String getConnectedHostPort() const = 0;
/// Get the xid of current connection.
virtual int32_t getConnectionXid() const = 0;
/// Useful to check owner of ephemeral node.
virtual int64_t getSessionID() const = 0;

View File

@ -40,6 +40,9 @@ public:
bool isExpired() const override { return expired; }
bool hasReachedDeadline() const override { return false; }
Int8 getConnectedNodeIdx() const override { return 0; }
String getConnectedHostPort() const override { return "TestKeeper:0000"; }
int32_t getConnectionXid() const override { return 0; }
int64_t getSessionID() const override { return 0; }

View File

@ -89,7 +89,7 @@ void ZooKeeper::init(ZooKeeperArgs args_)
const Poco::Net::SocketAddress host_socket_addr{host_string};
LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, host_socket_addr.toString());
nodes.emplace_back(Coordination::ZooKeeper::Node{host_socket_addr, secure});
nodes.emplace_back(Coordination::ZooKeeper::Node{host_socket_addr, host.original_index, secure});
}
catch (const Poco::Net::HostNotFoundException & e)
{
@ -113,12 +113,7 @@ void ZooKeeper::init(ZooKeeperArgs args_)
throw KeeperException::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Cannot use any of provided ZooKeeper nodes");
}
impl = std::make_unique<Coordination::ZooKeeper>(nodes, args, zk_log, [this](size_t node_idx, const Coordination::ZooKeeper::Node & node)
{
connected_zk_host = node.address.host().toString();
connected_zk_port = node.address.port();
connected_zk_index = node_idx;
});
impl = std::make_unique<Coordination::ZooKeeper>(nodes, args, zk_log);
if (args.chroot.empty())
LOG_TRACE(log, "Initialized, hosts: {}", fmt::join(args.hosts, ","));
@ -179,6 +174,7 @@ std::vector<ShuffleHost> ZooKeeper::shuffleHosts() const
{
ShuffleHost shuffle_host;
shuffle_host.host = args.hosts[i];
shuffle_host.original_index = static_cast<UInt8>(i);
if (get_priority)
shuffle_host.priority = get_priority(i);
shuffle_host.randomize();
@ -1312,6 +1308,20 @@ void ZooKeeper::setServerCompletelyStarted()
zk->setServerCompletelyStarted();
}
Int8 ZooKeeper::getConnectedHostIdx() const
{
return impl->getConnectedNodeIdx();
}
String ZooKeeper::getConnectedHostPort() const
{
return impl->getConnectedHostPort();
}
int32_t ZooKeeper::getConnectionXid() const
{
return impl->getConnectionXid();
}
size_t getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses)
{

View File

@ -51,6 +51,7 @@ constexpr size_t MULTI_BATCH_SIZE = 100;
struct ShuffleHost
{
String host;
UInt8 original_index = 0;
Priority priority;
UInt64 random = 0;
@ -551,10 +552,9 @@ public:
void setServerCompletelyStarted();
String getConnectedZooKeeperHost() const { return connected_zk_host; }
UInt16 getConnectedZooKeeperPort() const { return connected_zk_port; }
size_t getConnectedZooKeeperIndex() const { return connected_zk_index; }
UInt64 getConnectedTime() const { return connected_time; }
Int8 getConnectedHostIdx() const;
String getConnectedHostPort() const;
int32_t getConnectionXid() const;
const DB::KeeperFeatureFlags * getKeeperFeatureFlags() const { return impl->getKeeperFeatureFlags(); }
@ -621,11 +621,6 @@ private:
ZooKeeperArgs args;
String connected_zk_host;
UInt16 connected_zk_port;
size_t connected_zk_index;
UInt64 connected_time = timeInSeconds(std::chrono::system_clock::now());
std::mutex mutex;
Poco::Logger * log = nullptr;
@ -644,11 +639,18 @@ class EphemeralNodeHolder
public:
using Ptr = std::shared_ptr<EphemeralNodeHolder>;
EphemeralNodeHolder(const std::string & path_, ZooKeeper & zookeeper_, bool create, bool sequential, const std::string & data)
EphemeralNodeHolder(const std::string & path_, ZooKeeper & zookeeper_, bool create, bool try_create, bool sequential, const std::string & data)
: path(path_), zookeeper(zookeeper_)
{
if (create)
{
path = zookeeper.create(path, data, sequential ? CreateMode::EphemeralSequential : CreateMode::Ephemeral);
need_remove = created = true;
}
else if (try_create)
{
need_remove = created = Coordination::Error::ZOK == zookeeper.tryCreate(path, data, sequential ? CreateMode::EphemeralSequential : CreateMode::Ephemeral);
}
}
std::string getPath() const
@ -656,19 +658,32 @@ public:
return path;
}
bool isCreated() const
{
return created;
}
static Ptr create(const std::string & path, ZooKeeper & zookeeper, const std::string & data = "")
{
return std::make_shared<EphemeralNodeHolder>(path, zookeeper, true, false, data);
return std::make_shared<EphemeralNodeHolder>(path, zookeeper, true, false, false, data);
}
static Ptr tryCreate(const std::string & path, ZooKeeper & zookeeper, const std::string & data = "")
{
auto node = std::make_shared<EphemeralNodeHolder>(path, zookeeper, false, true, false, data);
if (node->isCreated())
return node;
return nullptr;
}
static Ptr createSequential(const std::string & path, ZooKeeper & zookeeper, const std::string & data = "")
{
return std::make_shared<EphemeralNodeHolder>(path, zookeeper, true, true, data);
return std::make_shared<EphemeralNodeHolder>(path, zookeeper, true, false, true, data);
}
static Ptr existing(const std::string & path, ZooKeeper & zookeeper)
{
return std::make_shared<EphemeralNodeHolder>(path, zookeeper, false, false, "");
return std::make_shared<EphemeralNodeHolder>(path, zookeeper, false, false, false, "");
}
void setAlreadyRemoved()
@ -702,6 +717,7 @@ private:
ZooKeeper & zookeeper;
CurrentMetrics::Increment metric_increment{CurrentMetrics::EphemeralNode};
bool need_remove = true;
bool created = false;
};
using EphemeralNodeHolderPtr = EphemeralNodeHolder::Ptr;

View File

@ -313,8 +313,8 @@ ZooKeeper::~ZooKeeper()
ZooKeeper::ZooKeeper(
const Nodes & nodes,
const zkutil::ZooKeeperArgs & args_,
std::shared_ptr<ZooKeeperLog> zk_log_, std::optional<ConnectedCallback> && connected_callback_)
: args(args_), connected_callback(std::move(connected_callback_))
std::shared_ptr<ZooKeeperLog> zk_log_)
: args(args_)
{
log = &Poco::Logger::get("ZooKeeperClient");
std::atomic_store(&zk_log, std::move(zk_log_));
@ -445,9 +445,7 @@ void ZooKeeper::connect(
throw;
}
connected = true;
if (connected_callback.has_value())
(*connected_callback)(i, node);
original_index = static_cast<Int8>(node.original_index);
if (i != 0)
{
@ -912,6 +910,9 @@ void ZooKeeper::finalize(bool error_send, bool error_receive, const String & rea
LOG_INFO(log, "Finalizing session {}. finalization_started: {}, queue_finished: {}, reason: '{}'",
session_id, already_started, requests_queue.isFinished(), reason);
/// Reset the original index.
original_index = -1;
auto expire_session_if_not_expired = [&]
{
/// No new requests will appear in queue after finish()

View File

@ -103,11 +103,11 @@ public:
struct Node
{
Poco::Net::SocketAddress address;
UInt8 original_index;
bool secure;
};
using Nodes = std::vector<Node>;
using ConnectedCallback = std::function<void(size_t, const Node&)>;
/** Connection to nodes is performed in order. If you want, shuffle them manually.
* Operation timeout couldn't be greater than session timeout.
@ -116,8 +116,7 @@ public:
ZooKeeper(
const Nodes & nodes,
const zkutil::ZooKeeperArgs & args_,
std::shared_ptr<ZooKeeperLog> zk_log_,
std::optional<ConnectedCallback> && connected_callback_ = {});
std::shared_ptr<ZooKeeperLog> zk_log_);
~ZooKeeper() override;
@ -125,6 +124,10 @@ public:
/// If expired, you can only destroy the object. All other methods will throw exception.
bool isExpired() const override { return requests_queue.isFinished(); }
Int8 getConnectedNodeIdx() const override { return original_index; }
String getConnectedHostPort() const override { return (original_index == -1) ? "" : args.hosts[original_index]; }
int32_t getConnectionXid() const override { return next_xid.load(); }
/// A ZooKeeper session can have an optional deadline set on it.
/// After it has been reached, the session needs to be finalized.
bool hasReachedDeadline() const override;
@ -219,7 +222,7 @@ private:
ACLs default_acls;
zkutil::ZooKeeperArgs args;
std::optional<ConnectedCallback> connected_callback = {};
Int8 original_index = -1;
/// Fault injection
void maybeInjectSendFault();

View File

@ -30,14 +30,15 @@ try
splitInto<','>(hosts_strings, hosts_arg);
ZooKeeper::Nodes nodes;
nodes.reserve(hosts_strings.size());
for (auto & host_string : hosts_strings)
for (size_t i = 0; i < hosts_strings.size(); ++i)
{
std::string host_string = hosts_strings[i];
bool secure = bool(startsWith(host_string, "secure://"));
if (secure)
host_string.erase(0, strlen("secure://"));
nodes.emplace_back(ZooKeeper::Node{Poco::Net::SocketAddress{host_string},secure});
nodes.emplace_back(ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, static_cast<UInt8>(i) , secure});
}

View File

@ -120,4 +120,78 @@ TEST_F(ProxyConfigurationResolverProviderTests, ListBoth)
ASSERT_EQ(https_proxy_configuration.port, https_list_proxy_server.getPort());
}
TEST_F(ProxyConfigurationResolverProviderTests, RemoteResolverIsBasedOnProtocolConfigurationHTTP)
{
/*
* Since there is no way to call `ProxyConfigurationResolver::resolve` on remote resolver,
* it is hard to verify the remote resolver was actually picked. One hackish way to assert
* the remote resolver was OR was not picked based on the configuration, is to use the
* environment resolver. Since the environment resolver is always returned as a fallback,
* we can assert the remote resolver was not picked if `ProxyConfigurationResolver::resolve`
* succeeds and returns an environment proxy configuration.
* */
EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server);
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
config->setString("proxy", "");
config->setString("proxy.https", "");
config->setString("proxy.https.resolver", "");
config->setString("proxy.https.resolver.endpoint", "http://resolver:8080/hostname");
// even tho proxy protocol / scheme is http, it should not be picked (prior to this PR, it would be picked)
config->setString("proxy.https.resolver.proxy_scheme", "http");
config->setString("proxy.https.resolver.proxy_port", "80");
config->setString("proxy.https.resolver.proxy_cache_time", "10");
context->setConfig(config);
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config)->resolve();
/*
* Asserts env proxy is used and not the remote resolver. If the remote resolver is picked, it is an error because
* there is no `http` specification for remote resolver
* */
ASSERT_EQ(http_proxy_configuration.host, http_env_proxy_server.getHost());
ASSERT_EQ(http_proxy_configuration.port, http_env_proxy_server.getPort());
ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_env_proxy_server.getScheme()));
}
TEST_F(ProxyConfigurationResolverProviderTests, RemoteResolverIsBasedOnProtocolConfigurationHTTPS)
{
/*
* Since there is no way to call `ProxyConfigurationResolver::resolve` on remote resolver,
* it is hard to verify the remote resolver was actually picked. One hackish way to assert
* the remote resolver was OR was not picked based on the configuration, is to use the
* environment resolver. Since the environment resolver is always returned as a fallback,
* we can assert the remote resolver was not picked if `ProxyConfigurationResolver::resolve`
* succeeds and returns an environment proxy configuration.
* */
EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server);
ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration());
config->setString("proxy", "");
config->setString("proxy.http", "");
config->setString("proxy.http.resolver", "");
config->setString("proxy.http.resolver.endpoint", "http://resolver:8080/hostname");
// even tho proxy protocol / scheme is https, it should not be picked (prior to this PR, it would be picked)
config->setString("proxy.http.resolver.proxy_scheme", "https");
config->setString("proxy.http.resolver.proxy_port", "80");
config->setString("proxy.http.resolver.proxy_cache_time", "10");
context->setConfig(config);
auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config)->resolve();
/*
* Asserts env proxy is used and not the remote resolver. If the remote resolver is picked, it is an error because
* there is no `http` specification for remote resolver
* */
ASSERT_EQ(http_proxy_configuration.host, https_env_proxy_server.getHost());
ASSERT_EQ(http_proxy_configuration.port, https_env_proxy_server.getPort());
ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_env_proxy_server.getScheme()));
}
// remote resolver is tricky to be tested in unit tests

View File

@ -57,40 +57,3 @@ TEST(EnvironmentProxyConfigurationResolver, TestHTTPsNoEnv)
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::Protocol::HTTP);
ASSERT_EQ(configuration.port, 0u);
}
TEST(EnvironmentProxyConfigurationResolver, TestANYHTTP)
{
EnvironmentProxySetter setter(http_proxy_server, {});
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::ANY);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, http_proxy_server.getHost());
ASSERT_EQ(configuration.port, http_proxy_server.getPort());
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_proxy_server.getScheme()));
}
TEST(EnvironmentProxyConfigurationResolver, TestANYHTTPS)
{
EnvironmentProxySetter setter({}, https_proxy_server);
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::ANY);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, https_proxy_server.getHost());
ASSERT_EQ(configuration.port, https_proxy_server.getPort());
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_proxy_server.getScheme()));
}
TEST(EnvironmentProxyConfigurationResolver, TestANYNoEnv)
{
DB::EnvironmentProxyConfigurationResolver resolver(DB::ProxyConfiguration::Protocol::ANY);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, "");
ASSERT_EQ(configuration.protocol, DB::ProxyConfiguration::Protocol::HTTP);
ASSERT_EQ(configuration.port, 0u);
}

View File

@ -103,11 +103,12 @@ class IColumn;
M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \
M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
M(UInt64, s3_retry_attempts, 10, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
M(UInt64, s3_request_timeout_ms, 3000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
M(UInt64, s3_http_connection_pool_size, 1000, "How many reusable open connections to keep per S3 endpoint. Only applies to the S3 table engine and table function, not to S3 disks (for disks, use disk config instead). Global setting, can only be set in config, overriding it per session or per query has no effect.", 0) \
M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \
M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \
M(Bool, s3queue_enable_logging_to_s3queue_log, false, "Enable writing to system.s3queue_log. The value can be overwritten per table with table settings", 0) \
M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \
M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \
M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \
@ -174,7 +175,7 @@ class IColumn;
M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \
M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \
M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \
M(UInt64, parallel_replicas_min_number_of_granules_to_enable, 0, "If the number of marks to read is less than the value of this setting - parallel replicas will be disabled", 0) \
M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \
\
M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
\
@ -205,7 +206,9 @@ class IColumn;
M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \
\
M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \
M(Bool, use_mysql_types_in_show_columns, false, "Show MySQL types in SHOW COLUMNS and system.columns", 0) \
M(Bool, use_mysql_types_in_show_columns, false, "Show native MySQL types in SHOW [FULL] COLUMNS", 0) \
M(Bool, mysql_map_string_to_text_in_show_columns, false, "If enabled, String type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise. Will only take effect if use_mysql_types_in_show_columns is enabled too", 0) \
M(Bool, mysql_map_fixed_string_to_text_in_show_columns, false, "If enabled, FixedString type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise. Will only take effect if use_mysql_types_in_show_columns is enabled too", 0) \
\
M(UInt64, optimize_min_equality_disjunction_chain_length, 3, "The minimum length of the expression `expr = x1 OR ... expr = xN` for optimization ", 0) \
\
@ -865,6 +868,7 @@ class IColumn;
MAKE_OBSOLETE(M, UInt64, backup_threads, 16) \
MAKE_OBSOLETE(M, UInt64, restore_threads, 16) \
MAKE_OBSOLETE(M, Bool, optimize_duplicate_order_by_and_distinct, false) \
MAKE_OBSOLETE(M, UInt64, parallel_replicas_min_number_of_granules_to_enable, 0) \
MAKE_OBSOLETE(M, Bool, query_plan_optimize_projection, true) \
/** The section above is for obsolete settings. Do not add anything there. */

View File

@ -75,7 +75,7 @@ static BlockIO tryToExecuteQuery(const String & query_to_execute, ContextMutable
if (!database.empty())
query_context->setCurrentDatabase(database);
return executeQuery("/*" + comment + "*/ " + query_to_execute, query_context, true);
return executeQuery("/*" + comment + "*/ " + query_to_execute, query_context, true).second;
}
catch (...)
{

View File

@ -168,8 +168,7 @@ QueryPipeline ClickHouseDictionarySource::createStreamForQuery(const String & qu
if (configuration.is_local)
{
pipeline = executeQuery(query, context_copy, true).pipeline;
pipeline = executeQuery(query, context_copy, true).second.pipeline;
pipeline.convertStructureTo(empty_sample_block.getColumnsWithTypeAndName());
}
else
@ -191,7 +190,7 @@ std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & re
if (configuration.is_local)
{
return readInvalidateQuery(executeQuery(request, context_copy, true).pipeline);
return readInvalidateQuery(executeQuery(request, context_copy, true).second.pipeline);
}
else
{

View File

@ -71,7 +71,11 @@ std::unique_ptr<S3::Client> getClient(
/*
* Override proxy configuration for backwards compatibility with old configuration format.
* */
auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat(config_prefix, config);
auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat(
ProxyConfiguration::protocolFromString(uri.uri.getScheme()),
config_prefix,
config
);
if (proxy_config)
{
client_configuration.per_request_configuration

View File

@ -410,7 +410,7 @@ std::unique_ptr<ReadBuffer> FormatFactory::wrapReadBufferIfNeeded(
static void addExistingProgressToOutputFormat(OutputFormatPtr format, ContextPtr context)
{
auto element_id = context->getProcessListElement();
auto element_id = context->getProcessListElementSafe();
if (element_id)
{
/// While preparing the query there might have been progress (for example in subscalar subqueries) so add it here

View File

@ -324,6 +324,14 @@ namespace
auto retry_timeout = timeouts.connection_timeout.totalMilliseconds();
auto session = pool_ptr->second->get(retry_timeout);
const auto & session_data = session->sessionData();
if (session_data.empty() || !Poco::AnyCast<HTTPSessionReuseTag>(&session_data))
{
/// Reset session if it is not reusable. See comment for HTTPSessionReuseTag.
session->reset();
}
session->attachSessionData({});
setTimeouts(*session, timeouts);
return session;

View File

@ -74,8 +74,17 @@ void resetSessionIfNeeded(bool read_all_range_successfully, std::optional<Aws::S
}
else if (auto session = getSession(*read_result); !session.isNull())
{
DB::markSessionForReuse(session);
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions);
if (!session->getProxyHost().empty())
{
/// Reset proxified sessions because proxy can change for every request. See ProxyConfigurationResolver.
resetSession(*read_result);
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions);
}
else
{
DB::markSessionForReuse(session);
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions);
}
}
}
}

View File

@ -276,7 +276,7 @@ void PocoHTTPClient::makeRequestInternal(
{
/// Most sessions in pool are already connected and it is not possible to set proxy host/port to a connected session.
const auto request_configuration = per_request_configuration();
if (http_connection_pool_size && request_configuration.host.empty())
if (http_connection_pool_size)
makeRequestInternalImpl<true>(request, request_configuration, response, readLimiter, writeLimiter);
else
makeRequestInternalImpl<false>(request, request_configuration, response, readLimiter, writeLimiter);

View File

@ -23,7 +23,7 @@ InterpreterShowAccessEntitiesQuery::InterpreterShowAccessEntitiesQuery(const AST
BlockIO InterpreterShowAccessEntitiesQuery::execute()
{
return executeQuery(getRewrittenQuery(), getContext(), true);
return executeQuery(getRewrittenQuery(), getContext(), true).second;
}

View File

@ -12,7 +12,7 @@ InterpreterShowPrivilegesQuery::InterpreterShowPrivilegesQuery(const ASTPtr & qu
BlockIO InterpreterShowPrivilegesQuery::execute()
{
return executeQuery("SELECT * FROM system.privileges", context, true);
return executeQuery("SELECT * FROM system.privileges", context, true).second;
}
}

View File

@ -24,6 +24,13 @@ NamesAndTypesList AsynchronousInsertLogElement::getNamesAndTypes()
{"FlushError", static_cast<Int8>(Status::FlushError)},
});
auto type_data_kind = std::make_shared<DataTypeEnum8>(
DataTypeEnum8::Values
{
{"Parsed", static_cast<Int8>(DataKind::Parsed)},
{"Preprocessed", static_cast<Int8>(DataKind::Preprocessed)},
});
return
{
{"event_date", std::make_shared<DataTypeDate>()},
@ -39,6 +46,7 @@ NamesAndTypesList AsynchronousInsertLogElement::getNamesAndTypes()
{"rows", std::make_shared<DataTypeUInt64>()},
{"exception", std::make_shared<DataTypeString>()},
{"status", type_status},
{"data_kind", type_data_kind},
{"flush_time", std::make_shared<DataTypeDateTime>()},
{"flush_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
@ -64,6 +72,7 @@ void AsynchronousInsertLogElement::appendToBlock(MutableColumns & columns) const
columns[i++]->insert(rows);
columns[i++]->insert(exception);
columns[i++]->insert(status);
columns[i++]->insert(data_kind);
columns[i++]->insert(flush_time);
columns[i++]->insert(flush_time_microseconds);

View File

@ -1,6 +1,6 @@
#pragma once
#include "Common/Exception.h"
#include <Interpreters/AsynchronousInsertQueue.h>
#include <Interpreters/SystemLog.h>
#include <Core/NamesAndTypes.h>
#include <Core/NamesAndAliases.h>
@ -31,6 +31,9 @@ struct AsynchronousInsertLogElement
String exception;
Status status{};
using DataKind = AsynchronousInsertQueue::DataKind;
DataKind data_kind{};
time_t flush_time{};
Decimal64 flush_time_microseconds{};
String flush_query_id;

View File

@ -58,26 +58,53 @@ namespace ErrorCodes
extern const int UNKNOWN_EXCEPTION;
extern const int UNKNOWN_FORMAT;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
AsynchronousInsertQueue::InsertQuery::InsertQuery(const ASTPtr & query_, const Settings & settings_, const std::optional<UUID> & user_id_, const std::vector<UUID> & current_roles_)
static const NameSet settings_to_skip
{
/// We don't consider this setting because it is only for deduplication,
/// which means we can put two inserts with different tokens in the same block safely.
"insert_deduplication_token",
"log_comment",
};
AsynchronousInsertQueue::InsertQuery::InsertQuery(
const ASTPtr & query_,
const std::optional<UUID> & user_id_,
const std::vector<UUID> & current_roles_,
const Settings & settings_,
DataKind data_kind_)
: query(query_->clone())
, query_str(queryToString(query))
, settings(settings_)
, user_id(user_id_)
, current_roles(current_roles_)
, hash(calculateHash())
, settings(settings_)
, data_kind(data_kind_)
{
}
SipHash siphash;
AsynchronousInsertQueue::InsertQuery::InsertQuery(const InsertQuery & other)
: query(other.query->clone())
, query_str(other.query_str)
, settings(other.settings)
, user_id(other.user_id)
, current_roles(other.current_roles)
, hash(other.hash)
{
siphash.update(data_kind);
query->updateTreeHash(siphash);
if (user_id)
{
siphash.update(*user_id);
for (const auto & current_role : current_roles)
siphash.update(current_role);
}
for (const auto & setting : settings.allChanged())
{
if (settings_to_skip.contains(setting.getName()))
continue;
setting_changes.emplace_back(setting.getName(), setting.getValue());
siphash.update(setting.getName());
applyVisitor(FieldVisitorHash(siphash), setting.getValue());
}
hash = siphash.get128();
}
AsynchronousInsertQueue::InsertQuery &
@ -90,46 +117,29 @@ AsynchronousInsertQueue::InsertQuery::operator=(const InsertQuery & other)
user_id = other.user_id;
current_roles = other.current_roles;
settings = other.settings;
data_kind = other.data_kind;
hash = other.hash;
setting_changes = other.setting_changes;
}
return *this;
}
UInt128 AsynchronousInsertQueue::InsertQuery::calculateHash() const
{
SipHash siphash;
query->updateTreeHash(siphash);
if (user_id)
{
siphash.update(*user_id);
for (const auto & current_role : current_roles)
siphash.update(current_role);
}
for (const auto & setting : settings.allChanged())
{
/// We don't consider this setting because it is only for deduplication,
/// which means we can put two inserts with different tokens in the same block safely.
if (setting.getName() == "insert_deduplication_token")
continue;
siphash.update(setting.getName());
applyVisitor(FieldVisitorHash(siphash), setting.getValue());
}
return siphash.get128();
}
bool AsynchronousInsertQueue::InsertQuery::operator==(const InsertQuery & other) const
{
return query_str == other.query_str && user_id == other.user_id && current_roles == other.current_roles && settings == other.settings;
return toTupleCmp() == other.toTupleCmp();
}
AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_, const String & async_dedup_token_, MemoryTracker * user_memory_tracker_)
: bytes(std::move(bytes_))
AsynchronousInsertQueue::InsertData::Entry::Entry(
DataChunk && chunk_,
String && query_id_,
const String & async_dedup_token_,
const String & format_,
MemoryTracker * user_memory_tracker_)
: chunk(std::move(chunk_))
, query_id(std::move(query_id_))
, async_dedup_token(async_dedup_token_)
, format(format_)
, user_memory_tracker(user_memory_tracker_)
, create_time(std::chrono::system_clock::now())
{
@ -146,7 +156,7 @@ void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr excep
// Each entry in the list may correspond to a different user,
// so we need to switch current thread's MemoryTracker.
MemoryTrackerSwitcher switcher(user_memory_tracker);
bytes = "";
chunk = {};
}
if (exception_)
@ -216,15 +226,12 @@ void AsynchronousInsertQueue::scheduleDataProcessingJob(const InsertQuery & key,
});
}
AsynchronousInsertQueue::PushResult
AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const ContextPtr & query_context)
{
query = query->clone();
const auto & settings = query_context->getSettingsRef();
auto & insert_query = query->as<ASTInsertQuery &>();
insert_query.async_insert_flush = true;
InterpreterInsertQuery interpreter(query, query_context, settings.insert_allow_materialized_columns);
InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns);
auto table = interpreter.getTable(insert_query);
auto sample_block = interpreter.getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr());
@ -235,6 +242,13 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
/// InterpreterInsertQuery::getTable() -> ITableFunction::execute().
if (insert_query.table_id)
query_context->checkAccess(AccessType::INSERT, insert_query.table_id, sample_block.getNames());
}
AsynchronousInsertQueue::PushResult
AsynchronousInsertQueue::pushQueryWithInlinedData(ASTPtr query, ContextPtr query_context)
{
query = query->clone();
preprocessInsertQuery(query, query_context);
String bytes;
{
@ -245,7 +259,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
auto read_buf = getReadBufferFromASTInsertQuery(query);
LimitReadBuffer limit_buf(
*read_buf, settings.async_insert_max_data_size,
*read_buf, query_context->getSettingsRef().async_insert_max_data_size,
/*throw_exception=*/ false, /*exact_limit=*/ {});
WriteBufferFromString write_buf(bytes);
@ -270,9 +284,35 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
}
}
auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId(), settings.insert_deduplication_token, CurrentThread::getUserMemoryTracker());
return pushDataChunk(std::move(query), std::move(bytes), std::move(query_context));
}
InsertQuery key{query, settings, query_context->getUserID(), query_context->getCurrentRoles()};
AsynchronousInsertQueue::PushResult
AsynchronousInsertQueue::pushQueryWithBlock(ASTPtr query, Block block, ContextPtr query_context)
{
query = query->clone();
preprocessInsertQuery(query, query_context);
return pushDataChunk(std::move(query), std::move(block), std::move(query_context));
}
AsynchronousInsertQueue::PushResult
AsynchronousInsertQueue::pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr query_context)
{
const auto & settings = query_context->getSettingsRef();
auto & insert_query = query->as<ASTInsertQuery &>();
auto data_kind = chunk.getDataKind();
auto entry = std::make_shared<InsertData::Entry>(
std::move(chunk), query_context->getCurrentQueryId(),
settings.insert_deduplication_token, insert_query.format,
CurrentThread::getUserMemoryTracker());
/// If data is parsed on client we don't care of format which is written
/// in INSERT query. Replace it to put all such queries into one bucket in queue.
if (data_kind == DataKind::Preprocessed)
insert_query.format = "Native";
InsertQuery key{query, query_context->getUserID(), query_context->getCurrentRoles(), settings, data_kind};
InsertDataPtr data_to_process;
std::future<void> insert_future;
@ -292,7 +332,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
auto queue_it = it->second;
auto & data = queue_it->second.data;
size_t entry_data_size = entry->bytes.size();
size_t entry_data_size = entry->chunk.byteSize();
assert(data);
data->size_in_bytes += entry_data_size;
@ -456,6 +496,13 @@ catch (...)
tryLogCurrentException("AsynchronousInsertQueue", "Failed to add elements to AsynchronousInsertLog");
}
String serializeQuery(const IAST & query, size_t max_length)
{
return query.hasSecretParts()
? query.formatForLogging(max_length)
: wipeSensitiveDataAndCutToLength(serializeAST(query), max_length);
}
}
// static
@ -469,6 +516,7 @@ try
const auto * log = &Poco::Logger::get("AsynchronousInsertQueue");
const auto & insert_query = assert_cast<const ASTInsertQuery &>(*key.query);
auto insert_context = Context::createCopy(global_context);
bool internal = false; // To enable logging this query
bool async_insert = true;
@ -478,6 +526,10 @@ try
/// 'resetParser' doesn't work for parallel parsing.
key.settings.set("input_format_parallel_parsing", false);
/// It maybe insert into distributed table.
/// It doesn't make sense to make insert into destination tables asynchronous.
key.settings.set("async_insert", false);
insert_context->makeQueryContext();
/// Access rights must be checked for the user who executed the initial INSERT query.
@ -491,6 +543,7 @@ try
auto insert_query_id = insert_context->getCurrentQueryId();
auto query_start_time = std::chrono::system_clock::now();
Stopwatch start_watch{CLOCK_MONOTONIC};
insert_context->setQueryKind(ClientInfo::QueryKind::INITIAL_QUERY);
insert_context->setInitialQueryStartTime(query_start_time);
@ -499,36 +552,45 @@ try
DB::CurrentThread::QueryScope query_scope_holder(insert_context);
size_t log_queries_cut_to_length = insert_context->getSettingsRef().log_queries_cut_to_length;
String query_for_logging = insert_query.hasSecretParts()
? insert_query.formatForLogging(log_queries_cut_to_length)
: wipeSensitiveDataAndCutToLength(serializeAST(insert_query), log_queries_cut_to_length);
auto query_for_logging = serializeQuery(*key.query, insert_context->getSettingsRef().log_queries_cut_to_length);
/// We add it to the process list so
/// a) it appears in system.processes
/// b) can be cancelled if we want to
/// c) has an associated process list element where runtime metrics are stored
auto process_list_entry
= insert_context->getProcessList().insert(query_for_logging, key.query.get(), insert_context, start_watch.getStart());
auto process_list_entry = insert_context->getProcessList().insert(
query_for_logging,
key.query.get(),
insert_context,
start_watch.getStart());
auto query_status = process_list_entry->getQueryStatus();
insert_context->setProcessListElement(std::move(query_status));
String query_database{};
String query_table{};
String query_database;
String query_table;
if (insert_query.table_id)
{
query_database = insert_query.table_id.getDatabaseName();
query_table = insert_query.table_id.getTableName();
insert_context->setInsertionTable(insert_query.table_id);
}
std::unique_ptr<DB::IInterpreter> interpreter;
QueryPipeline pipeline;
QueryLogElement query_log_elem;
auto async_insert_log = global_context->getAsynchronousInsertLog();
std::vector<AsynchronousInsertLogElement> log_elements;
if (async_insert_log)
log_elements.reserve(data->entries.size());
try
{
interpreter = std::make_unique<InterpreterInsertQuery>(
key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true);
pipeline = interpreter->execute().pipeline;
chassert(pipeline.pushing());
@ -550,92 +612,39 @@ try
throw;
}
auto header = pipeline.getHeader();
auto format = getInputFormatFromASTInsertQuery(key.query, false, header, insert_context, nullptr);
size_t total_rows = 0;
InsertData::EntryPtr current_entry;
String current_exception;
auto on_error = [&](const MutableColumns & result_columns, Exception & e)
auto add_entry_to_log = [&](
const auto & entry, const auto & entry_query_for_logging,
const auto & exception, size_t num_rows, size_t num_bytes)
{
current_exception = e.displayText();
LOG_ERROR(log, "Failed parsing for query '{}' with query id {}. {}",
key.query_str, current_entry->query_id, current_exception);
if (!async_insert_log)
return;
for (const auto & column : result_columns)
if (column->size() > total_rows)
column->popBack(column->size() - total_rows);
AsynchronousInsertLogElement elem;
elem.event_time = timeInSeconds(entry->create_time);
elem.event_time_microseconds = timeInMicroseconds(entry->create_time);
elem.query_for_logging = entry_query_for_logging;
elem.database = query_database;
elem.table = query_table;
elem.format = entry->format;
elem.query_id = entry->query_id;
elem.bytes = num_bytes;
elem.rows = num_rows;
elem.exception = exception;
elem.data_kind = entry->chunk.getDataKind();
current_entry->finish(std::current_exception());
return 0;
};
std::shared_ptr<ISimpleTransform> adding_defaults_transform;
if (insert_context->getSettingsRef().input_format_defaults_for_omitted_fields && insert_query.table_id)
{
StoragePtr storage = DatabaseCatalog::instance().getTable(insert_query.table_id, insert_context);
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
const auto & columns = metadata_snapshot->getColumns();
if (columns.hasDefaults())
adding_defaults_transform = std::make_shared<AddingDefaultsTransform>(header, columns, *format, insert_context);
}
auto insert_log = global_context->getAsynchronousInsertLog();
std::vector<AsynchronousInsertLogElement> log_elements;
if (insert_log)
log_elements.reserve(data->entries.size());
StreamingFormatExecutor executor(header, format, std::move(on_error), std::move(adding_defaults_transform));
std::unique_ptr<ReadBuffer> last_buffer;
auto chunk_info = std::make_shared<AsyncInsertInfo>();
for (const auto & entry : data->entries)
{
auto buffer = std::make_unique<ReadBufferFromString>(entry->bytes);
current_entry = entry;
auto bytes_size = entry->bytes.size();
size_t num_rows = executor.execute(*buffer);
total_rows += num_rows;
chunk_info->offsets.push_back(total_rows);
chunk_info->tokens.push_back(entry->async_dedup_token);
/// Keep buffer, because it still can be used
/// in destructor, while resetting buffer at next iteration.
last_buffer = std::move(buffer);
if (insert_log)
/// If there was a parsing error,
/// the entry won't be flushed anyway,
/// so add the log element immediately.
if (!elem.exception.empty())
{
AsynchronousInsertLogElement elem;
elem.event_time = timeInSeconds(entry->create_time);
elem.event_time_microseconds = timeInMicroseconds(entry->create_time);
elem.query_for_logging = query_for_logging;
elem.database = query_database;
elem.table = query_table;
elem.format = insert_query.format;
elem.query_id = entry->query_id;
elem.bytes = bytes_size;
elem.rows = num_rows;
elem.exception = current_exception;
current_exception.clear();
/// If there was a parsing error,
/// the entry won't be flushed anyway,
/// so add the log element immediately.
if (!elem.exception.empty())
{
elem.status = AsynchronousInsertLogElement::ParsingError;
insert_log->add(std::move(elem));
}
else
{
log_elements.push_back(elem);
}
elem.status = AsynchronousInsertLogElement::ParsingError;
async_insert_log->add(std::move(elem));
}
}
format->addBuffer(std::move(last_buffer));
ProfileEvents::increment(ProfileEvents::AsyncInsertRows, total_rows);
else
{
log_elements.push_back(elem);
}
};
auto finish_entries = [&]
{
@ -648,11 +657,21 @@ try
if (!log_elements.empty())
{
auto flush_time = std::chrono::system_clock::now();
appendElementsToLogSafe(*insert_log, std::move(log_elements), flush_time, insert_query_id, "");
appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, insert_query_id, "");
}
};
if (total_rows == 0)
Chunk chunk;
auto header = pipeline.getHeader();
if (key.data_kind == DataKind::Parsed)
chunk = processEntriesWithParsing(key, data->entries, header, insert_context, log, add_entry_to_log);
else
chunk = processPreprocessedEntries(key, data->entries, header, insert_context, add_entry_to_log);
ProfileEvents::increment(ProfileEvents::AsyncInsertRows, chunk.getNumRows());
if (chunk.getNumRows() == 0)
{
finish_entries();
return;
@ -660,9 +679,8 @@ try
try
{
auto chunk = Chunk(executor.getResultColumns(), total_rows);
chunk.setChunkInfo(std::move(chunk_info));
size_t total_bytes = chunk.bytes();
size_t num_rows = chunk.getNumRows();
size_t num_bytes = chunk.bytes();
auto source = std::make_shared<SourceFromSingleChunk>(header, std::move(chunk));
pipeline.complete(Pipe(std::move(source)));
@ -670,8 +688,7 @@ try
CompletedPipelineExecutor completed_executor(pipeline);
completed_executor.execute();
LOG_INFO(log, "Flushed {} rows, {} bytes for query '{}'",
total_rows, total_bytes, key.query_str);
LOG_INFO(log, "Flushed {} rows, {} bytes for query '{}'", num_rows, num_bytes, key.query_str);
bool pulling_pipeline = false;
logQueryFinish(query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, QueryCache::Usage::None, internal);
@ -684,7 +701,7 @@ try
{
auto exception = getCurrentExceptionMessage(false);
auto flush_time = std::chrono::system_clock::now();
appendElementsToLogSafe(*insert_log, std::move(log_elements), flush_time, insert_query_id, exception);
appendElementsToLogSafe(*async_insert_log, std::move(log_elements), flush_time, insert_query_id, exception);
}
throw;
}
@ -708,6 +725,133 @@ catch (...)
finishWithException(key.query, data->entries, Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception"));
}
template <typename LogFunc>
Chunk AsynchronousInsertQueue::processEntriesWithParsing(
const InsertQuery & key,
const std::list<InsertData::EntryPtr> & entries,
const Block & header,
const ContextPtr & insert_context,
const Poco::Logger * logger,
LogFunc && add_to_async_insert_log)
{
size_t total_rows = 0;
InsertData::EntryPtr current_entry;
String current_exception;
const auto & insert_query = assert_cast<const ASTInsertQuery &>(*key.query);
auto format = getInputFormatFromASTInsertQuery(key.query, false, header, insert_context, nullptr);
std::shared_ptr<ISimpleTransform> adding_defaults_transform;
if (insert_context->getSettingsRef().input_format_defaults_for_omitted_fields && insert_query.table_id)
{
StoragePtr storage = DatabaseCatalog::instance().getTable(insert_query.table_id, insert_context);
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
const auto & columns = metadata_snapshot->getColumns();
if (columns.hasDefaults())
adding_defaults_transform = std::make_shared<AddingDefaultsTransform>(header, columns, *format, insert_context);
}
auto on_error = [&](const MutableColumns & result_columns, Exception & e)
{
current_exception = e.displayText();
LOG_ERROR(logger, "Failed parsing for query '{}' with query id {}. {}",
key.query_str, current_entry->query_id, current_exception);
for (const auto & column : result_columns)
if (column->size() > total_rows)
column->popBack(column->size() - total_rows);
current_entry->finish(std::current_exception());
return 0;
};
StreamingFormatExecutor executor(header, format, std::move(on_error), std::move(adding_defaults_transform));
std::unique_ptr<ReadBuffer> last_buffer;
auto chunk_info = std::make_shared<AsyncInsertInfo>();
auto query_for_logging = serializeQuery(*key.query, insert_context->getSettingsRef().log_queries_cut_to_length);
for (const auto & entry : entries)
{
current_entry = entry;
const auto * bytes = entry->chunk.asString();
if (!bytes)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expected entry with data kind Parsed. Got: {}", entry->chunk.getDataKind());
auto buffer = std::make_unique<ReadBufferFromString>(*bytes);
size_t num_bytes = bytes->size();
size_t num_rows = executor.execute(*buffer);
/// Keep buffer, because it still can be used
/// in destructor, while resetting buffer at next iteration.
last_buffer = std::move(buffer);
total_rows += num_rows;
chunk_info->offsets.push_back(total_rows);
chunk_info->tokens.push_back(entry->async_dedup_token);
add_to_async_insert_log(entry, query_for_logging, current_exception, num_rows, num_bytes);
current_exception.clear();
}
format->addBuffer(std::move(last_buffer));
Chunk chunk(executor.getResultColumns(), total_rows);
chunk.setChunkInfo(std::move(chunk_info));
return chunk;
}
template <typename LogFunc>
Chunk AsynchronousInsertQueue::processPreprocessedEntries(
const InsertQuery & key,
const std::list<InsertData::EntryPtr> & entries,
const Block & header,
const ContextPtr & insert_context,
LogFunc && add_to_async_insert_log)
{
size_t total_rows = 0;
auto chunk_info = std::make_shared<AsyncInsertInfo>();
auto result_columns = header.cloneEmptyColumns();
std::unordered_map<String, String> format_to_query;
auto get_query_by_format = [&](const String & format) -> const String &
{
auto [it, inserted] = format_to_query.try_emplace(format);
if (!inserted)
return it->second;
auto query = key.query->clone();
assert_cast<ASTInsertQuery &>(*query).format = format;
it->second = serializeQuery(*query, insert_context->getSettingsRef().log_queries_cut_to_length);
return it->second;
};
for (const auto & entry : entries)
{
const auto * block = entry->chunk.asBlock();
if (!block)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expected entry with data kind Preprocessed. Got: {}", entry->chunk.getDataKind());
auto columns = block->getColumns();
for (size_t i = 0, s = columns.size(); i < s; ++i)
result_columns[i]->insertRangeFrom(*columns[i], 0, columns[i]->size());
total_rows += block->rows();
chunk_info->offsets.push_back(total_rows);
chunk_info->tokens.push_back(entry->async_dedup_token);
const auto & query_for_logging = get_query_by_format(entry->format);
add_to_async_insert_log(entry, query_for_logging, "", block->rows(), block->bytes());
}
Chunk chunk(std::move(result_columns), total_rows);
chunk.setChunkInfo(std::move(chunk_info));
return chunk;
}
template <typename E>
void AsynchronousInsertQueue::finishWithException(
const ASTPtr & query, const std::list<InsertData::EntryPtr> & entries, const E & exception)

View File

@ -1,13 +1,16 @@
#pragma once
#include <Core/Block.h>
#include <Core/Settings.h>
#include <Parsers/IAST_fwd.h>
#include <Poco/Logger.h>
#include <Common/CurrentThread.h>
#include <Common/MemoryTrackerSwitcher.h>
#include <Common/ThreadPool.h>
#include <Processors/Chunk.h>
#include <future>
#include <variant>
namespace DB
{
@ -38,11 +41,23 @@ public:
/// Read buffer that contains extracted
/// from query data in case of too much data.
std::unique_ptr<ReadBuffer> insert_data_buffer;
/// Block that contains received by Native
/// protocol data in case of too much data.
Block insert_block;
};
enum class DataKind
{
Parsed = 0,
Preprocessed = 1,
};
/// Force flush the whole queue.
void flushAll();
PushResult push(ASTPtr query, ContextPtr query_context);
PushResult pushQueryWithInlinedData(ASTPtr query, ContextPtr query_context);
PushResult pushQueryWithBlock(ASTPtr query, Block block, ContextPtr query_context);
size_t getPoolSize() const { return pool_size; }
private:
@ -52,18 +67,55 @@ private:
public:
ASTPtr query;
String query_str;
Settings settings;
std::optional<UUID> user_id;
std::vector<UUID> current_roles;
Settings settings;
DataKind data_kind;
UInt128 hash;
InsertQuery(const ASTPtr & query_, const Settings & settings_, const std::optional<UUID> & user_id_, const std::vector<UUID> & current_roles_);
InsertQuery(const InsertQuery & other);
InsertQuery(
const ASTPtr & query_,
const std::optional<UUID> & user_id_,
const std::vector<UUID> & current_roles_,
const Settings & settings_,
DataKind data_kind_);
InsertQuery(const InsertQuery & other) { *this = other; }
InsertQuery & operator=(const InsertQuery & other);
bool operator==(const InsertQuery & other) const;
private:
UInt128 calculateHash() const;
auto toTupleCmp() const { return std::tie(data_kind, query_str, user_id, current_roles, setting_changes); }
std::vector<SettingChange> setting_changes;
};
struct DataChunk : public std::variant<String, Block>
{
using std::variant<String, Block>::variant;
size_t byteSize() const
{
return std::visit([]<typename T>(const T & arg)
{
if constexpr (std::is_same_v<T, Block>)
return arg.bytes();
else
return arg.size();
}, *this);
}
DataKind getDataKind() const
{
if (std::holds_alternative<Block>(*this))
return DataKind::Preprocessed;
else
return DataKind::Parsed;
}
const String * asString() const { return std::get_if<String>(this); }
const Block * asBlock() const { return std::get_if<Block>(this); }
};
struct InsertData
@ -71,13 +123,19 @@ private:
struct Entry
{
public:
String bytes;
DataChunk chunk;
const String query_id;
const String async_dedup_token;
const String format;
MemoryTracker * const user_memory_tracker;
const std::chrono::time_point<std::chrono::system_clock> create_time;
Entry(String && bytes_, String && query_id_, const String & async_dedup_token, MemoryTracker * user_memory_tracker_);
Entry(
DataChunk && chunk_,
String && query_id_,
const String & async_dedup_token_,
const String & format_,
MemoryTracker * user_memory_tracker_);
void finish(std::exception_ptr exception_ = nullptr);
std::future<void> getFuture() { return promise.get_future(); }
@ -158,11 +216,31 @@ private:
Poco::Logger * log = &Poco::Logger::get("AsynchronousInsertQueue");
PushResult pushDataChunk(ASTPtr query, DataChunk chunk, ContextPtr query_context);
void preprocessInsertQuery(const ASTPtr & query, const ContextPtr & query_context);
void processBatchDeadlines(size_t shard_num);
void scheduleDataProcessingJob(const InsertQuery & key, InsertDataPtr data, ContextPtr global_context);
static void processData(InsertQuery key, InsertDataPtr data, ContextPtr global_context);
template <typename LogFunc>
static Chunk processEntriesWithParsing(
const InsertQuery & key,
const std::list<InsertData::EntryPtr> & entries,
const Block & header,
const ContextPtr & insert_context,
const Poco::Logger * logger,
LogFunc && add_to_async_insert_log);
template <typename LogFunc>
static Chunk processPreprocessedEntries(
const InsertQuery & key,
const std::list<InsertData::EntryPtr> & entries,
const Block & header,
const ContextPtr & insert_context,
LogFunc && add_to_async_insert_log);
template <typename E>
static void finishWithException(const ASTPtr & query, const std::list<InsertData::EntryPtr> & entries, const E & exception);

View File

@ -3588,6 +3588,15 @@ std::shared_ptr<FilesystemCacheLog> Context::getFilesystemCacheLog() const
return shared->system_logs->filesystem_cache_log;
}
std::shared_ptr<S3QueueLog> Context::getS3QueueLog() const
{
auto lock = getGlobalSharedLock();
if (!shared->system_logs)
return {};
return shared->system_logs->s3_queue_log;
}
std::shared_ptr<FilesystemReadPrefetchesLog> Context::getFilesystemReadPrefetchesLog() const
{
auto lock = getGlobalSharedLock();

View File

@ -105,6 +105,7 @@ class TransactionsInfoLog;
class ProcessorsProfileLog;
class FilesystemCacheLog;
class FilesystemReadPrefetchesLog;
class S3QueueLog;
class AsynchronousInsertLog;
class BackupLog;
class IAsynchronousReader;
@ -1041,6 +1042,7 @@ public:
std::shared_ptr<TransactionsInfoLog> getTransactionsInfoLog() const;
std::shared_ptr<ProcessorsProfileLog> getProcessorsProfileLog() const;
std::shared_ptr<FilesystemCacheLog> getFilesystemCacheLog() const;
std::shared_ptr<S3QueueLog> getS3QueueLog() const;
std::shared_ptr<FilesystemReadPrefetchesLog> getFilesystemReadPrefetchesLog() const;
std::shared_ptr<AsynchronousInsertLog> getAsynchronousInsertLog() const;
std::shared_ptr<BackupLog> getBackupLog() const;

View File

@ -420,7 +420,7 @@ Block InterpreterKillQueryQuery::getSelectResult(const String & columns, const S
if (where_expression)
select_query += " WHERE " + queryToString(where_expression);
auto io = executeQuery(select_query, getContext(), true);
auto io = executeQuery(select_query, getContext(), true).second;
PullingPipelineExecutor executor(io.pipeline);
Block res;
while (!res && executor.pull(res));

View File

@ -103,6 +103,9 @@ namespace ProfileEvents
namespace DB
{
static UInt64 getLimitUIntValue(const ASTPtr & node, const ContextPtr & context, const std::string & expr);
static std::pair<UInt64, UInt64> getLimitLengthAndOffset(const ASTSelectQuery & query, const ContextPtr & context);
namespace ErrorCodes
{
extern const int TOO_DEEP_SUBQUERIES;
@ -810,18 +813,8 @@ InterpreterSelectQuery::InterpreterSelectQuery(
need_analyze_again = true;
}
if (can_analyze_again
&& settings.max_parallel_replicas > 1
&& settings.allow_experimental_parallel_reading_from_replicas > 0
&& settings.parallel_replicas_custom_key.value.empty()
&& getTrivialCount(0).has_value())
{
/// The query could use trivial count if it didn't use parallel replicas, so let's disable it and reanalyze
context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0));
context->setSetting("max_parallel_replicas", UInt64{0});
need_analyze_again = true;
LOG_TRACE(log, "Disabling parallel replicas to be able to use a trivial count optimization");
}
if (can_analyze_again)
need_analyze_again |= adjustParallelReplicasAfterAnalysis();
if (need_analyze_again)
{
@ -871,6 +864,70 @@ InterpreterSelectQuery::InterpreterSelectQuery(
sanitizeBlock(result_header, true);
}
bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis()
{
const Settings & settings = context->getSettingsRef();
ASTSelectQuery & query = getSelectQuery();
/// While only_analyze we don't know anything about parts, so any decision about how many parallel replicas to use would be wrong
if (!storage || options.only_analyze || !context->canUseParallelReplicasOnInitiator())
return false;
if (getTrivialCount(0).has_value())
{
/// The query could use trivial count if it didn't use parallel replicas, so let's disable it and reanalyze
context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0));
context->setSetting("max_parallel_replicas", UInt64{0});
LOG_DEBUG(log, "Disabling parallel replicas to be able to use a trivial count optimization");
return true;
}
auto storage_merge_tree = std::dynamic_pointer_cast<MergeTreeData>(storage);
if (!storage_merge_tree || settings.parallel_replicas_min_number_of_rows_per_replica == 0)
return false;
auto query_info_copy = query_info;
auto analysis_copy = analysis_result;
/// There is a couple of instances where there might be a lower limit on the rows to be read
/// * The max_rows_to_read setting
/// * A LIMIT in a simple query (see maxBlockSizeByLimit())
UInt64 max_rows = maxBlockSizeByLimit();
if (settings.max_rows_to_read)
max_rows = max_rows ? std::min(max_rows, settings.max_rows_to_read.value) : settings.max_rows_to_read;
query_info_copy.limit = max_rows;
/// Apply filters to prewhere and add them to the query_info so we can filter out parts efficiently during row estimation
applyFiltersToPrewhereInAnalysis(analysis_copy);
if (analysis_copy.prewhere_info)
{
query_info_copy.prewhere_info = analysis_copy.prewhere_info;
if (query.prewhere() && !query.where())
query_info_copy.prewhere_info->need_filter = true;
}
ActionDAGNodes added_filter_nodes = MergeTreeData::getFiltersForPrimaryKeyAnalysis(*this);
UInt64 rows_to_read = storage_merge_tree->estimateNumberOfRowsToRead(context, storage_snapshot, query_info_copy, added_filter_nodes);
/// Note that we treat an estimation of 0 rows as a real estimation
size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica;
LOG_TRACE(log, "Estimated {} rows to read. It is enough work for {} parallel replicas", rows_to_read, number_of_replicas_to_use);
if (number_of_replicas_to_use <= 1)
{
context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0));
context->setSetting("max_parallel_replicas", UInt64{0});
LOG_DEBUG(log, "Disabling parallel replicas because there aren't enough rows to read");
return true;
}
else if (number_of_replicas_to_use < settings.max_parallel_replicas)
{
context->setSetting("max_parallel_replicas", number_of_replicas_to_use);
LOG_DEBUG(log, "Reducing the number of replicas to use to {}", number_of_replicas_to_use);
/// No need to reanalyze
}
return false;
}
void InterpreterSelectQuery::buildQueryPlan(QueryPlan & query_plan)
{
executeImpl(query_plan, std::move(input_pipe));
@ -2046,43 +2103,48 @@ void InterpreterSelectQuery::extendQueryLogElemImpl(QueryLogElement & elem, cons
}
}
bool InterpreterSelectQuery::shouldMoveToPrewhere()
bool InterpreterSelectQuery::shouldMoveToPrewhere() const
{
const Settings & settings = context->getSettingsRef();
const ASTSelectQuery & query = getSelectQuery();
const ASTSelectQuery & query = query_ptr->as<const ASTSelectQuery &>();
return settings.optimize_move_to_prewhere && (!query.final() || settings.optimize_move_to_prewhere_if_final);
}
void InterpreterSelectQuery::addPrewhereAliasActions()
/// Note that this is const and accepts the analysis ref to be able to use it to do analysis for parallel replicas
/// without affecting the final analysis multiple times
void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysisResult & analysis) const
{
auto & expressions = analysis_result;
if (expressions.filter_info)
if (!analysis.filter_info)
return;
if (!analysis.prewhere_info)
{
if (!expressions.prewhere_info)
const bool does_storage_support_prewhere = !input_pipe && storage && storage->supportsPrewhere();
if (does_storage_support_prewhere && shouldMoveToPrewhere())
{
const bool does_storage_support_prewhere = !input_pipe && storage && storage->supportsPrewhere();
if (does_storage_support_prewhere && shouldMoveToPrewhere())
{
/// Execute row level filter in prewhere as a part of "move to prewhere" optimization.
expressions.prewhere_info = std::make_shared<PrewhereInfo>(
std::move(expressions.filter_info->actions),
std::move(expressions.filter_info->column_name));
expressions.prewhere_info->prewhere_actions->projectInput(false);
expressions.prewhere_info->remove_prewhere_column = expressions.filter_info->do_remove_column;
expressions.prewhere_info->need_filter = true;
expressions.filter_info = nullptr;
}
}
else
{
/// Add row level security actions to prewhere.
expressions.prewhere_info->row_level_filter = std::move(expressions.filter_info->actions);
expressions.prewhere_info->row_level_column_name = std::move(expressions.filter_info->column_name);
expressions.prewhere_info->row_level_filter->projectInput(false);
expressions.filter_info = nullptr;
/// Execute row level filter in prewhere as a part of "move to prewhere" optimization.
analysis.prewhere_info
= std::make_shared<PrewhereInfo>(std::move(analysis.filter_info->actions), std::move(analysis.filter_info->column_name));
analysis.prewhere_info->prewhere_actions->projectInput(false);
analysis.prewhere_info->remove_prewhere_column = analysis.filter_info->do_remove_column;
analysis.prewhere_info->need_filter = true;
analysis.filter_info = nullptr;
}
}
else
{
/// Add row level security actions to prewhere.
analysis.prewhere_info->row_level_filter = std::move(analysis.filter_info->actions);
analysis.prewhere_info->row_level_column_name = std::move(analysis.filter_info->column_name);
analysis.prewhere_info->row_level_filter->projectInput(false);
analysis.filter_info = nullptr;
}
}
void InterpreterSelectQuery::addPrewhereAliasActions()
{
applyFiltersToPrewhereInAnalysis(analysis_result);
auto & prewhere_info = analysis_result.prewhere_info;
auto & columns_to_remove_after_prewhere = analysis_result.columns_to_remove_after_prewhere;
@ -2241,7 +2303,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
}
}
/// Based on the query analysis, check if optimizing the count trivial count to use totalRows is possible
/// Based on the query analysis, check if using a trivial count (storage or partition metadata) is possible
std::optional<UInt64> InterpreterSelectQuery::getTrivialCount(UInt64 max_parallel_replicas)
{
const Settings & settings = context->getSettingsRef();
@ -2281,6 +2343,36 @@ std::optional<UInt64> InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle
}
}
/** Optimization - if not specified DISTINCT, WHERE, GROUP, HAVING, ORDER, JOIN, LIMIT BY, WITH TIES
* but LIMIT is specified, and limit + offset < max_block_size,
* then as the block size we will use limit + offset (not to read more from the table than requested),
* and also set the number of threads to 1.
*/
UInt64 InterpreterSelectQuery::maxBlockSizeByLimit() const
{
const auto & query = query_ptr->as<const ASTSelectQuery &>();
auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, context);
if (!query.distinct
&& !query.limit_with_ties
&& !query.prewhere()
&& !query.where()
&& query_info.filter_asts.empty()
&& !query.groupBy()
&& !query.having()
&& !query.orderBy()
&& !query.limitBy()
&& !query.join()
&& !query_analyzer->hasAggregation()
&& !query_analyzer->hasWindow()
&& query.limitLength()
&& limit_length <= std::numeric_limits<UInt64>::max() - limit_offset)
return limit_length + limit_offset;
return 0;
}
void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan)
{
auto & query = getSelectQuery();
@ -2351,39 +2443,18 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
}
UInt64 max_block_size = settings.max_block_size;
auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, context);
auto local_limits = getStorageLimits(*context, options);
/** Optimization - if not specified DISTINCT, WHERE, GROUP, HAVING, ORDER, JOIN, LIMIT BY, WITH TIES
* but LIMIT is specified, and limit + offset < max_block_size,
* then as the block size we will use limit + offset (not to read more from the table than requested),
* and also set the number of threads to 1.
*/
if (!query.distinct
&& !query.limit_with_ties
&& !query.prewhere()
&& !query.where()
&& query_info.filter_asts.empty()
&& !query.groupBy()
&& !query.having()
&& !query.orderBy()
&& !query.limitBy()
&& !query.join()
&& !query_analyzer->hasAggregation()
&& !query_analyzer->hasWindow()
&& query.limitLength()
&& limit_length <= std::numeric_limits<UInt64>::max() - limit_offset)
if (UInt64 max_block_limited = maxBlockSizeByLimit())
{
if (limit_length + limit_offset < max_block_size)
if (max_block_limited < max_block_size)
{
max_block_size = std::max<UInt64>(1, limit_length + limit_offset);
max_block_size = std::max<UInt64>(1, max_block_limited);
max_threads_execute_query = max_streams = 1;
}
if (limit_length + limit_offset < local_limits.local_limits.size_limits.max_rows)
if (max_block_limited < local_limits.local_limits.size_limits.max_rows)
{
query_info.limit = limit_length + limit_offset;
query_info.limit = max_block_limited;
}
}

Some files were not shown because too many files have changed in this diff Show More