Merge remote-tracking branch 'blessed/master' into parallel_replicas_row_estimation

This commit is contained in:
Raúl Marín 2023-10-19 15:20:50 +00:00
commit 4a53943926
138 changed files with 4733 additions and 2195 deletions

View File

@ -77,6 +77,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:
@ -185,6 +186,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -227,6 +229,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -399,6 +402,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -448,6 +452,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -487,6 +492,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
filter: tree:0
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -18,6 +18,7 @@ on: # yamllint disable-line rule:truthy
- 'docs/**'
- 'utils/check-style/aspell-ignore/**'
- 'tests/ci/docs_check.py'
- '.github/workflows/docs_check.yml'
jobs:
CheckLabels:
runs-on: [self-hosted, style-checker]
@ -73,6 +74,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:

View File

@ -24,6 +24,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0
filter: tree:0
- name: Jepsen Test
run: |
sudo rm -fr "$TEMP_PATH"
@ -53,6 +54,7 @@ jobs:
# with:
# clear-repository: true
# fetch-depth: 0
# filter: tree:0
# - name: Jepsen Test
# run: |
# sudo rm -fr "$TEMP_PATH"

View File

@ -61,6 +61,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:
@ -200,6 +201,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -242,6 +244,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -283,6 +286,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -581,6 +585,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -630,6 +635,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -672,6 +678,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -714,6 +721,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -763,6 +771,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -805,6 +814,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -847,6 +857,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -889,6 +900,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -931,6 +943,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -963,6 +976,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
filter: tree:0
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -54,6 +54,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:
@ -90,6 +91,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
filter: tree:0
submodules: true
- name: Set up JDK 11
uses: actions/setup-java@v1

View File

@ -18,6 +18,7 @@ on: # yamllint disable-line rule:truthy
- 'docs/**'
- 'utils/check-style/aspell-ignore/**'
- 'tests/ci/docs_check.py'
- '.github/workflows/docs_check.yml'
##########################################################################################
##################################### SMALL CHECKS #######################################
##########################################################################################
@ -94,6 +95,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:
@ -266,6 +268,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # for performance artifact
filter: tree:0
submodules: true
- name: Build
run: |
@ -350,6 +353,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # for performance artifact
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -1021,6 +1025,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
filter: tree:0
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -49,6 +49,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # otherwise we will have no version info
filter: tree:0
ref: ${{ env.GITHUB_TAG }}
- name: Check docker clickhouse/clickhouse-server building
run: |

View File

@ -53,6 +53,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
filter: tree:0
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:
@ -161,6 +162,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -203,6 +205,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # For a proper version and performance artifacts
filter: tree:0
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
@ -456,6 +459,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -505,6 +509,7 @@ jobs:
clear-repository: true
submodules: true
fetch-depth: 0 # otherwise we will have no info about contributors
filter: tree:0
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
@ -544,6 +549,7 @@ jobs:
with:
clear-repository: true
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
filter: tree:0
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"

View File

@ -38,6 +38,7 @@ jobs:
with:
ref: master
fetch-depth: 0
filter: tree:0
- name: Update versions, docker version, changelog, security
env:
GITHUB_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}

2
contrib/grpc vendored

@ -1 +1 @@
Subproject commit 3f975ecab377cd5f739af780566596128f17bb74
Subproject commit c52656e2bfcda3450bd6a7c247d2d9eeb8498524

View File

@ -24,6 +24,12 @@ else ()
set (SNAPPY_HAVE_SSSE3 0)
endif ()
if (ARCH_AMD64 AND ENABLE_SSE42)
set (SNAPPY_HAVE_X86_CRC32 1)
else ()
set (SNAPPY_HAVE_X86_CRC32 0)
endif ()
configure_file(
"${SOURCE_DIR}/cmake/config.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/config.h")

View File

@ -2,8 +2,8 @@
# If the image is built from Dockerfile.alpine, then the `-alpine` suffix is added automatically,
# so the only purpose of Dockerfile.ubuntu is to push `latest`, `head` and so on w/o suffixes
FROM ubuntu:20.04 AS glibc-donor
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
@ -31,7 +31,9 @@ RUN arch=${TARGETARCH:-amd64} \
arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \
esac
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.9.1.1854"
ARG PACKAGES="clickhouse-keeper"
@ -46,16 +48,14 @@ ARG PACKAGES="clickhouse-keeper"
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& for package in ${PACKAGES}; do \
{ \
{ echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \
} || \
{ echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \
} ; \
} || exit 1 \
( \
cd /tmp \
&& echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz.sha512" \
&& sed 's:/output/:/tmp/:' < "${package}-${VERSION}-${arch}.tgz.sha512" | sha512sum -c \
&& tar xvzf "${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / \
) \
; done \
&& rm /tmp/*.tgz /install -r \
&& addgroup -S -g 101 clickhouse \

View File

@ -172,10 +172,15 @@ then
# This is why we add this repository snapshot from CI to the performance test
# package.
mkdir "$PERF_OUTPUT"/ch
git -C "$PERF_OUTPUT"/ch init --bare
git -C "$PERF_OUTPUT"/ch remote add origin /build
git -C "$PERF_OUTPUT"/ch fetch --no-tags --depth 50 origin HEAD:pr
git -C "$PERF_OUTPUT"/ch fetch --no-tags --depth 50 origin master:master
# Copy .git only, but skip modules, using tar
tar c -C /build/ --exclude='.git/modules/**' .git | tar x -C "$PERF_OUTPUT"/ch
# Create branch pr and origin/master to have them for the following performance comparison
git -C "$PERF_OUTPUT"/ch branch pr
git -C "$PERF_OUTPUT"/ch fetch --no-tags --depth 50 origin master:origin/master
# Clean remote, to not have it stale
git -C "$PERF_OUTPUT"/ch remote | xargs -n1 git -C "$PERF_OUTPUT"/ch remote remove
# And clean all tags
git -C "$PERF_OUTPUT"/ch tag | xargs git -C "$PERF_OUTPUT"/ch tag -d
git -C "$PERF_OUTPUT"/ch reset --soft pr
git -C "$PERF_OUTPUT"/ch log -5
(

View File

@ -23,7 +23,6 @@ COPY docker_related_config.xml /etc/clickhouse-server/config.d/
COPY entrypoint.sh /entrypoint.sh
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \
@ -45,16 +44,14 @@ ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
RUN arch=${TARGETARCH:-amd64} \
&& for package in ${PACKAGES}; do \
{ \
{ echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \
} || \
{ echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \
} ; \
} || exit 1 \
( \
cd /tmp \
&& echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz.sha512" \
&& sed 's:/output/:/tmp/:' < "${package}-${VERSION}-${arch}.tgz.sha512" | sha512sum -c \
&& tar xvzf "${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / \
) \
; done \
&& rm /tmp/*.tgz /install -r \
&& addgroup -S -g 101 clickhouse \

View File

@ -5,6 +5,13 @@ ARG DEBIAN_FRONTEND=noninteractive
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
# can't do chown and owners of mounted volumes should be configured externally.
# We do that in advance at the begining of Dockerfile before any packages will be
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list \
&& groupadd -r clickhouse --gid=101 \
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
@ -35,13 +42,6 @@ ARG deb_location_url=""
# from a single binary url (useful for non-standard builds - with sanitizers, for arm64).
ARG single_binary_location_url=""
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
# can't do chown and owners of mounted volumes should be configured externally.
# We do that in advance at the begining of Dockerfile before any packages will be
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
ARG TARGETARCH
# install from a web location with deb packages

View File

@ -337,8 +337,8 @@ quit
# which is confusing.
task_exit_code=$fuzzer_exit_code
echo "failure" > status.txt
{ rg --text -o "Found error:.*" fuzzer.log \
|| rg --text -ao "Exception:.*" fuzzer.log \
{ rg -ao "Found error:.*" fuzzer.log \
|| rg -ao "Exception:.*" fuzzer.log \
|| echo "Fuzzer failed ($fuzzer_exit_code). See the logs." ; } \
| tail -1 > description.txt
fi

View File

@ -61,11 +61,13 @@ function configure()
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
}
# Randomize all Keeper feature flags
randomize_config_boolean_value filtered_list
randomize_config_boolean_value multi_read
randomize_config_boolean_value check_not_exists
randomize_config_boolean_value create_if_not_exists
if [[ -n "$RANDOMIZE_KEEPER_FEATURE_FLAGS" ]] && [[ "$RANDOMIZE_KEEPER_FEATURE_FLAGS" -eq 1 ]]; then
# Randomize all Keeper feature flags
randomize_config_boolean_value filtered_list
randomize_config_boolean_value multi_read
randomize_config_boolean_value check_not_exists
randomize_config_boolean_value create_if_not_exists
fi
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml

View File

@ -67,6 +67,48 @@ This check means that the CI system started to process the pull request. When it
Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally).
If it fails, fix the style errors following the [code style guide](style.md).
#### Running style check locally:
```sh
mkdir -p /tmp/test_output
# running all checks
docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE clickhouse/style-test
# run specified check script (e.g.: ./check-mypy)
docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE --entrypoint= -w/ClickHouse/utils/check-style clickhouse/style-test ./check-mypy
# find all style check scripts under the directory:
cd ./utils/check-style
# Check duplicate includes
./check-duplicate-includes.sh
# Check c++ formatiing
./check-style
# Check python formatting with black
./check-black
# Check python type hinting with mypy
./check-mypy
# Check code with codespell
./check-typos
# Check docs spelling
./check-doc-aspell
# Check whitespaces
./check-whitespaces
# Check github actions workflows
./check-workflows
# Check submodules
./check-submodules
# Check shell scripts with shellcheck
./shellcheck-run.sh
```
## Fast Test
Normally this is the first check that is ran for a PR. It builds ClickHouse and
@ -75,6 +117,15 @@ some. If it fails, further checks are not started until it is fixed. Look at
the report to see which tests fail, then reproduce the failure locally as
described [here](tests.md#functional-test-locally).
#### Running Fast Test locally:
```sh
mkdir -p /tmp/test_output
mkdir -p /tmp/fasttest-workspace
cd ClickHouse
# this docker command performs minimal ClickHouse build and run FastTests against it
docker run --rm --cap-add=SYS_PTRACE -u $(id -u ${USER}):$(id -g ${USER}) --network=host -e FASTTEST_WORKSPACE=/fasttest-workspace -e FASTTEST_OUTPUT=/test_output -e FASTTEST_SOURCE=/ClickHouse --cap-add=SYS_PTRACE -e stage=clone_submodules --volume=/tmp/fasttest-workspace:/fasttest-workspace --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output clickhouse/fasttest
```
#### Status Page Files
- `runlog.out.log` is the general log that includes all other logs.
@ -122,6 +173,13 @@ Builds ClickHouse in various configurations for use in further steps. You have t
## Special Build Check
Performs static analysis and code style checks using `clang-tidy`. The report is similar to the [build check](#build-check). Fix the errors found in the build log.
#### Running clang-tidy locally:
There is a convenience `packager` script that runs the clang-tidy build in docker
```sh
mkdir build_tidy
./docker/packager/packager --output-dir=./build_tidy --package-type=binary --compiler=clang-17 --debug-build --clang-tidy
```
## Functional Stateless Tests
Runs [stateless functional tests](tests.md#functional-tests) for ClickHouse

View File

@ -197,6 +197,11 @@ Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.htm
ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = <new_size>;
```
### `materialized_postgresql_use_unique_replication_consumer_identifier` {#materialized_postgresql_use_unique_replication_consumer_identifier}
Use a unique replication consumer identifier for replication. Default: `0`.
If set to `1`, allows to setup several `MaterializedPostgreSQL` tables pointing to the same `PostgreSQL` table.
## Notes {#notes}
### Failover of the logical replication slot {#logical-replication-slot-failover}

View File

@ -24,12 +24,15 @@ CREATE TABLE s3_queue_engine_table (name String, value UInt32)
[after_processing = 'keep',]
[keeper_path = '',]
[s3queue_loading_retries = 0,]
[s3queue_processing_threads_num = 1,]
[s3queue_enable_logging_to_s3queue_log = 0,]
[s3queue_polling_min_timeout_ms = 1000,]
[s3queue_polling_max_timeout_ms = 10000,]
[s3queue_polling_backoff_ms = 0,]
[s3queue_tracked_files_limit = 1000,]
[s3queue_tracked_file_ttl_sec = 0,]
[s3queue_polling_size = 50,]
[s3queue_tracked_files_limit = 1000,]
[s3queue_cleanup_interval_min_ms = 10000,]
[s3queue_cleanup_interval_max_ms = 30000,]
```
**Engine parameters**
@ -46,7 +49,7 @@ CREATE TABLE s3_queue_engine_table (name String, value UInt32)
CREATE TABLE s3queue_engine_table (name String, value UInt32)
ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip')
SETTINGS
mode = 'ordered';
mode = 'unordered';
```
Using named collections:
@ -109,6 +112,18 @@ Possible values:
Default value: `0`.
### s3queue_processing_threads_num {#processing_threads_num}
Number of threads to perform processing. Applies only for `Unordered` mode.
Default value: `1`.
### s3queue_enable_logging_to_s3queue_log {#enable_logging_to_s3queue_log}
Enable logging to `system.s3queue_log`.
Default value: `0`.
### s3queue_polling_min_timeout_ms {#polling_min_timeout_ms}
Minimal timeout before next polling (in milliseconds).
@ -161,18 +176,17 @@ Possible values:
Default value: `0`.
### s3queue_polling_size {#polling_size}
### s3queue_cleanup_interval_min_ms {#cleanup_interval_min_ms}
Maximum files to fetch from S3 with SELECT or in background task.
Engine takes files for processing from S3 in batches.
We limit the batch size to increase concurrency if multiple table engines with the same `keeper_path` consume files from the same path.
For 'Ordered' mode. Defines a minimum boundary for reschedule interval for a background task, which is responsible for maintaining tracked file TTL and maximum tracked files set.
Possible values:
Default value: `10000`.
- Positive integer.
### s3queue_cleanup_interval_max_ms {#cleanup_interval_max_ms}
Default value: `50`.
For 'Ordered' mode. Defines a maximum boundary for reschedule interval for a background task, which is responsible for maintaining tracked file TTL and maximum tracked files set.
Default value: `30000`.
## S3-related Settings {#s3-settings}
@ -227,6 +241,118 @@ For more information about virtual columns see [here](../../../engines/table-eng
Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function.
:::note
If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
:::
## Limitations {#limitations}
1. Duplicated rows can be as a result of:
- an exception happens during parsing in the middle of file processing and retries are enabled via `s3queue_loading_retries`;
- `S3Queue` is configured on multiple servers pointing to the same path in zookeeper and keeper session expires before one server managed to commit processed file, which could lead to another server taking processing of the file, which could be partially or fully processed by the first server;
- abnormal server termination.
2. `S3Queue` is configured on multiple servers pointing to the same path in zookeeper and `Ordered` mode is used, then `s3queue_loading_retries` will not work. This will be fixed soon.
## Introspection {#introspection}
For introspection use `system.s3queue` stateless table and `system.s3queue_log` persistent table.
1. `system.s3queue`. This table is not persistent and shows in-memory state of `S3Queue`: which files are currently being processed, which files are processed or failed.
``` sql
┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE system.s3queue
(
`database` String,
`table` String,
`file_name` String,
`rows_processed` UInt64,
`status` String,
`processing_start_time` Nullable(DateTime),
`processing_end_time` Nullable(DateTime),
`ProfileEvents` Map(String, UInt64)
`exception` String
)
ENGINE = SystemS3Queue
COMMENT 'SYSTEM TABLE is built on the fly.' │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
Example:
``` sql
SELECT *
FROM system.s3queue
Row 1:
──────
zookeeper_path: /clickhouse/s3queue/25ea5621-ae8c-40c7-96d0-cec959c5ab88/3b3f66a1-9866-4c2e-ba78-b6bfa154207e
file_name: wikistat/original/pageviews-20150501-030000.gz
rows_processed: 5068534
status: Processed
processing_start_time: 2023-10-13 13:09:48
processing_end_time: 2023-10-13 13:10:31
ProfileEvents: {'ZooKeeperTransactions':3,'ZooKeeperGet':2,'ZooKeeperMulti':1,'SelectedRows':5068534,'SelectedBytes':198132283,'ContextLock':1,'S3QueueSetFileProcessingMicroseconds':2480,'S3QueueSetFileProcessedMicroseconds':9985,'S3QueuePullMicroseconds':273776,'LogTest':17}
exception:
```
2. `system.s3queue_log`. Persistent table. Has the same information as `system.s3queue`, but for `processed` and `failed` files.
The table has the following structure:
``` sql
SHOW CREATE TABLE system.s3queue_log
Query id: 0ad619c3-0f2a-4ee4-8b40-c73d86e04314
┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE system.s3queue_log
(
`event_date` Date,
`event_time` DateTime,
`table_uuid` String,
`file_name` String,
`rows_processed` UInt64,
`status` Enum8('Processed' = 0, 'Failed' = 1),
`processing_start_time` Nullable(DateTime),
`processing_end_time` Nullable(DateTime),
`ProfileEvents` Map(String, UInt64),
`exception` String
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(event_date)
ORDER BY (event_date, event_time)
SETTINGS index_granularity = 8192 │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
In order to use `system.s3queue_log` define its configuration in server config file:
``` xml
<s3queue_log>
<database>system</database>
<table>s3queue_log</table>
</s3queue_log>
```
Example:
``` sql
SELECT *
FROM system.s3queue_log
Row 1:
──────
event_date: 2023-10-13
event_time: 2023-10-13 13:10:12
table_uuid:
file_name: wikistat/original/pageviews-20150501-020000.gz
rows_processed: 5112621
status: Processed
processing_start_time: 2023-10-13 13:09:48
processing_end_time: 2023-10-13 13:10:12
ProfileEvents: {'ZooKeeperTransactions':3,'ZooKeeperGet':2,'ZooKeeperMulti':1,'SelectedRows':5112621,'SelectedBytes':198577687,'ContextLock':1,'S3QueueSetFileProcessingMicroseconds':1934,'S3QueueSetFileProcessedMicroseconds':17063,'S3QueuePullMicroseconds':5841972,'LogTest':17}
exception:
```

View File

@ -58,6 +58,12 @@ where `N` specifies the tokenizer:
- `inverted(0)` (or shorter: `inverted()`) set the tokenizer to "tokens", i.e. split strings along spaces,
- `inverted(N)` with `N` between 2 and 8 sets the tokenizer to "ngrams(N)"
The maximum rows per postings list can be specified as the second parameter. This parameter can be used to control postings list sizes to avoid generating huge postings list files. The following variants exist:
- `inverted(ngrams, max_rows_per_postings_list)`: Use given max_rows_per_postings_list (assuming it is not 0)
- `inverted(ngrams, 0)`: No limitation of maximum rows per postings list
- `inverted(ngrams)`: Use a default maximum rows which is 64K.
Being a type of skipping index, inverted indexes can be dropped or added to a column after table creation:
``` sql

View File

@ -3310,6 +3310,28 @@ Possible values:
Default value: `0`.
## mysql_map_string_to_text_in_show_columns {#mysql_map_string_to_text_in_show_columns}
When enabled, [String](../../sql-reference/data-types/string.md) ClickHouse data type will be displayed as `TEXT` in [SHOW COLUMNS](../../sql-reference/statements/show.md#show_columns).
Has effect only when [use_mysql_types_in_show_columns](#use_mysql_types_in_show_columns) is enabled.
- 0 - Use `BLOB`.
- 1 - Use `TEXT`.
Default value: `0`.
## mysql_map_fixed_string_to_text_in_show_columns {#mysql_map_fixed_string_to_text_in_show_columns}
When enabled, [FixedString](../../sql-reference/data-types/fixedstring.md) ClickHouse data type will be displayed as `TEXT` in [SHOW COLUMNS](../../sql-reference/statements/show.md#show_columns).
Has effect only when [use_mysql_types_in_show_columns](#use_mysql_types_in_show_columns) is enabled.
- 0 - Use `BLOB`.
- 1 - Use `TEXT`.
Default value: `0`.
## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold}
Enables special logic to perform merges on replicas.

View File

@ -103,4 +103,5 @@ ClickHouse-specific aggregate functions:
- [quantileInterpolatedWeighted](./quantileinterpolatedweighted.md)
- [sparkBar](./sparkbar.md)
- [sumCount](./sumcount.md)
- [largestTriangleThreeBuckets](./largestTriangleThreeBuckets.md)

View File

@ -0,0 +1,67 @@
---
slug: /en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets
sidebar_position: 312
sidebar_label: largestTriangleThreeBuckets
---
# largestTriangleThreeBuckets
Applies the [Largest-Triangle-Three-Buckets](https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf) algorithm to the input data.
The algorithm is used for downsampling time series data for visualization. It is designed to operate on series sorted by x coordinate.
It works by dividing the sorted series into buckets and then finding the largest triangle in each bucket. The number of buckets is equal to the number of points in the resulting series.
the function will sort data by `x` and then apply the downsampling algorithm to the sorted data.
**Syntax**
``` sql
largestTriangleThreeBuckets(n)(x, y)
```
Alias: `lttb`.
**Arguments**
- `x` — x coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md).
- `y` — y coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md).
**Parameters**
- `n` — number of points in the resulting series. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
[Array](../../../sql-reference/data-types/array.md) of [Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
**Example**
Input table:
``` text
┌─────x───────┬───────y──────┐
│ 1.000000000 │ 10.000000000 │
│ 2.000000000 │ 20.000000000 │
│ 3.000000000 │ 15.000000000 │
│ 8.000000000 │ 60.000000000 │
│ 9.000000000 │ 55.000000000 │
│ 10.00000000 │ 70.000000000 │
│ 4.000000000 │ 30.000000000 │
│ 5.000000000 │ 40.000000000 │
│ 6.000000000 │ 35.000000000 │
│ 7.000000000 │ 50.000000000 │
└─────────────┴──────────────┘
```
Query:
``` sql
SELECT largestTriangleThreeBuckets(4)(x, y) FROM largestTriangleThreeBuckets_test;
```
Result:
``` text
┌────────largestTriangleThreeBuckets(3)(x, y)───────────┐
│ [(1,10),(3,15),(5,40),(10,70)] │
└───────────────────────────────────────────────────────┘
```

View File

@ -112,7 +112,8 @@ EOF
tar -czf "$TARBALL" -C "$OUTPUT_DIR" "$PKG_DIR"
fi
sha512sum "$TARBALL" > "$TARBALL".sha512
# Cut the $OUTPUT_DIR/ from the sha512sum output to make it universal
sha512sum "$TARBALL" | sed "s|$OUTPUT_DIR/||" > "$TARBALL".sha512
rm -r "$PKG_PATH"
}

View File

@ -391,7 +391,7 @@ zkutil::EphemeralNodeHolder::Ptr ClusterCopier::createTaskWorkerNodeAndWaitIfNee
auto code = zookeeper->tryMulti(ops, responses);
if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS)
return std::make_shared<zkutil::EphemeralNodeHolder>(current_worker_path, *zookeeper, false, false, description);
return zkutil::EphemeralNodeHolder::existing(current_worker_path, *zookeeper);
if (code == Coordination::Error::ZBADVERSION)
{

View File

@ -145,6 +145,10 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
if (tables.next())
{
catalog_name = tables.table_catalog();
/// `tables.next()` call is mandatory to drain the iterator before next operation and avoid "Invalid cursor state"
if (tables.next())
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Driver returned more than one table for '{}': '{}' and '{}'",
table_name, catalog_name, tables.table_schema());
LOG_TRACE(log, "Will fetch info for table '{}.{}'", catalog_name, table_name);
return catalog.find_columns(/* column = */ "", table_name, /* schema = */ "", catalog_name);
}
@ -153,6 +157,10 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
if (tables.next())
{
catalog_name = tables.table_catalog();
/// `tables.next()` call is mandatory to drain the iterator before next operation and avoid "Invalid cursor state"
if (tables.next())
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Driver returned more than one table for '{}': '{}' and '{}'",
table_name, catalog_name, tables.table_schema());
LOG_TRACE(log, "Will fetch info for table '{}.{}.{}'", catalog_name, schema_name, table_name);
return catalog.find_columns(/* column = */ "", table_name, schema_name, catalog_name);
}

View File

@ -91,16 +91,17 @@ T execute(nanodbc::ConnectionHolderPtr connection_holder, std::function<T(nanodb
}
catch (const nanodbc::database_error & e)
{
LOG_ERROR(
&Poco::Logger::get("ODBCConnection"),
"ODBC query failed with error: {}, state: {}, native code: {}",
e.what(), e.state(), e.native());
/// SQLState, connection related errors start with 08 (main: 08S01), cursor invalid state is 24000.
/// Invalid cursor state is a retriable error.
/// Invalid transaction state 25000. Truncate to 2 letters on purpose.
/// https://docs.microsoft.com/ru-ru/sql/odbc/reference/appendixes/appendix-a-odbc-error-codes?view=sql-server-ver15
if (e.state().starts_with("08") || e.state().starts_with("24") || e.state().starts_with("25"))
bool is_retriable = e.state().starts_with("08") || e.state().starts_with("24") || e.state().starts_with("25");
LOG_ERROR(
&Poco::Logger::get("ODBCConnection"),
"ODBC query failed with error: {}, state: {}, native code: {}{}",
e.what(), e.state(), e.native(), is_retriable ? ", will retry" : "");
if (is_retriable)
{
connection_holder->updateConnection();
return query_func(connection_holder->get());

View File

@ -821,77 +821,85 @@ function insertChart(i) {
let move_text = document.createTextNode('✥');
move.appendChild(move_text);
let is_dragging = false;
move.addEventListener('mousedown', e => {
const idx = getCurrentIndex();
is_dragging = true;
let drag_state = {
is_dragging: false,
idx: null,
offset_x: null,
offset_y: null,
displace_idx: null,
displace_chart: null
};
function dragStop(e) {
drag_state.is_dragging = false;
chart.className = 'chart';
chart.style.left = null;
chart.style.top = null;
if (drag_state.displace_idx !== null) {
const elem = queries[drag_state.idx];
queries.splice(drag_state.idx, 1);
queries.splice(drag_state.displace_idx, 0, elem);
drag_state.displace_chart.className = 'chart';
drawAll();
}
}
function dragMove(e) {
if (!drag_state.is_dragging) return;
let x = e.clientX - drag_state.offset_x;
let y = e.clientY - drag_state.offset_y;
chart.style.left = `${x}px`;
chart.style.top = `${y}px`;
drag_state.displace_idx = null;
drag_state.displace_chart = null;
let current_idx = -1;
for (const elem of charts.querySelectorAll('.chart')) {
++current_idx;
if (current_idx == drag_state.idx) {
continue;
}
const this_rect = chart.getBoundingClientRect();
const this_center_x = this_rect.left + this_rect.width / 2;
const this_center_y = this_rect.top + this_rect.height / 2;
const elem_rect = elem.getBoundingClientRect();
if (this_center_x >= elem_rect.left && this_center_x <= elem_rect.right
&& this_center_y >= elem_rect.top && this_center_y <= elem_rect.bottom) {
elem.className = 'chart chart-displaced';
drag_state.displace_idx = current_idx;
drag_state.displace_chart = elem;
} else {
elem.className = 'chart';
}
}
}
function dragStart(e) {
if (e.button !== 0) return; /// left button only
move.setPointerCapture(e.pointerId);
drag_state.is_dragging = true;
drag_state.idx = getCurrentIndex();
chart.className = 'chart chart-moving';
let offset_x = e.clientX;
let offset_y = e.clientY;
drag_state.offset_x = e.clientX;
drag_state.offset_y = e.clientY;
}
let displace_idx = null;
let displace_chart = null;
function mouseup(e) {
is_dragging = false;
chart.className = 'chart';
chart.style.left = null;
chart.style.top = null;
if (displace_idx !== null) {
const elem = queries[idx];
queries.splice(idx, 1);
queries.splice(displace_idx, 0, elem);
displace_chart.className = 'chart';
drawAll();
}
}
function mousemove(e) {
if (!is_dragging) {
document.body.removeEventListener('mousemove', mousemove);
document.body.removeEventListener('mouseup', mouseup);
return;
}
let x = e.clientX - offset_x;
let y = e.clientY - offset_y;
chart.style.left = `${x}px`;
chart.style.top = `${y}px`;
displace_idx = null;
displace_chart = null;
let current_idx = -1;
for (const elem of charts.querySelectorAll('.chart')) {
++current_idx;
if (current_idx == idx) {
continue;
}
const this_rect = chart.getBoundingClientRect();
const this_center_x = this_rect.left + this_rect.width / 2;
const this_center_y = this_rect.top + this_rect.height / 2;
const elem_rect = elem.getBoundingClientRect();
if (this_center_x >= elem_rect.left && this_center_x <= elem_rect.right
&& this_center_y >= elem_rect.top && this_center_y <= elem_rect.bottom) {
elem.className = 'chart chart-displaced';
displace_idx = current_idx;
displace_chart = elem;
} else {
elem.className = 'chart';
}
}
}
document.body.addEventListener('mouseup', mouseup);
document.body.addEventListener('mousemove', mousemove);
});
/// Read https://www.redblobgames.com/making-of/draggable/
move.addEventListener('pointerdown', dragStart);
move.addEventListener('pointermove', dragMove);
move.addEventListener('pointerup', dragStop);
move.addEventListener('pointerancel', dragStop);
move.addEventListener('touchstart', (e) => e.preventDefault());
let maximize = document.createElement('a');
let maximize_text = document.createTextNode('🗖');

View File

@ -0,0 +1,52 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Helpers.h>
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
namespace DB
{
struct Settings;
namespace
{
AggregateFunctionPtr
createAggregateFunctionLargestTriangleThreeBuckets(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
if (!(isNumber(argument_types[0]) || isDateOrDate32(argument_types[0]) || isDateTime(argument_types[0])
|| isDateTime64(argument_types[0])))
throw Exception(
ErrorCodes::NOT_IMPLEMENTED,
"Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the first argument",
name);
if (!(isNumber(argument_types[1]) || isDateOrDate32(argument_types[1]) || isDateTime(argument_types[1])
|| isDateTime64(argument_types[1])))
throw Exception(
ErrorCodes::NOT_IMPLEMENTED,
"Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the second argument",
name);
return std::make_shared<AggregateFunctionLargestTriangleThreeBuckets>(argument_types, parameters);
}
}
void registerAggregateFunctionLargestTriangleThreeBuckets(AggregateFunctionFactory & factory)
{
factory.registerFunction(AggregateFunctionLargestTriangleThreeBuckets::name, createAggregateFunctionLargestTriangleThreeBuckets);
factory.registerAlias("lttb", AggregateFunctionLargestTriangleThreeBuckets::name);
}
}

View File

@ -0,0 +1,327 @@
#pragma once
#include <iostream>
#include <limits>
#include <numeric>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/StatCommon.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnsDateTime.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <base/types.h>
#include <Common/PODArray_fwd.h>
#include <Common/assert_cast.h>
#include <boost/math/distributions/normal.hpp>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
struct LargestTriangleThreeBucketsData : public StatisticalSample<Float64, Float64>
{
void add(const Float64 xval, const Float64 yval, Arena * arena)
{
this->addX(xval, arena);
this->addY(yval, arena);
}
void sort(Arena * arena)
{
// sort the this->x and this->y in ascending order of this->x using index
std::vector<size_t> index(this->x.size());
std::iota(index.begin(), index.end(), 0);
::sort(index.begin(), index.end(), [&](size_t i1, size_t i2) { return this->x[i1] < this->x[i2]; });
SampleX temp_x{};
SampleY temp_y{};
for (size_t i = 0; i < this->x.size(); ++i)
{
temp_x.push_back(this->x[index[i]], arena);
temp_y.push_back(this->y[index[i]], arena);
}
for (size_t i = 0; i < this->x.size(); ++i)
{
this->x[i] = temp_x[i];
this->y[i] = temp_y[i];
}
}
PODArray<std::pair<Float64, Float64>> getResult(size_t total_buckets, Arena * arena)
{
// Sort the data
this->sort(arena);
PODArray<std::pair<Float64, Float64>> result;
// Handle special cases for small data list
if (this->x.size() <= total_buckets)
{
for (size_t i = 0; i < this->x.size(); ++i)
{
result.emplace_back(std::make_pair(this->x[i], this->y[i]));
}
return result;
}
// Handle special cases for 0 or 1 or 2 buckets
if (total_buckets == 0)
return result;
if (total_buckets == 1)
{
result.emplace_back(std::make_pair(this->x.front(), this->y.front()));
return result;
}
if (total_buckets == 2)
{
result.emplace_back(std::make_pair(this->x.front(), this->y.front()));
result.emplace_back(std::make_pair(this->x.back(), this->y.back()));
return result;
}
// Find the size of each bucket
size_t single_bucket_size = this->x.size() / total_buckets;
// Include the first data point
result.emplace_back(std::make_pair(this->x[0], this->y[0]));
for (size_t i = 1; i < total_buckets - 1; ++i) // Skip the first and last bucket
{
size_t start_index = i * single_bucket_size;
size_t end_index = (i + 1) * single_bucket_size;
// Compute the average point in the next bucket
Float64 avg_x = 0;
Float64 avg_y = 0;
for (size_t j = end_index; j < (i + 2) * single_bucket_size; ++j)
{
avg_x += this->x[j];
avg_y += this->y[j];
}
avg_x /= single_bucket_size;
avg_y /= single_bucket_size;
// Find the point in the current bucket that forms the largest triangle
size_t max_index = start_index;
Float64 max_area = 0.0;
for (size_t j = start_index; j < end_index; ++j)
{
Float64 area = std::abs(
0.5
* (result.back().first * this->y[j] + this->x[j] * avg_y + avg_x * result.back().second - result.back().first * avg_y
- this->x[j] * result.back().second - avg_x * this->y[j]));
if (area > max_area)
{
max_area = area;
max_index = j;
}
}
// Include the selected point
result.emplace_back(std::make_pair(this->x[max_index], this->y[max_index]));
}
// Include the last data point
result.emplace_back(std::make_pair(this->x.back(), this->y.back()));
return result;
}
};
class AggregateFunctionLargestTriangleThreeBuckets final : public IAggregateFunctionDataHelper<LargestTriangleThreeBucketsData, AggregateFunctionLargestTriangleThreeBuckets>
{
private:
UInt64 total_buckets{0};
TypeIndex x_type;
TypeIndex y_type;
public:
explicit AggregateFunctionLargestTriangleThreeBuckets(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper<LargestTriangleThreeBucketsData, AggregateFunctionLargestTriangleThreeBuckets>({arguments}, {}, createResultType(arguments))
{
if (params.size() != 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require one parameter", getName());
if (params[0].getType() != Field::Types::UInt64)
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a UInt64", getName());
total_buckets = params[0].get<UInt64>();
this->x_type = WhichDataType(arguments[0]).idx;
this->y_type = WhichDataType(arguments[1]).idx;
}
static constexpr auto name = "largestTriangleThreeBuckets";
String getName() const override { return name; }
bool allocatesMemoryInArena() const override { return true; }
static DataTypePtr createResultType(const DataTypes & arguments)
{
TypeIndex x_type = arguments[0]->getTypeId();
TypeIndex y_type = arguments[1]->getTypeId();
UInt32 x_scale = 0;
UInt32 y_scale = 0;
if (const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(arguments[0].get()))
{
x_scale = datetime64_type->getScale();
}
if (const auto * datetime64_type = typeid_cast<const DataTypeDateTime64 *>(arguments[1].get()))
{
y_scale = datetime64_type->getScale();
}
DataTypes types = {getDataTypeFromTypeIndex(x_type, x_scale), getDataTypeFromTypeIndex(y_type, y_scale)};
auto tuple = std::make_shared<DataTypeTuple>(std::move(types));
return std::make_shared<DataTypeArray>(tuple);
}
static DataTypePtr getDataTypeFromTypeIndex(TypeIndex type_index, UInt32 scale)
{
DataTypePtr data_type;
switch (type_index)
{
case TypeIndex::Date:
data_type = std::make_shared<DataTypeDate>();
break;
case TypeIndex::Date32:
data_type = std::make_shared<DataTypeDate32>();
break;
case TypeIndex::DateTime:
data_type = std::make_shared<DataTypeDateTime>();
break;
case TypeIndex::DateTime64:
data_type = std::make_shared<DataTypeDateTime64>(scale);
break;
default:
data_type = std::make_shared<DataTypeNumber<Float64>>();
}
return data_type;
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
Float64 x = getFloat64DataFromColumn(columns[0], row_num, this->x_type);
Float64 y = getFloat64DataFromColumn(columns[1], row_num, this->y_type);
this->data(place).add(x, y, arena);
}
Float64 getFloat64DataFromColumn(const IColumn * column, size_t row_num, TypeIndex type_index) const
{
switch (type_index)
{
case TypeIndex::Date:
return static_cast<const ColumnDate &>(*column).getData()[row_num];
case TypeIndex::Date32:
return static_cast<const ColumnDate32 &>(*column).getData()[row_num];
case TypeIndex::DateTime:
return static_cast<const ColumnDateTime &>(*column).getData()[row_num];
case TypeIndex::DateTime64:
return static_cast<const ColumnDateTime64 &>(*column).getData()[row_num];
default:
return column->getFloat64(row_num);
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & a = this->data(place);
const auto & b = this->data(rhs);
a.merge(b, arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).write(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).read(buf, arena);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override
{
auto res = this->data(place).getResult(total_buckets, arena);
auto & col = assert_cast<ColumnArray &>(to);
auto & col_offsets = assert_cast<ColumnArray::ColumnOffsets &>(col.getOffsetsColumn());
auto column_x_adder_func = getColumnAdderFunc(x_type);
auto column_y_adder_func = getColumnAdderFunc(y_type);
for (size_t i = 0; i < res.size(); ++i)
{
auto & column_tuple = assert_cast<ColumnTuple &>(col.getData());
column_x_adder_func(column_tuple.getColumn(0), res[i].first);
column_y_adder_func(column_tuple.getColumn(1), res[i].second);
}
col_offsets.getData().push_back(col.getData().size());
}
std::function<void(IColumn &, Float64)> getColumnAdderFunc(TypeIndex type_index) const
{
switch (type_index)
{
case TypeIndex::Date:
return [](IColumn & column, Float64 value)
{
auto & col = assert_cast<ColumnDate &>(column);
col.getData().push_back(static_cast<UInt16>(value));
};
case TypeIndex::Date32:
return [](IColumn & column, Float64 value)
{
auto & col = assert_cast<ColumnDate32 &>(column);
col.getData().push_back(static_cast<UInt32>(value));
};
case TypeIndex::DateTime:
return [](IColumn & column, Float64 value)
{
auto & col = assert_cast<ColumnDateTime &>(column);
col.getData().push_back(static_cast<UInt32>(value));
};
case TypeIndex::DateTime64:
return [](IColumn & column, Float64 value)
{
auto & col = assert_cast<ColumnDateTime64 &>(column);
col.getData().push_back(static_cast<UInt64>(value));
};
default:
return [](IColumn & column, Float64 value)
{
auto & col = assert_cast<ColumnFloat64 &>(column);
col.getData().push_back(value);
};
}
}
};
}

View File

@ -129,7 +129,10 @@ public:
{
writePODBinary(value[i].first, buf);
writePODBinary(zero_padding, buf);
writePODBinary(value[i].second, buf);
if constexpr (std::endian::native == std::endian::little)
writePODBinary(value[i].second, buf);
else
writePODBinary(std::byteswap(value[i].second), buf);
}
}

View File

@ -82,6 +82,7 @@ void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
void registerAggregateFunctionFlameGraph(AggregateFunctionFactory &);
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory);
void registerAggregateFunctionLargestTriangleThreeBuckets(AggregateFunctionFactory & factory);
class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -176,6 +177,7 @@ void registerAggregateFunctions()
registerAggregateFunctionAnalysisOfVariance(factory);
registerAggregateFunctionFlameGraph(factory);
registerAggregateFunctionKolmogorovSmirnovTest(factory);
registerAggregateFunctionLargestTriangleThreeBuckets(factory);
registerWindowFunctions(factory);
}

View File

@ -1,7 +1,6 @@
#include <Client/MultiplexedConnections.h>
#include <Common/thread_local_rng.h>
#include <Common/logger_useful.h>
#include <Core/Protocol.h>
#include <IO/ConnectionTimeouts.h>
#include <IO/Operators.h>
@ -24,14 +23,6 @@ namespace ErrorCodes
}
#define MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION \
mutex_last_locked_by.store((getThreadId() << 32) | __LINE__); \
memcpy(mutex_memory_dump.data(), &cancel_mutex, mutex_memory_dump.size()); \
mutex_locked += 1; \
SCOPE_EXIT({ mutex_locked -= 1; });
/// When you remove this macro, please also remove the clang-tidy suppressions at the beginning + end of this file.
MultiplexedConnections::MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler)
: settings(settings_)
{
@ -86,7 +77,6 @@ MultiplexedConnections::MultiplexedConnections(
void MultiplexedConnections::sendScalarsData(Scalars & data)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (!sent_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot send scalars data: query not yet sent.");
@ -102,7 +92,6 @@ void MultiplexedConnections::sendScalarsData(Scalars & data)
void MultiplexedConnections::sendExternalTablesData(std::vector<ExternalTablesData> & data)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (!sent_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot send external tables data: query not yet sent.");
@ -131,7 +120,6 @@ void MultiplexedConnections::sendQuery(
bool with_pending_data)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (sent_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query already sent.");
@ -189,7 +177,6 @@ void MultiplexedConnections::sendQuery(
void MultiplexedConnections::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (sent_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot send uuids after query is sent.");
@ -206,7 +193,6 @@ void MultiplexedConnections::sendIgnoredPartUUIDs(const std::vector<UUID> & uuid
void MultiplexedConnections::sendReadTaskResponse(const String & response)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (cancelled)
return;
current_connection->sendReadTaskResponse(response);
@ -216,7 +202,6 @@ void MultiplexedConnections::sendReadTaskResponse(const String & response)
void MultiplexedConnections::sendMergeTreeReadTaskResponse(const ParallelReadResponse & response)
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (cancelled)
return;
current_connection->sendMergeTreeReadTaskResponse(response);
@ -226,29 +211,13 @@ void MultiplexedConnections::sendMergeTreeReadTaskResponse(const ParallelReadRes
Packet MultiplexedConnections::receivePacket()
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
Packet packet = receivePacketUnlocked({});
return packet;
}
void MultiplexedConnections::disconnect()
{
/// We've seen this lock mysteriously get stuck forever, without any other thread seeming to
/// hold the mutex. This is temporary code to print some extra information next time it happens.
/// std::lock_guard lock(cancel_mutex);
if (!cancel_mutex.try_lock_for(std::chrono::hours(1)))
{
UInt64 last_locked = mutex_last_locked_by.load();
std::array<UInt8, sizeof(std::timed_mutex)> new_memory_dump;
memcpy(new_memory_dump.data(), &cancel_mutex, new_memory_dump.size());
LOG_ERROR(&Poco::Logger::get("MultiplexedConnections"), "Deadlock in MultiplexedConnections::disconnect()! Mutex was last (instrumentedly) locked by thread {} on line {}, lock balance: {}, mutex memory when last locked: {}, mutex memory now: {}", last_locked >> 32, last_locked & 0xffffffff, mutex_locked.load(), hexString(mutex_memory_dump.data(), mutex_memory_dump.size()), hexString(new_memory_dump.data(), new_memory_dump.size()));
throw Exception(ErrorCodes::LOGICAL_ERROR, "Deadlock in MultiplexedConnections::disconnect()");
}
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wthread-safety-analysis"
std::lock_guard lock(cancel_mutex, std::adopt_lock);
#pragma clang diagnostic pop
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
std::lock_guard lock(cancel_mutex);
for (ReplicaState & state : replica_states)
{
@ -264,7 +233,6 @@ void MultiplexedConnections::disconnect()
void MultiplexedConnections::sendCancel()
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (!sent_query || cancelled)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot cancel. Either no query sent or already cancelled.");
@ -282,7 +250,6 @@ void MultiplexedConnections::sendCancel()
Packet MultiplexedConnections::drain()
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
if (!cancelled)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot drain connections: cancel first.");
@ -323,7 +290,6 @@ Packet MultiplexedConnections::drain()
std::string MultiplexedConnections::dumpAddresses() const
{
std::lock_guard lock(cancel_mutex);
MUTEX_LOCK_TEMPORARY_DEBUG_INSTRUMENTATION
return dumpAddressesUnlocked();
}

View File

@ -106,14 +106,7 @@ private:
std::optional<ReplicaInfo> replica_info;
/// A mutex for the sendCancel function to execute safely in separate thread.
mutable std::timed_mutex cancel_mutex;
/// Temporary instrumentation to debug a weird deadlock on cancel_mutex.
/// TODO: Once the investigation is done, get rid of these, and of INSTRUMENTED_LOCK_MUTEX, and
/// change cancel_mutex to std::mutex.
mutable std::atomic<UInt64> mutex_last_locked_by{0};
mutable std::atomic<Int64> mutex_locked{0};
mutable std::array<UInt8, sizeof(std::timed_mutex)> mutex_memory_dump;
mutable std::mutex cancel_mutex;
friend struct RemoteQueryExecutorRoutine;
};

View File

@ -530,6 +530,13 @@ The server successfully detected this situation and will download merged part fr
M(OverflowThrow, "Number of times, data processing was cancelled by query complexity limitation with setting '*_overflow_mode' = 'throw' and exception was thrown.") \
M(OverflowAny, "Number of times approximate GROUP BY was in effect: when aggregation was performed only on top of first 'max_rows_to_group_by' unique keys and other keys were ignored due to 'group_by_overflow_mode' = 'any'.") \
\
M(S3QueueSetFileProcessingMicroseconds, "Time spent to set file as processing")\
M(S3QueueSetFileProcessedMicroseconds, "Time spent to set file as processed")\
M(S3QueueSetFileFailedMicroseconds, "Time spent to set file as failed")\
M(S3QueueCleanupMaxSetSizeOrTTLMicroseconds, "Time spent to set file as failed")\
M(S3QueuePullMicroseconds, "Time spent to read file data")\
M(S3QueueLockLocalFileStatusesMicroseconds, "Time spent to lock local file statuses")\
\
M(ServerStartupMilliseconds, "Time elapsed from starting server to listening to sockets in milliseconds")\
M(IOUringSQEsSubmitted, "Total number of io_uring SQEs submitted") \
M(IOUringSQEsResubmits, "Total number of io_uring SQE resubmits performed") \
@ -589,9 +596,14 @@ Timer::Timer(Counters & counters_, Event timer_event_, Event counter_event, Reso
counters.increment(counter_event);
}
UInt64 Timer::get()
{
return watch.elapsedNanoseconds() / static_cast<UInt64>(resolution);
}
void Timer::end()
{
counters.increment(timer_event, watch.elapsedNanoseconds() / static_cast<UInt64>(resolution));
counters.increment(timer_event, get());
watch.reset();
}

View File

@ -41,6 +41,7 @@ namespace ProfileEvents
~Timer() { end(); }
void cancel() { watch.reset(); }
void end();
UInt64 get();
private:
Counters & counters;

View File

@ -10,6 +10,7 @@
#include <Interpreters/TextLog.h>
#include <Interpreters/TraceLog.h>
#include <Interpreters/FilesystemCacheLog.h>
#include <Interpreters/S3QueueLog.h>
#include <Interpreters/FilesystemReadPrefetchesLog.h>
#include <Interpreters/ProcessorsProfileLog.h>
#include <Interpreters/ZooKeeperLog.h>

View File

@ -27,6 +27,7 @@
M(ZooKeeperLogElement) \
M(ProcessorProfileLogElement) \
M(TextLogElement) \
M(S3QueueLogElement) \
M(FilesystemCacheLogElement) \
M(FilesystemReadPrefetchesLogElement) \
M(AsynchronousInsertLogElement) \

View File

@ -644,11 +644,18 @@ class EphemeralNodeHolder
public:
using Ptr = std::shared_ptr<EphemeralNodeHolder>;
EphemeralNodeHolder(const std::string & path_, ZooKeeper & zookeeper_, bool create, bool sequential, const std::string & data)
EphemeralNodeHolder(const std::string & path_, ZooKeeper & zookeeper_, bool create, bool try_create, bool sequential, const std::string & data)
: path(path_), zookeeper(zookeeper_)
{
if (create)
{
path = zookeeper.create(path, data, sequential ? CreateMode::EphemeralSequential : CreateMode::Ephemeral);
need_remove = created = true;
}
else if (try_create)
{
need_remove = created = Coordination::Error::ZOK == zookeeper.tryCreate(path, data, sequential ? CreateMode::EphemeralSequential : CreateMode::Ephemeral);
}
}
std::string getPath() const
@ -656,19 +663,32 @@ public:
return path;
}
bool isCreated() const
{
return created;
}
static Ptr create(const std::string & path, ZooKeeper & zookeeper, const std::string & data = "")
{
return std::make_shared<EphemeralNodeHolder>(path, zookeeper, true, false, data);
return std::make_shared<EphemeralNodeHolder>(path, zookeeper, true, false, false, data);
}
static Ptr tryCreate(const std::string & path, ZooKeeper & zookeeper, const std::string & data = "")
{
auto node = std::make_shared<EphemeralNodeHolder>(path, zookeeper, false, true, false, data);
if (node->isCreated())
return node;
return nullptr;
}
static Ptr createSequential(const std::string & path, ZooKeeper & zookeeper, const std::string & data = "")
{
return std::make_shared<EphemeralNodeHolder>(path, zookeeper, true, true, data);
return std::make_shared<EphemeralNodeHolder>(path, zookeeper, true, false, true, data);
}
static Ptr existing(const std::string & path, ZooKeeper & zookeeper)
{
return std::make_shared<EphemeralNodeHolder>(path, zookeeper, false, false, "");
return std::make_shared<EphemeralNodeHolder>(path, zookeeper, false, false, false, "");
}
void setAlreadyRemoved()
@ -702,6 +722,7 @@ private:
ZooKeeper & zookeeper;
CurrentMetrics::Increment metric_increment{CurrentMetrics::EphemeralNode};
bool need_remove = true;
bool created = false;
};
using EphemeralNodeHolderPtr = EphemeralNodeHolder::Ptr;

View File

@ -103,11 +103,12 @@ class IColumn;
M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \
M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
M(UInt64, s3_retry_attempts, 10, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
M(UInt64, s3_request_timeout_ms, 3000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
M(UInt64, s3_http_connection_pool_size, 1000, "How many reusable open connections to keep per S3 endpoint. Only applies to the S3 table engine and table function, not to S3 disks (for disks, use disk config instead). Global setting, can only be set in config, overriding it per session or per query has no effect.", 0) \
M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \
M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \
M(Bool, s3queue_enable_logging_to_s3queue_log, false, "Enable writing to system.s3queue_log. The value can be overwritten per table with table settings", 0) \
M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \
M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \
M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \
@ -205,7 +206,9 @@ class IColumn;
M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \
\
M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \
M(Bool, use_mysql_types_in_show_columns, false, "Show MySQL types in SHOW COLUMNS and system.columns", 0) \
M(Bool, use_mysql_types_in_show_columns, false, "Show native MySQL types in SHOW [FULL] COLUMNS", 0) \
M(Bool, mysql_map_string_to_text_in_show_columns, false, "If enabled, String type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise. Will only take effect if use_mysql_types_in_show_columns is enabled too", 0) \
M(Bool, mysql_map_fixed_string_to_text_in_show_columns, false, "If enabled, FixedString type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise. Will only take effect if use_mysql_types_in_show_columns is enabled too", 0) \
\
M(UInt64, optimize_min_equality_disjunction_chain_length, 3, "The minimum length of the expression `expr = x1 OR ... expr = xN` for optimization ", 0) \
\

View File

@ -52,10 +52,29 @@ DatabaseMaterializedPostgreSQL::DatabaseMaterializedPostgreSQL(
, remote_database_name(postgres_database_name)
, connection_info(connection_info_)
, settings(std::move(settings_))
, startup_task(getContext()->getSchedulePool().createTask("MaterializedPostgreSQLDatabaseStartup", [this]{ startSynchronization(); }))
, startup_task(getContext()->getSchedulePool().createTask("MaterializedPostgreSQLDatabaseStartup", [this]{ tryStartSynchronization(); }))
{
}
void DatabaseMaterializedPostgreSQL::tryStartSynchronization()
{
if (shutdown_called)
return;
try
{
startSynchronization();
LOG_INFO(log, "Successfully loaded tables from PostgreSQL and started replication");
}
catch (...)
{
LOG_ERROR(log, "Failed to start replication from PostgreSQL, "
"will retry. Error: {}", getCurrentExceptionMessage(true));
if (!shutdown_called)
startup_task->scheduleAfter(5000);
}
}
void DatabaseMaterializedPostgreSQL::startSynchronization()
{
@ -64,9 +83,10 @@ void DatabaseMaterializedPostgreSQL::startSynchronization()
return;
replication_handler = std::make_unique<PostgreSQLReplicationHandler>(
/* replication_identifier */ TSA_SUPPRESS_WARNING_FOR_READ(database_name), /// FIXME
remote_database_name,
/* table_name */"",
TSA_SUPPRESS_WARNING_FOR_READ(database_name), /// FIXME
toString(getUUID()),
connection_info,
getContext(),
is_attach,
@ -114,15 +134,7 @@ void DatabaseMaterializedPostgreSQL::startSynchronization()
LOG_TRACE(log, "Loaded {} tables. Starting synchronization", materialized_tables.size());
try
{
replication_handler->startup(/* delayed */false);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
throw;
}
replication_handler->startup(/* delayed */false);
}
@ -401,6 +413,7 @@ void DatabaseMaterializedPostgreSQL::detachTablePermanently(ContextPtr, const St
void DatabaseMaterializedPostgreSQL::shutdown()
{
shutdown_called = true;
startup_task->deactivate();
stopReplication();
DatabaseAtomic::shutdown();
@ -413,7 +426,6 @@ void DatabaseMaterializedPostgreSQL::stopReplication()
if (replication_handler)
replication_handler->shutdown();
shutdown_called = true;
/// Clear wrappers over nested, all access is not done to nested tables directly.
materialized_tables.clear();
}

View File

@ -73,6 +73,7 @@ protected:
ASTPtr getCreateTableQueryImpl(const String & table_name, ContextPtr local_context, bool throw_on_error) const override;
private:
void tryStartSynchronization();
void startSynchronization();
ASTPtr createAlterSettingsQuery(const SettingChange & new_setting);

View File

@ -322,8 +322,19 @@ void DatabasePostgreSQL::loadStoredObjects(ContextMutablePtr /* context */, Load
void DatabasePostgreSQL::removeOutdatedTables()
{
std::lock_guard lock{mutex};
auto connection_holder = pool->get();
auto actual_tables = fetchPostgreSQLTablesList(connection_holder->get(), configuration.schema);
std::set<std::string> actual_tables;
try
{
auto connection_holder = pool->get();
actual_tables = fetchPostgreSQLTablesList(connection_holder->get(), configuration.schema);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
cleaner_task->scheduleAfter(cleaner_reschedule_ms);
return;
}
if (cache_tables)
{

View File

@ -410,7 +410,7 @@ std::unique_ptr<ReadBuffer> FormatFactory::wrapReadBufferIfNeeded(
static void addExistingProgressToOutputFormat(OutputFormatPtr format, ContextPtr context)
{
auto element_id = context->getProcessListElement();
auto element_id = context->getProcessListElementSafe();
if (element_id)
{
/// While preparing the query there might have been progress (for example in subscalar subqueries) so add it here

View File

@ -324,6 +324,14 @@ namespace
auto retry_timeout = timeouts.connection_timeout.totalMilliseconds();
auto session = pool_ptr->second->get(retry_timeout);
const auto & session_data = session->sessionData();
if (session_data.empty() || !Poco::AnyCast<HTTPSessionReuseTag>(&session_data))
{
/// Reset session if it is not reusable. See comment for HTTPSessionReuseTag.
session->reset();
}
session->attachSessionData({});
setTimeouts(*session, timeouts);
return session;

View File

@ -74,8 +74,17 @@ void resetSessionIfNeeded(bool read_all_range_successfully, std::optional<Aws::S
}
else if (auto session = getSession(*read_result); !session.isNull())
{
DB::markSessionForReuse(session);
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions);
if (!session->getProxyHost().empty())
{
/// Reset proxified sessions because proxy can change for every request. See ProxyConfigurationResolver.
resetSession(*read_result);
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions);
}
else
{
DB::markSessionForReuse(session);
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions);
}
}
}
}

View File

@ -276,7 +276,7 @@ void PocoHTTPClient::makeRequestInternal(
{
/// Most sessions in pool are already connected and it is not possible to set proxy host/port to a connected session.
const auto request_configuration = per_request_configuration();
if (http_connection_pool_size && request_configuration.host.empty())
if (http_connection_pool_size)
makeRequestInternalImpl<true>(request, request_configuration, response, readLimiter, writeLimiter);
else
makeRequestInternalImpl<false>(request, request_configuration, response, readLimiter, writeLimiter);

View File

@ -3588,6 +3588,15 @@ std::shared_ptr<FilesystemCacheLog> Context::getFilesystemCacheLog() const
return shared->system_logs->filesystem_cache_log;
}
std::shared_ptr<S3QueueLog> Context::getS3QueueLog() const
{
auto lock = getGlobalSharedLock();
if (!shared->system_logs)
return {};
return shared->system_logs->s3_queue_log;
}
std::shared_ptr<FilesystemReadPrefetchesLog> Context::getFilesystemReadPrefetchesLog() const
{
auto lock = getGlobalSharedLock();

View File

@ -105,6 +105,7 @@ class TransactionsInfoLog;
class ProcessorsProfileLog;
class FilesystemCacheLog;
class FilesystemReadPrefetchesLog;
class S3QueueLog;
class AsynchronousInsertLog;
class BackupLog;
class IAsynchronousReader;
@ -1041,6 +1042,7 @@ public:
std::shared_ptr<TransactionsInfoLog> getTransactionsInfoLog() const;
std::shared_ptr<ProcessorsProfileLog> getProcessorsProfileLog() const;
std::shared_ptr<FilesystemCacheLog> getFilesystemCacheLog() const;
std::shared_ptr<S3QueueLog> getS3QueueLog() const;
std::shared_ptr<FilesystemReadPrefetchesLog> getFilesystemReadPrefetchesLog() const;
std::shared_ptr<AsynchronousInsertLog> getAsynchronousInsertLog() const;
std::shared_ptr<BackupLog> getBackupLog() const;

View File

@ -21,14 +21,15 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
GinFilterParameters::GinFilterParameters(size_t ngrams_, Float64 density_)
GinFilterParameters::GinFilterParameters(size_t ngrams_, UInt64 max_rows_per_postings_list_)
: ngrams(ngrams_)
, density(density_)
, max_rows_per_postings_list(max_rows_per_postings_list_)
{
if (max_rows_per_postings_list == UNLIMITED_ROWS_PER_POSTINGS_LIST)
max_rows_per_postings_list = std::numeric_limits<UInt64>::max();
if (ngrams > 8)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The size of inverted index filter cannot be greater than 8");
if (density <= 0 || density > 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The density inverted index gin filter must be between 0 and 1");
}
GinFilter::GinFilter(const GinFilterParameters & params_)
@ -36,7 +37,7 @@ GinFilter::GinFilter(const GinFilterParameters & params_)
{
}
void GinFilter::add(const char * data, size_t len, UInt32 rowID, GinIndexStorePtr & store, UInt64 limit) const
void GinFilter::add(const char * data, size_t len, UInt32 rowID, GinIndexStorePtr & store) const
{
if (len > FST::MAX_TERM_LENGTH)
return;
@ -51,8 +52,7 @@ void GinFilter::add(const char * data, size_t len, UInt32 rowID, GinIndexStorePt
}
else
{
UInt64 size_limit = std::lround(limit * params.density);
auto builder = std::make_shared<GinIndexPostingsBuilder>(size_limit);
auto builder = std::make_shared<GinIndexPostingsBuilder>(params.max_rows_per_postings_list);
builder->add(rowID);
store->setPostingsBuilder(term, builder);

View File

@ -8,13 +8,16 @@ namespace DB
{
static inline constexpr auto INVERTED_INDEX_NAME = "inverted";
static inline constexpr UInt64 UNLIMITED_ROWS_PER_POSTINGS_LIST = 0;
static inline constexpr UInt64 MIN_ROWS_PER_POSTINGS_LIST = 8 * 1024;
static inline constexpr UInt64 DEFAULT_MAX_ROWS_PER_POSTINGS_LIST = 64 * 1024;
struct GinFilterParameters
{
GinFilterParameters(size_t ngrams_, Float64 density_);
GinFilterParameters(size_t ngrams_, UInt64 max_rows_per_postings_list_);
size_t ngrams;
Float64 density;
UInt64 max_rows_per_postings_list;
};
struct GinSegmentWithRowIdRange
@ -42,7 +45,7 @@ public:
/// Add term (located at 'data' with length 'len') and its row ID to the postings list builder
/// for building inverted index for the given store.
void add(const char * data, size_t len, UInt32 rowID, GinIndexStorePtr & store, UInt64 limit) const;
void add(const char * data, size_t len, UInt32 rowID, GinIndexStorePtr & store) const;
/// Accumulate (segmentID, RowIDStart, RowIDEnd) for building skipping index
void addRowRangeToGinFilter(UInt32 segmentID, UInt32 rowIDStart, UInt32 rowIDEnd);

View File

@ -24,7 +24,10 @@ String InterpreterShowColumnsQuery::getRewrittenQuery()
{
const auto & query = query_ptr->as<ASTShowColumnsQuery &>();
const bool use_mysql_types = getContext()->getSettingsRef().use_mysql_types_in_show_columns;
const auto & settings = getContext()->getSettingsRef();
const bool use_mysql_types = settings.use_mysql_types_in_show_columns;
const bool remap_string_as_text = settings.mysql_map_string_to_text_in_show_columns;
const bool remap_fixed_string_as_text = settings.mysql_map_fixed_string_to_text_in_show_columns;
WriteBufferFromOwnString buf_database;
String resolved_database = getContext()->resolveDatabase(query.database);
@ -37,42 +40,51 @@ String InterpreterShowColumnsQuery::getRewrittenQuery()
String rewritten_query;
if (use_mysql_types)
{
/// Cheapskate SQL-based mapping from native types to MySQL types, see https://dev.mysql.com/doc/refman/8.0/en/data-types.html
/// Only used with setting 'use_mysql_types_in_show_columns = 1'
/// Known issues:
/// - Enums are translated to TEXT
rewritten_query += R"(
rewritten_query += fmt::format(
R"(
WITH map(
'Int8', 'TINYINT',
'Int16', 'SMALLINT',
'Int32', 'INTEGER',
'Int64', 'BIGINT',
'UInt8', 'TINYINT UNSIGNED',
'UInt16', 'SMALLINT UNSIGNED',
'UInt32', 'INTEGER UNSIGNED',
'UInt64', 'BIGINT UNSIGNED',
'Float32', 'FLOAT',
'Float64', 'DOUBLE',
'String', 'BLOB',
'UUID', 'CHAR',
'Bool', 'TINYINT',
'Date', 'DATE',
'Date32', 'DATE',
'DateTime', 'DATETIME',
'DateTime64', 'DATETIME',
'Map', 'JSON',
'Tuple', 'JSON',
'Object', 'JSON') AS native_to_mysql_mapping,
splitByRegexp('\(|\)', type_) AS split,
multiIf(startsWith(type_, 'LowCardinality(Nullable'), split[3],
startsWith(type_, 'LowCardinality'), split[2],
startsWith(type_, 'Nullable'), split[2],
split[1]) AS inner_type,
if (length(split) > 1, splitByString(', ', split[2]), []) AS decimal_scale_and_precision,
multiIf(inner_type = 'Decimal' AND toInt8(decimal_scale_and_precision[1]) <= 65 AND toInt8(decimal_scale_and_precision[2]) <= 30, concat('DECIMAL(', decimal_scale_and_precision[1], ', ', decimal_scale_and_precision[2], ')'),
mapContains(native_to_mysql_mapping, inner_type) = true, native_to_mysql_mapping[inner_type],
'TEXT') AS mysql_type
)";
'Int8', 'TINYINT',
'Int16', 'SMALLINT',
'Int32', 'INTEGER',
'Int64', 'BIGINT',
'UInt8', 'TINYINT UNSIGNED',
'UInt16', 'SMALLINT UNSIGNED',
'UInt32', 'INTEGER UNSIGNED',
'UInt64', 'BIGINT UNSIGNED',
'Float32', 'FLOAT',
'Float64', 'DOUBLE',
'UUID', 'CHAR',
'Bool', 'TINYINT',
'Date', 'DATE',
'Date32', 'DATE',
'DateTime', 'DATETIME',
'DateTime64', 'DATETIME',
'Map', 'JSON',
'Tuple', 'JSON',
'Object', 'JSON',
'String', '{}',
'FixedString', '{}') AS native_to_mysql_mapping,
)",
remap_string_as_text ? "TEXT" : "BLOB",
remap_fixed_string_as_text ? "TEXT" : "BLOB");
rewritten_query += R"(
splitByRegexp('\(|\)', type_) AS split,
multiIf(startsWith(type_, 'LowCardinality(Nullable'), split[3],
startsWith(type_, 'LowCardinality'), split[2],
startsWith(type_, 'Nullable'), split[2],
split[1]) AS inner_type,
if (length(split) > 1, splitByString(', ', split[2]), []) AS decimal_scale_and_precision,
multiIf(inner_type = 'Decimal' AND toInt8(decimal_scale_and_precision[1]) <= 65 AND toInt8(decimal_scale_and_precision[2]) <= 30, concat('DECIMAL(', decimal_scale_and_precision[1], ', ', decimal_scale_and_precision[2], ')'),
mapContains(native_to_mysql_mapping, inner_type) = true, native_to_mysql_mapping[inner_type],
'TEXT') AS mysql_type
)";
}
rewritten_query += R"(
SELECT

View File

@ -68,6 +68,10 @@ private:
void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
{
/// We do handle cycles via tracking current_asts
/// but in case of bug in that tricky logic we need to prevent stack overflow
checkStackSize();
auto & current_asts = data.current_asts;
String & current_alias = data.current_alias;

View File

@ -0,0 +1,62 @@
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeMap.h>
#include <Interpreters/ProfileEventsExt.h>
#include <DataTypes/DataTypeEnum.h>
#include <Interpreters/S3QueueLog.h>
namespace DB
{
NamesAndTypesList S3QueueLogElement::getNamesAndTypes()
{
auto status_datatype = std::make_shared<DataTypeEnum8>(
DataTypeEnum8::Values
{
{"Processed", static_cast<Int8>(S3QueueLogElement::S3QueueStatus::Processed)},
{"Failed", static_cast<Int8>(S3QueueLogElement::S3QueueStatus::Failed)},
});
return {
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime>()},
{"table_uuid", std::make_shared<DataTypeString>()},
{"file_name", std::make_shared<DataTypeString>()},
{"rows_processed", std::make_shared<DataTypeUInt64>()},
{"status", status_datatype},
{"processing_start_time", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>())},
{"processing_end_time", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>())},
{"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
{"exception", std::make_shared<DataTypeString>()},
};
}
void S3QueueLogElement::appendToBlock(MutableColumns & columns) const
{
size_t i = 0;
columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType());
columns[i++]->insert(event_time);
columns[i++]->insert(table_uuid);
columns[i++]->insert(file_name);
columns[i++]->insert(rows_processed);
columns[i++]->insert(status);
if (processing_start_time)
columns[i++]->insert(processing_start_time);
else
columns[i++]->insertDefault();
if (processing_end_time)
columns[i++]->insert(processing_end_time);
else
columns[i++]->insertDefault();
ProfileEvents::dumpToMapColumn(counters_snapshot, columns[i++].get(), true);
columns[i++]->insert(exception);
}
}

View File

@ -0,0 +1,43 @@
#pragma once
#include <Common/ProfileEvents.h>
#include <Core/NamesAndAliases.h>
#include <Core/NamesAndTypes.h>
#include <Interpreters/SystemLog.h>
namespace DB
{
struct S3QueueLogElement
{
time_t event_time{};
std::string table_uuid;
std::string file_name;
size_t rows_processed = 0;
enum class S3QueueStatus
{
Processed,
Failed,
};
S3QueueStatus status;
ProfileEvents::Counters::Snapshot counters_snapshot;
time_t processing_start_time;
time_t processing_end_time;
std::string exception;
static std::string name() { return "S3QueueLog"; }
static NamesAndTypesList getNamesAndTypes();
static NamesAndAliases getNamesAndAliases() { return {}; }
void appendToBlock(MutableColumns & columns) const;
static const char * getCustomColumnList() { return nullptr; }
};
class S3QueueLog : public SystemLog<S3QueueLogElement>
{
using SystemLog<S3QueueLogElement>::SystemLog;
};
}

View File

@ -19,6 +19,7 @@
#include <Interpreters/TransactionsInfoLog.h>
#include <Interpreters/FilesystemCacheLog.h>
#include <Interpreters/FilesystemReadPrefetchesLog.h>
#include <Interpreters/S3QueueLog.h>
#include <Interpreters/ZooKeeperLog.h>
#include <Interpreters/BackupLog.h>
#include <Parsers/ASTCreateQuery.h>
@ -289,6 +290,7 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf
processors_profile_log = createSystemLog<ProcessorsProfileLog>(global_context, "system", "processors_profile_log", config, "processors_profile_log");
asynchronous_insert_log = createSystemLog<AsynchronousInsertLog>(global_context, "system", "asynchronous_insert_log", config, "asynchronous_insert_log");
backup_log = createSystemLog<BackupLog>(global_context, "system", "backup_log", config, "backup_log");
s3_queue_log = createSystemLog<S3QueueLog>(global_context, "system", "s3queue_log", config, "s3queue_log");
if (query_log)
logs.emplace_back(query_log.get());
@ -329,6 +331,8 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf
logs.emplace_back(asynchronous_insert_log.get());
if (backup_log)
logs.emplace_back(backup_log.get());
if (s3_queue_log)
logs.emplace_back(s3_queue_log.get());
try
{

View File

@ -50,6 +50,7 @@ class FilesystemCacheLog;
class FilesystemReadPrefetchesLog;
class AsynchronousInsertLog;
class BackupLog;
class S3QueueLog;
/// System logs should be destroyed in destructor of the last Context and before tables,
/// because SystemLog destruction makes insert query while flushing data into underlying tables
@ -70,6 +71,7 @@ struct SystemLogs
std::shared_ptr<MetricLog> metric_log; /// Used to log all metrics.
std::shared_ptr<FilesystemCacheLog> filesystem_cache_log;
std::shared_ptr<FilesystemReadPrefetchesLog> filesystem_read_prefetches_log;
std::shared_ptr<S3QueueLog> s3_queue_log;
/// Metrics from system.asynchronous_metrics.
std::shared_ptr<AsynchronousMetricLog> asynchronous_metric_log;
/// OpenTelemetry trace spans.

View File

@ -988,7 +988,11 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
}
QueryCachePtr query_cache = context->getQueryCache();
const bool can_use_query_cache = query_cache != nullptr && settings.use_query_cache && !internal && (ast->as<ASTSelectQuery>() || ast->as<ASTSelectWithUnionQuery>());
const bool can_use_query_cache = query_cache != nullptr
&& settings.use_query_cache
&& !internal
&& client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY
&& (ast->as<ASTSelectQuery>() || ast->as<ASTSelectWithUnionQuery>());
QueryCache::Usage query_cache_usage = QueryCache::Usage::None;
if (!async_insert)
@ -1332,7 +1336,46 @@ void executeQuery(
BlockIO streams;
OutputFormatPtr output_format;
std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete, &istr);
auto update_format_for_exception_if_needed = [&]()
{
if (!output_format)
{
try
{
String format_name = context->getDefaultFormat();
output_format = FormatFactory::instance().getOutputFormat(format_name, ostr, {}, context, output_format_settings);
if (output_format && output_format->supportsWritingException())
{
/// Force an update of the headers before we start writing
result_details.content_type = output_format->getContentType();
result_details.format = format_name;
set_result_details(result_details);
set_result_details = nullptr;
}
}
catch (const DB::Exception & e)
{
/// Ignore this exception and report the original one
LOG_WARNING(&Poco::Logger::get("executeQuery"), getExceptionMessageAndPattern(e, true));
}
}
};
try
{
std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete, &istr);
}
catch (...)
{
if (handle_exception_in_output_format)
{
update_format_for_exception_if_needed();
if (output_format)
handle_exception_in_output_format(*output_format);
}
throw;
}
auto & pipeline = streams.pipeline;
std::unique_ptr<WriteBuffer> compressed_buffer;
@ -1426,8 +1469,12 @@ void executeQuery(
}
catch (...)
{
if (handle_exception_in_output_format && output_format)
handle_exception_in_output_format(*output_format);
if (handle_exception_in_output_format)
{
update_format_for_exception_if_needed();
if (output_format)
handle_exception_in_output_format(*output_format);
}
streams.onException();
throw;
}

View File

@ -36,8 +36,9 @@ String ASTPartition::getID(char delim) const
{
if (value)
return "Partition";
else
return "Partition_ID" + (delim + id->getID());
std::string id_string = id ? id->getID() : "";
return "Partition_ID" + (delim + id_string);
}
ASTPtr ASTPartition::clone() const

View File

@ -574,7 +574,7 @@ void RemoteQueryExecutor::processMergeTreeInitialReadAnnouncement(InitialAllRang
if (!extension || !extension->parallel_reading_coordinator)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Coordinator for parallel reading from replicas is not initialized");
extension->parallel_reading_coordinator->handleInitialAllRangesAnnouncement(announcement);
extension->parallel_reading_coordinator->handleInitialAllRangesAnnouncement(std::move(announcement));
}
void RemoteQueryExecutor::finish()

View File

@ -21,18 +21,46 @@ ActiveDataPartSet::ActiveDataPartSet(MergeTreeDataFormatVersion format_version_,
add(name);
}
bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts)
ActiveDataPartSet::AddPartOutcome ActiveDataPartSet::tryAddPart(const MergeTreePartInfo & part_info, String * out_reason)
{
auto part_info = MergeTreePartInfo::fromPartName(name, format_version);
return add(part_info, name, out_replaced_parts);
return addImpl(part_info, part_info.getPartNameAndCheckFormat(format_version), nullptr, out_reason);
}
bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, const String & name, Strings * out_replaced_parts)
{
String out_reason;
AddPartOutcome outcome = addImpl(part_info, name, out_replaced_parts, &out_reason);
if (outcome == AddPartOutcome::HasIntersectingPart)
{
chassert(!out_reason.empty());
throw Exception(ErrorCodes::LOGICAL_ERROR, fmt::runtime(out_reason));
}
return outcome == AddPartOutcome::Added;
}
bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts)
{
auto part_info = MergeTreePartInfo::fromPartName(name, format_version);
String out_reason;
AddPartOutcome outcome = addImpl(part_info, name, out_replaced_parts, &out_reason);
if (outcome == AddPartOutcome::HasIntersectingPart)
{
chassert(!out_reason.empty());
throw Exception(ErrorCodes::LOGICAL_ERROR, fmt::runtime(out_reason));
}
return outcome == AddPartOutcome::Added;
}
ActiveDataPartSet::AddPartOutcome ActiveDataPartSet::addImpl(const MergeTreePartInfo & part_info, const String & name, Strings * out_replaced_parts, String * out_reason)
{
/// TODO make it exception safe (out_replaced_parts->push_back(...) may throw)
if (getContainingPartImpl(part_info) != part_info_to_name.end())
return false;
return AddPartOutcome::HasCovering;
/// Parts contained in `part` are located contiguously in `part_info_to_name`, overlapping with the place where the part itself would be inserted.
auto it = part_info_to_name.lower_bound(part_info);
@ -47,10 +75,15 @@ bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, const String &
if (!part_info.contains(it->first))
{
if (!part_info.isDisjoint(it->first))
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Part {} intersects previous part {}. "
"It is a bug or a result of manual intervention in the ZooKeeper data.",
part_info.getPartNameForLogs(), it->first.getPartNameForLogs());
{
if (out_reason != nullptr)
*out_reason = fmt::format(
"Part {} intersects previous part {}. "
"It is a bug or a result of manual intervention in the ZooKeeper data.",
part_info.getPartNameForLogs(),
it->first.getPartNameForLogs());
return AddPartOutcome::HasIntersectingPart;
}
++it;
break;
}
@ -73,18 +106,33 @@ bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, const String &
}
if (it != part_info_to_name.end() && !part_info.isDisjoint(it->first))
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Part {} intersects part {}. It is a bug or a result of manual intervention "
"in the ZooKeeper data.", name, it->first.getPartNameForLogs());
{
if (out_reason != nullptr)
*out_reason = fmt::format(
"Part {} intersects part {}. It is a bug or a result of manual intervention "
"in the ZooKeeper data.",
name,
it->first.getPartNameForLogs());
return AddPartOutcome::HasIntersectingPart;
}
part_info_to_name.emplace(part_info, name);
return true;
return AddPartOutcome::Added;
}
bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, Strings * out_replaced_parts)
{
return add(part_info, part_info.getPartNameAndCheckFormat(format_version), out_replaced_parts);
String out_reason;
AddPartOutcome outcome = addImpl(part_info, part_info.getPartNameAndCheckFormat(format_version), out_replaced_parts, &out_reason);
if (outcome == AddPartOutcome::HasIntersectingPart)
{
chassert(!out_reason.empty());
throw Exception(ErrorCodes::LOGICAL_ERROR, fmt::runtime(out_reason));
}
return outcome == AddPartOutcome::Added;
}

View File

@ -22,6 +22,13 @@ using Strings = std::vector<String>;
class ActiveDataPartSet
{
public:
enum class AddPartOutcome
{
Added,
HasCovering,
HasIntersectingPart,
};
explicit ActiveDataPartSet(MergeTreeDataFormatVersion format_version_) : format_version(format_version_) {}
ActiveDataPartSet(MergeTreeDataFormatVersion format_version_, const Strings & names);
@ -43,6 +50,8 @@ public:
bool add(const MergeTreePartInfo & part_info, const String & name, Strings * out_replaced_parts = nullptr);
bool add(const MergeTreePartInfo & part_info, Strings * out_replaced_parts = nullptr);
AddPartOutcome tryAddPart(const MergeTreePartInfo & part_info, String * out_reason = nullptr);
bool remove(const MergeTreePartInfo & part_info)
{
return part_info_to_name.erase(part_info) > 0;
@ -97,6 +106,8 @@ public:
MergeTreeDataFormatVersion getFormatVersion() const { return format_version; }
private:
AddPartOutcome addImpl(const MergeTreePartInfo & part_info, const String & name, Strings * out_replaced_parts = nullptr, String * out_reason = nullptr);
MergeTreeDataFormatVersion format_version;
std::map<MergeTreePartInfo, String> part_info_to_name;

View File

@ -20,6 +20,7 @@
#include <Common/ThreadFuzzer.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/Config/ConfigHelper.h>
#include <Storages/MergeTree/RangesInDataPart.h>
#include <Compression/CompressedReadBuffer.h>
#include <Core/QueryProcessingStage.h>
#include <DataTypes/DataTypeEnum.h>
@ -76,6 +77,7 @@
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
#include <Storages/MergeTree/MergeTreeDataPartWide.h>
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
#include <Storages/MergeTree/ActiveDataPartSet.h>
#include <Storages/StorageMergeTree.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/VirtualColumnUtils.h>
@ -96,6 +98,7 @@
#include <iomanip>
#include <limits>
#include <optional>
#include <ranges>
#include <set>
#include <thread>
#include <typeinfo>
@ -3915,25 +3918,17 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT
return;
}
/// Let's restore some parts covered by unexpected to avoid partial data
if (restore_covered)
{
Strings restored;
bool error = false;
String error_parts;
Int64 pos = part->info.min_block;
Strings error_parts;
auto is_appropriate_state = [] (DataPartState state)
{
return state == DataPartState::Active || state == DataPartState::Outdated;
};
auto update_error = [&] (DataPartIteratorByInfo it)
{
error = true;
error_parts += (*it)->getNameWithState() + " ";
};
auto activate_part = [this, &restored_active_part](auto it)
{
/// It's not clear what to do if we try to activate part that was removed in transaction.
@ -3951,68 +3946,90 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT
restored_active_part = true;
};
auto it_middle = data_parts_by_info.lower_bound(part->info);
/// ActiveDataPartSet allows to restore most top-level parts instead of unexpected.
/// It can be important in case of assigned merges. If unexpected part is result of some
/// finished, but not committed merge then we should restore (at least try to restore)
/// closest ancestors for the unexpected part to be able to execute it.
/// However it's not guaranteed because outdated parts can intersect
ActiveDataPartSet parts_for_replacement(format_version);
auto range = getDataPartsPartitionRange(part->info.partition_id);
DataPartsVector parts_candidates(range.begin(), range.end());
/// Restore the leftmost part covered by the part
if (it_middle != data_parts_by_info.begin())
/// In case of intersecting outdated parts we want to add bigger parts (with higher level) first
auto comparator = [] (const DataPartPtr left, const DataPartPtr right) -> bool
{
auto it = std::prev(it_middle);
if (part->contains(**it) && is_appropriate_state((*it)->getState()))
{
/// Maybe, we must consider part level somehow
if ((*it)->info.min_block != part->info.min_block)
update_error(it);
if ((*it)->getState() != DataPartState::Active)
activate_part(it);
pos = (*it)->info.max_block + 1;
restored.push_back((*it)->name);
}
else if ((*it)->info.partition_id == part->info.partition_id)
update_error(it);
if (left->info.level < right->info.level)
return true;
else if (left->info.level > right->info.level)
return false;
else
error = true;
return left->info.mutation < right->info.mutation;
};
std::sort(parts_candidates.begin(), parts_candidates.end(), comparator);
/// From larger to smaller parts
for (const auto & part_candidate_in_partition : parts_candidates | std::views::reverse)
{
if (part->info.contains(part_candidate_in_partition->info)
&& is_appropriate_state(part_candidate_in_partition->getState()))
{
String out_reason;
/// Outdated parts can itersect legally (because of DROP_PART) here it's okay, we
/// are trying to do out best to restore covered parts.
auto outcome = parts_for_replacement.tryAddPart(part_candidate_in_partition->info, &out_reason);
if (outcome == ActiveDataPartSet::AddPartOutcome::HasIntersectingPart)
{
error_parts.push_back(part->name);
LOG_ERROR(log, "Failed to restore part {}, because of intersection reason '{}'", part->name, out_reason);
}
}
}
if (parts_for_replacement.size() > 0)
{
std::vector<std::pair<uint64_t, uint64_t>> holes_list;
/// Most part of the code below is just to write pretty message
auto part_infos = parts_for_replacement.getPartInfos();
int64_t current_right_block = part_infos[0].min_block;
for (const auto & top_level_part_to_replace : part_infos)
{
auto data_part_it = data_parts_by_info.find(top_level_part_to_replace);
if (data_part_it == data_parts_by_info.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find part {} in own set", top_level_part_to_replace.getPartNameForLogs());
activate_part(data_part_it);
restored.push_back((*data_part_it)->name);
if (top_level_part_to_replace.min_block - current_right_block > 1)
holes_list.emplace_back(current_right_block, top_level_part_to_replace.min_block);
current_right_block = top_level_part_to_replace.max_block;
}
if (part->info.max_block != current_right_block)
holes_list.emplace_back(current_right_block, part->info.max_block);
for (const String & name : restored)
LOG_INFO(log, "Activated part {} in place of unexpected {}", name, part->name);
if (!error_parts.empty() || !holes_list.empty())
{
std::string error_parts_message, holes_list_message;
if (!error_parts.empty())
error_parts_message = fmt::format(" Parts failed to restore because of intersection: [{}]", fmt::join(error_parts, ", "));
if (!holes_list.empty())
{
if (!error_parts.empty())
holes_list_message = ".";
Strings holes_list_pairs;
for (const auto & [left_side, right_side] : holes_list)
holes_list_pairs.push_back(fmt::format("({}, {})", left_side + 1, right_side - 1));
holes_list_message += fmt::format(" Block ranges failed to restore: [{}]", fmt::join(holes_list_pairs, ", "));
}
LOG_WARNING(log, "The set of parts restored in place of {} looks incomplete. "
"SELECT queries may observe gaps in data until this replica is synchronized with other replicas.{}{}",
part->name, error_parts_message, holes_list_message);
}
}
else
error = true;
/// Restore "right" parts
for (auto it = it_middle; it != data_parts_by_info.end() && part->contains(**it); ++it)
{
if ((*it)->info.min_block < pos)
continue;
if (!is_appropriate_state((*it)->getState()))
{
update_error(it);
continue;
}
if ((*it)->info.min_block > pos)
update_error(it);
if ((*it)->getState() != DataPartState::Active)
activate_part(it);
pos = (*it)->info.max_block + 1;
restored.push_back((*it)->name);
}
if (pos != part->info.max_block + 1)
error = true;
for (const String & name : restored)
{
LOG_INFO(log, "Activated part {}", name);
}
if (error)
{
LOG_WARNING(log, "The set of parts restored in place of {} looks incomplete. "
"SELECT queries may observe gaps in data until this replica is synchronized with other replicas.{}",
part->name, (error_parts.empty() ? "" : " Suspicious parts: " + error_parts));
LOG_INFO(log, "Don't find any parts for replacement instead of unexpected {}", part->name);
}
}

View File

@ -109,14 +109,14 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorInverted::getGranuleAndReset()
return new_granule;
}
void MergeTreeIndexAggregatorInverted::addToGinFilter(UInt32 rowID, const char * data, size_t length, GinFilter & gin_filter, UInt64 limit)
void MergeTreeIndexAggregatorInverted::addToGinFilter(UInt32 rowID, const char * data, size_t length, GinFilter & gin_filter)
{
size_t cur = 0;
size_t token_start = 0;
size_t token_len = 0;
while (cur < length && token_extractor->nextInStringPadded(data, length, &cur, &token_start, &token_len))
gin_filter.add(data + token_start, token_len, rowID, store, limit);
gin_filter.add(data + token_start, token_len, rowID, store);
}
void MergeTreeIndexAggregatorInverted::update(const Block & block, size_t * pos, size_t limit)
@ -150,7 +150,7 @@ void MergeTreeIndexAggregatorInverted::update(const Block & block, size_t * pos,
for (size_t row_num = 0; row_num < elements_size; ++row_num)
{
auto ref = column_key.getDataAt(element_start_row + row_num);
addToGinFilter(row_id, ref.data, ref.size, granule->gin_filters[col], rows_read);
addToGinFilter(row_id, ref.data, ref.size, granule->gin_filters[col]);
store->incrementCurrentSizeBy(ref.size);
}
current_position += 1;
@ -165,7 +165,7 @@ void MergeTreeIndexAggregatorInverted::update(const Block & block, size_t * pos,
for (size_t i = 0; i < rows_read; ++i)
{
auto ref = column->getDataAt(current_position + i);
addToGinFilter(row_id, ref.data, ref.size, granule->gin_filters[col], rows_read);
addToGinFilter(row_id, ref.data, ref.size, granule->gin_filters[col]);
store->incrementCurrentSizeBy(ref.size);
row_id++;
if (store->needToWrite())
@ -735,8 +735,8 @@ MergeTreeIndexPtr invertedIndexCreator(
const IndexDescription & index)
{
size_t n = index.arguments.empty() ? 0 : index.arguments[0].get<size_t>();
Float64 density = index.arguments.size() < 2 ? 1.0 : index.arguments[1].get<Float64>();
GinFilterParameters params(n, density);
UInt64 max_rows = index.arguments.size() < 2 ? DEFAULT_MAX_ROWS_PER_POSTINGS_LIST : index.arguments[1].get<UInt64>();
GinFilterParameters params(n, max_rows);
/// Use SplitTokenExtractor when n is 0, otherwise use NgramTokenExtractor
if (n > 0)
@ -780,13 +780,16 @@ void invertedIndexValidator(const IndexDescription & index, bool /*attach*/)
if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::UInt64)
throw Exception(ErrorCodes::INCORRECT_QUERY, "The first Inverted index argument must be positive integer.");
if (index.arguments.size() == 2 && (index.arguments[1].getType() != Field::Types::Float64 || index.arguments[1].get<Float64>() <= 0 || index.arguments[1].get<Float64>() > 1))
throw Exception(ErrorCodes::INCORRECT_QUERY, "The second Inverted index argument must be a float between 0 and 1.");
if (index.arguments.size() == 2)
{
if (index.arguments[1].getType() != Field::Types::UInt64)
throw Exception(ErrorCodes::INCORRECT_QUERY, "The second Inverted index argument must be UInt64");
if (index.arguments[1].get<UInt64>() != UNLIMITED_ROWS_PER_POSTINGS_LIST && index.arguments[1].get<UInt64>() < MIN_ROWS_PER_POSTINGS_LIST)
throw Exception(ErrorCodes::INCORRECT_QUERY, "The maximum rows per postings list must be no less than {}", MIN_ROWS_PER_POSTINGS_LIST);
}
/// Just validate
size_t ngrams = index.arguments.empty() ? 0 : index.arguments[0].get<size_t>();
Float64 density = index.arguments.size() < 2 ? 1.0 : index.arguments[1].get<Float64>();
GinFilterParameters params(ngrams, density);
UInt64 max_rows_per_postings_list = index.arguments.size() < 2 ? DEFAULT_MAX_ROWS_PER_POSTINGS_LIST : index.arguments[1].get<UInt64>();
GinFilterParameters params(ngrams, max_rows_per_postings_list);
}
}

View File

@ -48,7 +48,7 @@ struct MergeTreeIndexAggregatorInverted final : IMergeTreeIndexAggregator
void update(const Block & block, size_t * pos, size_t limit) override;
void addToGinFilter(UInt32 rowID, const char * data, size_t length, GinFilter & gin_filter, UInt64 limit);
void addToGinFilter(UInt32 rowID, const char * data, size_t length, GinFilter & gin_filter);
GinIndexStorePtr store;
Names index_columns;

View File

@ -134,7 +134,7 @@ public:
void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) override;
void markReplicaAsUnavailable(size_t replica_number) override;
void updateReadingState(const InitialAllRangesAnnouncement & announcement);
void updateReadingState(InitialAllRangesAnnouncement announcement);
void finalizeReadingState();
size_t computeConsistentHash(const MergeTreePartInfo & info) const
@ -152,12 +152,12 @@ DefaultCoordinator::~DefaultCoordinator()
LOG_DEBUG(log, "Coordination done: {}", toString(stats));
}
void DefaultCoordinator::updateReadingState(const InitialAllRangesAnnouncement & announcement)
void DefaultCoordinator::updateReadingState(InitialAllRangesAnnouncement announcement)
{
PartRefs parts_diff;
/// To get rid of duplicates
for (const auto & part: announcement.description)
for (auto && part: announcement.description)
{
auto the_same_it = std::find_if(all_parts_to_read.begin(), all_parts_to_read.end(),
[&part] (const Part & other) { return other.description.info.getPartNameV1() == part.info.getPartNameV1(); });
@ -176,12 +176,7 @@ void DefaultCoordinator::updateReadingState(const InitialAllRangesAnnouncement &
if (covering_or_the_same_it != all_parts_to_read.end())
continue;
auto new_part = Part{
.description = part,
.replicas = {announcement.replica_num}
};
auto [insert_it, _] = all_parts_to_read.insert(new_part);
auto [insert_it, _] = all_parts_to_read.emplace(Part{.description = std::move(part), .replicas = {announcement.replica_num}});
parts_diff.push_back(insert_it);
}
@ -242,12 +237,14 @@ void DefaultCoordinator::finalizeReadingState()
void DefaultCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement)
{
updateReadingState(announcement);
const auto replica_num = announcement.replica_num;
if (announcement.replica_num >= stats.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Replica number ({}) is bigger than total replicas count ({})", announcement.replica_num, stats.size());
updateReadingState(std::move(announcement));
stats[announcement.replica_num].number_of_requests +=1;
if (replica_num >= stats.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Replica number ({}) is bigger than total replicas count ({})", replica_num, stats.size());
++stats[replica_num].number_of_requests;
++sent_initial_requests;
LOG_DEBUG(log, "Sent initial requests: {} Replicas count: {}", sent_initial_requests, replicas_count);
@ -385,7 +382,7 @@ void InOrderCoordinator<mode>::handleInitialAllRangesAnnouncement(InitialAllRang
LOG_TRACE(log, "Received an announcement {}", announcement.describe());
/// To get rid of duplicates
for (const auto & part: announcement.description)
for (auto && part: announcement.description)
{
auto the_same_it = std::find_if(all_parts_to_read.begin(), all_parts_to_read.end(),
[&part] (const Part & other) { return other.description.info == part.info; });
@ -404,13 +401,8 @@ void InOrderCoordinator<mode>::handleInitialAllRangesAnnouncement(InitialAllRang
if (covering_or_the_same_it != all_parts_to_read.end())
continue;
auto new_part = Part{
.description = part,
.replicas = {announcement.replica_num}
};
auto insert_it = all_parts_to_read.insert(new_part);
auto & ranges = insert_it.first->description.ranges;
auto [inserted_it, _] = all_parts_to_read.emplace(Part{.description = std::move(part), .replicas = {announcement.replica_num}});
auto & ranges = inserted_it->description.ranges;
std::sort(ranges.begin(), ranges.end());
}
}
@ -517,7 +509,7 @@ void ParallelReplicasReadingCoordinator::handleInitialAllRangesAnnouncement(Init
}
return pimpl->handleInitialAllRangesAnnouncement(announcement);
return pimpl->handleInitialAllRangesAnnouncement(std::move(announcement));
}
ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelReadRequest request)

View File

@ -566,6 +566,7 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl
void MaterializedPostgreSQLConsumer::syncTables()
{
size_t synced_tables = 0;
while (!tables_to_sync.empty())
{
auto table_name = *tables_to_sync.begin();
@ -596,6 +597,7 @@ void MaterializedPostgreSQLConsumer::syncTables()
CompletedPipelineExecutor executor(io.pipeline);
executor.execute();
++synced_tables;
}
}
catch (...)
@ -608,7 +610,8 @@ void MaterializedPostgreSQLConsumer::syncTables()
tables_to_sync.erase(tables_to_sync.begin());
}
LOG_DEBUG(log, "Table sync end for {} tables, last lsn: {} = {}, (attempted lsn {})", tables_to_sync.size(), current_lsn, getLSNValue(current_lsn), getLSNValue(final_lsn));
LOG_DEBUG(log, "Table sync end for {} tables, last lsn: {} = {}, (attempted lsn {})",
synced_tables, current_lsn, getLSNValue(current_lsn), getLSNValue(final_lsn));
updateLsn();
}

View File

@ -24,6 +24,7 @@ namespace DB
M(UInt64, materialized_postgresql_backoff_min_ms, 200, "Poll backoff start point", 0) \
M(UInt64, materialized_postgresql_backoff_max_ms, 10000, "Poll backoff max point", 0) \
M(UInt64, materialized_postgresql_backoff_factor, 2, "Poll backoff factor", 0) \
M(Bool, materialized_postgresql_use_unique_replication_consumer_identifier, false, "Should a unique consumer be registered for table replication", 0) \
DECLARE_SETTINGS_TRAITS(MaterializedPostgreSQLSettingsTraits, LIST_OF_MATERIALIZED_POSTGRESQL_SETTINGS)

View File

@ -17,12 +17,14 @@
#include <Interpreters/Context.h>
#include <Databases/DatabaseOnDisk.h>
#include <boost/algorithm/string/trim.hpp>
#include <Poco/String.h>
namespace DB
{
static const auto CLEANUP_RESCHEDULE_MS = 600000 * 3; /// 30 min
static constexpr size_t replication_slot_name_max_size = 64;
namespace ErrorCodes
{
@ -56,10 +58,70 @@ private:
};
namespace
{
/// There can be several replication slots per publication, but one publication per table/database replication.
/// Replication slot might be unique (contain uuid) to allow have multiple replicas for the same PostgreSQL table/database.
String getPublicationName(const String & postgres_database, const String & postgres_table)
{
return fmt::format(
"{}_ch_publication",
postgres_table.empty() ? postgres_database : fmt::format("{}_{}", postgres_database, postgres_table));
}
void checkReplicationSlot(String name)
{
for (const auto & c : name)
{
const bool ok = (std::isalpha(c) && std::islower(c)) || std::isdigit(c) || c == '_';
if (!ok)
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Replication slot can contain lower-case letters, numbers, and the underscore character. "
"Got: {}", name);
}
}
if (name.size() > replication_slot_name_max_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Too big replication slot size: {}", name);
}
String normalizeReplicationSlot(String name)
{
name = Poco::toLower(name);
for (auto & c : name)
if (c == '-')
c = '_';
return name;
}
String getReplicationSlotName(
const String & postgres_database,
const String & postgres_table,
const String & clickhouse_uuid,
const MaterializedPostgreSQLSettings & replication_settings)
{
String slot_name = replication_settings.materialized_postgresql_replication_slot;
if (slot_name.empty())
{
if (replication_settings.materialized_postgresql_use_unique_replication_consumer_identifier)
slot_name = clickhouse_uuid;
else
slot_name = postgres_table.empty() ? postgres_database : fmt::format("{}_{}_ch_replication_slot", postgres_database, postgres_table);
slot_name = normalizeReplicationSlot(slot_name);
}
return slot_name;
}
}
PostgreSQLReplicationHandler::PostgreSQLReplicationHandler(
const String & replication_identifier,
const String & postgres_database_,
const String & current_database_name_,
const String & postgres_table_,
const String & clickhouse_database_,
const String & clickhouse_uuid_,
const postgres::ConnectionInfo & connection_info_,
ContextPtr context_,
bool is_attach_,
@ -70,14 +132,18 @@ PostgreSQLReplicationHandler::PostgreSQLReplicationHandler(
, is_attach(is_attach_)
, postgres_database(postgres_database_)
, postgres_schema(replication_settings.materialized_postgresql_schema)
, current_database_name(current_database_name_)
, current_database_name(clickhouse_database_)
, connection_info(connection_info_)
, max_block_size(replication_settings.materialized_postgresql_max_block_size)
, is_materialized_postgresql_database(is_materialized_postgresql_database_)
, tables_list(replication_settings.materialized_postgresql_tables_list)
, schema_list(replication_settings.materialized_postgresql_schema_list)
, schema_as_a_part_of_table_name(!schema_list.empty() || replication_settings.materialized_postgresql_tables_list_with_schema)
, user_managed_slot(!replication_settings.materialized_postgresql_replication_slot.value.empty())
, user_provided_snapshot(replication_settings.materialized_postgresql_snapshot)
, replication_slot(getReplicationSlotName(postgres_database_, postgres_table_, clickhouse_uuid_, replication_settings))
, tmp_replication_slot(replication_slot + "_tmp")
, publication_name(getPublicationName(postgres_database_, postgres_table_))
, reschedule_backoff_min_ms(replication_settings.materialized_postgresql_backoff_min_ms)
, reschedule_backoff_max_ms(replication_settings.materialized_postgresql_backoff_max_ms)
, reschedule_backoff_factor(replication_settings.materialized_postgresql_backoff_factor)
@ -89,13 +155,9 @@ PostgreSQLReplicationHandler::PostgreSQLReplicationHandler(
if (!schema_list.empty() && !postgres_schema.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot have schema list and common schema at the same time");
replication_slot = replication_settings.materialized_postgresql_replication_slot;
if (replication_slot.empty())
{
user_managed_slot = false;
replication_slot = fmt::format("{}_ch_replication_slot", replication_identifier);
}
publication_name = fmt::format("{}_ch_publication", replication_identifier);
checkReplicationSlot(replication_slot);
LOG_INFO(log, "Using replication slot {} and publication {}", replication_slot, publication_name);
startup_task = getContext()->getSchedulePool().createTask("PostgreSQLReplicaStartup", [this]{ checkConnectionAndStart(); });
consumer_task = getContext()->getSchedulePool().createTask("PostgreSQLReplicaStartup", [this]{ consumerFunc(); });
@ -496,7 +558,7 @@ void PostgreSQLReplicationHandler::createPublicationIfNeeded(pqxx::nontransactio
throw Exception(ErrorCodes::LOGICAL_ERROR, "No table found to be replicated");
/// 'ONLY' means just a table, without descendants.
std::string query_str = fmt::format("CREATE PUBLICATION {} FOR TABLE ONLY {}", publication_name, tables_list);
std::string query_str = fmt::format("CREATE PUBLICATION {} FOR TABLE ONLY {}", doubleQuoteString(publication_name), tables_list);
try
{
tx.exec(query_str);
@ -519,7 +581,7 @@ bool PostgreSQLReplicationHandler::isReplicationSlotExist(pqxx::nontransaction &
{
String slot_name;
if (temporary)
slot_name = replication_slot + "_tmp";
slot_name = tmp_replication_slot;
else
slot_name = replication_slot;
@ -546,11 +608,11 @@ void PostgreSQLReplicationHandler::createReplicationSlot(
String query_str, slot_name;
if (temporary)
slot_name = replication_slot + "_tmp";
slot_name = tmp_replication_slot;
else
slot_name = replication_slot;
query_str = fmt::format("CREATE_REPLICATION_SLOT {} LOGICAL pgoutput EXPORT_SNAPSHOT", slot_name);
query_str = fmt::format("CREATE_REPLICATION_SLOT {} LOGICAL pgoutput EXPORT_SNAPSHOT", doubleQuoteString(slot_name));
try
{
@ -573,7 +635,7 @@ void PostgreSQLReplicationHandler::dropReplicationSlot(pqxx::nontransaction & tx
std::string slot_name;
if (temporary)
slot_name = replication_slot + "_tmp";
slot_name = tmp_replication_slot;
else
slot_name = replication_slot;

View File

@ -21,9 +21,10 @@ public:
using ConsumerPtr = std::shared_ptr<MaterializedPostgreSQLConsumer>;
PostgreSQLReplicationHandler(
const String & replication_identifier,
const String & postgres_database_,
const String & current_database_name_,
const String & postgres_table_,
const String & clickhouse_database_,
const String & clickhouse_uuid_,
const postgres::ConnectionInfo & connection_info_,
ContextPtr context_,
bool is_attach_,
@ -128,10 +129,11 @@ private:
/// This is possible to allow replicating tables from multiple schemas in the same MaterializedPostgreSQL database engine.
mutable bool schema_as_a_part_of_table_name = false;
bool user_managed_slot = true;
String user_provided_snapshot;
String replication_slot, publication_name;
const bool user_managed_slot;
const String user_provided_snapshot;
const String replication_slot;
const String tmp_replication_slot;
const String publication_name;
/// Replication consumer. Manages decoding of replication stream and syncing into tables.
ConsumerPtr consumer;

View File

@ -74,13 +74,13 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL(
setInMemoryMetadata(storage_metadata);
String replication_identifier = remote_database_name + "_" + remote_table_name_;
replication_settings->materialized_postgresql_tables_list = remote_table_name_;
replication_handler = std::make_unique<PostgreSQLReplicationHandler>(
replication_identifier,
remote_database_name,
remote_table_name_,
table_id_.database_name,
toString(table_id_.uuid),
connection_info,
getContext(),
is_attach,

View File

@ -26,12 +26,13 @@ EmbeddedRocksDBSink::EmbeddedRocksDBSink(
break;
++primary_key_pos;
}
serializations = getHeader().getSerializations();
}
void EmbeddedRocksDBSink::consume(Chunk chunk)
{
auto rows = chunk.getNumRows();
auto block = getHeader().cloneWithColumns(chunk.detachColumns());
const auto & columns = chunk.getColumns();
WriteBufferFromOwnString wb_key;
WriteBufferFromOwnString wb_value;
@ -43,12 +44,9 @@ void EmbeddedRocksDBSink::consume(Chunk chunk)
wb_key.restart();
wb_value.restart();
size_t idx = 0;
for (const auto & elem : block)
{
elem.type->getDefaultSerialization()->serializeBinary(*elem.column, i, idx == primary_key_pos ? wb_key : wb_value, {});
++idx;
}
for (size_t idx = 0; idx < columns.size(); ++idx)
serializations[idx]->serializeBinary(*columns[idx], i, idx == primary_key_pos ? wb_key : wb_value, {});
status = batch.Put(wb_key.str(), wb_value.str());
if (!status.ok())
throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString());

View File

@ -24,6 +24,7 @@ private:
StorageEmbeddedRocksDB & storage;
StorageMetadataPtr metadata_snapshot;
size_t primary_key_pos = 0;
Serializations serializations;
};
}

File diff suppressed because it is too large Load Diff

View File

@ -1,127 +1,171 @@
#pragma once
#include "config.h"
#if USE_AWS_S3
#include <filesystem>
#include <Core/Types.h>
#include <Core/SettingsEnums.h>
#include <Core/BackgroundSchedulePool.h>
#include <Common/ZooKeeper/ZooKeeper.h>
# include <Core/UUID.h>
# include <Interpreters/Context.h>
# include <Storages/StorageS3Settings.h>
# include <Common/ZooKeeper/ZooKeeper.h>
namespace fs = std::filesystem;
namespace Poco { class Logger; }
namespace DB
{
class StorageS3Queue;
struct S3QueueSettings;
class StorageS3Queue;
/**
* A class for managing S3Queue metadata in zookeeper, e.g.
* the following folders:
* - <path_to_metadata>/processing
* - <path_to_metadata>/processed
* - <path_to_metadata>/failed
*
* Depending on S3Queue processing mode (ordered or unordered)
* we can differently store metadata in /processed node.
*
* Implements caching of zookeeper metadata for faster responses.
* Cached part is located in LocalFileStatuses.
*
* In case of Unordered mode - if files TTL is enabled or maximum tracked files limit is set
* starts a background cleanup thread which is responsible for maintaining them.
*/
class S3QueueFilesMetadata
{
public:
struct TrackedCollectionItem
class ProcessingNodeHolder;
using ProcessingNodeHolderPtr = std::shared_ptr<ProcessingNodeHolder>;
S3QueueFilesMetadata(const fs::path & zookeeper_path_, const S3QueueSettings & settings_);
~S3QueueFilesMetadata();
void setFileProcessed(ProcessingNodeHolderPtr holder);
void setFileFailed(ProcessingNodeHolderPtr holder, const std::string & exception_message);
struct FileStatus
{
TrackedCollectionItem() = default;
TrackedCollectionItem(const String & file_path_, UInt64 timestamp_, UInt64 retries_count_, const String & last_exception_)
: file_path(file_path_), timestamp(timestamp_), retries_count(retries_count_), last_exception(last_exception_) {}
String file_path;
UInt64 timestamp = 0;
UInt64 retries_count = 0;
String last_exception;
enum class State
{
Processing,
Processed,
Failed,
None
};
State state = State::None;
std::atomic<size_t> processed_rows = 0;
time_t processing_start_time = 0;
time_t processing_end_time = 0;
size_t retries = 0;
std::string last_exception;
ProfileEvents::Counters profile_counters;
std::mutex processing_lock;
std::mutex metadata_lock;
};
using FileStatusPtr = std::shared_ptr<FileStatus>;
using FileStatuses = std::unordered_map<std::string, FileStatusPtr>;
using S3FilesCollection = std::unordered_set<String>;
using TrackedFiles = std::deque<TrackedCollectionItem>;
/// Set file as processing, if it is not alreaty processed, failed or processing.
std::pair<ProcessingNodeHolderPtr, FileStatusPtr> trySetFileAsProcessing(const std::string & path);
S3QueueFilesMetadata(const StorageS3Queue * storage_, const S3QueueSettings & settings_);
FileStatusPtr getFileStatus(const std::string & path);
void setFilesProcessing(const Strings & file_paths);
void setFileProcessed(const String & file_path);
bool setFileFailed(const String & file_path, const String & exception_message);
FileStatuses getFileStateses() const { return local_file_statuses.getAll(); }
S3FilesCollection getProcessedFailedAndProcessingFiles();
String getMaxProcessedFile();
std::shared_ptr<zkutil::EphemeralNodeHolder> acquireLock(zkutil::ZooKeeperPtr zookeeper);
bool checkSettings(const S3QueueSettings & settings) const;
struct S3QueueCollection
{
public:
virtual ~S3QueueCollection() = default;
virtual String toString() const;
S3FilesCollection getFileNames();
virtual void parse(const String & collection_str) = 0;
protected:
TrackedFiles files;
void read(ReadBuffer & in);
void write(WriteBuffer & out) const;
};
struct S3QueueProcessedCollection : public S3QueueCollection
{
public:
S3QueueProcessedCollection(const UInt64 & max_size_, const UInt64 & max_age_);
void parse(const String & collection_str) override;
void add(const String & file_name);
private:
const UInt64 max_size;
const UInt64 max_age;
};
struct S3QueueFailedCollection : S3QueueCollection
{
public:
S3QueueFailedCollection(const UInt64 & max_retries_count_);
void parse(const String & collection_str) override;
bool add(const String & file_name, const String & exception_message);
S3FilesCollection getFileNames();
private:
UInt64 max_retries_count;
};
struct S3QueueProcessingCollection
{
public:
S3QueueProcessingCollection() = default;
void parse(const String & collection_str);
void add(const Strings & file_names);
void remove(const String & file_name);
String toString() const;
const S3FilesCollection & getFileNames() const { return files; }
private:
S3FilesCollection files;
};
void deactivateCleanupTask();
private:
const StorageS3Queue * storage;
const S3QueueMode mode;
const UInt64 max_set_size;
const UInt64 max_set_age_sec;
const UInt64 max_loading_retries;
const size_t min_cleanup_interval_ms;
const size_t max_cleanup_interval_ms;
const String zookeeper_processing_path;
const String zookeeper_processed_path;
const String zookeeper_failed_path;
const String zookeeper_lock_path;
const fs::path zookeeper_processing_path;
const fs::path zookeeper_processed_path;
const fs::path zookeeper_failed_path;
const fs::path zookeeper_cleanup_lock_path;
mutable std::mutex mutex;
Poco::Logger * log;
S3FilesCollection getFailedFiles();
S3FilesCollection getProcessingFiles();
S3FilesCollection getUnorderedProcessedFiles();
std::atomic_bool shutdown = false;
BackgroundSchedulePool::TaskHolder task;
void removeProcessingFile(const String & file_path);
std::string getNodeName(const std::string & path);
zkutil::ZooKeeperPtr getZooKeeper() const;
void setFileProcessedForOrderedMode(ProcessingNodeHolderPtr holder);
void setFileProcessedForUnorderedMode(ProcessingNodeHolderPtr holder);
enum class SetFileProcessingResult
{
Success,
ProcessingByOtherNode,
AlreadyProcessed,
AlreadyFailed,
};
std::pair<SetFileProcessingResult, ProcessingNodeHolderPtr> trySetFileAsProcessingForOrderedMode(const std::string & path);
std::pair<SetFileProcessingResult, ProcessingNodeHolderPtr> trySetFileAsProcessingForUnorderedMode(const std::string & path);
struct NodeMetadata
{
std::string file_path;
UInt64 last_processed_timestamp = 0;
std::string last_exception;
UInt64 retries = 0;
std::string processing_id; /// For ephemeral processing node.
std::string toString() const;
static NodeMetadata fromString(const std::string & metadata_str);
};
NodeMetadata createNodeMetadata(const std::string & path, const std::string & exception = "", size_t retries = 0);
void cleanupThreadFunc();
void cleanupThreadFuncImpl();
struct LocalFileStatuses
{
FileStatuses file_statuses;
mutable std::mutex mutex;
FileStatuses getAll() const;
FileStatusPtr get(const std::string & filename, bool create);
bool remove(const std::string & filename, bool if_exists);
std::unique_lock<std::mutex> lock() const;
};
LocalFileStatuses local_file_statuses;
};
class S3QueueFilesMetadata::ProcessingNodeHolder
{
friend class S3QueueFilesMetadata;
public:
ProcessingNodeHolder(
const std::string & processing_id_,
const std::string & path_,
const std::string & zk_node_path_,
zkutil::ZooKeeperPtr zk_client_);
~ProcessingNodeHolder();
private:
bool remove(Coordination::Requests * requests = nullptr, Coordination::Responses * responses = nullptr);
zkutil::ZooKeeperPtr zk_client;
std::string path;
std::string zk_node_path;
std::string processing_id;
bool removed = false;
Poco::Logger * log;
};
}
#endif

View File

@ -0,0 +1,70 @@
#include <Storages/S3Queue/S3QueueMetadataFactory.h>
#include <Interpreters/Context.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
S3QueueMetadataFactory & S3QueueMetadataFactory::instance()
{
static S3QueueMetadataFactory ret;
return ret;
}
S3QueueMetadataFactory::FilesMetadataPtr
S3QueueMetadataFactory::getOrCreate(const std::string & zookeeper_path, const S3QueueSettings & settings)
{
std::lock_guard lock(mutex);
auto it = metadata_by_path.find(zookeeper_path);
if (it == metadata_by_path.end())
{
it = metadata_by_path.emplace(zookeeper_path, std::make_shared<S3QueueFilesMetadata>(fs::path(zookeeper_path), settings)).first;
}
else if (it->second.metadata->checkSettings(settings))
{
it->second.ref_count += 1;
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Metadata with the same `s3queue_zookeeper_path` "
"was already created but with different settings");
}
return it->second.metadata;
}
void S3QueueMetadataFactory::remove(const std::string & zookeeper_path)
{
std::lock_guard lock(mutex);
auto it = metadata_by_path.find(zookeeper_path);
if (it == metadata_by_path.end())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Metadata with zookeeper path {} does not exist", zookeeper_path);
if (--it->second.ref_count == 0)
{
try
{
auto zk_client = Context::getGlobalContextInstance()->getZooKeeper();
zk_client->tryRemove(it->first);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
metadata_by_path.erase(it);
}
}
std::unordered_map<std::string, S3QueueMetadataFactory::FilesMetadataPtr> S3QueueMetadataFactory::getAll()
{
std::unordered_map<std::string, S3QueueMetadataFactory::FilesMetadataPtr> result;
for (const auto & [zk_path, metadata_and_ref_count] : metadata_by_path)
result.emplace(zk_path, metadata_and_ref_count.metadata);
return result;
}
}

View File

@ -0,0 +1,36 @@
#pragma once
#include <boost/noncopyable.hpp>
#include <Storages/S3Queue/S3QueueSettings.h>
#include <Storages/S3Queue/S3QueueFilesMetadata.h>
namespace DB
{
class S3QueueMetadataFactory final : private boost::noncopyable
{
public:
using FilesMetadataPtr = std::shared_ptr<S3QueueFilesMetadata>;
static S3QueueMetadataFactory & instance();
FilesMetadataPtr getOrCreate(const std::string & zookeeper_path, const S3QueueSettings & settings);
void remove(const std::string & zookeeper_path);
std::unordered_map<std::string, FilesMetadataPtr> getAll();
private:
struct Metadata
{
explicit Metadata(std::shared_ptr<S3QueueFilesMetadata> metadata_) : metadata(metadata_), ref_count(1) {}
std::shared_ptr<S3QueueFilesMetadata> metadata;
size_t ref_count = 0;
};
using MetadataByPath = std::unordered_map<std::string, Metadata>;
MetadataByPath metadata_by_path;
std::mutex mutex;
};
}

View File

@ -1,8 +1,8 @@
#include <Storages/S3Queue/S3QueueSettings.h>
#include <Common/Exception.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSetQuery.h>
#include <Storages/S3Queue/S3QueueSettings.h>
#include <Common/Exception.h>
namespace DB

View File

@ -19,17 +19,16 @@ class ASTStorage;
0) \
M(S3QueueAction, after_processing, S3QueueAction::KEEP, "Delete or keep file in S3 after successful processing", 0) \
M(String, keeper_path, "", "Zookeeper node path", 0) \
M(UInt64, s3queue_loading_retries, 0, "Retry loading up to specified number of times", 0) \
M(UInt64, s3queue_polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \
M(UInt64, s3queue_polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \
M(UInt64, s3queue_polling_backoff_ms, 0, "Polling backoff", 0) \
M(UInt64, s3queue_tracked_files_limit, 1000, "Max set size for tracking processed files in unordered mode in ZooKeeper", 0) \
M(UInt64, \
s3queue_tracked_file_ttl_sec, \
0, \
"Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", \
0) \
M(UInt64, s3queue_polling_size, 50, "Maximum files to fetch from S3 with SELECT", 0)
M(UInt32, s3queue_loading_retries, 0, "Retry loading up to specified number of times", 0) \
M(UInt32, s3queue_processing_threads_num, 1, "Number of processing threads", 0) \
M(UInt32, s3queue_enable_logging_to_s3queue_log, 1, "Enable logging to system table system.s3queue_log", 0) \
M(UInt32, s3queue_tracked_file_ttl_sec, 0, "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", 0) \
M(UInt32, s3queue_polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \
M(UInt32, s3queue_polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \
M(UInt32, s3queue_polling_backoff_ms, 1000, "Polling backoff", 0) \
M(UInt32, s3queue_tracked_files_limit, 1000, "For unordered mode. Max set size for tracking processed files in ZooKeeper", 0) \
M(UInt32, s3queue_cleanup_interval_min_ms, 60000, "For unordered mode. Polling backoff min for cleanup", 0) \
M(UInt32, s3queue_cleanup_interval_max_ms, 60000, "For unordered mode. Polling backoff max for cleanup", 0) \
#define LIST_OF_S3QUEUE_SETTINGS(M, ALIAS) \
S3QUEUE_RELATED_SETTINGS(M, ALIAS) \

View File

@ -1,59 +1,24 @@
#include <algorithm>
#include <Common/ProfileEvents.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include "IO/ParallelReadBuffer.h"
#include "Parsers/ASTCreateQuery.h"
#include "config.h"
#if USE_AWS_S3
# include <Common/isValidUTF8.h>
# include <Functions/FunctionsConversion.h>
# include <IO/S3/Requests.h>
# include <IO/S3Common.h>
# include <Interpreters/TreeRewriter.h>
# include <Parsers/ASTFunction.h>
# include <Parsers/ASTInsertQuery.h>
# include <Storages/NamedCollectionsHelpers.h>
# include <Storages/PartitionedSink.h>
# include <Storages/S3Queue/S3QueueSource.h>
# include <Storages/StorageS3.h>
# include <Storages/StorageS3Settings.h>
# include <Storages/VirtualColumnUtils.h>
# include <Formats/FormatFactory.h>
# include <Processors/Formats/IInputFormat.h>
# include <Processors/Formats/IOutputFormat.h>
# include <Processors/Transforms/AddingDefaultsTransform.h>
# include <QueryPipeline/QueryPipelineBuilder.h>
# include <DataTypes/DataTypeString.h>
# include <Common/CurrentMetrics.h>
# include <Common/NamedCollections/NamedCollections.h>
# include <Common/parseGlobs.h>
# include <Processors/ISource.h>
# include <Processors/Sinks/SinkToStorage.h>
#include <Common/ProfileEvents.h>
#include <Common/CurrentMetrics.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/logger_useful.h>
#include <Common/getRandomASCIIString.h>
#include <Storages/S3Queue/S3QueueSource.h>
#include <Storages/VirtualColumnUtils.h>
namespace CurrentMetrics
{
extern const Metric StorageS3Threads;
extern const Metric StorageS3ThreadsActive;
extern const Metric StorageS3Threads;
extern const Metric StorageS3ThreadsActive;
}
namespace ProfileEvents
{
extern const Event S3DeleteObjects;
extern const Event S3ListObjects;
extern const Event S3QueuePullMicroseconds;
}
namespace DB
@ -62,148 +27,83 @@ namespace DB
namespace ErrorCodes
{
extern const int S3_ERROR;
extern const int NOT_IMPLEMENTED;
}
StorageS3QueueSource::QueueGlobIterator::QueueGlobIterator(
const S3::Client & client_,
const S3::URI & globbed_uri_,
ASTPtr query,
const NamesAndTypesList & virtual_columns,
ContextPtr context,
UInt64 & max_poll_size_,
const S3Settings::RequestSettings & request_settings_)
: max_poll_size(max_poll_size_)
, glob_iterator(std::make_unique<StorageS3QueueSource::DisclosedGlobIterator>(
client_, globbed_uri_, query, virtual_columns, context, nullptr, request_settings_))
StorageS3QueueSource::S3QueueKeyWithInfo::S3QueueKeyWithInfo(
const std::string & key_,
std::optional<S3::ObjectInfo> info_,
Metadata::ProcessingNodeHolderPtr processing_holder_,
FileStatusPtr file_status_)
: StorageS3Source::KeyWithInfo(key_, info_)
, processing_holder(processing_holder_)
, file_status(file_status_)
{
/// todo(kssenii): remove this loop, it should not be here
while (true)
{
KeyWithInfo val = glob_iterator->next();
if (val.key.empty())
break;
keys_buf.push_back(val);
}
}
Strings StorageS3QueueSource::QueueGlobIterator::filterProcessingFiles(
const S3QueueMode & engine_mode, std::unordered_set<String> & exclude_keys, const String & max_file)
StorageS3QueueSource::FileIterator::FileIterator(
std::shared_ptr<S3QueueFilesMetadata> metadata_,
std::unique_ptr<GlobIterator> glob_iterator_,
std::atomic<bool> & shutdown_called_)
: metadata(metadata_)
, glob_iterator(std::move(glob_iterator_))
, shutdown_called(shutdown_called_)
{
for (const KeyWithInfo & val : keys_buf)
{
auto full_path = val.key;
if (exclude_keys.find(full_path) != exclude_keys.end())
{
LOG_TEST(log, "File {} will be skipped, because it was found in exclude files list "
"(either already processed or failed to be processed)", val.key);
continue;
}
if ((engine_mode == S3QueueMode::ORDERED) && (full_path.compare(max_file) <= 0))
continue;
if ((processing_keys.size() < max_poll_size) || (engine_mode == S3QueueMode::ORDERED))
{
processing_keys.push_back(val);
}
else
{
break;
}
}
if (engine_mode == S3QueueMode::ORDERED)
{
std::sort(
processing_keys.begin(),
processing_keys.end(),
[](const KeyWithInfo & lhs, const KeyWithInfo & rhs) { return lhs.key.compare(rhs.key) < 0; });
if (processing_keys.size() > max_poll_size)
{
processing_keys.erase(processing_keys.begin() + max_poll_size, processing_keys.end());
}
}
Strings keys;
for (const auto & key_info : processing_keys)
keys.push_back(key_info.key);
processing_keys.push_back(KeyWithInfo());
processing_iterator = processing_keys.begin();
return keys;
}
StorageS3QueueSource::KeyWithInfo StorageS3QueueSource::QueueGlobIterator::next()
StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next()
{
std::lock_guard lock(mutex);
if (processing_iterator != processing_keys.end())
while (!shutdown_called)
{
return *processing_iterator++;
}
KeyWithInfoPtr val = glob_iterator->next();
return KeyWithInfo();
if (!val || shutdown_called)
return {};
if (auto [processing_holder, processing_file_status] = metadata->trySetFileAsProcessing(val->key);
processing_holder && !shutdown_called)
{
return std::make_shared<S3QueueKeyWithInfo>(val->key, val->info, processing_holder, processing_file_status);
}
}
return {};
}
size_t StorageS3QueueSource::QueueGlobIterator::estimatedKeysCount()
size_t StorageS3QueueSource::FileIterator::estimatedKeysCount()
{
return keys_buf.size();
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method estimateKeysCount is not implemented");
}
StorageS3QueueSource::StorageS3QueueSource(
const ReadFromFormatInfo & info,
const String & format_,
String name_,
ContextPtr context_,
std::optional<FormatSettings> format_settings_,
UInt64 max_block_size_,
const S3Settings::RequestSettings & request_settings_,
String compression_hint_,
const std::shared_ptr<const S3::Client> & client_,
const String & bucket_,
const String & version_id_,
const String & url_host_and_port,
std::shared_ptr<IIterator> file_iterator_,
const Block & header_,
std::unique_ptr<StorageS3Source> internal_source_,
std::shared_ptr<S3QueueFilesMetadata> files_metadata_,
const S3QueueAction & action_,
const size_t download_thread_num_)
: ISource(info.source_header)
RemoveFileFunc remove_file_func_,
const NamesAndTypesList & requested_virtual_columns_,
ContextPtr context_,
const std::atomic<bool> & shutdown_called_,
std::shared_ptr<S3QueueLog> s3_queue_log_,
const StorageID & storage_id_)
: ISource(header_)
, WithContext(context_)
, name(std::move(name_))
, bucket(bucket_)
, version_id(version_id_)
, format(format_)
, columns_desc(info.columns_description)
, request_settings(request_settings_)
, client(client_)
, files_metadata(files_metadata_)
, requested_virtual_columns(info.requested_virtual_columns)
, requested_columns(info.requested_columns)
, file_iterator(file_iterator_)
, action(action_)
, files_metadata(files_metadata_)
, internal_source(std::move(internal_source_))
, requested_virtual_columns(requested_virtual_columns_)
, shutdown_called(shutdown_called_)
, s3_queue_log(s3_queue_log_)
, storage_id(storage_id_)
, remove_file_func(remove_file_func_)
, log(&Poco::Logger::get("StorageS3QueueSource"))
{
internal_source = std::make_shared<StorageS3Source>(
info,
format_,
name_,
context_,
format_settings_,
max_block_size_,
request_settings_,
compression_hint_,
client_,
bucket_,
version_id_,
url_host_and_port,
file_iterator,
download_thread_num_,
false,
/* query_info */ std::nullopt);
reader = std::move(internal_source->reader);
if (reader)
{
reader_future = std::move(internal_source->reader_future);
}
}
StorageS3QueueSource::~StorageS3QueueSource()
@ -218,61 +118,87 @@ String StorageS3QueueSource::getName() const
Chunk StorageS3QueueSource::generate()
{
auto file_progress = getContext()->getFileProgressCallback();
while (true)
{
if (isCancelled() || !reader)
if (!reader)
break;
if (isCancelled())
{
if (reader)
reader->cancel();
reader->cancel();
break;
}
Chunk chunk;
bool success_in_pulling = false;
if (shutdown_called)
{
if (processed_rows_from_file)
{
/// We could delay shutdown until files, which already started processing before the shutdown, finished.
/// But if files are big and `s3queue_processing_threads_num` is not small, it can take a significant time.
/// Anyway we cannot do anything in case of SIGTERM, so destination table must anyway support deduplication,
/// so here we will rely on it here as well.
LOG_WARNING(
log, "Shutdown called, {} rows are already processed, but file is not fully processed",
processed_rows_from_file);
}
break;
}
const auto * key_with_info = dynamic_cast<const S3QueueKeyWithInfo *>(&reader.getKeyWithInfo());
auto file_status = key_with_info->file_status;
auto * prev_scope = CurrentThread::get().attachProfileCountersScope(&file_status->profile_counters);
SCOPE_EXIT({ CurrentThread::get().attachProfileCountersScope(prev_scope); });
/// FIXME: if files are compressed, profile counters update does not work fully (s3 related counters are not saved). Why?
try
{
auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueuePullMicroseconds);
Chunk chunk;
if (reader->pull(chunk))
{
UInt64 num_rows = chunk.getNumRows();
auto file_path = reader.getPath();
LOG_TEST(log, "Read {} rows from file: {}", chunk.getNumRows(), reader.getPath());
for (const auto & virtual_column : requested_virtual_columns)
{
if (virtual_column.name == "_path")
{
chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_path)->convertToFullColumnIfConst());
}
else if (virtual_column.name == "_file")
{
size_t last_slash_pos = file_path.find_last_of('/');
auto column = virtual_column.type->createColumnConst(num_rows, file_path.substr(last_slash_pos + 1));
chunk.addColumn(column->convertToFullColumnIfConst());
}
}
success_in_pulling = true;
file_status->processed_rows += chunk.getNumRows();
processed_rows_from_file += chunk.getNumRows();
VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath());
return chunk;
}
}
catch (const Exception & e)
catch (...)
{
LOG_ERROR(log, "Exception in chunk pulling: {} ", e.displayText());
files_metadata->setFileFailed(reader.getFile(), e.message());
success_in_pulling = false;
}
if (success_in_pulling)
{
applyActionAfterProcessing(reader.getFile());
files_metadata->setFileProcessed(reader.getFile());
return chunk;
const auto message = getCurrentExceptionMessage(true);
LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", reader.getFile(), message);
files_metadata->setFileFailed(key_with_info->processing_holder, message);
appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, false);
throw;
}
files_metadata->setFileProcessed(key_with_info->processing_holder);
applyActionAfterProcessing(reader.getFile());
assert(reader_future.valid());
appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, true);
file_status.reset();
processed_rows_from_file = 0;
if (shutdown_called)
{
LOG_INFO(log, "Shutdown was called, stopping sync");
break;
}
chassert(reader_future.valid());
reader = reader_future.get();
if (!reader)
break;
file_status = files_metadata->getFileStatus(reader.getFile());
/// Even if task is finished the thread may be not freed in pool.
/// So wait until it will be freed before scheduling a new task.
internal_source->create_reader_pool.wait();
@ -282,35 +208,42 @@ Chunk StorageS3QueueSource::generate()
return {};
}
void StorageS3QueueSource::applyActionAfterProcessing(const String & file_path)
void StorageS3QueueSource::applyActionAfterProcessing(const String & path)
{
switch (action)
{
case S3QueueAction::DELETE:
deleteProcessedObject(file_path);
{
assert(remove_file_func);
remove_file_func(path);
break;
}
case S3QueueAction::KEEP:
break;
}
}
void StorageS3QueueSource::deleteProcessedObject(const String & file_path)
void StorageS3QueueSource::appendLogElement(const std::string & filename, S3QueueFilesMetadata::FileStatus & file_status_, size_t processed_rows, bool processed)
{
LOG_INFO(log, "Delete processed file {} from bucket {}", file_path, bucket);
if (!s3_queue_log)
return;
S3::DeleteObjectRequest request;
request.WithKey(file_path).WithBucket(bucket);
auto outcome = client->DeleteObject(request);
if (!outcome.IsSuccess())
S3QueueLogElement elem{};
{
const auto & err = outcome.GetError();
LOG_ERROR(log, "{} (Code: {})", err.GetMessage(), static_cast<size_t>(err.GetErrorType()));
}
else
{
LOG_TRACE(log, "Object with path {} was removed from S3", file_path);
std::lock_guard lock(file_status_.metadata_lock);
elem = S3QueueLogElement
{
.event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()),
.file_name = filename,
.rows_processed = processed_rows,
.status = processed ? S3QueueLogElement::S3QueueStatus::Processed : S3QueueLogElement::S3QueueStatus::Failed,
.counters_snapshot = file_status_.profile_counters.getPartiallyAtomicSnapshot(),
.processing_start_time = file_status_.processing_start_time,
.processing_end_time = file_status_.processing_end_time,
.exception = file_status_.last_exception,
};
}
s3_queue_log->add(std::move(elem));
}
}

View File

@ -2,125 +2,101 @@
#include "config.h"
#if USE_AWS_S3
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Processors/ISource.h>
#include <Storages/S3Queue/S3QueueFilesMetadata.h>
#include <Storages/StorageS3.h>
#include <Interpreters/S3QueueLog.h>
# include <Core/Types.h>
# include <Compression/CompressionInfo.h>
# include <Storages/IStorage.h>
# include <Storages/S3Queue/S3QueueFilesMetadata.h>
# include <Storages/StorageS3.h>
# include <Storages/StorageS3Settings.h>
# include <Storages/prepareReadingFromFormat.h>
# include <IO/CompressionMethod.h>
# include <IO/S3/getObjectInfo.h>
# include <Interpreters/Context.h>
# include <Interpreters/threadPoolCallbackRunner.h>
# include <Processors/Executors/PullingPipelineExecutor.h>
# include <Processors/ISource.h>
# include <Storages/Cache/SchemaCache.h>
# include <Storages/StorageConfiguration.h>
# include <Poco/URI.h>
# include <Common/ZooKeeper/ZooKeeper.h>
# include <Common/logger_useful.h>
namespace Poco { class Logger; }
namespace DB
{
class StorageS3QueueSource : public ISource, WithContext
{
public:
using IIterator = StorageS3Source::IIterator;
using DisclosedGlobIterator = StorageS3Source::DisclosedGlobIterator;
using KeysWithInfo = StorageS3Source::KeysWithInfo;
using KeyWithInfo = StorageS3Source::KeyWithInfo;
class QueueGlobIterator : public IIterator
using KeyWithInfoPtr = StorageS3Source::KeyWithInfoPtr;
using GlobIterator = StorageS3Source::DisclosedGlobIterator;
using ZooKeeperGetter = std::function<zkutil::ZooKeeperPtr()>;
using RemoveFileFunc = std::function<void(std::string)>;
using FileStatusPtr = S3QueueFilesMetadata::FileStatusPtr;
using Metadata = S3QueueFilesMetadata;
struct S3QueueKeyWithInfo : public StorageS3Source::KeyWithInfo
{
S3QueueKeyWithInfo(
const std::string & key_,
std::optional<S3::ObjectInfo> info_,
Metadata::ProcessingNodeHolderPtr processing_holder_,
FileStatusPtr file_status_);
Metadata::ProcessingNodeHolderPtr processing_holder;
FileStatusPtr file_status;
};
class FileIterator : public IIterator
{
public:
QueueGlobIterator(
const S3::Client & client_,
const S3::URI & globbed_uri_,
ASTPtr query,
const NamesAndTypesList & virtual_columns,
ContextPtr context,
UInt64 & max_poll_size_,
const S3Settings::RequestSettings & request_settings_ = {});
FileIterator(std::shared_ptr<S3QueueFilesMetadata> metadata_, std::unique_ptr<GlobIterator> glob_iterator_, std::atomic<bool> & shutdown_called_);
KeyWithInfo next() override;
Strings
filterProcessingFiles(const S3QueueMode & engine_mode, std::unordered_set<String> & exclude_keys, const String & max_file = "");
/// Note:
/// List results in s3 are always returned in UTF-8 binary order.
/// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html)
KeyWithInfoPtr next() override;
size_t estimatedKeysCount() override;
private:
UInt64 max_poll_size;
KeysWithInfo keys_buf;
KeysWithInfo processing_keys;
mutable std::mutex mutex;
std::unique_ptr<DisclosedGlobIterator> glob_iterator;
std::vector<KeyWithInfo>::iterator processing_iterator;
Poco::Logger * log = &Poco::Logger::get("StorageS3QueueSourceIterator");
const std::shared_ptr<S3QueueFilesMetadata> metadata;
const std::unique_ptr<GlobIterator> glob_iterator;
std::atomic<bool> & shutdown_called;
std::mutex mutex;
};
static Block getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns);
StorageS3QueueSource(
const ReadFromFormatInfo & info,
const String & format,
String name_,
ContextPtr context_,
std::optional<FormatSettings> format_settings_,
UInt64 max_block_size_,
const S3Settings::RequestSettings & request_settings_,
String compression_hint_,
const std::shared_ptr<const S3::Client> & client_,
const String & bucket,
const String & version_id,
const String & url_host_and_port,
std::shared_ptr<IIterator> file_iterator_,
const Block & header_,
std::unique_ptr<StorageS3Source> internal_source_,
std::shared_ptr<S3QueueFilesMetadata> files_metadata_,
const S3QueueAction & action_,
size_t download_thread_num);
RemoveFileFunc remove_file_func_,
const NamesAndTypesList & requested_virtual_columns_,
ContextPtr context_,
const std::atomic<bool> & shutdown_called_,
std::shared_ptr<S3QueueLog> s3_queue_log_,
const StorageID & storage_id_);
~StorageS3QueueSource() override;
static Block getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns);
String getName() const override;
Chunk generate() override;
private:
String name;
String bucket;
String version_id;
String format;
ColumnsDescription columns_desc;
S3Settings::RequestSettings request_settings;
std::shared_ptr<const S3::Client> client;
const String name;
const S3QueueAction action;
const std::shared_ptr<S3QueueFilesMetadata> files_metadata;
const std::shared_ptr<StorageS3Source> internal_source;
const NamesAndTypesList requested_virtual_columns;
const std::atomic<bool> & shutdown_called;
const std::shared_ptr<S3QueueLog> s3_queue_log;
const StorageID storage_id;
RemoveFileFunc remove_file_func;
Poco::Logger * log;
std::shared_ptr<S3QueueFilesMetadata> files_metadata;
using ReaderHolder = StorageS3Source::ReaderHolder;
ReaderHolder reader;
NamesAndTypesList requested_virtual_columns;
NamesAndTypesList requested_columns;
std::shared_ptr<IIterator> file_iterator;
const S3QueueAction action;
Poco::Logger * log = &Poco::Logger::get("StorageS3QueueSource");
std::future<ReaderHolder> reader_future;
size_t processed_rows_from_file = 0;
mutable std::mutex mutex;
std::shared_ptr<StorageS3Source> internal_source;
void deleteProcessedObject(const String & file_path);
void applyActionAfterProcessing(const String & file_path);
void applyActionAfterProcessing(const String & path);
void appendLogElement(const std::string & filename, S3QueueFilesMetadata::FileStatus & file_status_, size_t processed_rows, bool processed);
};
}

View File

@ -2,12 +2,12 @@
#if USE_AWS_S3
# include <Poco/JSON/JSON.h>
# include <Poco/JSON/Object.h>
# include <Poco/JSON/Parser.h>
# include <Storages/S3Queue/S3QueueSettings.h>
# include <Storages/S3Queue/S3QueueTableMetadata.h>
# include <Storages/StorageS3.h>
#include <Poco/JSON/JSON.h>
#include <Poco/JSON/Object.h>
#include <Poco/JSON/Parser.h>
#include <Storages/S3Queue/S3QueueSettings.h>
#include <Storages/S3Queue/S3QueueTableMetadata.h>
#include <Storages/StorageS3.h>
namespace DB
@ -18,13 +18,17 @@ namespace ErrorCodes
extern const int METADATA_MISMATCH;
}
S3QueueTableMetadata::S3QueueTableMetadata(const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings)
S3QueueTableMetadata::S3QueueTableMetadata(
const StorageS3::Configuration & configuration,
const S3QueueSettings & engine_settings,
const StorageInMemoryMetadata & storage_metadata)
{
format_name = configuration.format;
after_processing = engine_settings.after_processing.toString();
mode = engine_settings.mode.toString();
s3queue_tracked_files_limit = engine_settings.s3queue_tracked_files_limit;
s3queue_tracked_file_ttl_sec = engine_settings.s3queue_tracked_file_ttl_sec;
columns = storage_metadata.getColumns().toString();
}
@ -36,6 +40,7 @@ String S3QueueTableMetadata::toString() const
json.set("s3queue_tracked_files_limit", s3queue_tracked_files_limit);
json.set("s3queue_tracked_file_ttl_sec", s3queue_tracked_file_ttl_sec);
json.set("format_name", format_name);
json.set("columns", columns);
std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
oss.exceptions(std::ios::failbit);
@ -52,6 +57,7 @@ void S3QueueTableMetadata::read(const String & metadata_str)
s3queue_tracked_files_limit = json->getValue<UInt64>("s3queue_tracked_files_limit");
s3queue_tracked_file_ttl_sec = json->getValue<UInt64>("s3queue_tracked_file_ttl_sec");
format_name = json->getValue<String>("format_name");
columns = json->getValue<String>("columns");
}
S3QueueTableMetadata S3QueueTableMetadata::parse(const String & metadata_str)

View File

@ -2,9 +2,9 @@
#if USE_AWS_S3
# include <Storages/S3Queue/S3QueueSettings.h>
# include <Storages/StorageS3.h>
# include <base/types.h>
#include <Storages/S3Queue/S3QueueSettings.h>
#include <Storages/StorageS3.h>
#include <base/types.h>
namespace DB
{
@ -18,13 +18,14 @@ class ReadBuffer;
struct S3QueueTableMetadata
{
String format_name;
String columns;
String after_processing;
String mode;
UInt64 s3queue_tracked_files_limit;
UInt64 s3queue_tracked_file_ttl_sec;
S3QueueTableMetadata() = default;
S3QueueTableMetadata(const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings);
S3QueueTableMetadata(const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata);
void read(const String & metadata_str);
static S3QueueTableMetadata parse(const String & metadata_str);

View File

@ -1,83 +1,102 @@
#include "config.h"
#if USE_AWS_S3
#include <Common/ProfileEvents.h>
#include <IO/S3Common.h>
#include <IO/CompressionMethod.h>
#include <Formats/FormatFactory.h>
#include <Interpreters/InterpreterInsertQuery.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/ISource.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTInsertQuery.h>
#include <Storages/S3Queue/S3QueueTableMetadata.h>
#include <Storages/S3Queue/StorageS3Queue.h>
#include <Storages/S3Queue/S3QueueFilesMetadata.h>
#include <Storages/S3Queue/S3QueueMetadataFactory.h>
#include <Storages/StorageFactory.h>
#include <Storages/StorageMaterializedView.h>
#include <Storages/StorageSnapshot.h>
#include <Storages/VirtualColumnUtils.h>
#include <Storages/prepareReadingFromFormat.h>
#include <filesystem>
# include <Databases/DatabaseReplicated.h>
# include <IO/WriteBuffer.h>
# include <IO/WriteHelpers.h>
# include <Interpreters/InterpreterInsertQuery.h>
# include <Processors/Executors/CompletedPipelineExecutor.h>
# include <Common/ProfileEvents.h>
# include <Common/ZooKeeper/ZooKeeper.h>
# include <Common/isValidUTF8.h>
# include "IO/ParallelReadBuffer.h"
# include <Functions/FunctionsConversion.h>
# include <IO/S3Common.h>
# include <Interpreters/TreeRewriter.h>
# include <Parsers/ASTFunction.h>
# include <Parsers/ASTInsertQuery.h>
# include <Storages/NamedCollectionsHelpers.h>
# include <Storages/PartitionedSink.h>
# include <Storages/S3Queue/S3QueueSource.h>
# include <Storages/S3Queue/S3QueueTableMetadata.h>
# include <Storages/S3Queue/StorageS3Queue.h>
# include <Storages/StorageFactory.h>
# include <Storages/StorageMaterializedView.h>
# include <Storages/StorageS3.h>
# include <Storages/StorageSnapshot.h>
# include <Storages/VirtualColumnUtils.h>
# include <Storages/prepareReadingFromFormat.h>
# include <Common/NamedCollections/NamedCollections.h>
# include <Formats/FormatFactory.h>
# include <Processors/Formats/IInputFormat.h>
# include <Processors/Formats/IOutputFormat.h>
# include <Processors/Transforms/AddingDefaultsTransform.h>
# include <QueryPipeline/QueryPipelineBuilder.h>
# include <DataTypes/DataTypeString.h>
# include <Common/parseGlobs.h>
# include <filesystem>
# include <Processors/ISource.h>
# include <Processors/Sinks/SinkToStorage.h>
# include <QueryPipeline/Pipe.h>
namespace fs = std::filesystem;
namespace ProfileEvents
{
extern const Event S3DeleteObjects;
extern const Event S3ListObjects;
extern const Event S3DeleteObjects;
extern const Event S3ListObjects;
}
namespace DB
{
static const String PARTITION_ID_WILDCARD = "{_partition_id}";
static const auto MAX_THREAD_WORK_DURATION_MS = 60000;
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
extern const int S3_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int QUERY_NOT_ALLOWED;
extern const int REPLICA_ALREADY_EXISTS;
extern const int INCOMPATIBLE_COLUMNS;
}
namespace
{
bool containsGlobs(const S3::URI & url)
{
return url.key.find_first_of("*?{") != std::string::npos;
}
std::string chooseZooKeeperPath(const StorageID & table_id, const Settings & settings, const S3QueueSettings & s3queue_settings)
{
std::string zk_path_prefix = settings.s3queue_default_zookeeper_path.value;
if (zk_path_prefix.empty())
zk_path_prefix = "/";
std::string result_zk_path;
if (s3queue_settings.keeper_path.changed)
{
/// We do not add table uuid here on purpose.
result_zk_path = fs::path(zk_path_prefix) / s3queue_settings.keeper_path.value;
}
else
{
auto database_uuid = DatabaseCatalog::instance().getDatabase(table_id.database_name)->getUUID();
result_zk_path = fs::path(zk_path_prefix) / toString(database_uuid) / toString(table_id.uuid);
}
return zkutil::extractZooKeeperPath(result_zk_path, true);
}
void checkAndAdjustSettings(S3QueueSettings & s3queue_settings, const Settings & settings, Poco::Logger * log)
{
if (s3queue_settings.mode == S3QueueMode::ORDERED && s3queue_settings.s3queue_processing_threads_num > 1)
{
LOG_WARNING(log, "Parallel processing is not yet supported for Ordered mode");
s3queue_settings.s3queue_processing_threads_num = 1;
}
if (!s3queue_settings.s3queue_processing_threads_num)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `s3queue_processing_threads_num` cannot be set to zero");
}
if (!s3queue_settings.s3queue_enable_logging_to_s3queue_log.changed)
{
s3queue_settings.s3queue_enable_logging_to_s3queue_log = settings.s3queue_enable_logging_to_s3queue_log;
}
if (s3queue_settings.s3queue_cleanup_interval_min_ms > s3queue_settings.s3queue_cleanup_interval_max_ms)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Setting `s3queue_cleanup_interval_min_ms` ({}) must be less or equal to `s3queue_cleanup_interval_max_ms` ({})",
s3queue_settings.s3queue_cleanup_interval_min_ms, s3queue_settings.s3queue_cleanup_interval_max_ms);
}
}
}
StorageS3Queue::StorageS3Queue(
std::unique_ptr<S3QueueSettings> s3queue_settings_,
@ -87,79 +106,80 @@ StorageS3Queue::StorageS3Queue(
const ConstraintsDescription & constraints_,
const String & comment,
ContextPtr context_,
std::optional<FormatSettings> format_settings_,
ASTPtr partition_by_)
std::optional<FormatSettings> format_settings_)
: IStorage(table_id_)
, WithContext(context_)
, s3queue_settings(std::move(s3queue_settings_))
, zk_path(chooseZooKeeperPath(table_id_, context_->getSettingsRef(), *s3queue_settings))
, after_processing(s3queue_settings->after_processing)
, files_metadata(S3QueueMetadataFactory::instance().getOrCreate(zk_path, *s3queue_settings))
, configuration{configuration_}
, reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms)
, format_settings(format_settings_)
, partition_by(partition_by_)
, reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms)
, log(&Poco::Logger::get("StorageS3Queue (" + table_id_.table_name + ")"))
{
if (configuration.url.key.ends_with('/'))
{
configuration.url.key += '*';
if (!withGlobs())
}
else if (!containsGlobs(configuration.url))
{
throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "S3Queue url must either end with '/' or contain globs");
std::string zk_path_prefix = getContext()->getSettingsRef().s3queue_default_zookeeper_path.value;
if (zk_path_prefix.empty())
zk_path_prefix = "/";
std::string result_zk_path;
if (s3queue_settings->keeper_path.changed)
{
/// We do not add table uuid here on purpose.
result_zk_path = fs::path(zk_path_prefix) / s3queue_settings->keeper_path.value;
}
else
{
auto database_uuid = DatabaseCatalog::instance().getDatabase(table_id_.database_name)->getUUID();
result_zk_path = fs::path(zk_path_prefix) / toString(database_uuid) / toString(table_id_.uuid);
}
zk_path = zkutil::extractZooKeeperPath(result_zk_path, true/* check_starts_with_slash */, log);
LOG_INFO(log, "Using zookeeper path: {}", zk_path);
checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef(), log);
FormatFactory::instance().checkFormatName(configuration.format);
context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri);
StorageInMemoryMetadata storage_metadata;
configuration.update(context_);
FormatFactory::instance().checkFormatName(configuration.format);
context_->getRemoteHostFilter().checkURL(configuration.url.uri);
StorageInMemoryMetadata storage_metadata;
if (columns_.empty())
{
auto columns = StorageS3::getTableStructureFromDataImpl(configuration, format_settings, context_);
storage_metadata.setColumns(columns);
}
else
{
storage_metadata.setColumns(columns_);
}
storage_metadata.setConstraints(constraints_);
storage_metadata.setComment(comment);
createOrCheckMetadata(storage_metadata);
setInMemoryMetadata(storage_metadata);
auto metadata_snapshot = getInMemoryMetadataPtr();
const bool is_first_replica = createTableIfNotExists(metadata_snapshot);
if (!is_first_replica)
{
checkTableStructure(zk_path, metadata_snapshot);
}
files_metadata = std::make_shared<S3QueueFilesMetadata>(this, *s3queue_settings);
virtual_columns = VirtualColumnUtils::getPathAndFileVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); });
auto poll_thread = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); });
task = std::make_shared<TaskContext>(std::move(poll_thread));
LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());
}
bool StorageS3Queue::supportsSubcolumns() const
void StorageS3Queue::startup()
{
return true;
if (task)
task->activateAndSchedule();
}
void StorageS3Queue::shutdown()
{
shutdown_called = true;
if (task)
{
task->deactivate();
}
if (files_metadata)
{
files_metadata->deactivateCleanupTask();
files_metadata.reset();
}
}
void StorageS3Queue::drop()
{
S3QueueMetadataFactory::instance().remove(zk_path);
}
bool StorageS3Queue::supportsSubsetOfColumns(const ContextPtr & context_) const
@ -174,83 +194,70 @@ Pipe StorageS3Queue::read(
ContextPtr local_context,
QueryProcessingStage::Enum /*processed_stage*/,
size_t max_block_size,
size_t /* num_streams */)
size_t num_streams)
{
if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select)
throw Exception(
ErrorCodes::QUERY_NOT_ALLOWED, "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`");
{
throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Direct select is not allowed. "
"To enable use setting `stream_like_engine_allow_direct_select`");
}
if (mv_attached)
throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageS3Queue with attached materialized views");
{
throw Exception(ErrorCodes::QUERY_NOT_ALLOWED,
"Cannot read from {} with attached materialized views", getName());
}
auto query_configuration = updateConfigurationAndGetCopy(local_context);
Pipes pipes;
const size_t adjusted_num_streams = std::min<size_t>(num_streams, s3queue_settings->s3queue_processing_threads_num);
std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper = createFileIterator(local_context, query_info.query);
auto file_iterator = createFileIterator(local_context, query_info.query);
for (size_t i = 0; i < adjusted_num_streams; ++i)
pipes.emplace_back(createSource(file_iterator, column_names, storage_snapshot, max_block_size, local_context));
return Pipe::unitePipes(std::move(pipes));
}
std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
std::shared_ptr<StorageS3Queue::FileIterator> file_iterator,
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
size_t max_block_size,
ContextPtr local_context)
{
auto configuration_snapshot = updateConfigurationAndGetCopy(local_context);
auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
const size_t max_download_threads = local_context->getSettingsRef().max_download_threads;
return Pipe(std::make_shared<StorageS3QueueSource>(
read_from_format_info,
configuration.format,
getName(),
local_context,
format_settings,
auto internal_source = std::make_unique<StorageS3Source>(
read_from_format_info, configuration.format, getName(), local_context, format_settings,
max_block_size,
query_configuration.request_settings,
configuration.compression_method,
query_configuration.client,
query_configuration.url.bucket,
query_configuration.url.version_id,
query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()),
iterator_wrapper,
files_metadata,
after_processing,
max_download_threads));
}
configuration_snapshot.request_settings,
configuration_snapshot.compression_method,
configuration_snapshot.client,
configuration_snapshot.url.bucket,
configuration_snapshot.url.version_id,
configuration_snapshot.url.uri.getHost() + std::to_string(configuration_snapshot.url.uri.getPort()),
file_iterator, local_context->getSettingsRef().max_download_threads, false, /* query_info */ std::nullopt);
SinkToStoragePtr StorageS3Queue::write(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, bool)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Write is not supported by storage {}", getName());
}
void StorageS3Queue::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncate is not supported by storage {}", getName());
}
NamesAndTypesList StorageS3Queue::getVirtuals() const
{
return virtual_columns;
}
bool StorageS3Queue::supportsPartitionBy() const
{
return true;
}
void StorageS3Queue::startup()
{
if (task)
task->holder->activateAndSchedule();
}
void StorageS3Queue::shutdown()
{
shutdown_called = true;
if (task)
auto file_deleter = [this, bucket = configuration_snapshot.url.bucket, client = configuration_snapshot.client](const std::string & path)
{
task->stream_cancelled = true;
task->holder->deactivate();
}
}
size_t StorageS3Queue::getTableDependentCount() const
{
auto table_id = getStorageID();
// Check if at least one direct dependency is attached
return DatabaseCatalog::instance().getDependentViews(table_id).size();
S3::DeleteObjectRequest request;
request.WithKey(path).WithBucket(bucket);
auto outcome = client->DeleteObject(request);
if (!outcome.IsSuccess())
{
const auto & err = outcome.GetError();
LOG_ERROR(log, "{} (Code: {})", err.GetMessage(), static_cast<size_t>(err.GetErrorType()));
}
else
{
LOG_TRACE(log, "Object with path {} was removed from S3", path);
}
};
auto s3_queue_log = s3queue_settings->s3queue_enable_logging_to_s3queue_log ? local_context->getS3QueueLog() : nullptr;
return std::make_shared<StorageS3QueueSource>(
getName(), read_from_format_info.source_header, std::move(internal_source),
files_metadata, after_processing, file_deleter, read_from_format_info.requested_virtual_columns,
local_context, shutdown_called, s3_queue_log, getStorageID());
}
bool StorageS3Queue::hasDependencies(const StorageID & table_id)
@ -280,41 +287,35 @@ bool StorageS3Queue::hasDependencies(const StorageID & table_id)
void StorageS3Queue::threadFunc()
{
bool reschedule = true;
if (shutdown_called)
return;
try
{
auto table_id = getStorageID();
auto dependencies_count = getTableDependentCount();
const size_t dependencies_count = DatabaseCatalog::instance().getDependentViews(getStorageID()).size();
if (dependencies_count)
{
auto start_time = std::chrono::steady_clock::now();
mv_attached.store(true);
// Keep streaming as long as there are attached views and streaming is not cancelled
while (!task->stream_cancelled)
SCOPE_EXIT({ mv_attached.store(false); });
LOG_DEBUG(log, "Started streaming to {} attached views", dependencies_count);
if (streamToViews())
{
if (!hasDependencies(table_id))
{
/// For this case, we can not wait for watch thread to wake up
reschedule = true;
break;
}
LOG_DEBUG(log, "Started streaming to {} attached views", dependencies_count);
streamToViews();
auto ts = std::chrono::steady_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(ts - start_time);
if (duration.count() > MAX_THREAD_WORK_DURATION_MS)
{
LOG_TRACE(log, "Thread work duration limit exceeded. Reschedule.");
reschedule = true;
break;
}
/// Reset the reschedule interval.
reschedule_processing_interval_ms = s3queue_settings->s3queue_polling_min_timeout_ms;
}
else
{
/// Increase the reschedule interval.
reschedule_processing_interval_ms += s3queue_settings->s3queue_polling_backoff_ms;
}
LOG_DEBUG(log, "Stopped streaming to {} attached views", dependencies_count);
}
else
{
LOG_TEST(log, "No attached dependencies");
}
}
catch (...)
@ -322,20 +323,14 @@ void StorageS3Queue::threadFunc()
tryLogCurrentException(__PRETTY_FUNCTION__);
}
mv_attached.store(false);
if (reschedule && !shutdown_called)
if (!shutdown_called)
{
LOG_TRACE(log, "Reschedule S3 Queue thread func.");
/// Reschedule with backoff.
if (reschedule_processing_interval_ms < s3queue_settings->s3queue_polling_max_timeout_ms)
reschedule_processing_interval_ms += s3queue_settings->s3queue_polling_backoff_ms;
task->holder->scheduleAfter(reschedule_processing_interval_ms);
LOG_TRACE(log, "Reschedule S3 Queue processing thread in {} ms", reschedule_processing_interval_ms);
task->scheduleAfter(reschedule_processing_interval_ms);
}
}
void StorageS3Queue::streamToViews()
bool StorageS3Queue::streamToViews()
{
auto table_id = getStorageID();
auto table = DatabaseCatalog::instance().getTable(table_id, getContext());
@ -348,8 +343,6 @@ void StorageS3Queue::streamToViews()
auto insert = std::make_shared<ASTInsertQuery>();
insert->table_id = table_id;
size_t block_size = 100;
auto s3queue_context = Context::createCopy(getContext());
s3queue_context->makeQueryContext();
auto query_configuration = updateConfigurationAndGetCopy(s3queue_context);
@ -358,40 +351,31 @@ void StorageS3Queue::streamToViews()
// Only insert into dependent views and expect that input blocks contain virtual columns
InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true);
auto block_io = interpreter.execute();
auto column_names = block_io.pipeline.getHeader().getNames();
auto file_iterator = createFileIterator(s3queue_context, nullptr);
// Create a stream for each consumer and join them in a union stream
Pipes pipes;
pipes.reserve(s3queue_settings->s3queue_processing_threads_num);
for (size_t i = 0; i < s3queue_settings->s3queue_processing_threads_num; ++i)
{
auto source = createSource(
file_iterator, block_io.pipeline.getHeader().getNames(),
storage_snapshot, DBMS_DEFAULT_BUFFER_SIZE, s3queue_context);
std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper = createFileIterator(s3queue_context, nullptr);
auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(getContext()), getVirtuals());
const size_t max_download_threads = s3queue_context->getSettingsRef().max_download_threads;
auto pipe = Pipe(std::make_shared<StorageS3QueueSource>(
read_from_format_info,
configuration.format,
getName(),
s3queue_context,
format_settings,
block_size,
query_configuration.request_settings,
configuration.compression_method,
query_configuration.client,
query_configuration.url.bucket,
query_configuration.url.version_id,
query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()),
iterator_wrapper,
files_metadata,
after_processing,
max_download_threads));
pipes.emplace_back(std::move(source));
}
auto pipe = Pipe::unitePipes(std::move(pipes));
block_io.pipeline.complete(std::move(pipe));
block_io.pipeline.setNumThreads(s3queue_settings->s3queue_processing_threads_num);
block_io.pipeline.setConcurrencyControl(s3queue_context->getSettingsRef().use_concurrency_control);
std::atomic_size_t rows = 0;
{
block_io.pipeline.complete(std::move(pipe));
block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); });
CompletedPipelineExecutor executor(block_io.pipeline);
executor.execute();
}
block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); });
CompletedPipelineExecutor executor(block_io.pipeline);
executor.execute();
return rows > 0;
}
StorageS3Queue::Configuration StorageS3Queue::updateConfigurationAndGetCopy(ContextPtr local_context)
@ -402,58 +386,43 @@ StorageS3Queue::Configuration StorageS3Queue::updateConfigurationAndGetCopy(Cont
zkutil::ZooKeeperPtr StorageS3Queue::getZooKeeper() const
{
std::lock_guard lock{zk_mutex};
if (!zk_client || zk_client->expired())
{
zk_client = getContext()->getZooKeeper();
zk_client->sync(zk_path);
}
return zk_client;
return getContext()->getZooKeeper();
}
bool StorageS3Queue::createTableIfNotExists(const StorageMetadataPtr & metadata_snapshot)
void StorageS3Queue::createOrCheckMetadata(const StorageInMemoryMetadata & storage_metadata)
{
auto zookeeper = getZooKeeper();
zookeeper->createAncestors(zk_path);
for (size_t i = 0; i < zk_create_table_retries; ++i)
for (size_t i = 0; i < 1000; ++i)
{
Coordination::Requests ops;
bool is_first_replica = true;
if (zookeeper->exists(zk_path + "/metadata"))
Coordination::Requests requests;
if (zookeeper->exists(zk_path / "metadata"))
{
if (!zookeeper->exists(zk_path + "/processing"))
ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/processing", "", zkutil::CreateMode::Ephemeral));
LOG_DEBUG(log, "This table {} is already created, will use existing metadata for checking engine settings", zk_path);
is_first_replica = false;
checkTableStructure(zk_path, storage_metadata);
}
else
{
String metadata_str = S3QueueTableMetadata(configuration, *s3queue_settings).toString();
ops.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/processed", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/failed", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/processing", "", zkutil::CreateMode::Ephemeral));
ops.emplace_back(zkutil::makeCreateRequest(
zk_path + "/columns", metadata_snapshot->getColumns().toString(), zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/metadata", metadata_str, zkutil::CreateMode::Persistent));
std::string metadata = S3QueueTableMetadata(configuration, *s3queue_settings, storage_metadata).toString();
requests.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent));
requests.emplace_back(zkutil::makeCreateRequest(zk_path / "processed", "", zkutil::CreateMode::Persistent));
requests.emplace_back(zkutil::makeCreateRequest(zk_path / "failed", "", zkutil::CreateMode::Persistent));
requests.emplace_back(zkutil::makeCreateRequest(zk_path / "processing", "", zkutil::CreateMode::Persistent));
requests.emplace_back(zkutil::makeCreateRequest(zk_path / "metadata", metadata, zkutil::CreateMode::Persistent));
}
Coordination::Responses responses;
auto code = zookeeper->tryMulti(ops, responses);
auto code = zookeeper->tryMulti(requests, responses);
if (code == Coordination::Error::ZNODEEXISTS)
{
LOG_INFO(log, "It looks like the table {} was created by another server at the same moment, will retry", zk_path);
LOG_INFO(log, "It looks like the table {} was created by another server at the same moment, will retry", zk_path.string());
continue;
}
else if (code != Coordination::Error::ZOK)
{
zkutil::KeeperMultiException::check(code, ops, responses);
zkutil::KeeperMultiException::check(code, requests, responses);
}
return is_first_replica;
return;
}
throw Exception(
@ -463,24 +432,20 @@ bool StorageS3Queue::createTableIfNotExists(const StorageMetadataPtr & metadata_
}
/** Verify that list of columns and table settings match those specified in ZK (/metadata).
* If not, throw an exception.
*/
void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const StorageMetadataPtr & metadata_snapshot)
void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const StorageInMemoryMetadata & storage_metadata)
{
// Verify that list of columns and table settings match those specified in ZK (/metadata).
// If not, throw an exception.
auto zookeeper = getZooKeeper();
S3QueueTableMetadata old_metadata(configuration, *s3queue_settings);
Coordination::Stat metadata_stat;
String metadata_str = zookeeper->get(fs::path(zookeeper_prefix) / "metadata", &metadata_stat);
String metadata_str = zookeeper->get(fs::path(zookeeper_prefix) / "metadata");
auto metadata_from_zk = S3QueueTableMetadata::parse(metadata_str);
S3QueueTableMetadata old_metadata(configuration, *s3queue_settings, storage_metadata);
old_metadata.checkEquals(metadata_from_zk);
Coordination::Stat columns_stat;
auto columns_from_zk = ColumnsDescription::parse(zookeeper->get(fs::path(zookeeper_prefix) / "columns", &columns_stat));
const ColumnsDescription & old_columns = metadata_snapshot->getColumns();
auto columns_from_zk = ColumnsDescription::parse(metadata_from_zk.columns);
const ColumnsDescription & old_columns = storage_metadata.getColumns();
if (columns_from_zk != old_columns)
{
throw Exception(
@ -492,45 +457,12 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const
}
}
std::shared_ptr<StorageS3QueueSource::IIterator>
StorageS3Queue::createFileIterator(ContextPtr local_context, ASTPtr query)
std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator(ContextPtr local_context, ASTPtr query)
{
auto it = std::make_shared<StorageS3QueueSource::QueueGlobIterator>(
*configuration.client,
configuration.url,
query,
virtual_columns,
local_context,
s3queue_settings->s3queue_polling_size.value,
configuration.request_settings);
auto zookeeper = getZooKeeper();
auto lock = files_metadata->acquireLock(zookeeper);
S3QueueFilesMetadata::S3FilesCollection files_to_skip = files_metadata->getProcessedFailedAndProcessingFiles();
Strings files_to_process;
if (s3queue_settings->mode == S3QueueMode::UNORDERED)
{
files_to_process = it->filterProcessingFiles(s3queue_settings->mode, files_to_skip);
}
else
{
String max_processed_file = files_metadata->getMaxProcessedFile();
files_to_process = it->filterProcessingFiles(s3queue_settings->mode, files_to_skip, max_processed_file);
}
LOG_TEST(log, "Found files to process: {}", fmt::join(files_to_process, ", "));
files_metadata->setFilesProcessing(files_to_process);
return it;
}
void StorageS3Queue::drop()
{
auto zookeeper = getZooKeeper();
if (zookeeper->exists(zk_path))
zookeeper->removeRecursive(zk_path);
auto glob_iterator = std::make_unique<StorageS3QueueSource::GlobIterator>(
*configuration.client, configuration.url, query, virtual_columns, local_context,
/* read_keys */nullptr, configuration.request_settings);
return std::make_shared<FileIterator>(files_metadata, std::move(glob_iterator), shutdown_called);
}
void registerStorageS3QueueImpl(const String & name, StorageFactory & factory)
@ -540,11 +472,15 @@ void registerStorageS3QueueImpl(const String & name, StorageFactory & factory)
[](const StorageFactory::Arguments & args)
{
if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_s3queue)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3Queue is experimental. You can enable it with the `allow_experimental_s3queue` setting.");
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3Queue is experimental. "
"You can enable it with the `allow_experimental_s3queue` setting.");
}
auto & engine_args = args.engine_args;
if (engine_args.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments");
auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext());
// Use format settings from global server context + settings from
@ -582,10 +518,6 @@ void registerStorageS3QueueImpl(const String & name, StorageFactory & factory)
format_settings = getFormatSettings(args.getContext());
}
ASTPtr partition_by;
if (args.storage_def->partition_by)
partition_by = args.storage_def->partition_by->clone();
return std::make_shared<StorageS3Queue>(
std::move(s3queue_settings),
std::move(configuration),
@ -594,12 +526,10 @@ void registerStorageS3QueueImpl(const String & name, StorageFactory & factory)
args.constraints,
args.comment,
args.getContext(),
format_settings,
partition_by);
format_settings);
},
{
.supports_settings = true,
.supports_sort_order = true, // for partition by
.supports_schema_inference = true,
.source_access_type = AccessType::S3,
});

View File

@ -1,32 +1,15 @@
#pragma once
#include "config.h"
#if USE_AWS_S3
# include <Core/Types.h>
# include <Compression/CompressionInfo.h>
# include <Common/ZooKeeper/ZooKeeper.h>
# include <Core/BackgroundSchedulePool.h>
# include <Storages/IStorage.h>
# include <Storages/S3Queue/S3QueueFilesMetadata.h>
# include <Storages/S3Queue/S3QueueSettings.h>
# include <Storages/S3Queue/S3QueueSource.h>
# include <Storages/StorageS3Settings.h>
# include <IO/CompressionMethod.h>
# include <IO/S3/getObjectInfo.h>
# include <Interpreters/Context.h>
# include <Interpreters/threadPoolCallbackRunner.h>
# include <Processors/Executors/PullingPipelineExecutor.h>
# include <Processors/ISource.h>
# include <Storages/Cache/SchemaCache.h>
# include <Storages/StorageConfiguration.h>
# include <Storages/StorageS3.h>
# include <Poco/URI.h>
# include <Common/logger_useful.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/logger_useful.h>
#include <Core/BackgroundSchedulePool.h>
#include <Storages/IStorage.h>
#include <Storages/S3Queue/S3QueueSettings.h>
#include <Storages/S3Queue/S3QueueSource.h>
#include <Storages/StorageS3.h>
#include <Interpreters/Context.h>
namespace Aws::S3
{
@ -35,7 +18,7 @@ class Client;
namespace DB
{
class S3QueueFilesMetadata;
class StorageS3Queue : public IStorage, WithContext
{
@ -50,8 +33,7 @@ public:
const ConstraintsDescription & constraints_,
const String & comment,
ContextPtr context_,
std::optional<FormatSettings> format_settings_,
ASTPtr partition_by_ = nullptr);
std::optional<FormatSettings> format_settings_);
String getName() const override { return "S3Queue"; }
@ -64,79 +46,55 @@ public:
size_t max_block_size,
size_t num_streams) override;
SinkToStoragePtr write(
const ASTPtr & query,
const StorageMetadataPtr & metadata_snapshot,
ContextPtr context,
bool async_insert) override;
void truncate(
const ASTPtr & /*query*/,
const StorageMetadataPtr & /*metadata_snapshot*/,
ContextPtr /*local_context*/,
TableExclusiveLockHolder &) override;
NamesAndTypesList getVirtuals() const override;
bool supportsPartitionBy() const override;
NamesAndTypesList getVirtuals() const override { return virtual_columns; }
const auto & getFormatName() const { return configuration.format; }
const String & getZooKeeperPath() const { return zk_path; }
const fs::path & getZooKeeperPath() const { return zk_path; }
zkutil::ZooKeeperPtr getZooKeeper() const;
private:
using FileIterator = StorageS3QueueSource::FileIterator;
const std::unique_ptr<S3QueueSettings> s3queue_settings;
const fs::path zk_path;
const S3QueueAction after_processing;
std::shared_ptr<S3QueueFilesMetadata> files_metadata;
Configuration configuration;
const std::optional<FormatSettings> format_settings;
NamesAndTypesList virtual_columns;
BackgroundSchedulePool::TaskHolder task;
std::atomic<bool> stream_cancelled{false};
UInt64 reschedule_processing_interval_ms;
std::optional<FormatSettings> format_settings;
ASTPtr partition_by;
String zk_path;
mutable zkutil::ZooKeeperPtr zk_client;
mutable std::mutex zk_mutex;
std::atomic<bool> mv_attached = false;
std::atomic<bool> shutdown_called{false};
std::atomic<bool> shutdown_called = false;
Poco::Logger * log;
bool supportsSubcolumns() const override;
bool withGlobs() const { return configuration.url.key.find_first_of("*?{") != std::string::npos; }
void threadFunc();
size_t getTableDependentCount() const;
bool hasDependencies(const StorageID & table_id);
void startup() override;
void shutdown() override;
void drop() override;
struct TaskContext
{
BackgroundSchedulePool::TaskHolder holder;
std::atomic<bool> stream_cancelled{false};
explicit TaskContext(BackgroundSchedulePool::TaskHolder && task_) : holder(std::move(task_)) { }
};
std::shared_ptr<TaskContext> task;
bool supportsSubsetOfColumns(const ContextPtr & context_) const;
bool supportsSubcolumns() const override { return true; }
const UInt32 zk_create_table_retries = 1000;
bool createTableIfNotExists(const StorageMetadataPtr & metadata_snapshot);
void checkTableStructure(const String & zookeeper_prefix, const StorageMetadataPtr & metadata_snapshot);
std::shared_ptr<FileIterator> createFileIterator(ContextPtr local_context, ASTPtr query);
std::shared_ptr<StorageS3QueueSource> createSource(
std::shared_ptr<StorageS3Queue::FileIterator> file_iterator,
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
size_t max_block_size,
ContextPtr local_context);
using KeysWithInfo = StorageS3QueueSource::KeysWithInfo;
bool hasDependencies(const StorageID & table_id);
bool streamToViews();
void threadFunc();
std::shared_ptr<StorageS3QueueSource::IIterator>
createFileIterator(ContextPtr local_context, ASTPtr query);
void streamToViews();
void createOrCheckMetadata(const StorageInMemoryMetadata & storage_metadata);
void checkTableStructure(const String & zookeeper_prefix, const StorageInMemoryMetadata & storage_metadata);
Configuration updateConfigurationAndGetCopy(ContextPtr local_context);
};

View File

@ -478,6 +478,13 @@ ActionLock StorageMaterializedView::getActionLock(StorageActionBlockType type)
return ActionLock{};
}
bool StorageMaterializedView::isRemote() const
{
if (auto table = tryGetTargetTable())
return table->isRemote();
return false;
}
void registerStorageMaterializedView(StorageFactory & factory)
{
factory.registerStorage("MaterializedView", [](const StorageFactory::Arguments & args)

View File

@ -22,6 +22,7 @@ public:
std::string getName() const override { return "MaterializedView"; }
bool isView() const override { return true; }
bool isRemote() const override;
bool hasInnerTable() const { return has_inner_table; }

View File

@ -161,7 +161,7 @@ public:
/// We don't have to list bucket, because there is no asterisks.
if (key_prefix.size() == globbed_uri.key.size())
{
buffer.emplace_back(globbed_uri.key, std::nullopt);
buffer.emplace_back(std::make_shared<KeyWithInfo>(globbed_uri.key, std::nullopt));
buffer_iter = buffer.begin();
is_finished = true;
return;
@ -182,7 +182,7 @@ public:
fillInternalBufferAssumeLocked();
}
KeyWithInfo next()
KeyWithInfoPtr next()
{
std::lock_guard lock(mutex);
return nextAssumeLocked();
@ -201,7 +201,7 @@ public:
private:
using ListObjectsOutcome = Aws::S3::Model::ListObjectsV2Outcome;
KeyWithInfo nextAssumeLocked()
KeyWithInfoPtr nextAssumeLocked()
{
if (buffer_iter != buffer.end())
{
@ -210,11 +210,11 @@ private:
/// If url doesn't contain globs, we didn't list s3 bucket and didn't get object info for the key.
/// So we get object info lazily here on 'next()' request.
if (!answer.info)
if (!answer->info)
{
answer.info = S3::getObjectInfo(*client, globbed_uri.bucket, answer.key, globbed_uri.version_id, request_settings);
answer->info = S3::getObjectInfo(*client, globbed_uri.bucket, answer->key, globbed_uri.version_id, request_settings);
if (file_progress_callback)
file_progress_callback(FileProgress(0, answer.info->size));
file_progress_callback(FileProgress(0, answer->info->size));
}
return answer;
@ -287,7 +287,7 @@ private:
.last_modification_time = row.GetLastModified().Millis() / 1000,
};
temp_buffer.emplace_back(std::move(key), std::move(info));
temp_buffer.emplace_back(std::make_shared<KeyWithInfo>(std::move(key), std::move(info)));
}
}
@ -299,7 +299,7 @@ private:
if (!is_initialized)
{
filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, fs::path(globbed_uri.bucket) / temp_buffer.front().key, getContext());
filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, fs::path(globbed_uri.bucket) / temp_buffer.front()->key, getContext());
is_initialized = true;
}
@ -308,7 +308,7 @@ private:
std::vector<String> paths;
paths.reserve(temp_buffer.size());
for (const auto & key_with_info : temp_buffer)
paths.push_back(fs::path(globbed_uri.bucket) / key_with_info.key);
paths.push_back(fs::path(globbed_uri.bucket) / key_with_info->key);
VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, query, virtual_columns, getContext(), filter_ast);
}
@ -317,8 +317,8 @@ private:
if (file_progress_callback)
{
for (const auto & [_, info] : buffer)
file_progress_callback(FileProgress(0, info->size));
for (const auto & key_with_info : buffer)
file_progress_callback(FileProgress(0, key_with_info->info->size));
}
/// Set iterator only after the whole batch is processed
@ -381,7 +381,7 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
{
}
StorageS3Source::KeyWithInfo StorageS3Source::DisclosedGlobIterator::next()
StorageS3Source::KeyWithInfoPtr StorageS3Source::DisclosedGlobIterator::next()
{
return pimpl->next();
}
@ -432,11 +432,11 @@ public:
if (read_keys_)
{
for (const auto & key : keys)
read_keys_->push_back({key, {}});
read_keys_->push_back(std::make_shared<KeyWithInfo>(key));
}
}
KeyWithInfo next()
KeyWithInfoPtr next()
{
size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
if (current_index >= keys.size())
@ -449,7 +449,7 @@ public:
file_progress_callback(FileProgress(0, info->size));
}
return {key, info};
return std::make_shared<KeyWithInfo>(key, info);
}
size_t objectsCount()
@ -486,7 +486,7 @@ StorageS3Source::KeysIterator::KeysIterator(
{
}
StorageS3Source::KeyWithInfo StorageS3Source::KeysIterator::next()
StorageS3Source::KeyWithInfoPtr StorageS3Source::KeysIterator::next()
{
return pimpl->next();
}
@ -512,14 +512,14 @@ StorageS3Source::ReadTaskIterator::ReadTaskIterator(
pool.wait();
buffer.reserve(max_threads_count);
for (auto & key_future : keys)
buffer.emplace_back(key_future.get(), std::nullopt);
buffer.emplace_back(std::make_shared<KeyWithInfo>(key_future.get(), std::nullopt));
}
StorageS3Source::KeyWithInfo StorageS3Source::ReadTaskIterator::next()
StorageS3Source::KeyWithInfoPtr StorageS3Source::ReadTaskIterator::next()
{
size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
if (current_index >= buffer.size())
return {callback(), {}};
return std::make_shared<KeyWithInfo>(callback());
return buffer[current_index];
}
@ -576,22 +576,22 @@ StorageS3Source::StorageS3Source(
StorageS3Source::ReaderHolder StorageS3Source::createReader()
{
KeyWithInfo key_with_info;
KeyWithInfoPtr key_with_info;
do
{
key_with_info = (*file_iterator)();
if (key_with_info.key.empty())
if (!key_with_info || key_with_info->key.empty())
return {};
if (!key_with_info.info)
key_with_info.info = S3::getObjectInfo(*client, bucket, key_with_info.key, version_id, request_settings);
if (!key_with_info->info)
key_with_info->info = S3::getObjectInfo(*client, bucket, key_with_info->key, version_id, request_settings);
}
while (getContext()->getSettingsRef().s3_skip_empty_files && key_with_info.info->size == 0);
while (getContext()->getSettingsRef().s3_skip_empty_files && key_with_info->info->size == 0);
QueryPipelineBuilder builder;
std::shared_ptr<ISource> source;
std::unique_ptr<ReadBuffer> read_buf;
std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(key_with_info) : std::nullopt;
std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(*key_with_info) : std::nullopt;
if (num_rows_from_cache)
{
/// We should not return single chunk with all number of rows,
@ -604,8 +604,8 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader()
}
else
{
auto compression_method = chooseCompressionMethod(key_with_info.key, compression_hint);
read_buf = createS3ReadBuffer(key_with_info.key, key_with_info.info->size);
auto compression_method = chooseCompressionMethod(key_with_info->key, compression_hint);
read_buf = createS3ReadBuffer(key_with_info->key, key_with_info->info->size);
auto input_format = FormatFactory::instance().getInput(
format,
@ -1505,7 +1505,7 @@ namespace
{
current_key_with_info = (*file_iterator)();
if (current_key_with_info.key.empty())
if (!current_key_with_info || current_key_with_info->key.empty())
{
if (first)
throw Exception(
@ -1526,15 +1526,15 @@ namespace
return nullptr;
}
if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info.info && current_key_with_info.info->size == 0)
if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0)
continue;
int zstd_window_log_max = static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max);
auto impl = std::make_unique<ReadBufferFromS3>(configuration.client, configuration.url.bucket, current_key_with_info.key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings());
auto impl = std::make_unique<ReadBufferFromS3>(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings());
if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof())
{
first = false;
return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info.key, configuration.compression_method), zstd_window_log_max);
return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max);
}
}
}
@ -1549,7 +1549,7 @@ namespace
if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3)
return;
String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info.key;
String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key;
auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows);
}
@ -1560,7 +1560,7 @@ namespace
const StorageS3::Configuration & configuration;
const std::optional<FormatSettings> & format_settings;
std::optional<ColumnsDescription> columns_from_cache;
StorageS3Source::KeyWithInfo current_key_with_info;
StorageS3Source::KeyWithInfoPtr current_key_with_info;
size_t prev_read_keys_size;
bool first = true;
};
@ -1700,9 +1700,9 @@ std::optional<ColumnsDescription> StorageS3::tryGetColumnsFromCache(
auto get_last_mod_time = [&]
{
time_t last_modification_time = 0;
if (it->info)
if ((*it)->info)
{
last_modification_time = it->info->last_modification_time;
last_modification_time = (*it)->info->last_modification_time;
}
else
{
@ -1712,7 +1712,7 @@ std::optional<ColumnsDescription> StorageS3::tryGetColumnsFromCache(
last_modification_time = S3::getObjectInfo(
*configuration.client,
configuration.url.bucket,
it->key,
(*it)->key,
configuration.url.version_id,
configuration.request_settings,
/*with_metadata=*/ false,
@ -1723,7 +1723,7 @@ std::optional<ColumnsDescription> StorageS3::tryGetColumnsFromCache(
return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt;
};
String path = fs::path(configuration.url.bucket) / it->key;
String path = fs::path(configuration.url.bucket) / (*it)->key;
String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path;
auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, ctx);
auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
@ -1745,7 +1745,7 @@ void StorageS3::addColumnsToCache(
auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket;
Strings sources;
sources.reserve(keys.size());
std::transform(keys.begin(), keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem.key; });
std::transform(keys.begin(), keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem->key; });
auto cache_keys = getKeysForSchemaCache(sources, format_name, format_settings, ctx);
auto & schema_cache = getSchemaCache(ctx);
schema_cache.addManyColumns(cache_keys, columns);

View File

@ -43,22 +43,24 @@ public:
struct KeyWithInfo
{
KeyWithInfo() = default;
KeyWithInfo(String key_, std::optional<S3::ObjectInfo> info_)
: key(std::move(key_)), info(std::move(info_))
{
}
explicit KeyWithInfo(String key_, std::optional<S3::ObjectInfo> info_ = std::nullopt)
: key(std::move(key_)), info(std::move(info_)) {}
virtual ~KeyWithInfo() = default;
String key;
std::optional<S3::ObjectInfo> info;
};
using KeyWithInfoPtr = std::shared_ptr<KeyWithInfo>;
using KeysWithInfo = std::vector<KeyWithInfo>;
using KeysWithInfo = std::vector<KeyWithInfoPtr>;
class IIterator
{
public:
virtual ~IIterator() = default;
virtual KeyWithInfo next() = 0;
virtual KeyWithInfoPtr next() = 0;
/// Estimates how many streams we need to process all files.
/// If keys count >= max_threads_count, the returned number may not represent the actual number of the keys.
@ -66,7 +68,7 @@ public:
/// fixme: May underestimate if the glob has a strong filter, so there are few matches among the first 1000 ListObjects results.
virtual size_t estimatedKeysCount() = 0;
KeyWithInfo operator ()() { return next(); }
KeyWithInfoPtr operator ()() { return next(); }
};
class DisclosedGlobIterator : public IIterator
@ -82,7 +84,7 @@ public:
const S3Settings::RequestSettings & request_settings_ = {},
std::function<void(FileProgress)> progress_callback_ = {});
KeyWithInfo next() override;
KeyWithInfoPtr next() override;
size_t estimatedKeysCount() override;
private:
@ -106,7 +108,7 @@ public:
KeysWithInfo * read_keys = nullptr,
std::function<void(FileProgress)> progress_callback_ = {});
KeyWithInfo next() override;
KeyWithInfoPtr next() override;
size_t estimatedKeysCount() override;
private:
@ -120,7 +122,7 @@ public:
public:
explicit ReadTaskIterator(const ReadTaskCallback & callback_, const size_t max_threads_count);
KeyWithInfo next() override;
KeyWithInfoPtr next() override;
size_t estimatedKeysCount() override;
private:
@ -176,13 +178,13 @@ private:
{
public:
ReaderHolder(
KeyWithInfo key_with_info_,
KeyWithInfoPtr key_with_info_,
String bucket_,
std::unique_ptr<ReadBuffer> read_buf_,
std::shared_ptr<ISource> source_,
std::unique_ptr<QueryPipeline> pipeline_,
std::unique_ptr<PullingPipelineExecutor> reader_)
: key_with_info(std::move(key_with_info_))
: key_with_info(key_with_info_)
, bucket(std::move(bucket_))
, read_buf(std::move(read_buf_))
, source(std::move(source_))
@ -216,14 +218,14 @@ private:
explicit operator bool() const { return reader != nullptr; }
PullingPipelineExecutor * operator->() { return reader.get(); }
const PullingPipelineExecutor * operator->() const { return reader.get(); }
String getPath() const { return fs::path(bucket) / key_with_info.key; }
const String & getFile() const { return key_with_info.key; }
const KeyWithInfo & getKeyWithInfo() const { return key_with_info; }
String getPath() const { return fs::path(bucket) / key_with_info->key; }
const String & getFile() const { return key_with_info->key; }
const KeyWithInfo & getKeyWithInfo() const { return *key_with_info; }
const IInputFormat * getInputFormat() const { return dynamic_cast<const IInputFormat *>(source.get()); }
private:
KeyWithInfo key_with_info;
KeyWithInfoPtr key_with_info;
String bucket;
std::unique_ptr<ReadBuffer> read_buf;
std::shared_ptr<ISource> source;

View File

@ -82,7 +82,13 @@ RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr
{
auto iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(
*s3_configuration.client, s3_configuration.url, query, virtual_columns, context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback());
auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String { return iterator->next().key; });
auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String
{
if (auto next = iterator->next())
return next->key;
return "";
});
return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) };
}

View File

@ -156,12 +156,62 @@ StorageSet::StorageSet(
}
void StorageSet::insertBlock(const Block & block, ContextPtr) { set->insertFromBlock(block.getColumnsWithTypeAndName()); }
void StorageSet::finishInsert() { set->finishInsert(); }
SetPtr StorageSet::getSet() const
{
std::lock_guard lock(mutex);
return set;
}
size_t StorageSet::getSize(ContextPtr) const { return set->getTotalRowCount(); }
std::optional<UInt64> StorageSet::totalRows(const Settings &) const { return set->getTotalRowCount(); }
std::optional<UInt64> StorageSet::totalBytes(const Settings &) const { return set->getTotalByteCount(); }
void StorageSet::insertBlock(const Block & block, ContextPtr)
{
SetPtr current_set;
{
std::lock_guard lock(mutex);
current_set = set;
}
current_set->insertFromBlock(block.getColumnsWithTypeAndName());
}
void StorageSet::finishInsert()
{
SetPtr current_set;
{
std::lock_guard lock(mutex);
current_set = set;
}
current_set->finishInsert();
}
size_t StorageSet::getSize(ContextPtr) const
{
SetPtr current_set;
{
std::lock_guard lock(mutex);
current_set = set;
}
return current_set->getTotalRowCount();
}
std::optional<UInt64> StorageSet::totalRows(const Settings &) const
{
SetPtr current_set;
{
std::lock_guard lock(mutex);
current_set = set;
}
return current_set->getTotalRowCount();
}
std::optional<UInt64> StorageSet::totalBytes(const Settings &) const
{
SetPtr current_set;
{
std::lock_guard lock(mutex);
current_set = set;
}
return current_set->getTotalByteCount();
}
void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder &)
{
@ -176,8 +226,13 @@ void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_sn
Block header = metadata_snapshot->getSampleBlock();
increment = 0;
set = std::make_shared<Set>(SizeLimits(), 0, true);
set->setHeader(header.getColumnsWithTypeAndName());
auto new_set = std::make_shared<Set>(SizeLimits(), 0, true);
new_set->setHeader(header.getColumnsWithTypeAndName());
{
std::lock_guard lock(mutex);
set = new_set;
}
}

View File

@ -79,7 +79,7 @@ public:
String getName() const override { return "Set"; }
/// Access the insides.
SetPtr & getSet() { return set; }
SetPtr getSet() const;
void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder &) override;
@ -87,7 +87,9 @@ public:
std::optional<UInt64> totalBytes(const Settings & settings) const override;
private:
SetPtr set;
/// Allows to concurrently truncate the set and work (read/fill) the existing set.
mutable std::mutex mutex;
SetPtr set TSA_GUARDED_BY(mutex);
void insertBlock(const Block & block, ContextPtr) override;
void finishInsert() override;

View File

@ -0,0 +1,73 @@
#include "StorageSystemS3Queue.h"
#include <Access/ContextAccess.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeMap.h>
#include <Interpreters/Cache/FileCache.h>
#include <Interpreters/Cache/FileSegment.h>
#include <Interpreters/Cache/FileCacheFactory.h>
#include <Interpreters/Context.h>
#include <Interpreters/ProfileEventsExt.h>
#include <Storages/S3Queue/S3QueueFilesMetadata.h>
#include <Storages/S3Queue/S3QueueMetadataFactory.h>
#include <Storages/S3Queue/StorageS3Queue.h>
#include <Disks/IDisk.h>
namespace DB
{
NamesAndTypesList StorageSystemS3Queue::getNamesAndTypes()
{
return {
{"zookeeper_path", std::make_shared<DataTypeString>()},
{"file_name", std::make_shared<DataTypeString>()},
{"rows_processed", std::make_shared<DataTypeUInt64>()},
{"status", std::make_shared<DataTypeString>()},
{"processing_start_time", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>())},
{"processing_end_time", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>())},
{"ProfileEvents", std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt64>())},
{"exception", std::make_shared<DataTypeString>()},
};
}
StorageSystemS3Queue::StorageSystemS3Queue(const StorageID & table_id_)
: IStorageSystemOneBlock(table_id_)
{
}
void StorageSystemS3Queue::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const
{
for (const auto & [zookeeper_path, metadata] : S3QueueMetadataFactory::instance().getAll())
{
for (const auto & [file_name, file_status] : metadata->getFileStateses())
{
size_t i = 0;
res_columns[i++]->insert(zookeeper_path);
res_columns[i++]->insert(file_name);
std::lock_guard lock(file_status->metadata_lock);
res_columns[i++]->insert(file_status->processed_rows.load());
res_columns[i++]->insert(magic_enum::enum_name(file_status->state));
if (file_status->processing_start_time)
res_columns[i++]->insert(file_status->processing_start_time);
else
res_columns[i++]->insertDefault();
if (file_status->processing_end_time)
res_columns[i++]->insert(file_status->processing_end_time);
else
res_columns[i++]->insertDefault();
ProfileEvents::dumpToMapColumn(file_status->profile_counters.getPartiallyAtomicSnapshot(), res_columns[i++].get(), true);
res_columns[i++]->insert(file_status->last_exception);
}
}
}
}

View File

@ -0,0 +1,23 @@
#pragma once
#include "config.h"
#include <Storages/System/IStorageSystemOneBlock.h>
#include <Interpreters/Cache/FileCache_fwd_internal.h>
namespace DB
{
class StorageSystemS3Queue final : public IStorageSystemOneBlock<StorageSystemS3Queue>
{
public:
explicit StorageSystemS3Queue(const StorageID & table_id_);
std::string getName() const override { return "SystemS3Queue"; }
static NamesAndTypesList getNamesAndTypes();
protected:
void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override;
};
}

View File

@ -84,6 +84,7 @@
#include <Storages/System/StorageSystemZooKeeperConnection.h>
#include <Storages/System/StorageSystemJemalloc.h>
#include <Storages/System/StorageSystemScheduler.h>
#include <Storages/System/StorageSystemS3Queue.h>
#if USE_RDKAFKA
#include <Storages/System/StorageSystemKafkaConsumers.h>
@ -196,6 +197,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b
attach<StorageSystemNamedCollections>(context, system_database, "named_collections");
attach<StorageSystemUserProcesses>(context, system_database, "user_processes");
attach<StorageSystemJemallocBins>(context, system_database, "jemalloc_bins");
attach<StorageSystemS3Queue>(context, system_database, "s3queue");
if (has_zookeeper)
{

View File

@ -1,3 +1,4 @@
#include <algorithm>
#include <memory>
#include <Core/NamesAndTypes.h>
#include <Core/TypeId.h>
@ -81,14 +82,33 @@ bool extractFunctions(const ASTPtr & expression, const std::function<bool(const
}
else if (function->name == "or")
{
bool ret = true;
bool ret = false;
ASTs or_args;
for (const auto & child : function->arguments->children)
ret &= extractFunctions(child, is_constant, or_args);
/// We can keep condition only if it still OR condition (i.e. we
/// have dependent conditions for columns at both sides)
if (or_args.size() == 2)
ret |= extractFunctions(child, is_constant, or_args);
if (!or_args.empty())
{
/// In case of there are less number of arguments for which
/// is_constant() == true, we need to add always-true
/// implicitly to avoid breaking AND invariant.
///
/// Consider the following:
///
/// ((value = 10) OR (_table = 'v2')) AND ((_table = 'v1') OR (value = 20))
///
/// Without implicit always-true:
///
/// (_table = 'v2') AND (_table = 'v1')
///
/// With:
///
/// (_table = 'v2' OR 1) AND (_table = 'v1' OR 1) -> (_table = 'v2') OR (_table = 'v1')
///
if (or_args.size() != function->arguments->children.size())
or_args.push_back(std::make_shared<ASTLiteral>(Field(1)));
result.push_back(makeASTForLogicalOr(std::move(or_args)));
}
return ret;
}
}
@ -165,8 +185,10 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block
if (!select.where() && !select.prewhere())
return unmodified;
// Provide input columns as constant columns to check if an expression is constant.
std::function<bool(const ASTPtr &)> is_constant = [&block, &context](const ASTPtr & node)
// Provide input columns as constant columns to check if an expression is
// constant and depends on the columns from provided block (the last is
// required to allow skipping some conditions for handling OR).
std::function<bool(const ASTPtr &)> is_constant = [&block, &context](const ASTPtr & expr)
{
auto actions = std::make_shared<ActionsDAG>(block.getColumnsWithTypeAndName());
PreparedSetsPtr prepared_sets = std::make_shared<PreparedSets>();
@ -178,13 +200,26 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block
context, SizeLimits{}, 1, source_columns, std::move(actions), prepared_sets, true, true, true,
{ aggregation_keys, grouping_set_keys, GroupByKind::NONE });
ActionsVisitor(visitor_data).visit(node);
ActionsVisitor(visitor_data).visit(expr);
actions = visitor_data.getActions();
auto expr_column_name = expr->getColumnName();
const auto * expr_const_node = actions->tryFindInOutputs(expr_column_name);
if (!expr_const_node)
return false;
auto filter_actions = ActionsDAG::buildFilterActionsDAG({expr_const_node}, {}, context);
const auto & nodes = filter_actions->getNodes();
bool has_dependent_columns = std::any_of(nodes.begin(), nodes.end(), [&](const auto & node)
{
return block.has(node.result_name);
});
if (!has_dependent_columns)
return false;
auto expression_actions = std::make_shared<ExpressionActions>(actions);
auto block_with_constants = block;
expression_actions->execute(block_with_constants);
auto column_name = node->getColumnName();
return block_with_constants.has(column_name) && isColumnConst(*block_with_constants.getByName(column_name).column);
return block_with_constants.has(expr_column_name) && isColumnConst(*block_with_constants.getByName(expr_column_name).column);
};
/// Create an expression that evaluates the expressions in WHERE and PREWHERE, depending only on the existing columns.

1
tests/README.md Normal file
View File

@ -0,0 +1 @@
Find CI documents and instructions on running CI checks localy [here](https://clickhouse.com/docs/en/development/continuous-integration).

View File

@ -39,8 +39,6 @@ test_settings_profile/test.py::test_show_profiles
test_shard_level_const_function/test.py::test_remote
test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster
test_storage_rabbitmq/test.py::test_rabbitmq_materialized_view
test_system_merges/test.py::test_mutation_simple[]
test_system_merges/test.py::test_mutation_simple[replicated]
test_user_defined_object_persistence/test.py::test_persistence
test_wrong_db_or_table_name/test.py::test_wrong_table_name
test_zookeeper_config/test.py::test_chroot_with_same_root

View File

@ -51,9 +51,9 @@ def get_gh_api(
sleep: int = 3,
**kwargs: Any,
) -> requests.Response:
"""It's a wrapper around get_with_retries that requests GH api w/o auth by
default, and falls back to the get_best_robot_token in case of receiving
"403 rate limit exceeded" error
"""
Request GH api w/o auth by default, and failover to the get_best_robot_token in case of receiving
"403 rate limit exceeded" or "404 not found" error
It sets auth automatically when ROBOT_TOKEN is already set by get_best_robot_token
"""
@ -71,27 +71,39 @@ def get_gh_api(
if grt.ROBOT_TOKEN is not None:
set_auth_header()
need_retry = False
for _ in range(retries):
token_is_set = "Authorization" in kwargs.get("headers", {})
exc = Exception("A placeholder to satisfy typing and avoid nesting")
try_cnt = 0
while try_cnt < retries:
try_cnt += 1
try:
response = get_with_retries(url, 1, sleep, **kwargs)
response = requests.get(url, **kwargs)
response.raise_for_status()
return response
except requests.HTTPError as exc:
if (
exc.response.status_code == 403
except requests.HTTPError as e:
exc = e
ratelimit_exceeded = (
e.response.status_code == 403
and b"rate limit exceeded"
in exc.response._content # pylint:disable=protected-access
):
in e.response._content # pylint:disable=protected-access
)
try_auth = e.response.status_code == 404
if (ratelimit_exceeded or try_auth) and not token_is_set:
logging.warning(
"Received rate limit exception, setting the auth header and retry"
)
set_auth_header()
need_retry = True
break
token_is_set = True
try_cnt = 0
continue
except Exception as e:
exc = e
if need_retry:
return get_with_retries(url, retries, sleep, **kwargs)
if try_cnt < retries:
logging.info("Exception '%s' while getting, retry %i", exc, try_cnt)
time.sleep(sleep)
raise exc
def get_build_name_for_check(check_name: str) -> str:

Some files were not shown because too many files have changed in this diff Show More