mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Merge branch 'master' into revert-46909-revert-45911-mutations_rename_hang
This commit is contained in:
commit
46f25d53b3
140
.github/workflows/pull_request.yml
vendored
140
.github/workflows/pull_request.yml
vendored
@ -3105,10 +3105,10 @@ jobs:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/stress_thread
|
||||
TEMP_PATH=${{runner.temp}}/stress_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Stress test (asan)
|
||||
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
|
||||
REPO_COPY=${{runner.temp}}/stress_asan/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
@ -3267,6 +3267,142 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
######################################### UPGRADE CHECK ######################################
|
||||
##############################################################################################
|
||||
UpgradeCheckAsan:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/upgrade_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Upgrade check (asan)
|
||||
REPO_COPY=${{runner.temp}}/upgrade_asan/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Upgrade check
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 upgrade_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
UpgradeCheckTsan:
|
||||
needs: [BuilderDebTsan]
|
||||
# same as for stress test with tsan
|
||||
runs-on: [self-hosted, func-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/upgrade_thread
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Upgrade check (tsan)
|
||||
REPO_COPY=${{runner.temp}}/upgrade_thread/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Upgrade check
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 upgrade_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
UpgradeCheckMsan:
|
||||
needs: [BuilderDebMsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/upgrade_memory
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Upgrade check (msan)
|
||||
REPO_COPY=${{runner.temp}}/upgrade_memory/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Upgrade check
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 upgrade_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
UpgradeCheckDebug:
|
||||
needs: [BuilderDebDebug]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/upgrade_debug
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Upgrade check (debug)
|
||||
REPO_COPY=${{runner.temp}}/upgrade_debug/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Upgrade check
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 upgrade_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
##################################### AST FUZZERS ############################################
|
||||
##############################################################################################
|
||||
|
@ -195,7 +195,6 @@ long splice(int fd_in, off_t *off_in, int fd_out, off_t *off_out, size_t len, un
|
||||
#include <sys/stat.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if !defined(__aarch64__)
|
||||
struct statx {
|
||||
uint32_t stx_mask;
|
||||
uint32_t stx_blksize;
|
||||
@ -226,7 +225,6 @@ int statx(int fd, const char *restrict path, int flag,
|
||||
{
|
||||
return syscall(SYS_statx, fd, path, flag, mask, statxbuf);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#include <syscall.h>
|
||||
|
@ -43,7 +43,8 @@
|
||||
"docker/test/stateful": {
|
||||
"name": "clickhouse/stateful-test",
|
||||
"dependent": [
|
||||
"docker/test/stress"
|
||||
"docker/test/stress",
|
||||
"docker/test/upgrade"
|
||||
]
|
||||
},
|
||||
"docker/test/unit": {
|
||||
@ -54,6 +55,10 @@
|
||||
"name": "clickhouse/stress-test",
|
||||
"dependent": []
|
||||
},
|
||||
"docker/test/upgrade": {
|
||||
"name": "clickhouse/upgrade-check",
|
||||
"dependent": []
|
||||
},
|
||||
"docker/test/codebrowser": {
|
||||
"name": "clickhouse/codebrowser",
|
||||
"dependent": []
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM ubuntu:20.04
|
||||
FROM ubuntu:22.04
|
||||
|
||||
# see https://github.com/moby/moby/issues/4032#issuecomment-192327844
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
@ -9,13 +9,14 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
&& groupadd -r clickhouse --gid=101 \
|
||||
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
|
||||
&& apt-get update \
|
||||
&& apt-get upgrade -yq \
|
||||
&& apt-get install --yes --no-install-recommends \
|
||||
apt-transport-https \
|
||||
ca-certificates \
|
||||
dirmngr \
|
||||
gnupg \
|
||||
locales \
|
||||
gnupg2 \
|
||||
wget \
|
||||
locales \
|
||||
tzdata \
|
||||
&& apt-get clean
|
||||
|
||||
@ -80,15 +81,8 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
|
||||
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
|
||||
|
||||
# Remove as much of Ubuntu as possible.
|
||||
# ClickHouse does not need Ubuntu. It can run on top of Linux kernel without any OS distribution.
|
||||
# ClickHouse does not need Docker at all. ClickHouse is above all that.
|
||||
# It does not care about Ubuntu, Docker, or other cruft and you should neither.
|
||||
# The fact that this Docker image is based on Ubuntu is just a misconception.
|
||||
# Some vulnerability scanners are arguing about Ubuntu, which is not relevant to ClickHouse at all.
|
||||
# ClickHouse does not care when you report false vulnerabilities by running some Docker scanners.
|
||||
|
||||
RUN apt-get remove --purge -y libksba8 && apt-get autoremove -y
|
||||
RUN apt-get autoremove --purge -yq libksba8 && \
|
||||
apt-get autoremove -yq
|
||||
|
||||
# we need to allow "others" access to clickhouse folder, because docker container
|
||||
# can be started with arbitrary uid (openshift usecase)
|
||||
|
@ -1,4 +1,4 @@
|
||||
# rebuild in #33610
|
||||
# rebuild in #47031
|
||||
# docker build -t clickhouse/stateful-test .
|
||||
ARG FROM_TAG=latest
|
||||
FROM clickhouse/stateless-test:$FROM_TAG
|
||||
|
@ -21,10 +21,9 @@ RUN apt-get update -y \
|
||||
openssl \
|
||||
netcat-openbsd \
|
||||
telnet \
|
||||
llvm-9 \
|
||||
brotli
|
||||
brotli \
|
||||
&& apt-get clean
|
||||
|
||||
COPY ./stress /stress
|
||||
COPY run.sh /
|
||||
|
||||
ENV DATASETS="hits visits"
|
||||
|
@ -8,229 +8,13 @@ dmesg --clear
|
||||
|
||||
set -x
|
||||
|
||||
# core.COMM.PID-TID
|
||||
sysctl kernel.core_pattern='core.%e.%p-%P'
|
||||
# we mount tests folder from repo to /usr/share
|
||||
ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress
|
||||
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
|
||||
OK="\tOK\t\\N\t"
|
||||
FAIL="\tFAIL\t\\N\t"
|
||||
|
||||
FAILURE_CONTEXT_LINES=50
|
||||
FAILURE_CONTEXT_MAX_LINE_WIDTH=400
|
||||
|
||||
function escaped()
|
||||
{
|
||||
# That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language.
|
||||
# Also limit lines width just in case (too long lines are not really useful usually)
|
||||
clickhouse local -S 's String' --input-format=LineAsString -q "select substr(s, 1, $FAILURE_CONTEXT_MAX_LINE_WIDTH)
|
||||
from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'"
|
||||
}
|
||||
function head_escaped()
|
||||
{
|
||||
head -n $FAILURE_CONTEXT_LINES $1 | escaped
|
||||
}
|
||||
function unts()
|
||||
{
|
||||
grep -Po "[0-9][0-9]:[0-9][0-9] \K.*"
|
||||
}
|
||||
function trim_server_logs()
|
||||
{
|
||||
head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped
|
||||
}
|
||||
|
||||
function install_packages()
|
||||
{
|
||||
dpkg -i $1/clickhouse-common-static_*.deb
|
||||
dpkg -i $1/clickhouse-common-static-dbg_*.deb
|
||||
dpkg -i $1/clickhouse-server_*.deb
|
||||
dpkg -i $1/clickhouse-client_*.deb
|
||||
}
|
||||
|
||||
function configure()
|
||||
{
|
||||
# install test configs
|
||||
export USE_DATABASE_ORDINARY=1
|
||||
export EXPORT_S3_STORAGE_POLICIES=1
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
# we mount tests folder from repo to /usr/share
|
||||
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages
|
||||
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
|
||||
|
||||
# avoid too slow startup
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
|
||||
| sed "s|<snapshot_distance>100000</snapshot_distance>|<snapshot_distance>10000</snapshot_distance>|" \
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
|
||||
# for clickhouse-server (via service)
|
||||
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
|
||||
# for clickhouse-client
|
||||
export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
|
||||
|
||||
# since we run clickhouse from root
|
||||
sudo chown root: /var/lib/clickhouse
|
||||
|
||||
# Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM).
|
||||
echo "<clickhouse><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></clickhouse>" \
|
||||
> /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml
|
||||
|
||||
local total_mem
|
||||
total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB
|
||||
total_mem=$(( total_mem*1024 )) # bytes
|
||||
|
||||
# Set maximum memory usage as half of total memory (less chance of OOM).
|
||||
#
|
||||
# But not via max_server_memory_usage but via max_memory_usage_for_user,
|
||||
# so that we can override this setting and execute service queries, like:
|
||||
# - hung check
|
||||
# - show/drop database
|
||||
# - ...
|
||||
#
|
||||
# So max_memory_usage_for_user will be a soft limit, and
|
||||
# max_server_memory_usage will be hard limit, and queries that should be
|
||||
# executed regardless memory limits will use max_memory_usage_for_user=0,
|
||||
# instead of relying on max_untracked_memory
|
||||
|
||||
max_server_memory_usage_to_ram_ratio=0.5
|
||||
echo "Setting max_server_memory_usage_to_ram_ratio to ${max_server_memory_usage_to_ram_ratio}"
|
||||
cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml <<EOL
|
||||
<clickhouse>
|
||||
<max_server_memory_usage_to_ram_ratio>${max_server_memory_usage_to_ram_ratio}</max_server_memory_usage_to_ram_ratio>
|
||||
</clickhouse>
|
||||
EOL
|
||||
|
||||
local max_users_mem
|
||||
max_users_mem=$((total_mem*30/100)) # 30%
|
||||
echo "Setting max_memory_usage_for_user=$max_users_mem and max_memory_usage for queries to 10G"
|
||||
cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml <<EOL
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<max_memory_usage>10G</max_memory_usage>
|
||||
<max_memory_usage_for_user>${max_users_mem}</max_memory_usage_for_user>
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
||||
EOL
|
||||
|
||||
cat > /etc/clickhouse-server/config.d/core.xml <<EOL
|
||||
<clickhouse>
|
||||
<core_dump>
|
||||
<!-- 100GiB -->
|
||||
<size_limit>107374182400</size_limit>
|
||||
</core_dump>
|
||||
<!-- NOTE: no need to configure core_path,
|
||||
since clickhouse is not started as daemon (via clickhouse start)
|
||||
-->
|
||||
<core_path>$PWD</core_path>
|
||||
</clickhouse>
|
||||
EOL
|
||||
|
||||
# Let OOM killer terminate other processes before clickhouse-server:
|
||||
cat > /etc/clickhouse-server/config.d/oom_score.xml <<EOL
|
||||
<clickhouse>
|
||||
<oom_score>-1000</oom_score>
|
||||
</clickhouse>
|
||||
EOL
|
||||
|
||||
# Analyzer is not yet ready for testing
|
||||
cat > /etc/clickhouse-server/users.d/no_analyzer.xml <<EOL
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<constraints>
|
||||
<allow_experimental_analyzer>
|
||||
<readonly/>
|
||||
</allow_experimental_analyzer>
|
||||
</constraints>
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
||||
EOL
|
||||
|
||||
}
|
||||
|
||||
function stop()
|
||||
{
|
||||
local max_tries="${1:-90}"
|
||||
local pid
|
||||
# Preserve the pid, since the server can hung after the PID will be deleted.
|
||||
pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)"
|
||||
|
||||
clickhouse stop --max-tries "$max_tries" --do-not-kill && return
|
||||
|
||||
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
|
||||
echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv
|
||||
kill -TERM "$(pidof gdb)" ||:
|
||||
sleep 5
|
||||
echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log
|
||||
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
|
||||
clickhouse stop --force
|
||||
}
|
||||
|
||||
function start()
|
||||
{
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
if [ "$counter" -gt ${1:-120} ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||:
|
||||
echo -e "Cannot start clickhouse-server$FAIL$(trim_server_logs application_errors.txt)" >> /test_output/test_results.tsv
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n100 /var/log/clickhouse-server/stderr.log
|
||||
tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e '<Warning> RaftInstance:' -e '<Information> RaftInstance' | tail -n100
|
||||
break
|
||||
fi
|
||||
# use root to match with current uid
|
||||
clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log
|
||||
sleep 0.5
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
|
||||
# Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
|
||||
# and clickhouse-server can do fork-exec, for example, to run some bridge.
|
||||
# Do not set nostop noprint for all signals, because some it may cause gdb to hang,
|
||||
# explicitly ignore non-fatal signals that are used by server.
|
||||
# Number of SIGRTMIN can be determined only in runtime.
|
||||
RTMIN=$(kill -l SIGRTMIN)
|
||||
echo "
|
||||
set follow-fork-mode parent
|
||||
handle SIGHUP nostop noprint pass
|
||||
handle SIGINT nostop noprint pass
|
||||
handle SIGQUIT nostop noprint pass
|
||||
handle SIGPIPE nostop noprint pass
|
||||
handle SIGTERM nostop noprint pass
|
||||
handle SIGUSR1 nostop noprint pass
|
||||
handle SIGUSR2 nostop noprint pass
|
||||
handle SIG$RTMIN nostop noprint pass
|
||||
info signals
|
||||
continue
|
||||
backtrace full
|
||||
thread apply all backtrace full
|
||||
info registers
|
||||
disassemble /s
|
||||
up
|
||||
disassemble /s
|
||||
up
|
||||
disassemble /s
|
||||
p \"done\"
|
||||
detach
|
||||
quit
|
||||
" > script.gdb
|
||||
|
||||
# FIXME Hung check may work incorrectly because of attached gdb
|
||||
# 1. False positives are possible
|
||||
# 2. We cannot attach another gdb to get stacktraces if some queries hung
|
||||
gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log &
|
||||
sleep 5
|
||||
# gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
|
||||
time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
|
||||
}
|
||||
# Stress tests and upgrade check uses similar code that was placed
|
||||
# in a separate bash library. See tests/ci/stress_tests.lib
|
||||
source /usr/share/clickhouse-test/ci/stress_tests.lib
|
||||
|
||||
install_packages package_folder
|
||||
|
||||
@ -396,7 +180,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
|
||||
|
||||
start
|
||||
|
||||
./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
|
||||
stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
|
||||
&& echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv
|
||||
|
||||
@ -413,316 +197,27 @@ unset "${!THREAD_@}"
|
||||
|
||||
start
|
||||
|
||||
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|
||||
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \
|
||||
&& echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \
|
||||
>> /test_output/test_results.tsv)
|
||||
check_server_start
|
||||
|
||||
stop
|
||||
|
||||
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
|
||||
[ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL"
|
||||
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.final.log
|
||||
|
||||
# Grep logs for sanitizer asserts, crashes and other critical errors
|
||||
check_logs_for_critical_errors
|
||||
|
||||
# Sanitizer asserts
|
||||
rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp
|
||||
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
|
||||
&& echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv
|
||||
rm -f /test_output/tmp
|
||||
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
|
||||
|
||||
# OOM
|
||||
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
|
||||
&& echo -e "Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Logical errors
|
||||
rg -Fa "Code: 49. DB::Exception: " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \
|
||||
&& echo -e "Logical error thrown (see clickhouse-server.log or logical_errors.txt)$FAIL$(head_escaped /test_output/logical_errors.txt)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No logical errors$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file logical_errors.txt if it's empty
|
||||
[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt
|
||||
|
||||
# No such key errors
|
||||
rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \
|
||||
&& echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file no_such_key_errors.txt if it's empty
|
||||
[ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt
|
||||
|
||||
# Crash
|
||||
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
|
||||
&& echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Not crashed$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# It also checks for crash without stacktrace (printed by watchdog)
|
||||
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/fatal_messages.txt \
|
||||
&& echo -e "Fatal message in clickhouse-server.log (see fatal_messages.txt)$FAIL$(trim_server_logs fatal_messages.txt)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file fatal_messages.txt if it's empty
|
||||
[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
|
||||
|
||||
rg -Fa "########################################" /test_output/* > /dev/null \
|
||||
&& echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv
|
||||
|
||||
function get_gdb_log_context()
|
||||
{
|
||||
rg -A50 -Fa " received signal " /test_output/gdb.log | head_escaped
|
||||
}
|
||||
|
||||
rg -Fa " received signal " /test_output/gdb.log > /dev/null \
|
||||
&& echo -e "Found signal in gdb.log$FAIL$(get_gdb_log_context)" >> /test_output/test_results.tsv
|
||||
|
||||
if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
echo -e "Backward compatibility check\n"
|
||||
|
||||
echo "Get previous release tag"
|
||||
previous_release_tag=$(clickhouse-client --version | rg -o "[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*" | get_previous_release_tag)
|
||||
echo $previous_release_tag
|
||||
|
||||
echo "Clone previous release repository"
|
||||
git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository
|
||||
|
||||
echo "Download clickhouse-server from the previous release"
|
||||
mkdir previous_release_package_folder
|
||||
|
||||
echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv
|
||||
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log
|
||||
for table in query_log trace_log
|
||||
do
|
||||
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
|
||||
done
|
||||
|
||||
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
|
||||
|
||||
# Check if we cloned previous release repository successfully
|
||||
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
|
||||
then
|
||||
echo -e "Backward compatibility check: Failed to clone previous release tests$FAIL" >> /test_output/test_results.tsv
|
||||
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
|
||||
then
|
||||
echo -e "Backward compatibility check: Failed to download previous release packages$FAIL" >> /test_output/test_results.tsv
|
||||
else
|
||||
echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv
|
||||
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Uninstall current packages
|
||||
dpkg --remove clickhouse-client
|
||||
dpkg --remove clickhouse-server
|
||||
dpkg --remove clickhouse-common-static-dbg
|
||||
dpkg --remove clickhouse-common-static
|
||||
|
||||
rm -rf /var/lib/clickhouse/*
|
||||
|
||||
# Make BC check more funny by forcing Ordinary engine for system database
|
||||
mkdir /var/lib/clickhouse/metadata
|
||||
echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql
|
||||
|
||||
# Install previous release packages
|
||||
install_packages previous_release_package_folder
|
||||
|
||||
# Start server from previous release
|
||||
# Previous version may not be ready for fault injections
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
|
||||
# Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..."
|
||||
rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||:
|
||||
rm -f /etc/clickhouse-server/users.d/marks.xml ||:
|
||||
|
||||
# Remove s3 related configs to avoid "there is no disk type `cache`"
|
||||
rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||:
|
||||
rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||:
|
||||
|
||||
# Turn on after 22.12
|
||||
rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||:
|
||||
# it uses recently introduced settings which previous versions may not have
|
||||
rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||:
|
||||
|
||||
# Turn on after 23.1
|
||||
rm -f /etc/clickhouse-server/users.d/prefetch_settings.xml ||:
|
||||
|
||||
start
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
||||
# Install new package before running stress test because we should use new
|
||||
# clickhouse-client and new clickhouse-test.
|
||||
#
|
||||
# But we should leave old binary in /usr/bin/ and debug symbols in
|
||||
# /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it
|
||||
# will print sane stacktraces and also to avoid possible crashes.
|
||||
#
|
||||
# FIXME: those files can be extracted directly from debian package, but
|
||||
# actually better solution will be to use different PATH instead of playing
|
||||
# games with files from packages.
|
||||
mv /usr/bin/clickhouse previous_release_package_folder/
|
||||
mv /usr/lib/debug/usr/bin/clickhouse.debug previous_release_package_folder/
|
||||
install_packages package_folder
|
||||
mv /usr/bin/clickhouse package_folder/
|
||||
mv /usr/lib/debug/usr/bin/clickhouse.debug package_folder/
|
||||
mv previous_release_package_folder/clickhouse /usr/bin/
|
||||
mv previous_release_package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug
|
||||
|
||||
mkdir tmp_stress_output
|
||||
|
||||
./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" \
|
||||
--backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
|
||||
&& echo -e "Backward compatibility check: Test script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: Test script failed$FAIL" >> /test_output/test_results.tsv
|
||||
rm -rf tmp_stress_output
|
||||
|
||||
# We experienced deadlocks in this command in very rare cases. Let's debug it:
|
||||
timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" ||
|
||||
(
|
||||
echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log
|
||||
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
|
||||
clickhouse stop --force
|
||||
)
|
||||
|
||||
# Use bigger timeout for previous version
|
||||
stop 300
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log
|
||||
|
||||
# Start new server
|
||||
mv package_folder/clickhouse /usr/bin/
|
||||
mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug
|
||||
# Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables).
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
start 500
|
||||
clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|
||||
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt \
|
||||
&& echo -e "Backward compatibility check: Server failed to start$FAIL$(trim_server_logs bc_check_application_errors.txt)" >> /test_output/test_results.tsv)
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
||||
# Let the server run for a while before checking log.
|
||||
sleep 60
|
||||
|
||||
stop
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.dirty.log
|
||||
|
||||
# Error messages (we should ignore some errors)
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64")
|
||||
# FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server.
|
||||
# Let's just ignore all errors from queries ("} <Error> TCPHandler: Code:", "} <Error> executeQuery: Code:")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility
|
||||
echo "Check for Error messages in server log:"
|
||||
rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
|
||||
-e "Code: 236. DB::Exception: Cancelled mutating parts" \
|
||||
-e "REPLICA_IS_ALREADY_ACTIVE" \
|
||||
-e "REPLICA_ALREADY_EXISTS" \
|
||||
-e "ALL_REPLICAS_LOST" \
|
||||
-e "DDLWorker: Cannot parse DDL task query" \
|
||||
-e "RaftInstance: failed to accept a rpc connection due to error 125" \
|
||||
-e "UNKNOWN_DATABASE" \
|
||||
-e "NETWORK_ERROR" \
|
||||
-e "UNKNOWN_TABLE" \
|
||||
-e "ZooKeeperClient" \
|
||||
-e "KEEPER_EXCEPTION" \
|
||||
-e "DirectoryMonitor" \
|
||||
-e "TABLE_IS_READ_ONLY" \
|
||||
-e "Code: 1000, e.code() = 111, Connection refused" \
|
||||
-e "UNFINISHED" \
|
||||
-e "NETLINK_ERROR" \
|
||||
-e "Renaming unexpected part" \
|
||||
-e "PART_IS_TEMPORARILY_LOCKED" \
|
||||
-e "and a merge is impossible: we didn't find" \
|
||||
-e "found in queue and some source parts for it was lost" \
|
||||
-e "is lost forever." \
|
||||
-e "Unknown index: idx." \
|
||||
-e "Cannot parse string 'Hello' as UInt64" \
|
||||
-e "} <Error> TCPHandler: Code:" \
|
||||
-e "} <Error> executeQuery: Code:" \
|
||||
-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
|
||||
-e "[Queue = DB::DynamicRuntimeQueue]: Code: 235. DB::Exception: Part" \
|
||||
-e "The set of parts restored in place of" \
|
||||
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
|
||||
-e "Code: 269. DB::Exception: Destination table is myself" \
|
||||
-e "Coordination::Exception: Connection loss" \
|
||||
-e "MutateFromLogEntryTask" \
|
||||
-e "No connection to ZooKeeper, cannot get shared table ID" \
|
||||
-e "Session expired" \
|
||||
-e "TOO_MANY_PARTS" \
|
||||
-e "Container already exists" \
|
||||
/var/log/clickhouse-server/clickhouse-server.backward.dirty.log | rg -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
|
||||
&& echo -e "Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)$FAIL$(trim_server_logs bc_check_error_messages.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No Error messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file bc_check_error_messages.txt if it's empty
|
||||
[ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt
|
||||
|
||||
# Sanitizer asserts
|
||||
rg -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
|
||||
&& echo -e "Backward compatibility check: Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No sanitizer asserts$OK" >> /test_output/test_results.tsv
|
||||
rm -f /test_output/tmp
|
||||
|
||||
# OOM
|
||||
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
|
||||
&& echo -e "Backward compatibility check: Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Logical errors
|
||||
echo "Check for Logical errors in server log:"
|
||||
rg -Fa -A20 "Code: 49. DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \
|
||||
&& echo -e "Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)$FAIL$(trim_server_logs bc_check_logical_errors.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No logical errors$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file bc_check_logical_errors.txt if it's empty
|
||||
[ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt
|
||||
|
||||
# Crash
|
||||
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
|
||||
&& echo -e "Backward compatibility check: Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: Not crashed$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# It also checks for crash without stacktrace (printed by watchdog)
|
||||
echo "Check for Fatal message in server log:"
|
||||
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \
|
||||
&& echo -e "Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)$FAIL$(trim_server_logs bc_check_fatal_messages.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file bc_check_fatal_messages.txt if it's empty
|
||||
[ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
|
||||
|
||||
tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||:
|
||||
for table in query_log trace_log
|
||||
do
|
||||
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" \
|
||||
| zstd --threads=0 > /test_output/$table.backward.tsv.zst ||:
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
dmesg -T > /test_output/dmesg.log
|
||||
|
||||
# OOM in dmesg -- those are real
|
||||
grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \
|
||||
&& echo -e "OOM in dmesg$FAIL$(head_escaped /test_output/dmesg.log)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No OOM in dmesg$OK" >> /test_output/test_results.tsv
|
||||
collect_query_and_trace_logs
|
||||
|
||||
mv /var/log/clickhouse-server/stderr.log /test_output/
|
||||
|
||||
# Write check result into check_status.tsv
|
||||
# Try to choose most specific error for the whole check status
|
||||
clickhouse-local --structure "test String, res String, time Nullable(Float32), desc String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by
|
||||
(test like 'Backward compatibility check%'), -- BC check goes last
|
||||
(test like '%Sanitizer%') DESC,
|
||||
(test like '%Killed by signal%') DESC,
|
||||
(test like '%gdb.log%') DESC,
|
||||
@ -732,14 +227,8 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d
|
||||
(test like '%OOM%') DESC,
|
||||
(test like '%Signal 9%') DESC,
|
||||
(test like '%Fatal message%') DESC,
|
||||
(test like '%Error message%') DESC,
|
||||
(test like '%previous release%') DESC,
|
||||
rowNumberInAllBlocks()
|
||||
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
|
||||
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
|
||||
|
||||
# Core dumps
|
||||
find . -type f -maxdepth 1 -name 'core.*' | while read core; do
|
||||
zstd --threads=0 $core
|
||||
mv $core.zst /test_output/
|
||||
done
|
||||
collect_core_dumps
|
||||
|
31
docker/test/upgrade/Dockerfile
Normal file
31
docker/test/upgrade/Dockerfile
Normal file
@ -0,0 +1,31 @@
|
||||
# rebuild in #33610
|
||||
# docker build -t clickhouse/upgrade-check .
|
||||
ARG FROM_TAG=latest
|
||||
FROM clickhouse/stateful-test:$FROM_TAG
|
||||
|
||||
RUN apt-get update -y \
|
||||
&& env DEBIAN_FRONTEND=noninteractive \
|
||||
apt-get install --yes --no-install-recommends \
|
||||
bash \
|
||||
tzdata \
|
||||
fakeroot \
|
||||
debhelper \
|
||||
parallel \
|
||||
expect \
|
||||
python3 \
|
||||
python3-lxml \
|
||||
python3-termcolor \
|
||||
python3-requests \
|
||||
curl \
|
||||
sudo \
|
||||
openssl \
|
||||
netcat-openbsd \
|
||||
telnet \
|
||||
brotli \
|
||||
&& apt-get clean
|
||||
|
||||
COPY run.sh /
|
||||
|
||||
ENV EXPORT_S3_STORAGE_POLICIES=1
|
||||
|
||||
CMD ["/bin/bash", "/run.sh"]
|
200
docker/test/upgrade/run.sh
Normal file
200
docker/test/upgrade/run.sh
Normal file
@ -0,0 +1,200 @@
|
||||
#!/bin/bash
|
||||
# shellcheck disable=SC2094
|
||||
# shellcheck disable=SC2086
|
||||
# shellcheck disable=SC2024
|
||||
|
||||
# Avoid overlaps with previous runs
|
||||
dmesg --clear
|
||||
|
||||
set -x
|
||||
|
||||
# we mount tests folder from repo to /usr/share
|
||||
ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress
|
||||
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages
|
||||
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
|
||||
|
||||
# Stress tests and upgrade check uses similar code that was placed
|
||||
# in a separate bash library. See tests/ci/stress_tests.lib
|
||||
source /usr/share/clickhouse-test/ci/stress_tests.lib
|
||||
|
||||
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
|
||||
./setup_minio.sh stateless # to have a proper environment
|
||||
|
||||
echo "Get previous release tag"
|
||||
previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb | grep "Version: " | awk '{print $2}' | cut -f1 -d'+' | get_previous_release_tag)
|
||||
echo $previous_release_tag
|
||||
|
||||
echo "Clone previous release repository"
|
||||
git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository
|
||||
|
||||
echo "Download clickhouse-server from the previous release"
|
||||
mkdir previous_release_package_folder
|
||||
|
||||
echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv
|
||||
|
||||
# Check if we cloned previous release repository successfully
|
||||
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
|
||||
then
|
||||
echo -e 'failure\tFailed to clone previous release tests' > /test_output/check_status.tsv
|
||||
exit
|
||||
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
|
||||
then
|
||||
echo -e 'failure\tFailed to download previous release packages' > /test_output/check_status.tsv
|
||||
exit
|
||||
fi
|
||||
|
||||
echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv
|
||||
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Make upgrade check more funny by forcing Ordinary engine for system database
|
||||
mkdir /var/lib/clickhouse/metadata
|
||||
echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql
|
||||
|
||||
# Install previous release packages
|
||||
install_packages previous_release_package_folder
|
||||
|
||||
# Start server from previous release
|
||||
# Let's enable S3 storage by default
|
||||
export USE_S3_STORAGE_FOR_MERGE_TREE=1
|
||||
# Previous version may not be ready for fault injections
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
|
||||
# But we still need default disk because some tables loaded only into it
|
||||
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
|
||||
| sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
|
||||
> /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
|
||||
start
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
||||
mkdir tmp_stress_output
|
||||
|
||||
stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --upgrade-check --output-folder tmp_stress_output --global-time-limit=1200 \
|
||||
&& echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv
|
||||
|
||||
rm -rf tmp_stress_output
|
||||
|
||||
# We experienced deadlocks in this command in very rare cases. Let's debug it:
|
||||
timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" ||
|
||||
(
|
||||
echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log
|
||||
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
|
||||
clickhouse stop --force
|
||||
)
|
||||
|
||||
# Use bigger timeout for previous version and disable additional hang check
|
||||
stop 300 false
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log
|
||||
|
||||
# Install and start new server
|
||||
install_packages package_folder
|
||||
# Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables).
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
start 500
|
||||
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|
||||
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \
|
||||
&& echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \
|
||||
>> /test_output/test_results.tsv)
|
||||
|
||||
# Remove file application_errors.txt if it's empty
|
||||
[ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
||||
# Let the server run for a while before checking log.
|
||||
sleep 60
|
||||
|
||||
stop
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.upgrade.log
|
||||
|
||||
# Error messages (we should ignore some errors)
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64")
|
||||
# FIXME Not sure if it's expected, but some tests from stress test may not be finished yet when we restarting server.
|
||||
# Let's just ignore all errors from queries ("} <Error> TCPHandler: Code:", "} <Error> executeQuery: Code:")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility
|
||||
echo "Check for Error messages in server log:"
|
||||
rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
|
||||
-e "Code: 236. DB::Exception: Cancelled mutating parts" \
|
||||
-e "REPLICA_IS_ALREADY_ACTIVE" \
|
||||
-e "REPLICA_ALREADY_EXISTS" \
|
||||
-e "ALL_REPLICAS_LOST" \
|
||||
-e "DDLWorker: Cannot parse DDL task query" \
|
||||
-e "RaftInstance: failed to accept a rpc connection due to error 125" \
|
||||
-e "UNKNOWN_DATABASE" \
|
||||
-e "NETWORK_ERROR" \
|
||||
-e "UNKNOWN_TABLE" \
|
||||
-e "ZooKeeperClient" \
|
||||
-e "KEEPER_EXCEPTION" \
|
||||
-e "DirectoryMonitor" \
|
||||
-e "TABLE_IS_READ_ONLY" \
|
||||
-e "Code: 1000, e.code() = 111, Connection refused" \
|
||||
-e "UNFINISHED" \
|
||||
-e "NETLINK_ERROR" \
|
||||
-e "Renaming unexpected part" \
|
||||
-e "PART_IS_TEMPORARILY_LOCKED" \
|
||||
-e "and a merge is impossible: we didn't find" \
|
||||
-e "found in queue and some source parts for it was lost" \
|
||||
-e "is lost forever." \
|
||||
-e "Unknown index: idx." \
|
||||
-e "Cannot parse string 'Hello' as UInt64" \
|
||||
-e "} <Error> TCPHandler: Code:" \
|
||||
-e "} <Error> executeQuery: Code:" \
|
||||
-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
|
||||
-e "The set of parts restored in place of" \
|
||||
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
|
||||
-e "Code: 269. DB::Exception: Destination table is myself" \
|
||||
-e "Coordination::Exception: Connection loss" \
|
||||
-e "MutateFromLogEntryTask" \
|
||||
-e "No connection to ZooKeeper, cannot get shared table ID" \
|
||||
-e "Session expired" \
|
||||
-e "TOO_MANY_PARTS" \
|
||||
-e "Authentication failed" \
|
||||
-e "Container already exists" \
|
||||
/var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
|
||||
&& echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/bc_check_error_messages.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file upgrade_error_messages.txt if it's empty
|
||||
[ -s /test_output/upgrade_error_messages.txt ] || rm /test_output/upgrade_error_messages.txt
|
||||
|
||||
# Grep logs for sanitizer asserts, crashes and other critical errors
|
||||
check_logs_for_critical_errors
|
||||
|
||||
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
|
||||
|
||||
collect_query_and_trace_logs
|
||||
|
||||
check_oom_in_dmesg
|
||||
|
||||
mv /var/log/clickhouse-server/stderr.log /test_output/
|
||||
|
||||
# Write check result into check_status.tsv
|
||||
# Try to choose most specific error for the whole check status
|
||||
clickhouse-local --structure "test String, res String, time Nullable(Float32), desc String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by
|
||||
(test like '%Sanitizer%') DESC,
|
||||
(test like '%Killed by signal%') DESC,
|
||||
(test like '%gdb.log%') DESC,
|
||||
(test ilike '%possible deadlock%') DESC,
|
||||
(test like '%start%') DESC,
|
||||
(test like '%dmesg%') DESC,
|
||||
(test like '%OOM%') DESC,
|
||||
(test like '%Signal 9%') DESC,
|
||||
(test like '%Fatal message%') DESC,
|
||||
(test like '%Error message%') DESC,
|
||||
(test like '%previous release%') DESC,
|
||||
rowNumberInAllBlocks()
|
||||
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
|
||||
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
|
||||
|
||||
collect_core_dumps
|
17
docs/changelogs/v22.3.19.6-lts.md
Normal file
17
docs/changelogs/v22.3.19.6-lts.md
Normal file
@ -0,0 +1,17 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.3.19.6-lts (467e0a7bd77) FIXME as compared to v22.3.18.37-lts (fe512717551)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#46440](https://github.com/ClickHouse/ClickHouse/issues/46440): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
@ -39,12 +39,59 @@ To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` director
|
||||
|
||||
Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables.
|
||||
|
||||
### Restricting test runs
|
||||
|
||||
A test can have zero or more _test tags_ specifying restrictions for test runs.
|
||||
|
||||
For `.sh` tests tags are written as a comment on the second line:
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
```
|
||||
|
||||
For `.sql` tests tags are placed in the first line as a SQL comment:
|
||||
|
||||
```sql
|
||||
-- Tags: no-fasttest
|
||||
SELECT 1
|
||||
```
|
||||
|
||||
|Tag name | What it does | Usage example |
|
||||
|---|---|---|
|
||||
| `disabled`| Test is not run ||
|
||||
| `long` | Test's execution time is extended from 1 to 10 minutes ||
|
||||
| `deadlock` | Test is run in a loop for a long time ||
|
||||
| `race` | Same as `deadlock`. Prefer `deadlock` ||
|
||||
| `shard` | Server is required to listen to `127.0.0.*` ||
|
||||
| `distributed` | Same as `shard`. Prefer `shard` ||
|
||||
| `global` | Same as `shard`. Prefer `shard` ||
|
||||
| `zookeeper` | Test requires Zookeeper or ClickHouse Keeper to run | Test uses `ReplicatedMergeTree` |
|
||||
| `replica` | Same as `zookeeper`. Prefer `zookeeper` ||
|
||||
| `no-fasttest`| Test is not run under [Fast test](continuous-integration#fast-test) | Test uses `MySQL` table engine which is disabled in Fast test|
|
||||
| `no-[asan, tsan, msan, ubsan]` | Disables tests in build with [sanitizers](#sanitizers) | Test is run under QEMU which doesn't work with sanitizers |
|
||||
| `no-replicated-database` |||
|
||||
| `no-ordinary-database` |||
|
||||
| `no-parallel` | Disables running other tests in parallel with this one | Test reads from `system` tables and invariants may be broken|
|
||||
| `no-parallel-replicas` |||
|
||||
| `no-debug` |||
|
||||
| `no-stress` |||
|
||||
| `no-polymorphic-parts` |||
|
||||
| `no-random-settings` |||
|
||||
| `no-random-merge-tree-settings` |||
|
||||
| `no-backward-compatibility-check` |||
|
||||
| `no-cpu-x86_64` |||
|
||||
| `no-cpu-aarch64` |||
|
||||
| `no-cpu-ppc64le` |||
|
||||
| `no-s3-storage` |||
|
||||
|
||||
In addition to the above settings, you can use `USE_*` flags from `system.build_options` to define usage of particular ClickHouse features.
|
||||
For example, if your test uses a MySQL table, you should add a tag `use-mysql`.
|
||||
|
||||
### Choosing the Test Name
|
||||
|
||||
The name of the test starts with a five-digit prefix followed by a descriptive name, such as `00422_hash_function_constexpr.sql`. To choose the prefix, find the largest prefix already present in the directory, and increment it by one. In the meantime, some other tests might be added with the same numeric prefix, but this is OK and does not lead to any problems, you don't have to change it later.
|
||||
|
||||
Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that requires server to listen `127.0.0.*`; `distributed` or `global` have the same meaning. `long` is for tests that run slightly longer that one second. You can disable these groups of tests using `--no-zookeeper`, `--no-shard` and `--no-long` options, respectively. Make sure to add a proper prefix to your test name if it needs ZooKeeper or distributed queries.
|
||||
|
||||
### Checking for an Error that Must Occur
|
||||
|
||||
Sometimes you want to test that a server error occurs for an incorrect query. We support special annotations for this in SQL tests, in the following form:
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex}
|
||||
|
||||
The main task that indexes achieve is to quickly find nearest neighbors for multidimensional data. An example of such a problem can be finding similar pictures (texts) for a given picture (text). That problem can be reduced to finding the nearest [embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning). They can be created from data using [UDF](../../../sql-reference/functions/index.md#executable-user-defined-functions).
|
||||
The main task that indexes achieve is to quickly find nearest neighbors for multidimensional data. An example of such a problem can be finding similar pictures (texts) for a given picture (text). That problem can be reduced to finding the nearest [embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning). They can be created from data using [UDF](/docs/en/sql-reference/functions/index.md/#executable-user-defined-functions).
|
||||
|
||||
The next queries find the closest neighbors in N-dimensional space using the L2 (Euclidean) distance:
|
||||
``` sql
|
||||
@ -39,7 +39,7 @@ Approximate Nearest Neighbor Search Indexes (`ANNIndexes`) are similar to skip i
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
In these queries, `DistanceFunction` is selected from [distance functions](../../../sql-reference/functions/distance-functions). `Point` is a known vector (something like `(0.1, 0.1, ... )`). To avoid writing large vectors, use [client parameters](../../../interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters). `Value` - a float value that will bound the neighbourhood.
|
||||
In these queries, `DistanceFunction` is selected from [distance functions](/docs/en/sql-reference/functions/distance-functions.md). `Point` is a known vector (something like `(0.1, 0.1, ... )`). To avoid writing large vectors, use [client parameters](/docs/en//interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters). `Value` - a float value that will bound the neighbourhood.
|
||||
|
||||
:::note
|
||||
ANN index can't speed up query that satisfies both types (`where + order by`, only one of them). All queries must have the limit, as algorithms are used to find nearest neighbors and need a specific number of them.
|
||||
@ -85,13 +85,13 @@ As the indexes are built only during insertions into table, `INSERT` and `OPTIMI
|
||||
You can create your table with index which uses certain algorithm. Now only indices based on the following algorithms are supported:
|
||||
|
||||
# Index list
|
||||
- [Annoy](../../../engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
|
||||
- [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
|
||||
|
||||
# Annoy {#annoy}
|
||||
Implementation of the algorithm was taken from [this repository](https://github.com/spotify/annoy).
|
||||
|
||||
Short description of the algorithm:
|
||||
The algorithm recursively divides in half all space by random linear surfaces (lines in 2D, planes in 3D e.t.c.). Thus it makes tree of polyhedrons and points that they contains. Repeating the operation several times for greater accuracy it creates a forest.
|
||||
The algorithm recursively divides in half all space by random linear surfaces (lines in 2D, planes in 3D etc.). Thus it makes tree of polyhedrons and points that they contains. Repeating the operation several times for greater accuracy it creates a forest.
|
||||
To find K Nearest Neighbours it goes down through the trees and fills the buffer of closest points using the priority queue of polyhedrons. Next, it sorts buffer and return the nearest K points.
|
||||
|
||||
__Examples__:
|
||||
@ -118,7 +118,7 @@ ORDER BY id;
|
||||
```
|
||||
|
||||
:::note
|
||||
Table with array field will work faster, but all arrays **must** have same length. Use [CONSTRAINT](../../../sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(data) = 256`.
|
||||
Table with array field will work faster, but all arrays **must** have same length. Use [CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(data) = 256`.
|
||||
:::
|
||||
|
||||
Parameter `NumTrees` is the number of trees which the algorithm will create. The bigger it is, the slower (approximately linear) it works (in both `CREATE` and `SELECT` requests), but the better accuracy you get (adjusted for randomness). By default it is set to `100`. Parameter `DistanceName` is name of distance function. By default it is set to `L2Distance`. It can be set without changing first parameter, for example
|
||||
|
@ -1971,7 +1971,8 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
|
||||
- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`.
|
||||
- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`.
|
||||
- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
|
||||
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
|
||||
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
|
||||
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
|
||||
|
||||
## Arrow {#data-format-arrow}
|
||||
|
||||
|
@ -229,7 +229,7 @@ To prevent inferring the same schema every time ClickHouse read the data from th
|
||||
|
||||
There are special settings that control this cache:
|
||||
- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config.
|
||||
- `use_cache_for_{file,s3,hdfs,url}_schema_inference` - allows turning on/off using cache for schema inference. These settings can be used in queries.
|
||||
- `schema_inference_use_cache_for_{file,s3,hdfs,url}` - allows turning on/off using cache for schema inference. These settings can be used in queries.
|
||||
|
||||
The schema of the file can be changed by modifying the data or by changing format settings.
|
||||
For this reason, the schema inference cache identifies the schema by file source, format name, used format settings, and the last modification time of the file.
|
||||
@ -1177,8 +1177,7 @@ This setting can be used to specify the types of columns that could not be deter
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}'
|
||||
SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)'
|
||||
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1
|
||||
```
|
||||
```response
|
||||
┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
|
@ -50,7 +50,7 @@ If there are multiple profiles active for a user, then constraints are merged. M
|
||||
|
||||
Read-only mode is enabled by `readonly` setting (not to confuse with `readonly` constraint type):
|
||||
- `readonly=0`: No read-only restrictions.
|
||||
- `readonly=1`: Only read queries are allowed and settings cannot be changes unless `changeable_in_readonly` is set.
|
||||
- `readonly=1`: Only read queries are allowed and settings cannot be changed unless `changeable_in_readonly` is set.
|
||||
- `readonly=2`: Only read queries are allowed, but settings can be changed, except for `readonly` setting itself.
|
||||
|
||||
|
||||
|
@ -142,6 +142,10 @@ y Nullable(String)
|
||||
z IPv4
|
||||
```
|
||||
|
||||
:::warning
|
||||
If the `schema_inference_hints` is not formated properly, or if there is a typo or a wrong datatype, etc... the whole schema_inference_hints will be ignored.
|
||||
:::
|
||||
|
||||
## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable}
|
||||
|
||||
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
|
||||
@ -507,7 +511,7 @@ Enabled by default.
|
||||
|
||||
Ignore unknown keys in json object for named tuples.
|
||||
|
||||
Disabled by default.
|
||||
Enabled by default.
|
||||
|
||||
## input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple}
|
||||
|
||||
@ -1102,6 +1106,12 @@ Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedStrin
|
||||
|
||||
Enabled by default.
|
||||
|
||||
### output_format_parquet_version {#output_format_parquet_version}
|
||||
|
||||
The version of Parquet format used in output format. Supported versions: `1.0`, `2.4`, `2.6` and `2.latest`.
|
||||
|
||||
Default value: `2.latest`.
|
||||
|
||||
## Hive format settings {#hive-format-settings}
|
||||
|
||||
### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}
|
||||
|
52
docs/en/operations/system-tables/server_settings.md
Normal file
52
docs/en/operations/system-tables/server_settings.md
Normal file
@ -0,0 +1,52 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/server_settings
|
||||
---
|
||||
# server_settings
|
||||
|
||||
Contains information about global settings for the server, which were specified in `config.xml`.
|
||||
Currently, the table shows only settings from the first layer of `config.xml` and doesn't support nested configs (e.g. [logger](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-logger)).
|
||||
|
||||
Columns:
|
||||
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — Server setting name.
|
||||
- `value` ([String](../../sql-reference/data-types/string.md)) — Server setting value.
|
||||
- `default` ([String](../../sql-reference/data-types/string.md)) — Server setting default value.
|
||||
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml`
|
||||
- `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description.
|
||||
- `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type.
|
||||
|
||||
**Example**
|
||||
|
||||
The following example shows how to get information about server settings which name contains `thread_pool`.
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM system.server_settings
|
||||
WHERE name LIKE '%thread_pool%'
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─name─────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐
|
||||
│ max_thread_pool_size │ 5000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │
|
||||
│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │
|
||||
│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │
|
||||
│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │
|
||||
│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │
|
||||
│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │
|
||||
└──────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘
|
||||
```
|
||||
|
||||
Using of `WHERE changed` can be useful, for example, when you want to check
|
||||
whether settings in configuration files are loaded correctly and are in use.
|
||||
|
||||
<!-- -->
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.server_settings WHERE changed AND name='max_thread_pool_size'
|
||||
```
|
||||
|
||||
**See also**
|
||||
|
||||
- [Settings](../../operations/system-tables/settings.md)
|
||||
- [Configuration Files](../../operations/configuration-files.md)
|
||||
- [Server Settings](../../operations/server-configuration-parameters/settings.md)
|
@ -16,6 +16,7 @@ Columns:
|
||||
- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting:
|
||||
- `0` — Current user can change the setting.
|
||||
- `1` — Current user can’t change the setting.
|
||||
- `default` ([String](../../sql-reference/data-types/string.md)) — Setting default value.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -7,8 +7,8 @@ sidebar_position: 37
|
||||
|
||||
Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`.
|
||||
|
||||
Returns Float64. When `n <= 1`, returns +∞.
|
||||
Returns Float64. When `n <= 1`, returns `nan`.
|
||||
|
||||
:::note
|
||||
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error.
|
||||
:::
|
||||
:::
|
||||
|
@ -48,7 +48,35 @@ When dividing by zero you get ‘inf’, ‘-inf’, or ‘nan’.
|
||||
## intDiv(a, b)
|
||||
|
||||
Calculates the quotient of the numbers. Divides into integers, rounding down (by the absolute value).
|
||||
An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one.
|
||||
|
||||
Returns an integer of the type of the dividend (the first parameter).
|
||||
|
||||
An exception is thrown when dividing by zero, when the quotient does not fit in the range of the dividend, or when dividing a minimal negative number by minus one.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
intDiv(toFloat64(1), 0.001) AS res,
|
||||
toTypeName(res)
|
||||
```
|
||||
```response
|
||||
┌──res─┬─toTypeName(intDiv(toFloat64(1), 0.001))─┐
|
||||
│ 1000 │ Int64 │
|
||||
└──────┴─────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
intDiv(1, 0.001) AS res,
|
||||
toTypeName(res)
|
||||
```
|
||||
```response
|
||||
Received exception from server (version 23.2.1):
|
||||
Code: 153. DB::Exception: Received from localhost:9000. DB::Exception: Cannot perform integer division, because it will produce infinite or too large number: While processing intDiv(1, 0.001) AS res, toTypeName(res). (ILLEGAL_DIVISION)
|
||||
```
|
||||
|
||||
## intDivOrZero(a, b)
|
||||
|
||||
|
@ -579,3 +579,33 @@ Result:
|
||||
│ 3628800 │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
## width_bucket(operand, low, high, count)
|
||||
|
||||
Returns the number of the bucket in which `operand` falls in a histogram having `count` equal-width buckets spanning the range `low` to `high`. Returns `0` if `operand < low`, and returns `count+1` if `operand >= high`.
|
||||
|
||||
`operand`, `low`, `high` can be any native number type. `count` can only be unsigned native integer and its value cannot be zero.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
widthBucket(operand, low, high, count)
|
||||
```
|
||||
|
||||
There is also a case insensitive alias called `WIDTH_BUCKET` to provide compatibility with other databases.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT widthBucket(10.15, -8.6, 23, 18);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─widthBucket(10.15, -8.6, 23, 18)─┐
|
||||
│ 11 │
|
||||
└──────────────────────────────────┘
|
||||
```
|
@ -226,6 +226,17 @@ SELECT splitByNonAlpha(' 1! a, b. ');
|
||||
Concatenates string representations of values listed in the array with the separator. `separator` is an optional parameter: a constant string, set to an empty string by default.
|
||||
Returns the string.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT arrayStringConcat(['12/05/2021', '12:50:00'], ' ') AS DateString;
|
||||
```
|
||||
```text
|
||||
┌─DateString──────────┐
|
||||
│ 12/05/2021 12:50:00 │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## alphaTokens(s[, max_substrings]), splitByAlpha(s[, max_substrings])
|
||||
|
||||
Selects substrings of consecutive bytes from the ranges a-z and A-Z.Returns an array of substrings.
|
||||
@ -364,4 +375,4 @@ Result:
|
||||
┌─tokens────────────────────────────┐
|
||||
│ ['test1','test2','test3','test4'] │
|
||||
└───────────────────────────────────┘
|
||||
```
|
||||
```
|
||||
|
@ -19,8 +19,15 @@ CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_n
|
||||
|
||||
## Example
|
||||
|
||||
Create a user:
|
||||
```sql
|
||||
CREATE USER robin IDENTIFIED BY 'password';
|
||||
```
|
||||
|
||||
Create the `max_memory_usage_profile` settings profile with value and constraints for the `max_memory_usage` setting and assign it to user `robin`:
|
||||
|
||||
``` sql
|
||||
CREATE SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin
|
||||
CREATE
|
||||
SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000
|
||||
TO robin
|
||||
```
|
||||
|
@ -17,10 +17,11 @@ By default, tables are created only on the current server. Distributed DDL queri
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1],
|
||||
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2],
|
||||
name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1] [COMMENT 'comment for column'],
|
||||
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2] [COMMENT 'comment for column'],
|
||||
...
|
||||
) ENGINE = engine
|
||||
COMMENT 'comment for table'
|
||||
```
|
||||
|
||||
Creates a table named `table_name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine.
|
||||
@ -32,6 +33,8 @@ Expressions can also be defined for default values (see below).
|
||||
|
||||
If necessary, primary key can be specified, with one or more key expressions.
|
||||
|
||||
Comments can be added for columns and for the table.
|
||||
|
||||
### With a Schema Similar to Other Table
|
||||
|
||||
``` sql
|
||||
@ -267,7 +270,7 @@ You can define a [primary key](../../../engines/table-engines/mergetree-family/m
|
||||
CREATE TABLE db.table_name
|
||||
(
|
||||
name1 type1, name2 type2, ...,
|
||||
PRIMARY KEY(expr1[, expr2,...])]
|
||||
PRIMARY KEY(expr1[, expr2,...])
|
||||
)
|
||||
ENGINE = engine;
|
||||
```
|
||||
|
@ -54,6 +54,10 @@ SELECT * FROM view(column1=value1, column2=value2 ...)
|
||||
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
|
||||
```
|
||||
|
||||
:::tip
|
||||
Here is a step by step guide on using [Materialized views](docs/en/guides/developer/cascading-materialized-views.md).
|
||||
:::
|
||||
|
||||
Materialized views store data transformed by the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query.
|
||||
|
||||
When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` – the table engine for storing data.
|
||||
|
@ -23,23 +23,3 @@ You can use table functions in:
|
||||
:::warning
|
||||
You can’t use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled.
|
||||
:::
|
||||
|
||||
| Function | Description |
|
||||
|------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| [file](../../sql-reference/table-functions/file.md) | Creates a [File](../../engines/table-engines/special/file.md)-engine table. |
|
||||
| [merge](../../sql-reference/table-functions/merge.md) | Creates a [Merge](../../engines/table-engines/special/merge.md)-engine table. |
|
||||
| [numbers](../../sql-reference/table-functions/numbers.md) | Creates a table with a single column filled with integer numbers. |
|
||||
| [remote](../../sql-reference/table-functions/remote.md) | Allows you to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md)-engine table. |
|
||||
| [url](../../sql-reference/table-functions/url.md) | Creates a [Url](../../engines/table-engines/special/url.md)-engine table. |
|
||||
| [mysql](../../sql-reference/table-functions/mysql.md) | Creates a [MySQL](../../engines/table-engines/integrations/mysql.md)-engine table. |
|
||||
| [postgresql](../../sql-reference/table-functions/postgresql.md) | Creates a [PostgreSQL](../../engines/table-engines/integrations/postgresql.md)-engine table. |
|
||||
| [jdbc](../../sql-reference/table-functions/jdbc.md) | Creates a [JDBC](../../engines/table-engines/integrations/jdbc.md)-engine table. |
|
||||
| [odbc](../../sql-reference/table-functions/odbc.md) | Creates a [ODBC](../../engines/table-engines/integrations/odbc.md)-engine table. |
|
||||
| [hdfs](../../sql-reference/table-functions/hdfs.md) | Creates a [HDFS](../../engines/table-engines/integrations/hdfs.md)-engine table. |
|
||||
| [s3](../../sql-reference/table-functions/s3.md) | Creates a [S3](../../engines/table-engines/integrations/s3.md)-engine table. |
|
||||
| [sqlite](../../sql-reference/table-functions/sqlite.md) | Creates a [sqlite](../../engines/table-engines/integrations/sqlite.md)-engine table. |
|
||||
|
||||
:::note
|
||||
Only these table functions are enabled in readonly mode :
|
||||
null, view, viewIfPermitted, numbers, numbers_mt, generateRandom, values, cluster, clusterAllReplicas
|
||||
:::
|
53
docs/ru/operations/system-tables/server_settings.md
Normal file
53
docs/ru/operations/system-tables/server_settings.md
Normal file
@ -0,0 +1,53 @@
|
||||
---
|
||||
slug: /ru/operations/system-tables/server_settings
|
||||
---
|
||||
# system.server_settings
|
||||
|
||||
Содержит информацию о конфигурации сервера.
|
||||
В настоящий момент таблица содержит только верхнеуровневые параметры из файла `config.xml` и не поддерживает вложенные конфигурации
|
||||
(например [logger](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-logger))
|
||||
|
||||
Столбцы:
|
||||
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — имя настройки.
|
||||
- `value` ([String](../../sql-reference/data-types/string.md)) — значение настройки.
|
||||
- `default` ([String](../../sql-reference/data-types/string.md)) — значению настройки по умолчанию.
|
||||
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — показывает, была ли настройка указана в `config.xml` или является значением по-умолчанию.
|
||||
- `description` ([String](../../sql-reference/data-types/string.md)) — краткое описание настройки.
|
||||
- `type` ([String](../../sql-reference/data-types/string.md)) — тип настройки.
|
||||
|
||||
**Пример**
|
||||
|
||||
Пример показывает как получить информацию о настройках, имена которых содержат `thread_pool`.
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM system.server_settings
|
||||
WHERE name LIKE '%thread_pool%'
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─name─────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐
|
||||
│ max_thread_pool_size │ 5000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │
|
||||
│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │
|
||||
│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │
|
||||
│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │
|
||||
│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │
|
||||
│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │
|
||||
└──────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘
|
||||
```
|
||||
|
||||
Использование `WHERE changed` может быть полезно, например, если необходимо проверить,
|
||||
что настройки корректно загрузились из конфигурационного файла и используются.
|
||||
|
||||
<!-- -->
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.settings WHERE changed AND name='max_thread_pool_size'
|
||||
```
|
||||
|
||||
**Cм. также**
|
||||
|
||||
- [Настройки](../../operations/system-tables/settings.md)
|
||||
- [Конфигурационные файлы](../../operations/configuration-files.md)
|
||||
- [Настройки сервера](../../operations/server-configuration-parameters/settings.md)
|
@ -16,6 +16,7 @@ slug: /ru/operations/system-tables/settings
|
||||
- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Показывает, может ли пользователь изменять настройку:
|
||||
- `0` — Текущий пользователь может изменять настройку.
|
||||
- `1` — Текущий пользователь не может изменять настройку.
|
||||
- `default` ([String](../../sql-reference/data-types/string.md)) — значению настройки по умолчанию.
|
||||
|
||||
**Пример**
|
||||
|
||||
|
@ -18,12 +18,14 @@ Group=clickhouse
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
# Since ClickHouse is systemd aware default 1m30sec may not be enough
|
||||
TimeoutStartSec=inifinity
|
||||
TimeoutStartSec=infinity
|
||||
# %p is resolved to the systemd unit name
|
||||
RuntimeDirectory=%p
|
||||
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=%t/%p/%p.pid
|
||||
# Minus means that this file is optional.
|
||||
EnvironmentFile=-/etc/default/%p
|
||||
# Bring back /etc/default/clickhouse for backward compatibility
|
||||
EnvironmentFile=-/etc/default/clickhouse
|
||||
LimitCORE=infinity
|
||||
LimitNOFILE=500000
|
||||
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
|
||||
|
@ -91,6 +91,7 @@
|
||||
#include <Server/ProtocolServerAdapter.h>
|
||||
#include <Server/HTTP/HTTPServer.h>
|
||||
#include <Interpreters/AsynchronousInsertQueue.h>
|
||||
#include <Core/ServerSettings.h>
|
||||
#include <filesystem>
|
||||
#include <unordered_set>
|
||||
|
||||
@ -662,7 +663,10 @@ try
|
||||
|
||||
MainThreadStatus::getInstance();
|
||||
|
||||
StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true));
|
||||
ServerSettings server_settings;
|
||||
server_settings.loadSettingsFromConfig(config());
|
||||
|
||||
StackTrace::setShowAddresses(server_settings.show_addresses_in_stack_traces);
|
||||
|
||||
#if USE_HDFS
|
||||
/// This will point libhdfs3 to the right location for its config.
|
||||
@ -696,7 +700,7 @@ try
|
||||
{
|
||||
const String config_path = config().getString("config-file", "config.xml");
|
||||
const auto config_dir = std::filesystem::path{config_path}.replace_filename("openssl.conf");
|
||||
setenv("OPENSSL_CONF", config_dir.string(), true);
|
||||
setenv("OPENSSL_CONF", config_dir.c_str(), true);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -747,9 +751,9 @@ try
|
||||
// nodes (`from_zk`), because ZooKeeper interface uses the pool. We will
|
||||
// ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well.
|
||||
GlobalThreadPool::initialize(
|
||||
config().getUInt("max_thread_pool_size", 10000),
|
||||
config().getUInt("max_thread_pool_free_size", 1000),
|
||||
config().getUInt("thread_pool_queue_size", 10000));
|
||||
server_settings.max_thread_pool_size,
|
||||
server_settings.max_thread_pool_free_size,
|
||||
server_settings.thread_pool_queue_size);
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
/// It makes sense to deinitialize libxml after joining of all threads
|
||||
@ -765,9 +769,9 @@ try
|
||||
#endif
|
||||
|
||||
IOThreadPool::initialize(
|
||||
config().getUInt("max_io_thread_pool_size", 100),
|
||||
config().getUInt("max_io_thread_pool_free_size", 0),
|
||||
config().getUInt("io_thread_pool_queue_size", 10000));
|
||||
server_settings.max_io_thread_pool_size,
|
||||
server_settings.max_io_thread_pool_free_size,
|
||||
server_settings.io_thread_pool_queue_size);
|
||||
|
||||
/// Initialize global local cache for remote filesystem.
|
||||
if (config().has("local_cache_for_remote_fs"))
|
||||
@ -783,15 +787,15 @@ try
|
||||
}
|
||||
}
|
||||
|
||||
Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024));
|
||||
Poco::ThreadPool server_pool(3, server_settings.max_connections);
|
||||
std::mutex servers_lock;
|
||||
std::vector<ProtocolServerAdapter> servers;
|
||||
std::vector<ProtocolServerAdapter> servers_to_start_before_tables;
|
||||
/// This object will periodically calculate some metrics.
|
||||
ServerAsynchronousMetrics async_metrics(
|
||||
global_context,
|
||||
config().getUInt("asynchronous_metrics_update_period_s", 1),
|
||||
config().getUInt("asynchronous_heavy_metrics_update_period_s", 120),
|
||||
server_settings.asynchronous_metrics_update_period_s,
|
||||
server_settings.asynchronous_heavy_metrics_update_period_s,
|
||||
[&]() -> std::vector<ProtocolServerMetrics>
|
||||
{
|
||||
std::vector<ProtocolServerMetrics> metrics;
|
||||
@ -806,7 +810,7 @@ try
|
||||
}
|
||||
);
|
||||
|
||||
ConnectionCollector::init(global_context, config().getUInt("max_threads_for_connection_collector", 10));
|
||||
ConnectionCollector::init(global_context, server_settings.max_threads_for_connection_collector);
|
||||
|
||||
bool has_zookeeper = config().has("zookeeper");
|
||||
|
||||
@ -825,6 +829,9 @@ try
|
||||
|
||||
Settings::checkNoSettingNamesAtTopLevel(config(), config_path);
|
||||
|
||||
/// We need to reload server settings because config could be updated via zookeeper.
|
||||
server_settings.loadSettingsFromConfig(config());
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
std::string executable_path = getExecutablePath();
|
||||
|
||||
@ -944,7 +951,7 @@ try
|
||||
|
||||
std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
|
||||
fs::path path = path_str;
|
||||
std::string default_database = config().getString("default_database", "default");
|
||||
std::string default_database = server_settings.default_database.toString();
|
||||
|
||||
/// Check that the process user id matches the owner of the data.
|
||||
const auto effective_user_id = geteuid();
|
||||
@ -1035,21 +1042,18 @@ try
|
||||
LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone());
|
||||
|
||||
/// Storage with temporary data for processing of heavy queries.
|
||||
if (auto temporary_policy = config().getString("tmp_policy", ""); !temporary_policy.empty())
|
||||
if (!server_settings.tmp_policy.value.empty())
|
||||
{
|
||||
size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0);
|
||||
global_context->setTemporaryStoragePolicy(temporary_policy, max_size);
|
||||
global_context->setTemporaryStoragePolicy(server_settings.tmp_policy, server_settings.max_temporary_data_on_disk_size);
|
||||
}
|
||||
else if (auto temporary_cache = config().getString("temporary_data_in_cache", ""); !temporary_cache.empty())
|
||||
else if (!server_settings.temporary_data_in_cache.value.empty())
|
||||
{
|
||||
size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0);
|
||||
global_context->setTemporaryStorageInCache(temporary_cache, max_size);
|
||||
global_context->setTemporaryStorageInCache(server_settings.temporary_data_in_cache, server_settings.max_temporary_data_on_disk_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string temporary_path = config().getString("tmp_path", path / "tmp/");
|
||||
size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0);
|
||||
global_context->setTemporaryStoragePath(temporary_path, max_size);
|
||||
global_context->setTemporaryStoragePath(temporary_path, server_settings.max_temporary_data_on_disk_size);
|
||||
}
|
||||
|
||||
/** Directory with 'flags': files indicating temporary settings for the server set by system administrator.
|
||||
@ -1184,10 +1188,12 @@ try
|
||||
{
|
||||
Settings::checkNoSettingNamesAtTopLevel(*config, config_path);
|
||||
|
||||
/// Limit on total memory usage
|
||||
size_t max_server_memory_usage = config->getUInt64("max_server_memory_usage", 0);
|
||||
ServerSettings server_settings;
|
||||
server_settings.loadSettingsFromConfig(*config);
|
||||
|
||||
double max_server_memory_usage_to_ram_ratio = config->getDouble("max_server_memory_usage_to_ram_ratio", 0.9);
|
||||
size_t max_server_memory_usage = server_settings.max_server_memory_usage;
|
||||
|
||||
double max_server_memory_usage_to_ram_ratio = server_settings.max_server_memory_usage_to_ram_ratio;
|
||||
size_t default_max_server_memory_usage = static_cast<size_t>(memory_amount * max_server_memory_usage_to_ram_ratio);
|
||||
|
||||
if (max_server_memory_usage == 0)
|
||||
@ -1215,8 +1221,7 @@ try
|
||||
total_memory_tracker.setDescription("(total)");
|
||||
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
|
||||
|
||||
bool allow_use_jemalloc_memory = config->getBool("allow_use_jemalloc_memory", true);
|
||||
total_memory_tracker.setAllowUseJemallocMemory(allow_use_jemalloc_memory);
|
||||
total_memory_tracker.setAllowUseJemallocMemory(server_settings.allow_use_jemalloc_memory);
|
||||
|
||||
auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker();
|
||||
total_memory_tracker.setOvercommitTracker(global_overcommit_tracker);
|
||||
@ -1234,36 +1239,23 @@ try
|
||||
|
||||
global_context->setRemoteHostFilter(*config);
|
||||
|
||||
/// Setup protection to avoid accidental DROP for big tables (that are greater than 50 GB by default)
|
||||
if (config->has("max_table_size_to_drop"))
|
||||
global_context->setMaxTableSizeToDrop(config->getUInt64("max_table_size_to_drop"));
|
||||
|
||||
if (config->has("max_partition_size_to_drop"))
|
||||
global_context->setMaxPartitionSizeToDrop(config->getUInt64("max_partition_size_to_drop"));
|
||||
global_context->setMaxTableSizeToDrop(server_settings.max_table_size_to_drop);
|
||||
global_context->setMaxPartitionSizeToDrop(server_settings.max_partition_size_to_drop);
|
||||
|
||||
ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited;
|
||||
if (config->has("concurrent_threads_soft_limit_num"))
|
||||
if (server_settings.concurrent_threads_soft_limit_num > 0 && server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit)
|
||||
concurrent_threads_soft_limit = server_settings.concurrent_threads_soft_limit_num;
|
||||
if (server_settings.concurrent_threads_soft_limit_ratio_to_cores > 0)
|
||||
{
|
||||
auto value = config->getUInt64("concurrent_threads_soft_limit_num", 0);
|
||||
if (value > 0 && value < concurrent_threads_soft_limit)
|
||||
concurrent_threads_soft_limit = value;
|
||||
}
|
||||
if (config->has("concurrent_threads_soft_limit_ratio_to_cores"))
|
||||
{
|
||||
auto value = config->getUInt64("concurrent_threads_soft_limit_ratio_to_cores", 0) * std::thread::hardware_concurrency();
|
||||
auto value = server_settings.concurrent_threads_soft_limit_ratio_to_cores * std::thread::hardware_concurrency();
|
||||
if (value > 0 && value < concurrent_threads_soft_limit)
|
||||
concurrent_threads_soft_limit = value;
|
||||
}
|
||||
ConcurrencyControl::instance().setMaxConcurrency(concurrent_threads_soft_limit);
|
||||
|
||||
if (config->has("max_concurrent_queries"))
|
||||
global_context->getProcessList().setMaxSize(config->getInt("max_concurrent_queries", 0));
|
||||
|
||||
if (config->has("max_concurrent_insert_queries"))
|
||||
global_context->getProcessList().setMaxInsertQueriesAmount(config->getInt("max_concurrent_insert_queries", 0));
|
||||
|
||||
if (config->has("max_concurrent_select_queries"))
|
||||
global_context->getProcessList().setMaxSelectQueriesAmount(config->getInt("max_concurrent_select_queries", 0));
|
||||
global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries);
|
||||
global_context->getProcessList().setMaxInsertQueriesAmount(server_settings.max_concurrent_insert_queries);
|
||||
global_context->getProcessList().setMaxSelectQueriesAmount(server_settings.max_concurrent_select_queries);
|
||||
|
||||
if (config->has("keeper_server"))
|
||||
global_context->updateKeeperConfiguration(*config);
|
||||
@ -1272,56 +1264,36 @@ try
|
||||
/// Note: If you specified it in the top level config (not it config of default profile)
|
||||
/// then ClickHouse will use it exactly.
|
||||
/// This is done for backward compatibility.
|
||||
if (global_context->areBackgroundExecutorsInitialized() && (config->has("background_pool_size") || config->has("background_merges_mutations_concurrency_ratio")))
|
||||
if (global_context->areBackgroundExecutorsInitialized())
|
||||
{
|
||||
auto new_pool_size = config->getUInt64("background_pool_size", 16);
|
||||
auto new_ratio = config->getUInt64("background_merges_mutations_concurrency_ratio", 2);
|
||||
auto new_pool_size = server_settings.background_pool_size;
|
||||
auto new_ratio = server_settings.background_merges_mutations_concurrency_ratio;
|
||||
global_context->getMergeMutateExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size * new_ratio);
|
||||
auto new_scheduling_policy = config->getString("background_merges_mutations_scheduling_policy", "round_robin");
|
||||
global_context->getMergeMutateExecutor()->updateSchedulingPolicy(new_scheduling_policy);
|
||||
global_context->getMergeMutateExecutor()->updateSchedulingPolicy(server_settings.background_merges_mutations_scheduling_policy.toString());
|
||||
}
|
||||
|
||||
if (global_context->areBackgroundExecutorsInitialized() && config->has("background_move_pool_size"))
|
||||
if (global_context->areBackgroundExecutorsInitialized())
|
||||
{
|
||||
auto new_pool_size = config->getUInt64("background_move_pool_size");
|
||||
auto new_pool_size = server_settings.background_move_pool_size;
|
||||
global_context->getMovesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
|
||||
}
|
||||
|
||||
if (global_context->areBackgroundExecutorsInitialized() && config->has("background_fetches_pool_size"))
|
||||
if (global_context->areBackgroundExecutorsInitialized())
|
||||
{
|
||||
auto new_pool_size = config->getUInt64("background_fetches_pool_size");
|
||||
auto new_pool_size = server_settings.background_fetches_pool_size;
|
||||
global_context->getFetchesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
|
||||
}
|
||||
|
||||
if (global_context->areBackgroundExecutorsInitialized() && config->has("background_common_pool_size"))
|
||||
if (global_context->areBackgroundExecutorsInitialized())
|
||||
{
|
||||
auto new_pool_size = config->getUInt64("background_common_pool_size");
|
||||
auto new_pool_size = server_settings.background_common_pool_size;
|
||||
global_context->getCommonExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
|
||||
}
|
||||
|
||||
if (config->has("background_buffer_flush_schedule_pool_size"))
|
||||
{
|
||||
auto new_pool_size = config->getUInt64("background_buffer_flush_schedule_pool_size");
|
||||
global_context->getBufferFlushSchedulePool().increaseThreadsCount(new_pool_size);
|
||||
}
|
||||
|
||||
if (config->has("background_schedule_pool_size"))
|
||||
{
|
||||
auto new_pool_size = config->getUInt64("background_schedule_pool_size");
|
||||
global_context->getSchedulePool().increaseThreadsCount(new_pool_size);
|
||||
}
|
||||
|
||||
if (config->has("background_message_broker_schedule_pool_size"))
|
||||
{
|
||||
auto new_pool_size = config->getUInt64("background_message_broker_schedule_pool_size");
|
||||
global_context->getMessageBrokerSchedulePool().increaseThreadsCount(new_pool_size);
|
||||
}
|
||||
|
||||
if (config->has("background_distributed_schedule_pool_size"))
|
||||
{
|
||||
auto new_pool_size = config->getUInt64("background_distributed_schedule_pool_size");
|
||||
global_context->getDistributedSchedulePool().increaseThreadsCount(new_pool_size);
|
||||
}
|
||||
global_context->getBufferFlushSchedulePool().increaseThreadsCount(server_settings.background_buffer_flush_schedule_pool_size);
|
||||
global_context->getSchedulePool().increaseThreadsCount(server_settings.background_schedule_pool_size);
|
||||
global_context->getMessageBrokerSchedulePool().increaseThreadsCount(server_settings.background_message_broker_schedule_pool_size);
|
||||
global_context->getDistributedSchedulePool().increaseThreadsCount(server_settings.background_distributed_schedule_pool_size);
|
||||
|
||||
if (config->has("resources"))
|
||||
{
|
||||
@ -1466,18 +1438,15 @@ try
|
||||
});
|
||||
|
||||
/// Limit on total number of concurrently executed queries.
|
||||
global_context->getProcessList().setMaxSize(config().getInt("max_concurrent_queries", 0));
|
||||
global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries);
|
||||
|
||||
/// Set up caches.
|
||||
|
||||
/// Lower cache size on low-memory systems.
|
||||
double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5);
|
||||
size_t max_cache_size = static_cast<size_t>(memory_amount * cache_size_to_ram_max_ratio);
|
||||
size_t max_cache_size = static_cast<size_t>(memory_amount * server_settings.cache_size_to_ram_max_ratio);
|
||||
|
||||
/// Size of cache for uncompressed blocks. Zero means disabled.
|
||||
String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", "SLRU");
|
||||
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
|
||||
LOG_INFO(log, "Uncompressed cache policy name {}", uncompressed_cache_policy);
|
||||
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0);
|
||||
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
|
||||
if (uncompressed_cache_size > max_cache_size)
|
||||
{
|
||||
uncompressed_cache_size = max_cache_size;
|
||||
@ -1499,9 +1468,8 @@ try
|
||||
global_context,
|
||||
settings.async_insert_threads));
|
||||
|
||||
/// Size of cache for marks (index of MergeTree family of tables).
|
||||
size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120);
|
||||
String mark_cache_policy = config().getString("mark_cache_policy", "SLRU");
|
||||
size_t mark_cache_size = server_settings.mark_cache_size;
|
||||
String mark_cache_policy = server_settings.mark_cache_policy;
|
||||
if (!mark_cache_size)
|
||||
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
|
||||
if (mark_cache_size > max_cache_size)
|
||||
@ -1512,20 +1480,14 @@ try
|
||||
}
|
||||
global_context->setMarkCache(mark_cache_size, mark_cache_policy);
|
||||
|
||||
/// Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.
|
||||
size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", 0);
|
||||
if (index_uncompressed_cache_size)
|
||||
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
|
||||
if (server_settings.index_uncompressed_cache_size)
|
||||
global_context->setIndexUncompressedCache(server_settings.index_uncompressed_cache_size);
|
||||
|
||||
/// Size of cache for index marks (index of MergeTree skip indices).
|
||||
size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", 0);
|
||||
if (index_mark_cache_size)
|
||||
global_context->setIndexMarkCache(index_mark_cache_size);
|
||||
if (server_settings.index_mark_cache_size)
|
||||
global_context->setIndexMarkCache(server_settings.index_mark_cache_size);
|
||||
|
||||
/// A cache for mmapped files.
|
||||
size_t mmap_cache_size = config().getUInt64("mmap_cache_size", 1000); /// The choice of default is arbitrary.
|
||||
if (mmap_cache_size)
|
||||
global_context->setMMappedFileCache(mmap_cache_size);
|
||||
if (server_settings.mmap_cache_size)
|
||||
global_context->setMMappedFileCache(server_settings.mmap_cache_size);
|
||||
|
||||
/// A cache for query results.
|
||||
global_context->setQueryCache(config());
|
||||
@ -1611,7 +1573,7 @@ try
|
||||
/// context is destroyed.
|
||||
/// In addition this object has to be created before the loading of the tables.
|
||||
std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
|
||||
if (config().has("disable_internal_dns_cache") && config().getInt("disable_internal_dns_cache"))
|
||||
if (server_settings.disable_internal_dns_cache)
|
||||
{
|
||||
/// Disable DNS caching at all
|
||||
DNSResolver::instance().setDisableCacheFlag();
|
||||
@ -1621,7 +1583,7 @@ try
|
||||
{
|
||||
/// Initialize a watcher periodically updating DNS cache
|
||||
dns_cache_updater = std::make_unique<DNSCacheUpdater>(
|
||||
global_context, config().getInt("dns_cache_update_period", 15), config().getUInt("dns_max_consecutive_failures", 5));
|
||||
global_context, server_settings.dns_cache_update_period, server_settings.dns_max_consecutive_failures);
|
||||
}
|
||||
|
||||
if (dns_cache_updater)
|
||||
@ -1886,7 +1848,7 @@ try
|
||||
LOG_INFO(log, "Closed all listening sockets.");
|
||||
|
||||
/// Killing remaining queries.
|
||||
if (!config().getBool("shutdown_wait_unfinished_queries", false))
|
||||
if (server_settings.shutdown_wait_unfinished_queries)
|
||||
global_context->getProcessList().killAllQueries();
|
||||
|
||||
if (current_connections)
|
||||
|
@ -13,7 +13,7 @@ struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
@ -129,7 +129,7 @@ public:
|
||||
const IColumn::Offsets & ith_offsets = ith_column.getOffsets();
|
||||
|
||||
if (ith_offsets[row_num] != end || (row_num != 0 && ith_offsets[row_num - 1] != begin))
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
|
||||
}
|
||||
|
||||
for (size_t i = begin; i < end; ++i)
|
||||
|
@ -19,7 +19,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
@ -197,7 +197,7 @@ public:
|
||||
const IColumn::Offsets & ith_offsets = ith_column.getOffsets();
|
||||
|
||||
if (ith_offsets[row_num] != end || (row_num != 0 && ith_offsets[row_num - 1] != begin))
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
|
||||
}
|
||||
|
||||
AggregateFunctionForEachData & state = ensureAggregateData(place, end - begin, *arena);
|
||||
|
@ -102,19 +102,21 @@ public:
|
||||
if (!left_argument_constant_node && !right_argument_constant_node)
|
||||
return;
|
||||
|
||||
/** If we extract negative constant, aggregate function name must be updated.
|
||||
/** Need reverse max <-> min for:
|
||||
*
|
||||
* Example: SELECT min(-1 * id);
|
||||
* Result: SELECT -1 * max(id);
|
||||
* max(-1*value) -> -1*min(value)
|
||||
* max(value/-2) -> min(value)/-2
|
||||
* max(1-value) -> 1-min(value)
|
||||
*/
|
||||
std::string aggregate_function_name_if_constant_is_negative;
|
||||
if (arithmetic_function_name == "multiply" || arithmetic_function_name == "divide")
|
||||
auto get_reverse_aggregate_function_name = [](const std::string & aggregate_function_name) -> std::string
|
||||
{
|
||||
if (lower_aggregate_function_name == "min")
|
||||
aggregate_function_name_if_constant_is_negative = "max";
|
||||
else if (lower_aggregate_function_name == "max")
|
||||
aggregate_function_name_if_constant_is_negative = "min";
|
||||
}
|
||||
if (aggregate_function_name == "min")
|
||||
return "max";
|
||||
else if (aggregate_function_name == "max")
|
||||
return "min";
|
||||
else
|
||||
return aggregate_function_name;
|
||||
};
|
||||
|
||||
size_t arithmetic_function_argument_index = 0;
|
||||
|
||||
@ -126,11 +128,11 @@ public:
|
||||
|
||||
/// Rewrite `aggregate_function(inner_function(constant, argument))` into `inner_function(constant, aggregate_function(argument))`
|
||||
const auto & left_argument_constant_value_literal = left_argument_constant_node->getValue();
|
||||
if (!aggregate_function_name_if_constant_is_negative.empty() &&
|
||||
left_argument_constant_value_literal < zeroField(left_argument_constant_value_literal))
|
||||
{
|
||||
lower_aggregate_function_name = aggregate_function_name_if_constant_is_negative;
|
||||
}
|
||||
bool need_reverse = (arithmetic_function_name == "multiply" && left_argument_constant_value_literal < zeroField(left_argument_constant_value_literal))
|
||||
|| (arithmetic_function_name == "minus");
|
||||
|
||||
if (need_reverse)
|
||||
lower_aggregate_function_name = get_reverse_aggregate_function_name(lower_aggregate_function_name);
|
||||
|
||||
arithmetic_function_argument_index = 1;
|
||||
}
|
||||
@ -138,11 +140,10 @@ public:
|
||||
{
|
||||
/// Rewrite `aggregate_function(inner_function(argument, constant))` into `inner_function(aggregate_function(argument), constant)`
|
||||
const auto & right_argument_constant_value_literal = right_argument_constant_node->getValue();
|
||||
if (!aggregate_function_name_if_constant_is_negative.empty() &&
|
||||
right_argument_constant_value_literal < zeroField(right_argument_constant_value_literal))
|
||||
{
|
||||
lower_aggregate_function_name = aggregate_function_name_if_constant_is_negative;
|
||||
}
|
||||
bool need_reverse = (arithmetic_function_name == "multiply" || arithmetic_function_name == "divide") && right_argument_constant_value_literal < zeroField(right_argument_constant_value_literal);
|
||||
|
||||
if (need_reverse)
|
||||
lower_aggregate_function_name = get_reverse_aggregate_function_name(lower_aggregate_function_name);
|
||||
|
||||
arithmetic_function_argument_index = 0;
|
||||
}
|
||||
|
@ -233,11 +233,43 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
|
||||
auto select_settings = select_query_typed.settings();
|
||||
SettingsChanges settings_changes;
|
||||
|
||||
/// We are going to remove settings LIMIT and OFFSET and
|
||||
/// further replace them with corresponding expression nodes
|
||||
UInt64 limit = 0;
|
||||
UInt64 offset = 0;
|
||||
|
||||
/// Remove global settings limit and offset
|
||||
if (const auto & settings_ref = updated_context->getSettingsRef(); settings_ref.limit || settings_ref.offset)
|
||||
{
|
||||
Settings settings = updated_context->getSettings();
|
||||
limit = settings.limit;
|
||||
offset = settings.offset;
|
||||
settings.limit = 0;
|
||||
settings.offset = 0;
|
||||
updated_context->setSettings(settings);
|
||||
}
|
||||
|
||||
if (select_settings)
|
||||
{
|
||||
auto & set_query = select_settings->as<ASTSetQuery &>();
|
||||
updated_context->applySettingsChanges(set_query.changes);
|
||||
settings_changes = set_query.changes;
|
||||
|
||||
/// Remove expression settings limit and offset
|
||||
if (auto * limit_field = set_query.changes.tryGet("limit"))
|
||||
{
|
||||
limit = limit_field->safeGet<UInt64>();
|
||||
set_query.changes.removeSetting("limit");
|
||||
}
|
||||
if (auto * offset_field = set_query.changes.tryGet("offset"))
|
||||
{
|
||||
offset = offset_field->safeGet<UInt64>();
|
||||
set_query.changes.removeSetting("offset");
|
||||
}
|
||||
|
||||
if (!set_query.changes.empty())
|
||||
{
|
||||
updated_context->applySettingsChanges(set_query.changes);
|
||||
settings_changes = set_query.changes;
|
||||
}
|
||||
}
|
||||
|
||||
auto current_query_tree = std::make_shared<QueryNode>(std::move(updated_context), std::move(settings_changes));
|
||||
@ -323,12 +355,32 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
|
||||
if (select_limit_by)
|
||||
current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by, current_context);
|
||||
|
||||
/// Combine limit expression with limit setting
|
||||
auto select_limit = select_query_typed.limitLength();
|
||||
if (select_limit)
|
||||
if (select_limit && limit)
|
||||
{
|
||||
auto function_node = std::make_shared<FunctionNode>("least");
|
||||
function_node->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
|
||||
function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(limit));
|
||||
current_query_tree->getLimit() = std::move(function_node);
|
||||
}
|
||||
else if (limit)
|
||||
current_query_tree->getLimit() = std::make_shared<ConstantNode>(limit);
|
||||
else if (select_limit)
|
||||
current_query_tree->getLimit() = buildExpression(select_limit, current_context);
|
||||
|
||||
/// Combine offset expression with offset setting
|
||||
auto select_offset = select_query_typed.limitOffset();
|
||||
if (select_offset)
|
||||
if (select_offset && offset)
|
||||
{
|
||||
auto function_node = std::make_shared<FunctionNode>("plus");
|
||||
function_node->getArguments().getNodes().push_back(buildExpression(select_offset, current_context));
|
||||
function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(offset));
|
||||
current_query_tree->getOffset() = std::move(function_node);
|
||||
}
|
||||
else if (offset)
|
||||
current_query_tree->getOffset() = std::make_shared<ConstantNode>(offset);
|
||||
else if (select_offset)
|
||||
current_query_tree->getOffset() = buildExpression(select_offset, current_context);
|
||||
|
||||
return current_query_tree;
|
||||
|
@ -577,8 +577,8 @@ if (TARGET ch_contrib::annoy)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_rust::skim)
|
||||
# Add only -I, library is needed only for clickhouse-client/clickhouse-local
|
||||
dbms_target_include_directories(PRIVATE $<TARGET_PROPERTY:ch_rust::skim,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
dbms_target_link_libraries(PUBLIC ch_rust::skim)
|
||||
endif()
|
||||
|
||||
include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")
|
||||
|
@ -197,7 +197,7 @@
|
||||
M(187, COLLATION_COMPARISON_FAILED) \
|
||||
M(188, UNKNOWN_ACTION) \
|
||||
M(189, TABLE_MUST_NOT_BE_CREATED_MANUALLY) \
|
||||
M(190, SIZES_OF_ARRAYS_DOESNT_MATCH) \
|
||||
M(190, SIZES_OF_ARRAYS_DONT_MATCH) \
|
||||
M(191, SET_SIZE_LIMIT_EXCEEDED) \
|
||||
M(192, UNKNOWN_USER) \
|
||||
M(193, WRONG_PASSWORD) \
|
||||
|
@ -117,14 +117,14 @@ String FieldVisitorToString::operator() (const Map & x) const
|
||||
{
|
||||
WriteBufferFromOwnString wb;
|
||||
|
||||
wb << '(';
|
||||
wb << '[';
|
||||
for (auto it = x.begin(); it != x.end(); ++it)
|
||||
{
|
||||
if (it != x.begin())
|
||||
wb << ", ";
|
||||
wb << applyVisitor(*this, *it);
|
||||
}
|
||||
wb << ')';
|
||||
wb << ']';
|
||||
|
||||
return wb.str();
|
||||
}
|
||||
|
@ -100,8 +100,8 @@ private:
|
||||
bool required_substring_is_prefix;
|
||||
bool is_case_insensitive;
|
||||
std::string required_substring;
|
||||
std::optional<DB::StringSearcher<true, true>> case_sensitive_substring_searcher;
|
||||
std::optional<DB::StringSearcher<false, true>> case_insensitive_substring_searcher;
|
||||
std::optional<DB::ASCIICaseSensitiveStringSearcher> case_sensitive_substring_searcher;
|
||||
std::optional<DB::ASCIICaseInsensitiveStringSearcher> case_insensitive_substring_searcher;
|
||||
std::unique_ptr<RegexType> re2;
|
||||
unsigned number_of_subpatterns;
|
||||
|
||||
|
@ -46,4 +46,30 @@ Field * SettingsChanges::tryGet(std::string_view name)
|
||||
return &change->value;
|
||||
}
|
||||
|
||||
bool SettingsChanges::insertSetting(std::string_view name, const Field & value)
|
||||
{
|
||||
auto it = std::find_if(begin(), end(), [&name](const SettingChange & change) { return change.name == name; });
|
||||
if (it != end())
|
||||
return false;
|
||||
emplace_back(name, value);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SettingsChanges::setSetting(std::string_view name, const Field & value)
|
||||
{
|
||||
if (auto * setting_value = tryGet(name))
|
||||
*setting_value = value;
|
||||
else
|
||||
insertSetting(name, value);
|
||||
}
|
||||
|
||||
bool SettingsChanges::removeSetting(std::string_view name)
|
||||
{
|
||||
auto it = std::find_if(begin(), end(), [&name](const SettingChange & change) { return change.name == name; });
|
||||
if (it == end())
|
||||
return false;
|
||||
erase(it);
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -28,6 +28,13 @@ public:
|
||||
bool tryGet(std::string_view name, Field & out_value) const;
|
||||
const Field * tryGet(std::string_view name) const;
|
||||
Field * tryGet(std::string_view name);
|
||||
|
||||
/// Inserts element if doesn't exists and returns true, otherwise just returns false
|
||||
bool insertSetting(std::string_view name, const Field & value);
|
||||
/// Sets element to value, inserts if doesn't exist
|
||||
void setSetting(std::string_view name, const Field & value);
|
||||
/// If element exists - removes it and returns true, otherwise returns false
|
||||
bool removeSetting(std::string_view name);
|
||||
};
|
||||
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -107,7 +107,9 @@ public:
|
||||
public:
|
||||
const String & getName() const;
|
||||
Field getValue() const;
|
||||
Field getDefaultValue() const;
|
||||
String getValueString() const;
|
||||
String getDefaultValueString() const;
|
||||
bool isValueChanged() const;
|
||||
const char * getTypeName() const;
|
||||
const char * getDescription() const;
|
||||
@ -797,6 +799,17 @@ Field BaseSettings<TTraits>::SettingFieldRef::getValue() const
|
||||
return accessor->getValue(*settings, index);
|
||||
}
|
||||
|
||||
template <typename TTraits>
|
||||
Field BaseSettings<TTraits>::SettingFieldRef::getDefaultValue() const
|
||||
{
|
||||
if constexpr (Traits::allow_custom_settings)
|
||||
{
|
||||
if (custom_setting)
|
||||
return static_cast<Field>(custom_setting->second);
|
||||
}
|
||||
return accessor->getDefaultValue(index);
|
||||
}
|
||||
|
||||
template <typename TTraits>
|
||||
String BaseSettings<TTraits>::SettingFieldRef::getValueString() const
|
||||
{
|
||||
@ -808,6 +821,17 @@ String BaseSettings<TTraits>::SettingFieldRef::getValueString() const
|
||||
return accessor->getValueString(*settings, index);
|
||||
}
|
||||
|
||||
template <typename TTraits>
|
||||
String BaseSettings<TTraits>::SettingFieldRef::getDefaultValueString() const
|
||||
{
|
||||
if constexpr (Traits::allow_custom_settings)
|
||||
{
|
||||
if (custom_setting)
|
||||
return custom_setting->second.toString();
|
||||
}
|
||||
return accessor->getDefaultValueString(index);
|
||||
}
|
||||
|
||||
template <typename TTraits>
|
||||
bool BaseSettings<TTraits>::SettingFieldRef::isValueChanged() const
|
||||
{
|
||||
@ -902,7 +926,8 @@ using AliasMap = std::unordered_map<std::string_view, std::string_view>;
|
||||
void resetValueToDefault(Data & data, size_t index) const { return field_infos[index].reset_value_to_default_function(data); } \
|
||||
void writeBinary(const Data & data, size_t index, WriteBuffer & out) const { return field_infos[index].write_binary_function(data, out); } \
|
||||
void readBinary(Data & data, size_t index, ReadBuffer & in) const { return field_infos[index].read_binary_function(data, in); } \
|
||||
\
|
||||
Field getDefaultValue(size_t index) const { return field_infos[index].get_default_value_function(); } \
|
||||
String getDefaultValueString(size_t index) const { return field_infos[index].get_default_value_string_function(); } \
|
||||
private: \
|
||||
Accessor(); \
|
||||
struct FieldInfo \
|
||||
@ -923,6 +948,8 @@ using AliasMap = std::unordered_map<std::string_view, std::string_view>;
|
||||
void (*reset_value_to_default_function)(Data &) ; \
|
||||
void (*write_binary_function)(const Data &, WriteBuffer &) ; \
|
||||
void (*read_binary_function)(Data &, ReadBuffer &) ; \
|
||||
Field (*get_default_value_function)() ; \
|
||||
String (*get_default_value_string_function)() ; \
|
||||
}; \
|
||||
std::vector<FieldInfo> field_infos; \
|
||||
std::unordered_map<std::string_view, size_t> name_to_index_map; \
|
||||
@ -1033,6 +1060,8 @@ struct DefineAliases
|
||||
[](const Data & data) -> bool { return data.NAME.changed; }, \
|
||||
[](Data & data) { data.NAME = SettingField##TYPE{DEFAULT}; }, \
|
||||
[](const Data & data, WriteBuffer & out) { data.NAME.writeBinary(out); }, \
|
||||
[](Data & data, ReadBuffer & in) { data.NAME.readBinary(in); } \
|
||||
[](Data & data, ReadBuffer & in) { data.NAME.readBinary(in); }, \
|
||||
[]() -> Field { return static_cast<Field>(SettingField##TYPE{DEFAULT}); }, \
|
||||
[]() -> String { return SettingField##TYPE{DEFAULT}.toString(); } \
|
||||
});
|
||||
}
|
||||
|
19
src/Core/ServerSettings.cpp
Normal file
19
src/Core/ServerSettings.cpp
Normal file
@ -0,0 +1,19 @@
|
||||
#include "ServerSettings.h"
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
IMPLEMENT_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS)
|
||||
|
||||
void ServerSettings::loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
for (auto setting : all())
|
||||
{
|
||||
const auto & name = setting.getName();
|
||||
if (config.has(name))
|
||||
set(name, config.getString(name));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
78
src/Core/ServerSettings.h
Normal file
78
src/Core/ServerSettings.h
Normal file
@ -0,0 +1,78 @@
|
||||
#pragma once
|
||||
|
||||
|
||||
#include <Core/BaseSettings.h>
|
||||
|
||||
|
||||
namespace Poco::Util
|
||||
{
|
||||
class AbstractConfiguration;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
#define SERVER_SETTINGS(M, ALIAS) \
|
||||
M(Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0) \
|
||||
M(Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0) \
|
||||
M(UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0) \
|
||||
M(UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0) \
|
||||
M(UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0) \
|
||||
M(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \
|
||||
M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
|
||||
M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
|
||||
M(Int32, max_connections, 1024, "Max server connections.", 0) \
|
||||
M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
|
||||
M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating asynchronous metrics.", 0) \
|
||||
M(UInt32, max_threads_for_connection_collector, 10, "The maximum number of threads that will be used for draining connections asynchronously in a background upon finishing executing distributed queries.", 0) \
|
||||
M(String, default_database, "default", "Default database name.", 0) \
|
||||
M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \
|
||||
M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0) \
|
||||
M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \
|
||||
M(UInt64, max_server_memory_usage, 0, "Limit on total memory usage. Zero means Unlimited.", 0) \
|
||||
M(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to ram ratio. Allows to lower max memory on low-memory systems.", 0) \
|
||||
M(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \
|
||||
\
|
||||
M(UInt64, max_concurrent_queries, 0, "Limit on total number of concurrently executed queries. Zero means Unlimited.", 0) \
|
||||
M(UInt64, max_concurrent_insert_queries, 0, "Limit on total number of concurrently insert queries. Zero means Unlimited.", 0) \
|
||||
M(UInt64, max_concurrent_select_queries, 0, "Limit on total number of concurrently select queries. Zero means Unlimited.", 0) \
|
||||
\
|
||||
M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro ram max ratio. Allows to lower cache size on low-memory systems.", 0) \
|
||||
M(String, uncompressed_cache_policy, "SLRU", "Uncompressed cache policy name.", 0) \
|
||||
M(UInt64, uncompressed_cache_size, 0, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \
|
||||
M(UInt64, mark_cache_size, 5368709120, "Size of cache for marks (index of MergeTree family of tables).", 0) \
|
||||
M(String, mark_cache_policy, "SLRU", "Mark cache policy name.", 0) \
|
||||
M(UInt64, index_uncompressed_cache_size, 0, "Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.", 0) \
|
||||
M(UInt64, index_mark_cache_size, 0, "Size of cache for index marks. Zero means disabled.", 0) \
|
||||
M(UInt64, mmap_cache_size, 1000, "A cache for mmapped files.", 0) /* The choice of default is arbitrary. */ \
|
||||
\
|
||||
M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \
|
||||
M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \
|
||||
M(UInt32, dns_max_consecutive_failures, 1024, "Max server connections.", 0) \
|
||||
\
|
||||
M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
|
||||
M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \
|
||||
M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means Unlimited.", 0) \
|
||||
M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \
|
||||
\
|
||||
M(UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0) \
|
||||
M(UInt64, background_merges_mutations_concurrency_ratio, 2, "The multiplier which shows the relation between the number of tasks that could be executed concurrently and the number of threads being used.", 0) \
|
||||
M(String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0) \
|
||||
M(UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0) \
|
||||
M(UInt64, background_fetches_pool_size, 8, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0) \
|
||||
M(UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0) \
|
||||
M(UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0) \
|
||||
M(UInt64, background_schedule_pool_size, 16, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
|
||||
M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
|
||||
M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
|
||||
|
||||
|
||||
DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS)
|
||||
|
||||
struct ServerSettings : public BaseSettings<ServerSettingsTraits>
|
||||
{
|
||||
void loadSettingsFromConfig(const Poco::Util::AbstractConfiguration & config);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -809,7 +809,7 @@ class IColumn;
|
||||
M(Bool, input_format_json_read_numbers_as_strings, false, "Allow to parse numbers as strings in JSON input formats", 0) \
|
||||
M(Bool, input_format_json_read_objects_as_strings, true, "Allow to parse JSON objects as strings in JSON input formats", 0) \
|
||||
M(Bool, input_format_json_named_tuples_as_objects, true, "Deserialize named tuple columns as JSON objects", 0) \
|
||||
M(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, false, "Ignore unknown keys in json object for named tuples", 0) \
|
||||
M(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, true, "Ignore unknown keys in json object for named tuples", 0) \
|
||||
M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
|
||||
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
|
||||
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
|
||||
@ -856,6 +856,7 @@ class IColumn;
|
||||
M(UInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.", 0) \
|
||||
M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \
|
||||
M(Bool, output_format_parquet_fixed_string_as_fixed_byte_array, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary for FixedString columns.", 0) \
|
||||
M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \
|
||||
M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
|
||||
M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
|
||||
M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \
|
||||
|
@ -80,6 +80,8 @@ namespace SettingsChangesHistory
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"},
|
||||
{"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}}},
|
||||
{"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"},
|
||||
{"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"},
|
||||
{"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"},
|
||||
|
@ -171,4 +171,12 @@ IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS,
|
||||
{{"mmap", LocalFSReadMethod::mmap},
|
||||
{"pread", LocalFSReadMethod::pread},
|
||||
{"read", LocalFSReadMethod::read}})
|
||||
|
||||
|
||||
IMPLEMENT_SETTING_ENUM_WITH_RENAME(ParquetVersion, ErrorCodes::BAD_ARGUMENTS,
|
||||
{{"1.0", FormatSettings::ParquetVersion::V1_0},
|
||||
{"2.4", FormatSettings::ParquetVersion::V2_4},
|
||||
{"2.6", FormatSettings::ParquetVersion::V2_6},
|
||||
{"2.latest", FormatSettings::ParquetVersion::V2_LATEST}})
|
||||
|
||||
}
|
||||
|
@ -72,6 +72,8 @@ DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, FormatSettings::DateTimeIn
|
||||
|
||||
DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOutputFormat, FormatSettings::DateTimeOutputFormat)
|
||||
|
||||
DECLARE_SETTING_ENUM_WITH_RENAME(ParquetVersion, FormatSettings::ParquetVersion)
|
||||
|
||||
enum class LogsLevel
|
||||
{
|
||||
none = 0, /// Disable
|
||||
|
@ -150,10 +150,16 @@ template struct SettingFieldNumber<UInt64>;
|
||||
template struct SettingFieldNumber<Int64>;
|
||||
template struct SettingFieldNumber<float>;
|
||||
template struct SettingFieldNumber<bool>;
|
||||
template struct SettingFieldNumber<Int32>;
|
||||
template struct SettingFieldNumber<UInt32>;
|
||||
template struct SettingFieldNumber<double>;
|
||||
|
||||
template struct SettingAutoWrapper<SettingFieldNumber<UInt64>>;
|
||||
template struct SettingAutoWrapper<SettingFieldNumber<Int64>>;
|
||||
template struct SettingAutoWrapper<SettingFieldNumber<float>>;
|
||||
template struct SettingAutoWrapper<SettingFieldNumber<UInt32>>;
|
||||
template struct SettingAutoWrapper<SettingFieldNumber<Int32>>;
|
||||
template struct SettingAutoWrapper<SettingFieldNumber<double>>;
|
||||
|
||||
namespace
|
||||
{
|
||||
|
@ -55,7 +55,10 @@ struct SettingFieldNumber
|
||||
|
||||
using SettingFieldUInt64 = SettingFieldNumber<UInt64>;
|
||||
using SettingFieldInt64 = SettingFieldNumber<Int64>;
|
||||
using SettingFieldUInt32 = SettingFieldNumber<UInt32>;
|
||||
using SettingFieldInt32 = SettingFieldNumber<Int32>;
|
||||
using SettingFieldFloat = SettingFieldNumber<float>;
|
||||
using SettingFieldDouble = SettingFieldNumber<double>;
|
||||
using SettingFieldBool = SettingFieldNumber<bool>;
|
||||
|
||||
/** Wraps any SettingField to support special value 'auto' that can be checked with `is_auto` flag.
|
||||
@ -129,6 +132,9 @@ struct SettingAutoWrapper
|
||||
using SettingFieldUInt64Auto = SettingAutoWrapper<SettingFieldUInt64>;
|
||||
using SettingFieldInt64Auto = SettingAutoWrapper<SettingFieldInt64>;
|
||||
using SettingFieldFloatAuto = SettingAutoWrapper<SettingFieldFloat>;
|
||||
using SettingFieldUInt32Auto = SettingAutoWrapper<SettingFieldUInt32>;
|
||||
using SettingFieldInt32Auto = SettingAutoWrapper<SettingFieldInt32>;
|
||||
using SettingFieldDoubleAuto = SettingAutoWrapper<SettingFieldDouble>;
|
||||
|
||||
/* Similar to SettingFieldUInt64Auto with small differences to behave like regular UInt64, supported to compatibility.
|
||||
* When setting to 'auto' it becomes equal to the number of processor cores without taking into account SMT.
|
||||
|
@ -25,7 +25,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
}
|
||||
|
||||
namespace Nested
|
||||
@ -242,7 +242,7 @@ void validateArraySizes(const Block & block)
|
||||
const ColumnArray & another_array_column = assert_cast<const ColumnArray &>(*elem.column);
|
||||
|
||||
if (!first_array_column.hasEqualOffsets(another_array_column))
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"Elements '{}' and '{}' "
|
||||
"of Nested data structure '{}' (Array columns) have different array sizes.",
|
||||
block.getByPosition(it->second).name, elem.name, split.first);
|
||||
|
@ -99,6 +99,17 @@ struct RegExpTreeDictionary::RegexTreeNode
|
||||
return searcher.Match(haystack, 0, size, re2_st::RE2::Anchor::UNANCHORED, nullptr, 0);
|
||||
}
|
||||
|
||||
/// check if this node can cover all the attributes from the query.
|
||||
bool containsAll(const std::unordered_map<String, const DictionaryAttribute &> & matching_attributes) const
|
||||
{
|
||||
for (const auto & [key, value] : matching_attributes)
|
||||
{
|
||||
if (!attributes.contains(key))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
struct AttributeValue
|
||||
{
|
||||
Field field;
|
||||
@ -498,6 +509,9 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
|
||||
if (node_ptr->match(reinterpret_cast<const char *>(keys_data.data()) + offset, length))
|
||||
{
|
||||
match_result.insertNodeID(node_ptr->id);
|
||||
/// When this node is leaf and contains all the required attributes, it means a match.
|
||||
if (node_ptr->containsAll(attributes) && node_ptr->children.empty())
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -110,6 +110,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.null_as_default = settings.input_format_null_as_default;
|
||||
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
|
||||
format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
|
||||
format_settings.parquet.output_version = settings.output_format_parquet_version;
|
||||
format_settings.parquet.import_nested = settings.input_format_parquet_import_nested;
|
||||
format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching;
|
||||
format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
|
||||
|
@ -175,6 +175,14 @@ struct FormatSettings
|
||||
String column_for_object_name;
|
||||
} json_object_each_row;
|
||||
|
||||
enum class ParquetVersion
|
||||
{
|
||||
V1_0,
|
||||
V2_4,
|
||||
V2_6,
|
||||
V2_LATEST,
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
UInt64 row_group_size = 1000000;
|
||||
@ -186,6 +194,7 @@ struct FormatSettings
|
||||
bool output_string_as_string = false;
|
||||
bool output_fixed_string_as_fixed_byte_array = true;
|
||||
UInt64 max_block_size = 8192;
|
||||
ParquetVersion output_version;
|
||||
} parquet;
|
||||
|
||||
struct Pretty
|
||||
|
@ -16,7 +16,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
@ -213,7 +213,7 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments)
|
||||
if (i == 0)
|
||||
offsets = offsets_i;
|
||||
else if (*offsets_i != *offsets)
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to aggregate function must be equal.");
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to aggregate function must be equal.");
|
||||
}
|
||||
return {nested_columns, offsets->data()};
|
||||
}
|
||||
|
@ -647,11 +647,12 @@ public:
|
||||
case ElementType::OBJECT:
|
||||
type = '{';
|
||||
break;
|
||||
case ElementType::BOOL:
|
||||
type = 'b';
|
||||
break;
|
||||
case ElementType::NULL_VALUE:
|
||||
type = 0;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
ColumnVector<Int8> & col_vec = assert_cast<ColumnVector<Int8> &>(dest);
|
||||
|
@ -23,9 +23,10 @@ namespace ErrorCodes
|
||||
namespace impl
|
||||
{
|
||||
|
||||
/// Is the [I]LIKE expression reduced to finding a substring in a string?
|
||||
/// Is the [I]LIKE expression equivalent to a substring search?
|
||||
inline bool likePatternIsSubstring(std::string_view pattern, String & res)
|
||||
{
|
||||
/// TODO: ignore multiple leading or trailing %
|
||||
if (pattern.size() < 2 || !pattern.starts_with('%') || !pattern.ends_with('%'))
|
||||
return false;
|
||||
|
||||
@ -45,9 +46,25 @@ inline bool likePatternIsSubstring(std::string_view pattern, String & res)
|
||||
case '\\':
|
||||
++pos;
|
||||
if (pos == end)
|
||||
/// pattern ends with \% --> trailing % is to be taken literally and pattern doesn't qualify for substring search
|
||||
return false;
|
||||
else
|
||||
res += *pos;
|
||||
{
|
||||
switch (*pos)
|
||||
{
|
||||
/// Known LIKE escape sequences:
|
||||
case '%':
|
||||
case '_':
|
||||
case '\\':
|
||||
res += *pos;
|
||||
break;
|
||||
/// For all other escape sequences, the backslash loses its special meaning
|
||||
default:
|
||||
res += '\\';
|
||||
res += *pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
res += *pos;
|
||||
|
@ -37,7 +37,7 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
@ -361,7 +361,7 @@ public:
|
||||
if (getOffsetsPtr(*column_array) != offsets_column
|
||||
&& getOffsets(*column_array) != typeid_cast<const ColumnArray::ColumnOffsets &>(*offsets_column).getData())
|
||||
throw Exception(
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"{}s passed to {} must have equal size",
|
||||
argument_type_name,
|
||||
getName());
|
||||
|
@ -16,7 +16,7 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
}
|
||||
|
||||
@ -356,7 +356,7 @@ private:
|
||||
{
|
||||
ColumnArray::Offset prev_offset = row > 0 ? offsets_x[row] : 0;
|
||||
throw Exception(
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"Arguments of function {} have different array sizes: {} and {}",
|
||||
getName(),
|
||||
offsets_x[row] - prev_offset,
|
||||
@ -423,7 +423,7 @@ private:
|
||||
if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"Arguments of function {} have different array sizes: {} and {}",
|
||||
getName(),
|
||||
offsets_x[0],
|
||||
|
@ -20,7 +20,7 @@ namespace ErrorCodes
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
}
|
||||
|
||||
class FunctionArrayEnumerateUniq;
|
||||
@ -153,7 +153,7 @@ ColumnPtr FunctionArrayEnumerateExtended<Derived>::executeImpl(const ColumnsWith
|
||||
offsets_column = array->getOffsetsPtr();
|
||||
}
|
||||
else if (offsets_i != *offsets)
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
getName());
|
||||
|
||||
const auto * array_data = &array->getData();
|
||||
|
@ -60,7 +60,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
}
|
||||
|
||||
class FunctionArrayEnumerateUniqRanked;
|
||||
@ -194,7 +194,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
|
||||
{
|
||||
if (*offsets_by_depth[0] != array->getOffsets())
|
||||
{
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"Lengths and effective depths of all arrays passed to {} must be equal.", getName());
|
||||
}
|
||||
}
|
||||
@ -217,7 +217,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
|
||||
{
|
||||
if (*offsets_by_depth[col_depth] != array->getOffsets())
|
||||
{
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"Lengths and effective depths of all arrays passed to {} must be equal.", getName());
|
||||
}
|
||||
}
|
||||
@ -225,7 +225,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
|
||||
|
||||
if (col_depth < arrays_depths.depths[array_num])
|
||||
{
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"{}: Passed array number {} depth ({}) is more than the actual array depth ({}).",
|
||||
getName(), array_num, std::to_string(arrays_depths.depths[array_num]), col_depth);
|
||||
}
|
||||
|
@ -19,7 +19,7 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
@ -144,7 +144,7 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume
|
||||
if (i == 0)
|
||||
offsets = offsets_i;
|
||||
else if (*offsets_i != *offsets)
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
getName());
|
||||
}
|
||||
const IColumn ** aggregate_arguments = aggregate_arguments_vec.data();
|
||||
|
@ -21,7 +21,7 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
@ -190,7 +190,7 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl(
|
||||
if (i == 0)
|
||||
offsets = offsets_i;
|
||||
else if (*offsets_i != *offsets)
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
getName());
|
||||
}
|
||||
const IColumn ** aggregate_arguments = aggregate_arguments_vec.data();
|
||||
|
@ -18,7 +18,7 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
@ -151,7 +151,7 @@ ColumnPtr FunctionArrayUniq::executeImpl(const ColumnsWithTypeAndName & argument
|
||||
if (i == 0)
|
||||
offsets = &offsets_i;
|
||||
else if (offsets_i != *offsets)
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
getName());
|
||||
|
||||
const auto * array_data = &array->getData();
|
||||
|
@ -13,7 +13,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
@ -81,7 +81,7 @@ public:
|
||||
}
|
||||
else if (!column_array->hasEqualOffsets(static_cast<const ColumnArray &>(*first_array_column)))
|
||||
{
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"The argument 1 and argument {} of function {} have different array sizes",
|
||||
i + 1, getName());
|
||||
}
|
||||
|
@ -55,14 +55,19 @@ private:
|
||||
getName(), arguments.size());
|
||||
}
|
||||
|
||||
for (const auto & arg : arguments)
|
||||
DataTypes arg_types;
|
||||
for (size_t i = 0, size = arguments.size(); i < size; ++i)
|
||||
{
|
||||
if (!isInteger(arg))
|
||||
if (i < 2 && WhichDataType(arguments[i]).isIPv4())
|
||||
arg_types.emplace_back(std::make_shared<DataTypeUInt32>());
|
||||
else if (isInteger(arguments[i]))
|
||||
arg_types.push_back(arguments[i]);
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
|
||||
arg->getName(), getName());
|
||||
arguments[i]->getName(), getName());
|
||||
}
|
||||
|
||||
DataTypePtr common_type = getLeastSupertype(arguments);
|
||||
DataTypePtr common_type = getLeastSupertype(arg_types);
|
||||
return std::make_shared<DataTypeArray>(common_type);
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -118,7 +118,7 @@ public:
|
||||
const auto * rhs_array = assert_cast<const ColumnArray *>(arguments[i].column.get());
|
||||
|
||||
if (!lhs_array->hasEqualOffsets(*rhs_array))
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"The argument 1 and argument {} of function {} have different array offsets",
|
||||
i + 1,
|
||||
getName());
|
||||
|
@ -21,7 +21,7 @@ namespace ErrorCodes
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
|
||||
extern const int NUMBER_OF_DIMENSIONS_MISMATCHED;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -200,7 +200,7 @@ private:
|
||||
const auto & array_y = *assert_cast<const ColumnArray *>(col_y.get());
|
||||
if (!array_x.hasEqualOffsets(array_y))
|
||||
{
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"The argument 1 and argument 3 of function {} have different array sizes", getName());
|
||||
}
|
||||
}
|
||||
@ -222,7 +222,7 @@ private:
|
||||
{
|
||||
if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset))
|
||||
{
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"The argument 1 and argument 3 of function {} have different array sizes", getName());
|
||||
}
|
||||
prev_offset = offsets_y[row];
|
||||
|
@ -12,7 +12,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
}
|
||||
|
||||
/** Function validateNestedArraySizes is used to check the consistency of Nested DataType subcolumns's offsets when Update
|
||||
@ -106,7 +106,7 @@ ColumnPtr FunctionValidateNestedArraySizes::executeImpl(
|
||||
else if (first_length != length)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"Elements '{}' and '{}' of Nested data structure (Array columns) "
|
||||
"have different array sizes ({} and {} respectively) on row {}",
|
||||
arguments[1].name, arguments[args_idx].name, first_length, length, i);
|
||||
|
290
src/Functions/widthBucket.cpp
Normal file
290
src/Functions/widthBucket.cpp
Normal file
@ -0,0 +1,290 @@
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Core/ColumnWithTypeAndName.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
#include <Core/Types.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
#include <Common/register_objects.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
class FunctionWidthBucket : public IFunction
|
||||
{
|
||||
template <typename TDataType>
|
||||
void throwIfInvalid(
|
||||
const size_t argument_index,
|
||||
const ColumnConst * col_const,
|
||||
const typename ColumnVector<TDataType>::Container * col_vec,
|
||||
const size_t expected_size) const
|
||||
{
|
||||
if ((nullptr == col_const) ^ (nullptr != col_vec && col_vec->size() == expected_size))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Logical error in function {}: argument {} has unexpected type or size!",
|
||||
getName(),
|
||||
argument_index);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TDataType>
|
||||
const typename ColumnVector<TDataType>::Container * getDataIfNotNull(const ColumnVector<TDataType> * col_vec) const
|
||||
{
|
||||
if (nullptr == col_vec)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
return &col_vec->getData();
|
||||
}
|
||||
|
||||
template <typename TDataType>
|
||||
static TDataType
|
||||
getValue(const ColumnConst * col_const, const typename ColumnVector<TDataType>::Container * col_vec, const size_t index)
|
||||
{
|
||||
if (nullptr != col_const)
|
||||
{
|
||||
return col_const->getValue<TDataType>();
|
||||
}
|
||||
return col_vec->data()[index];
|
||||
}
|
||||
|
||||
static Float64 calculateRelativeBucket(const Float64 operand, const Float64 low, const Float64 high)
|
||||
{
|
||||
return (operand - low) / (high - low);
|
||||
}
|
||||
|
||||
template <typename TResultType, typename TCountType>
|
||||
std::optional<TResultType> checkArguments(const Float64 operand, const Float64 low, const Float64 high, const TCountType count) const
|
||||
{
|
||||
if (count == 0)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Last argument (count) for function {} cannot be 0.", getName());
|
||||
}
|
||||
if (isNaN(operand) || isNaN(low) || isNaN(high))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS, "The first three arguments (operand, low, high) cannot be NaN in function {}", getName());
|
||||
}
|
||||
// operand can be infinity, the following conditions will take care of it
|
||||
if (!isFinite(low) || !isFinite(high))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second and third arguments (low, high) cannot be Inf function {}", getName());
|
||||
}
|
||||
if (operand < low || low >= high)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else if (operand >= high)
|
||||
{
|
||||
return count + 1;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
template <typename TResultType, typename TCountType>
|
||||
TResultType NO_SANITIZE_UNDEFINED calculate(const Float64 operand, const Float64 low, const Float64 high, const TCountType count) const
|
||||
{
|
||||
if (const auto maybe_early_return = checkArguments<TResultType>(operand, low, high, count); maybe_early_return.has_value())
|
||||
{
|
||||
return *maybe_early_return;
|
||||
}
|
||||
|
||||
const auto relative_bucket = calculateRelativeBucket(operand, low, high);
|
||||
|
||||
if (isNaN(relative_bucket) || !isFinite(relative_bucket))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR, "The calculation resulted in NaN or Inf which is unexpected in function {}.", getName());
|
||||
}
|
||||
return static_cast<TResultType>(count * relative_bucket + 1);
|
||||
}
|
||||
|
||||
template <is_any_of<UInt8, UInt16, UInt32, UInt64> TCountType>
|
||||
ColumnPtr executeForResultType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
|
||||
{
|
||||
using ResultType = typename NumberTraits::Construct<false, false, NumberTraits::nextSize(sizeof(TCountType))>::Type;
|
||||
auto common_type = std::make_shared<DataTypeNumber<Float64>>();
|
||||
|
||||
std::vector<ColumnPtr> casted_columns;
|
||||
casted_columns.reserve(3);
|
||||
for (const auto argument_index : collections::range(0, 3))
|
||||
{
|
||||
casted_columns.push_back(castColumn(arguments[argument_index], common_type));
|
||||
}
|
||||
|
||||
const auto * operands_vec = getDataIfNotNull(checkAndGetColumn<ColumnVector<Float64>>(casted_columns[0].get()));
|
||||
const auto * lows_vec = getDataIfNotNull(checkAndGetColumn<ColumnVector<Float64>>(casted_columns[1].get()));
|
||||
const auto * highs_vec = getDataIfNotNull(checkAndGetColumn<ColumnVector<Float64>>(casted_columns[2].get()));
|
||||
const auto * counts_vec = getDataIfNotNull(checkAndGetColumn<ColumnVector<TCountType>>(arguments[3].column.get()));
|
||||
|
||||
const auto * operands_col_const = checkAndGetColumnConst<ColumnVector<Float64>>(casted_columns[0].get());
|
||||
const auto * lows_col_const = checkAndGetColumnConst<ColumnVector<Float64>>(casted_columns[1].get());
|
||||
const auto * highs_col_const = checkAndGetColumnConst<ColumnVector<Float64>>(casted_columns[2].get());
|
||||
const auto * counts_col_const = checkAndGetColumnConst<ColumnVector<TCountType>>(arguments[3].column.get());
|
||||
|
||||
throwIfInvalid<Float64>(0, operands_col_const, operands_vec, input_rows_count);
|
||||
throwIfInvalid<Float64>(1, lows_col_const, lows_vec, input_rows_count);
|
||||
throwIfInvalid<Float64>(2, highs_col_const, highs_vec, input_rows_count);
|
||||
throwIfInvalid<TCountType>(4, counts_col_const, counts_vec, input_rows_count);
|
||||
|
||||
const auto are_all_const_cols
|
||||
= nullptr != operands_col_const && nullptr != lows_col_const && nullptr != highs_col_const && nullptr != counts_col_const;
|
||||
|
||||
|
||||
if (are_all_const_cols)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR, "Logical error in function {}: unexpected combination of argument types!", getName());
|
||||
}
|
||||
|
||||
auto result_column = ColumnVector<ResultType>::create();
|
||||
result_column->reserve(1);
|
||||
auto & result_data = result_column->getData();
|
||||
|
||||
for (const auto row_index : collections::range(0, input_rows_count))
|
||||
{
|
||||
const auto operand = getValue<Float64>(operands_col_const, operands_vec, row_index);
|
||||
const auto low = getValue<Float64>(lows_col_const, lows_vec, row_index);
|
||||
const auto high = getValue<Float64>(highs_col_const, highs_vec, row_index);
|
||||
const auto count = getValue<TCountType>(counts_col_const, counts_vec, row_index);
|
||||
result_data.push_back(calculate<ResultType>(operand, low, high, count));
|
||||
}
|
||||
|
||||
return result_column;
|
||||
}
|
||||
|
||||
public:
|
||||
static inline const char * name = "widthBucket";
|
||||
|
||||
explicit FunctionWidthBucket() = default;
|
||||
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionWidthBucket>(); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 4; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
for (const auto argument_index : collections::range(0, 3))
|
||||
{
|
||||
if (!isNativeNumber(arguments[argument_index]))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"The first three arguments of function {} must be a Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32 "
|
||||
"or Float64.",
|
||||
getName());
|
||||
}
|
||||
}
|
||||
if (!WhichDataType(arguments[3]).isNativeUInt())
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"The last argument of function {} must be UInt8, UInt16, UInt32 or UInt64, found {}.",
|
||||
getName(),
|
||||
arguments[3]->getName());
|
||||
}
|
||||
switch (arguments[3]->getTypeId())
|
||||
{
|
||||
case TypeIndex::UInt8:
|
||||
return std::make_shared<DataTypeUInt16>();
|
||||
case TypeIndex::UInt16:
|
||||
return std::make_shared<DataTypeUInt32>();
|
||||
case TypeIndex::UInt32:
|
||||
[[fallthrough]];
|
||||
case TypeIndex::UInt64:
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
ColumnPtr
|
||||
executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
|
||||
{
|
||||
switch (arguments[3].type->getTypeId())
|
||||
{
|
||||
case TypeIndex::UInt8:
|
||||
return executeForResultType<UInt8>(arguments, input_rows_count);
|
||||
case TypeIndex::UInt16:
|
||||
return executeForResultType<UInt16>(arguments, input_rows_count);
|
||||
case TypeIndex::UInt32:
|
||||
return executeForResultType<UInt32>(arguments, input_rows_count);
|
||||
case TypeIndex::UInt64:
|
||||
return executeForResultType<UInt64>(arguments, input_rows_count);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
};
|
||||
|
||||
REGISTER_FUNCTION(WidthBucket)
|
||||
{
|
||||
factory.registerFunction<FunctionWidthBucket>({
|
||||
R"(
|
||||
Returns the number of the bucket in which `operand` falls in a histogram having `count` equal-width buckets spanning the range `low` to `high`. Returns `0` if `operand < low`, and returns `count+1` if `operand >= high`.
|
||||
|
||||
`operand`, `low`, `high` can be any native number type. `count` can only be unsigned native integer and its value cannot be zero.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
widthBucket(operand, low, high, count)
|
||||
```
|
||||
|
||||
There is also a case insensitive alias called `WIDTH_BUCKET` to provide compatibility with other databases.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
[example:simple]
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─widthBucket(10.15, -8.6, 23, 18)─┐
|
||||
│ 11 │
|
||||
└──────────────────────────────────┘
|
||||
```
|
||||
)",
|
||||
Documentation::Examples{
|
||||
{"simple", "SELECT widthBucket(10.15, -8.6, 23, 18)"},
|
||||
},
|
||||
Documentation::Categories{"Mathematical"},
|
||||
});
|
||||
|
||||
factory.registerAlias("width_bucket", "widthBucket", FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
@ -73,24 +73,6 @@ Field zeroField(const Field & value)
|
||||
throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Unexpected literal type in function");
|
||||
}
|
||||
|
||||
const String & changeNameIfNeeded(const String & func_name, const String & child_name, const ASTLiteral & literal)
|
||||
{
|
||||
static const std::unordered_map<String, std::unordered_set<String>> matches = {
|
||||
{ "min", { "multiply", "divide" } },
|
||||
{ "max", { "multiply", "divide" } }
|
||||
};
|
||||
|
||||
static const std::unordered_map<String, String> swap_to = {
|
||||
{ "min", "max" },
|
||||
{ "max", "min" }
|
||||
};
|
||||
|
||||
if (literal.value < zeroField(literal.value) && matches.contains(func_name) && matches.find(func_name)->second.contains(child_name))
|
||||
return swap_to.find(func_name)->second;
|
||||
|
||||
return func_name;
|
||||
}
|
||||
|
||||
ASTPtr tryExchangeFunctions(const ASTFunction & func)
|
||||
{
|
||||
static const std::unordered_map<String, std::unordered_set<String>> supported
|
||||
@ -114,19 +96,42 @@ ASTPtr tryExchangeFunctions(const ASTFunction & func)
|
||||
|
||||
ASTPtr optimized_ast;
|
||||
|
||||
/** Need reverse max <-> min for:
|
||||
*
|
||||
* max(-1*value) -> -1*min(value)
|
||||
* max(value/-2) -> min(value)/-2
|
||||
* max(1-value) -> 1-min(value)
|
||||
*/
|
||||
auto get_reverse_aggregate_function_name = [](const std::string & aggregate_function_name) -> std::string
|
||||
{
|
||||
if (aggregate_function_name == "min")
|
||||
return "max";
|
||||
else if (aggregate_function_name == "max")
|
||||
return "min";
|
||||
else
|
||||
return aggregate_function_name;
|
||||
};
|
||||
|
||||
if (first_literal && !second_literal)
|
||||
{
|
||||
/// It's possible to rewrite 'sum(1/n)' with 'sum(1) * div(1/n)' but we lose accuracy. Ignored.
|
||||
if (child_func->name == "divide")
|
||||
return {};
|
||||
bool need_reverse
|
||||
= (child_func->name == "multiply" && first_literal->value < zeroField(first_literal->value)) || child_func->name == "minus";
|
||||
if (need_reverse)
|
||||
lower_name = get_reverse_aggregate_function_name(lower_name);
|
||||
|
||||
const String & new_name = changeNameIfNeeded(lower_name, child_func->name, *first_literal);
|
||||
optimized_ast = exchangeExtractFirstArgument(new_name, *child_func);
|
||||
optimized_ast = exchangeExtractFirstArgument(lower_name, *child_func);
|
||||
}
|
||||
else if (second_literal) /// second or both are consts
|
||||
{
|
||||
const String & new_name = changeNameIfNeeded(lower_name, child_func->name, *second_literal);
|
||||
optimized_ast = exchangeExtractSecondArgument(new_name, *child_func);
|
||||
bool need_reverse
|
||||
= (child_func->name == "multiply" || child_func->name == "divide") && second_literal->value < zeroField(second_literal->value);
|
||||
if (need_reverse)
|
||||
lower_name = get_reverse_aggregate_function_name(lower_name);
|
||||
|
||||
optimized_ast = exchangeExtractSecondArgument(lower_name, *child_func);
|
||||
}
|
||||
|
||||
if (optimized_ast)
|
||||
|
@ -14,7 +14,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int TYPE_MISMATCH;
|
||||
}
|
||||
|
||||
@ -186,7 +186,7 @@ void ArrayJoinAction::execute(Block & block)
|
||||
|
||||
const ColumnArray & array = typeid_cast<const ColumnArray &>(*array_ptr);
|
||||
if (!is_unaligned && !array.hasEqualOffsets(*any_array))
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Sizes of ARRAY-JOIN-ed arrays do not match");
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Sizes of ARRAY-JOIN-ed arrays do not match");
|
||||
|
||||
current.column = typeid_cast<const ColumnArray &>(*array_ptr).getDataPtr();
|
||||
current.type = type->getNestedType();
|
||||
|
@ -92,12 +92,11 @@ ColumnsDescription parseColumnsListFromString(const std::string & structure, con
|
||||
return columns;
|
||||
}
|
||||
|
||||
bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context)
|
||||
bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context, String & error)
|
||||
{
|
||||
ParserColumnDeclarationList parser(true, true);
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
|
||||
String error;
|
||||
const char * start = structure.data();
|
||||
const char * end = structure.data() + structure.size();
|
||||
ASTPtr columns_list_raw = tryParseQuery(parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth);
|
||||
@ -106,7 +105,10 @@ bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescrip
|
||||
|
||||
auto * columns_list = dynamic_cast<ASTExpressionList *>(columns_list_raw.get());
|
||||
if (!columns_list)
|
||||
{
|
||||
error = fmt::format("Invalid columns declaration list: \"{}\"", structure);
|
||||
return false;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
@ -118,6 +120,7 @@ bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescrip
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
error = getCurrentExceptionMessage(false);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -33,6 +33,6 @@ void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings
|
||||
/// Parses a common argument for table functions such as table structure given in string
|
||||
ColumnsDescription parseColumnsListFromString(const std::string & structure, const ContextPtr & context);
|
||||
|
||||
bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context);
|
||||
bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context, String & error);
|
||||
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ namespace ErrorCodes
|
||||
ASTPtr ASTColumnsRegexpMatcher::clone() const
|
||||
{
|
||||
auto clone = std::make_shared<ASTColumnsRegexpMatcher>(*this);
|
||||
clone->children.clear();
|
||||
|
||||
if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); }
|
||||
if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
|
||||
@ -91,6 +92,7 @@ bool ASTColumnsRegexpMatcher::isColumnMatching(const String & column_name) const
|
||||
ASTPtr ASTColumnsListMatcher::clone() const
|
||||
{
|
||||
auto clone = std::make_shared<ASTColumnsListMatcher>(*this);
|
||||
clone->children.clear();
|
||||
|
||||
if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); }
|
||||
if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
|
||||
@ -150,6 +152,7 @@ void ASTColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatSt
|
||||
ASTPtr ASTQualifiedColumnsRegexpMatcher::clone() const
|
||||
{
|
||||
auto clone = std::make_shared<ASTQualifiedColumnsRegexpMatcher>(*this);
|
||||
clone->children.clear();
|
||||
|
||||
if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
|
||||
|
||||
@ -216,6 +219,7 @@ void ASTQualifiedColumnsRegexpMatcher::formatImpl(const FormatSettings & setting
|
||||
ASTPtr ASTQualifiedColumnsListMatcher::clone() const
|
||||
{
|
||||
auto clone = std::make_shared<ASTQualifiedColumnsListMatcher>(*this);
|
||||
clone->children.clear();
|
||||
|
||||
if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
|
||||
|
||||
|
@ -334,6 +334,16 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl
|
||||
}
|
||||
else if (query_node || union_node)
|
||||
{
|
||||
if (table_expression_data.getColumnNames().empty())
|
||||
{
|
||||
const auto & projection_columns = query_node ? query_node->getProjectionColumns() : union_node->computeProjectionColumns();
|
||||
NamesAndTypesList projection_columns_list(projection_columns.begin(), projection_columns.end());
|
||||
auto additional_column_to_read = ExpressionActions::getSmallestColumn(projection_columns_list);
|
||||
|
||||
const auto & column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(additional_column_to_read, table_expression);
|
||||
table_expression_data.addColumn(additional_column_to_read, column_identifier);
|
||||
}
|
||||
|
||||
auto subquery_options = select_query_options.subquery();
|
||||
Planner subquery_planner(table_expression, subquery_options, planner_context->getGlobalPlannerContext());
|
||||
/// Propagate storage limits to subquery
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Processors/Formats/ISchemaReader.h>
|
||||
#include <Formats/SchemaInferenceUtils.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Interpreters/parseColumnsListForTableFunction.h>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
@ -15,20 +16,38 @@ namespace ErrorCodes
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read)
|
||||
void checkFinalInferredType(
|
||||
DataTypePtr & type,
|
||||
const String & name,
|
||||
const FormatSettings & settings,
|
||||
const DataTypePtr & default_type,
|
||||
size_t rows_read,
|
||||
const String & hints_parsing_error)
|
||||
{
|
||||
if (!checkIfTypeIsComplete(type))
|
||||
{
|
||||
if (!default_type)
|
||||
throw Exception(
|
||||
ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA,
|
||||
"Cannot determine type for column '{}' by first {} rows "
|
||||
"of data, most likely this column contains only Nulls or empty "
|
||||
"Arrays/Maps. You can specify the type for this column using setting schema_inference_hints. "
|
||||
"If your data contains complex JSON objects, try enabling one "
|
||||
"of the settings allow_experimental_object_type/input_format_json_read_objects_as_strings",
|
||||
name,
|
||||
rows_read);
|
||||
{
|
||||
if (hints_parsing_error.empty())
|
||||
throw Exception(
|
||||
ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA,
|
||||
"Cannot determine type for column '{}' by first {} rows "
|
||||
"of data, most likely this column contains only Nulls or empty "
|
||||
"Arrays/Maps. You can specify the type for this column using setting schema_inference_hints. "
|
||||
"If your data contains complex JSON objects, try enabling one "
|
||||
"of the settings allow_experimental_object_type/input_format_json_read_objects_as_strings",
|
||||
name,
|
||||
rows_read);
|
||||
else
|
||||
throw Exception(
|
||||
ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA,
|
||||
"Cannot determine type for column '{}' by first {} rows "
|
||||
"of data, most likely this column contains only Nulls or empty Arrays/Maps. "
|
||||
"Column types from setting schema_inference_hints couldn't be parsed because of error: {}",
|
||||
name,
|
||||
rows_read,
|
||||
hints_parsing_error);
|
||||
}
|
||||
|
||||
type = default_type;
|
||||
}
|
||||
@ -46,11 +65,15 @@ IIRowSchemaReader::IIRowSchemaReader(ReadBuffer & in_, const FormatSettings & fo
|
||||
void IIRowSchemaReader::setContext(ContextPtr & context)
|
||||
{
|
||||
ColumnsDescription columns;
|
||||
if (tryParseColumnsListFromString(hints_str, columns, context))
|
||||
if (tryParseColumnsListFromString(hints_str, columns, context, hints_parsing_error))
|
||||
{
|
||||
for (const auto & [name, type] : columns.getAll())
|
||||
hints[name] = type;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_WARNING(&Poco::Logger::get("IIRowSchemaReader"), "Couldn't parse schema inference hints: {}. This setting will be ignored", hints_parsing_error);
|
||||
}
|
||||
}
|
||||
|
||||
void IIRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
|
||||
@ -137,7 +160,14 @@ NamesAndTypesList IRowSchemaReader::readSchema()
|
||||
if (!new_data_types[field_index] || hints.contains(column_names[field_index]))
|
||||
continue;
|
||||
|
||||
chooseResultColumnType(*this, data_types[field_index], new_data_types[field_index], getDefaultType(field_index), std::to_string(field_index + 1), rows_read);
|
||||
chooseResultColumnType(
|
||||
*this,
|
||||
data_types[field_index],
|
||||
new_data_types[field_index],
|
||||
getDefaultType(field_index),
|
||||
std::to_string(field_index + 1),
|
||||
rows_read,
|
||||
hints_parsing_error);
|
||||
}
|
||||
}
|
||||
|
||||
@ -149,7 +179,7 @@ NamesAndTypesList IRowSchemaReader::readSchema()
|
||||
{
|
||||
transformFinalTypeIfNeeded(data_types[field_index]);
|
||||
/// Check that we could determine the type of this column.
|
||||
checkFinalInferredType(data_types[field_index], column_names[field_index], format_settings, getDefaultType(field_index), rows_read);
|
||||
checkFinalInferredType(data_types[field_index], column_names[field_index], format_settings, getDefaultType(field_index), rows_read, hints_parsing_error);
|
||||
}
|
||||
result.emplace_back(column_names[field_index], data_types[field_index]);
|
||||
}
|
||||
@ -246,7 +276,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
|
||||
continue;
|
||||
|
||||
auto & type = it->second;
|
||||
chooseResultColumnType(*this, type, new_type, default_type, name, rows_read);
|
||||
chooseResultColumnType(*this, type, new_type, default_type, name, rows_read, hints_parsing_error);
|
||||
}
|
||||
}
|
||||
|
||||
@ -263,7 +293,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
|
||||
{
|
||||
transformFinalTypeIfNeeded(type);
|
||||
/// Check that we could determine the type of this column.
|
||||
checkFinalInferredType(type, name, format_settings, default_type, rows_read);
|
||||
checkFinalInferredType(type, name, format_settings, default_type, rows_read, hints_parsing_error);
|
||||
}
|
||||
result.emplace_back(name, type);
|
||||
}
|
||||
|
@ -65,6 +65,7 @@ protected:
|
||||
String hints_str;
|
||||
FormatSettings format_settings;
|
||||
std::unordered_map<String, DataTypePtr> hints;
|
||||
String hints_parsing_error;
|
||||
};
|
||||
|
||||
/// Base class for schema inference for formats that read data row by row.
|
||||
@ -145,7 +146,8 @@ void chooseResultColumnType(
|
||||
DataTypePtr & new_type,
|
||||
const DataTypePtr & default_type,
|
||||
const String & column_name,
|
||||
size_t row)
|
||||
size_t row,
|
||||
const String & hints_parsing_error = "")
|
||||
{
|
||||
if (!type)
|
||||
{
|
||||
@ -166,14 +168,25 @@ void chooseResultColumnType(
|
||||
type = default_type;
|
||||
else
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::TYPE_MISMATCH,
|
||||
"Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. "
|
||||
"You can specify the type for this column using setting schema_inference_hints",
|
||||
type->getName(),
|
||||
column_name,
|
||||
row,
|
||||
new_type->getName());
|
||||
if (hints_parsing_error.empty())
|
||||
throw Exception(
|
||||
ErrorCodes::TYPE_MISMATCH,
|
||||
"Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. "
|
||||
"You can specify the type for this column using setting schema_inference_hints",
|
||||
type->getName(),
|
||||
column_name,
|
||||
row,
|
||||
new_type->getName());
|
||||
else
|
||||
throw Exception(
|
||||
ErrorCodes::TYPE_MISMATCH,
|
||||
"Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. "
|
||||
"Column types from setting schema_inference_hints couldn't be parsed because of error: {}",
|
||||
type->getName(),
|
||||
column_name,
|
||||
row,
|
||||
new_type->getName(),
|
||||
hints_parsing_error);
|
||||
}
|
||||
}
|
||||
|
||||
@ -196,7 +209,13 @@ void chooseResultColumnTypes(
|
||||
chooseResultColumnType(schema_reader, types[i], new_types[i], default_type, column_names[i], row);
|
||||
}
|
||||
|
||||
void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read);
|
||||
void checkFinalInferredType(
|
||||
DataTypePtr & type,
|
||||
const String & name,
|
||||
const FormatSettings & settings,
|
||||
const DataTypePtr & default_type,
|
||||
size_t rows_read,
|
||||
const String & hints_parsing_error);
|
||||
|
||||
Strings splitColumnNames(const String & column_names_str);
|
||||
|
||||
|
@ -182,7 +182,7 @@ JSONColumnsSchemaReaderBase::JSONColumnsSchemaReaderBase(
|
||||
void JSONColumnsSchemaReaderBase::setContext(ContextPtr & ctx)
|
||||
{
|
||||
ColumnsDescription columns;
|
||||
if (tryParseColumnsListFromString(hints_str, columns, ctx))
|
||||
if (tryParseColumnsListFromString(hints_str, columns, ctx, hints_parsing_error))
|
||||
{
|
||||
for (const auto & [name, type] : columns.getAll())
|
||||
hints[name] = type;
|
||||
@ -238,7 +238,7 @@ NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema()
|
||||
rows_in_block = 0;
|
||||
auto column_type = readColumnAndGetDataType(
|
||||
column_name, rows_in_block, format_settings.max_rows_to_read_for_schema_inference - total_rows_read);
|
||||
chooseResultColumnType(*this, names_to_types[column_name], column_type, nullptr, column_name, total_rows_read + 1);
|
||||
chooseResultColumnType(*this, names_to_types[column_name], column_type, nullptr, column_name, total_rows_read + 1, hints_parsing_error);
|
||||
}
|
||||
|
||||
++iteration;
|
||||
@ -260,7 +260,7 @@ NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema()
|
||||
{
|
||||
transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info);
|
||||
/// Check that we could determine the type of this column.
|
||||
checkFinalInferredType(type, name, format_settings, nullptr, format_settings.max_rows_to_read_for_schema_inference);
|
||||
checkFinalInferredType(type, name, format_settings, nullptr, format_settings.max_rows_to_read_for_schema_inference, hints_parsing_error);
|
||||
}
|
||||
result.emplace_back(name, type);
|
||||
}
|
||||
|
@ -91,6 +91,7 @@ private:
|
||||
const FormatSettings format_settings;
|
||||
String hints_str;
|
||||
std::unordered_map<String, DataTypePtr> hints;
|
||||
String hints_parsing_error;
|
||||
std::unique_ptr<JSONColumnsReaderBase> reader;
|
||||
Names column_names_from_settings;
|
||||
JSONInferenceInfo inference_info;
|
||||
|
@ -16,6 +16,21 @@ namespace ErrorCodes
|
||||
extern const int UNKNOWN_EXCEPTION;
|
||||
}
|
||||
|
||||
static parquet::ParquetVersion::type getParquetVersion(const FormatSettings & settings)
|
||||
{
|
||||
switch (settings.parquet.output_version)
|
||||
{
|
||||
case FormatSettings::ParquetVersion::V1_0:
|
||||
return parquet::ParquetVersion::PARQUET_1_0;
|
||||
case FormatSettings::ParquetVersion::V2_4:
|
||||
return parquet::ParquetVersion::PARQUET_2_4;
|
||||
case FormatSettings::ParquetVersion::V2_6:
|
||||
return parquet::ParquetVersion::PARQUET_2_6;
|
||||
case FormatSettings::ParquetVersion::V2_LATEST:
|
||||
return parquet::ParquetVersion::PARQUET_2_LATEST;
|
||||
}
|
||||
}
|
||||
|
||||
ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_)
|
||||
: IOutputFormat(header_, out_), format_settings{format_settings_}
|
||||
{
|
||||
@ -44,6 +59,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk)
|
||||
auto sink = std::make_shared<ArrowBufferedOutputStream>(out);
|
||||
|
||||
parquet::WriterProperties::Builder builder;
|
||||
builder.version(getParquetVersion(format_settings));
|
||||
#if USE_SNAPPY
|
||||
builder.compression(parquet::Compression::SNAPPY);
|
||||
#endif
|
||||
|
@ -387,7 +387,8 @@ Chain buildPushingToViewsChain(
|
||||
chains.emplace_back(std::move(out));
|
||||
|
||||
/// Add the view to the query access info so it can appear in system.query_log
|
||||
if (!no_destination)
|
||||
/// hasQueryContext - for materialized tables with background replication process query context is not added
|
||||
if (!no_destination && context->hasQueryContext())
|
||||
{
|
||||
context->getQueryContext()->addQueryAccessInfo(
|
||||
backQuoteIfNeed(view_id.getDatabaseName()), views_data->views.back().runtime_stats->target_name, {}, "", view_id.getFullTableName());
|
||||
@ -757,7 +758,6 @@ IProcessor::Status FinalizingViewsTransform::prepare()
|
||||
output.finish();
|
||||
return Status::Finished;
|
||||
}
|
||||
|
||||
return Status::NeedData;
|
||||
}
|
||||
|
||||
|
@ -45,6 +45,9 @@
|
||||
|
||||
#include <chrono>
|
||||
#include <sstream>
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
#if USE_SSL
|
||||
#include <Poco/Net/X509Certificate.h>
|
||||
@ -619,7 +622,7 @@ void HTTPHandler::processQuery(
|
||||
if (buffer_until_eof)
|
||||
{
|
||||
const std::string tmp_path(server.context()->getTemporaryVolume()->getDisk()->getPath());
|
||||
const std::string tmp_path_template(tmp_path + "http_buffers/");
|
||||
const std::string tmp_path_template(fs::path(tmp_path) / "http_buffers/");
|
||||
|
||||
auto create_tmp_disk_buffer = [tmp_path_template] (const WriteBufferPtr &)
|
||||
{
|
||||
|
@ -685,12 +685,6 @@ void DataPartStorageOnDiskBase::clearDirectory(
|
||||
request.emplace_back(fs::path(dir) / "txn_version.txt", true);
|
||||
request.emplace_back(fs::path(dir) / "metadata_version.txt", true);
|
||||
|
||||
/// Inverted index
|
||||
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_dict", true);
|
||||
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_post", true);
|
||||
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_seg", true);
|
||||
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_sid", true);
|
||||
|
||||
disk->removeSharedFiles(request, !can_remove_shared_data, names_not_to_remove);
|
||||
disk->removeDirectory(dir);
|
||||
}
|
||||
|
@ -174,6 +174,7 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
|
||||
writeUUIDText(part->uuid, out);
|
||||
|
||||
String remote_fs_metadata = parse<String>(params.get("remote_fs_metadata", ""));
|
||||
|
||||
std::regex re("\\s*,\\s*");
|
||||
Strings capability(
|
||||
std::sregex_token_iterator(remote_fs_metadata.begin(), remote_fs_metadata.end(), re, -1),
|
||||
@ -482,6 +483,22 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart(
|
||||
|
||||
int server_protocol_version = parse<int>(in->getResponseCookie("server_protocol_version", "0"));
|
||||
|
||||
String remote_fs_metadata = parse<String>(in->getResponseCookie("remote_fs_metadata", ""));
|
||||
|
||||
DiskPtr preffered_disk = disk;
|
||||
|
||||
if (!preffered_disk)
|
||||
{
|
||||
for (const auto & disk_candidate : data.getDisks())
|
||||
{
|
||||
if (toString(disk_candidate->getDataSourceDescription().type) == remote_fs_metadata)
|
||||
{
|
||||
preffered_disk = disk_candidate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ReservationPtr reservation;
|
||||
size_t sum_files_size = 0;
|
||||
if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE)
|
||||
@ -498,31 +515,32 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart(
|
||||
|
||||
if (!disk)
|
||||
{
|
||||
LOG_TRACE(log, "Disk for fetch is not provided, reserving space using storage balanced reservation");
|
||||
LOG_TEST(log, "Disk for fetch is not provided, reserving space using storage balanced reservation");
|
||||
reservation
|
||||
= data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, &ttl_infos, true);
|
||||
|
||||
if (!reservation)
|
||||
{
|
||||
LOG_TRACE(log, "Disk for fetch is not provided, reserving space using TTL rules");
|
||||
LOG_TEST(log, "Disk for fetch is not provided, reserving space using TTL rules");
|
||||
reservation
|
||||
= data.reserveSpacePreferringTTLRules(metadata_snapshot, sum_files_size, ttl_infos, std::time(nullptr), 0, true);
|
||||
= data.reserveSpacePreferringTTLRules(metadata_snapshot, sum_files_size, ttl_infos, std::time(nullptr), 0, true, preffered_disk);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!disk)
|
||||
{
|
||||
LOG_TRACE(log, "Making balanced reservation");
|
||||
LOG_TEST(log, "Making balanced reservation");
|
||||
reservation = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, nullptr);
|
||||
if (!reservation)
|
||||
{
|
||||
LOG_TRACE(log, "Making simple reservation");
|
||||
LOG_TEST(log, "Making simple reservation");
|
||||
reservation = data.reserveSpace(sum_files_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!disk)
|
||||
{
|
||||
LOG_TRACE(log, "Making reservation on the largest disk");
|
||||
LOG_TEST(log, "Making reservation on the largest disk");
|
||||
/// We don't know real size of part because sender server version is too old
|
||||
reservation = data.makeEmptyReservationOnLargestDisk();
|
||||
}
|
||||
@ -530,11 +548,11 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart(
|
||||
if (!disk)
|
||||
{
|
||||
disk = reservation->getDisk();
|
||||
LOG_INFO(log, "Disk for fetch is not provided, getting disk from reservation {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type));
|
||||
LOG_TRACE(log, "Disk for fetch is not provided, getting disk from reservation {} with type '{}'", disk->getName(), toString(disk->getDataSourceDescription().type));
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_INFO(log, "Disk for fetch is disk {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type));
|
||||
LOG_TEST(log, "Disk for fetch is disk {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type));
|
||||
}
|
||||
|
||||
UInt64 revision = parse<UInt64>(in->getResponseCookie("disk_revision", "0"));
|
||||
@ -557,8 +575,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart(
|
||||
if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID)
|
||||
readUUIDText(part_uuid, *in);
|
||||
|
||||
String remote_fs_metadata = parse<String>(in->getResponseCookie("remote_fs_metadata", ""));
|
||||
|
||||
size_t projections = 0;
|
||||
if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION)
|
||||
readBinary(projections, *in);
|
||||
|
@ -4608,8 +4608,18 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String &
|
||||
throw Exception(ErrorCodes::UNKNOWN_DISK, "All parts of partition '{}' are already on disk '{}'", partition_id, disk->getName());
|
||||
}
|
||||
|
||||
if (!movePartsToSpace(parts, std::static_pointer_cast<Space>(disk)))
|
||||
throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled");
|
||||
MovePartsOutcome moves_outcome = movePartsToSpace(parts, std::static_pointer_cast<Space>(disk));
|
||||
switch (moves_outcome)
|
||||
{
|
||||
case MovePartsOutcome::MovesAreCancelled:
|
||||
throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled");
|
||||
case MovePartsOutcome::NothingToMove:
|
||||
throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No parts to move are found in partition {}", partition_id);
|
||||
case MovePartsOutcome::MoveWasPostponedBecauseOfZeroCopy:
|
||||
throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Move was not finished, because zero copy mode is enabled and someone other is moving the same parts right now");
|
||||
case MovePartsOutcome::PartsMoved:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -4661,8 +4671,18 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String
|
||||
throw Exception(ErrorCodes::UNKNOWN_DISK, "All parts of partition '{}' are already on volume '{}'", partition_id, volume->getName());
|
||||
}
|
||||
|
||||
if (!movePartsToSpace(parts, std::static_pointer_cast<Space>(volume)))
|
||||
throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled");
|
||||
MovePartsOutcome moves_outcome = movePartsToSpace(parts, std::static_pointer_cast<Space>(volume));
|
||||
switch (moves_outcome)
|
||||
{
|
||||
case MovePartsOutcome::MovesAreCancelled:
|
||||
throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled");
|
||||
case MovePartsOutcome::NothingToMove:
|
||||
throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No parts to move are found in partition {}", partition_id);
|
||||
case MovePartsOutcome::MoveWasPostponedBecauseOfZeroCopy:
|
||||
throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Move was not finished, because zero copy mode is enabled and someone other is moving the same parts right now");
|
||||
case MovePartsOutcome::PartsMoved:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void MergeTreeData::movePartitionToShard(const ASTPtr & /*partition*/, bool /*move_part*/, const String & /*to*/, ContextPtr /*query_context*/)
|
||||
@ -7447,7 +7467,7 @@ bool MergeTreeData::scheduleDataMovingJob(BackgroundJobsAssignee & assignee)
|
||||
assignee.scheduleMoveTask(std::make_shared<ExecutableLambdaAdapter>(
|
||||
[this, moving_tagger] () mutable
|
||||
{
|
||||
return moveParts(moving_tagger);
|
||||
return moveParts(moving_tagger) == MovePartsOutcome::PartsMoved;
|
||||
}, moves_assignee_trigger, getStorageID()));
|
||||
return true;
|
||||
}
|
||||
@ -7462,14 +7482,14 @@ bool MergeTreeData::areBackgroundMovesNeeded() const
|
||||
return policy->getVolumes().size() == 1 && policy->getVolumes()[0]->getDisks().size() > 1;
|
||||
}
|
||||
|
||||
bool MergeTreeData::movePartsToSpace(const DataPartsVector & parts, SpacePtr space)
|
||||
MovePartsOutcome MergeTreeData::movePartsToSpace(const DataPartsVector & parts, SpacePtr space)
|
||||
{
|
||||
if (parts_mover.moves_blocker.isCancelled())
|
||||
return false;
|
||||
return MovePartsOutcome::MovesAreCancelled;
|
||||
|
||||
auto moving_tagger = checkPartsForMove(parts, space);
|
||||
if (moving_tagger->parts_to_move.empty())
|
||||
return false;
|
||||
return MovePartsOutcome::NothingToMove;
|
||||
|
||||
return moveParts(moving_tagger);
|
||||
}
|
||||
@ -7526,13 +7546,13 @@ MergeTreeData::CurrentlyMovingPartsTaggerPtr MergeTreeData::checkPartsForMove(co
|
||||
return std::make_shared<CurrentlyMovingPartsTagger>(std::move(parts_to_move), *this);
|
||||
}
|
||||
|
||||
bool MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger)
|
||||
MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger)
|
||||
{
|
||||
LOG_INFO(log, "Got {} parts to move.", moving_tagger->parts_to_move.size());
|
||||
|
||||
const auto settings = getSettings();
|
||||
|
||||
bool result = true;
|
||||
MovePartsOutcome result{MovePartsOutcome::PartsMoved};
|
||||
for (const auto & moving_part : moving_tagger->parts_to_move)
|
||||
{
|
||||
Stopwatch stopwatch;
|
||||
@ -7588,7 +7608,7 @@ bool MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagge
|
||||
{
|
||||
/// Move will be retried but with backoff.
|
||||
LOG_DEBUG(log, "Move of part {} postponed, because zero copy mode enabled and someone other moving this part right now", moving_part.part->name);
|
||||
result = false;
|
||||
result = MovePartsOutcome::MoveWasPostponedBecauseOfZeroCopy;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -1315,7 +1315,7 @@ protected:
|
||||
/// MergeTree because they store mutations in different way.
|
||||
virtual std::map<int64_t, MutationCommands> getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0;
|
||||
/// Moves part to specified space, used in ALTER ... MOVE ... queries
|
||||
bool movePartsToSpace(const DataPartsVector & parts, SpacePtr space);
|
||||
MovePartsOutcome movePartsToSpace(const DataPartsVector & parts, SpacePtr space);
|
||||
|
||||
/// Makes backup entries to backup the parts of this table.
|
||||
BackupEntries backupParts(const DataPartsVector & data_parts, const String & data_path_in_backup, const ContextPtr & local_context);
|
||||
@ -1456,7 +1456,7 @@ private:
|
||||
using CurrentlyMovingPartsTaggerPtr = std::shared_ptr<CurrentlyMovingPartsTagger>;
|
||||
|
||||
/// Move selected parts to corresponding disks
|
||||
bool moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger);
|
||||
MovePartsOutcome moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger);
|
||||
|
||||
/// Select parts for move and disks for them. Used in background moving processes.
|
||||
CurrentlyMovingPartsTaggerPtr selectPartsForMove();
|
||||
|
@ -75,6 +75,10 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r
|
||||
{
|
||||
const String & name = it.first;
|
||||
|
||||
/// Exclude files written by inverted index from check. No correct checksums are available for them currently.
|
||||
if (name.ends_with(".gin_dict") || name.ends_with(".gin_post") || name.ends_with(".gin_seg") || name.ends_with(".gin_sid"))
|
||||
continue;
|
||||
|
||||
auto jt = rhs.files.find(name);
|
||||
if (jt == rhs.files.end())
|
||||
throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No file {} in data part", name);
|
||||
|
@ -208,26 +208,26 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices()
|
||||
auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr);
|
||||
|
||||
for (const auto & index_helper : skip_indices)
|
||||
for (const auto & skip_index : skip_indices)
|
||||
{
|
||||
String stream_name = index_helper->getFileName();
|
||||
String stream_name = skip_index->getFileName();
|
||||
skip_indices_streams.emplace_back(
|
||||
std::make_unique<MergeTreeDataPartWriterOnDisk::Stream>(
|
||||
stream_name,
|
||||
data_part->getDataPartStoragePtr(),
|
||||
stream_name, index_helper->getSerializedFileExtension(),
|
||||
stream_name, skip_index->getSerializedFileExtension(),
|
||||
stream_name, marks_file_extension,
|
||||
default_codec, settings.max_compress_block_size,
|
||||
marks_compression_codec, settings.marks_compress_block_size,
|
||||
settings.query_write_settings));
|
||||
|
||||
GinIndexStorePtr store = nullptr;
|
||||
if (dynamic_cast<const MergeTreeIndexInverted *>(&*index_helper) != nullptr)
|
||||
if (typeid_cast<const MergeTreeIndexInverted *>(&*skip_index) != nullptr)
|
||||
{
|
||||
store = std::make_shared<GinIndexStore>(stream_name, data_part->getDataPartStoragePtr(), data_part->getDataPartStoragePtr(), storage.getSettings()->max_digestion_size_per_segment);
|
||||
gin_index_stores[stream_name] = store;
|
||||
}
|
||||
skip_indices_aggregators.push_back(index_helper->createIndexAggregatorForPart(store));
|
||||
skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store));
|
||||
skip_index_accumulated_marks.push_back(0);
|
||||
}
|
||||
}
|
||||
@ -284,7 +284,7 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block
|
||||
WriteBuffer & marks_out = stream.compress_marks ? stream.marks_compressed_hashing : stream.marks_hashing;
|
||||
|
||||
GinIndexStorePtr store;
|
||||
if (dynamic_cast<const MergeTreeIndexInverted *>(&*index_helper) != nullptr)
|
||||
if (typeid_cast<const MergeTreeIndexInverted *>(&*index_helper) != nullptr)
|
||||
{
|
||||
String stream_name = index_helper->getFileName();
|
||||
auto it = gin_index_stores.find(stream_name);
|
||||
@ -388,6 +388,18 @@ void MergeTreeDataPartWriterOnDisk::fillSkipIndicesChecksums(MergeTreeData::Data
|
||||
auto & stream = *skip_indices_streams[i];
|
||||
if (!skip_indices_aggregators[i]->empty())
|
||||
skip_indices_aggregators[i]->getGranuleAndReset()->serializeBinary(stream.compressed_hashing);
|
||||
|
||||
/// Register additional files written only by the inverted index. Required because otherwise DROP TABLE complains about unknown
|
||||
/// files. Note that the provided actual checksums are bogus. The problem is that at this point the file writes happened already and
|
||||
/// we'd need to re-open + hash the files (fixing this is TODO). For now, CHECK TABLE skips these four files.
|
||||
if (typeid_cast<const MergeTreeIndexInverted *>(&*skip_indices[i]) != nullptr)
|
||||
{
|
||||
String filename_without_extension = skip_indices[i]->getFileName();
|
||||
checksums.files[filename_without_extension + ".gin_dict"] = MergeTreeDataPartChecksums::Checksum();
|
||||
checksums.files[filename_without_extension + ".gin_post"] = MergeTreeDataPartChecksums::Checksum();
|
||||
checksums.files[filename_without_extension + ".gin_seg"] = MergeTreeDataPartChecksums::Checksum();
|
||||
checksums.files[filename_without_extension + ".gin_sid"] = MergeTreeDataPartChecksums::Checksum();
|
||||
}
|
||||
}
|
||||
|
||||
for (auto & stream : skip_indices_streams)
|
||||
|
@ -11,6 +11,13 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
enum class MovePartsOutcome
|
||||
{
|
||||
PartsMoved,
|
||||
NothingToMove,
|
||||
MovesAreCancelled,
|
||||
MoveWasPostponedBecauseOfZeroCopy,
|
||||
};
|
||||
|
||||
/// Active part from storage and destination reservation where it has to be moved
|
||||
struct MergeTreeMoveEntry
|
||||
|
@ -43,7 +43,7 @@ struct Settings;
|
||||
M(UInt64, merge_max_block_size, DEFAULT_MERGE_BLOCK_SIZE, "How many rows in blocks should be formed for merge operations.", 0) \
|
||||
M(UInt64, max_bytes_to_merge_at_max_space_in_pool, 150ULL * 1024 * 1024 * 1024, "Maximum in total size of parts to merge, when there are maximum free threads in background pool (or entries in replication queue).", 0) \
|
||||
M(UInt64, max_bytes_to_merge_at_min_space_in_pool, 1024 * 1024, "Maximum in total size of parts to merge, when there are minimum free threads in background pool (or entries in replication queue).", 0) \
|
||||
M(UInt64, max_replicated_merges_in_queue, 16, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
|
||||
M(UInt64, max_replicated_merges_in_queue, 1000, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
|
||||
M(UInt64, max_replicated_mutations_in_queue, 8, "How many tasks of mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
|
||||
M(UInt64, max_replicated_merges_with_ttl_in_queue, 1, "How many tasks of merging parts with TTL are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
|
||||
M(UInt64, number_of_free_entries_in_pool_to_lower_max_size_of_merge, 8, "When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). This is to allow small merges to process - not filling the pool with long running merges.", 0) \
|
||||
|
@ -157,25 +157,29 @@ IMergeTreeDataPart::Checksums checkDataPart(
|
||||
}
|
||||
|
||||
NameSet projections_on_disk;
|
||||
const auto & checksum_files_txt = checksums_txt.files;
|
||||
const auto & checksums_txt_files = checksums_txt.files;
|
||||
for (auto it = data_part_storage.iterate(); it->isValid(); it->next())
|
||||
{
|
||||
auto file_name = it->name();
|
||||
|
||||
/// We will check projections later.
|
||||
if (data_part_storage.isDirectory(file_name) && endsWith(file_name, ".proj"))
|
||||
if (data_part_storage.isDirectory(file_name) && file_name.ends_with(".proj"))
|
||||
{
|
||||
projections_on_disk.insert(file_name);
|
||||
continue;
|
||||
}
|
||||
|
||||
/// Exclude files written by inverted index from check. No correct checksums are available for them currently.
|
||||
if (file_name.ends_with(".gin_dict") || file_name.ends_with(".gin_post") || file_name.ends_with(".gin_seg") || file_name.ends_with(".gin_sid"))
|
||||
continue;
|
||||
|
||||
auto checksum_it = checksums_data.files.find(file_name);
|
||||
|
||||
/// Skip files that we already calculated. Also skip metadata files that are not checksummed.
|
||||
if (checksum_it == checksums_data.files.end() && !files_without_checksums.contains(file_name))
|
||||
{
|
||||
auto txt_checksum_it = checksum_files_txt.find(file_name);
|
||||
if (txt_checksum_it == checksum_files_txt.end() || txt_checksum_it->second.uncompressed_size == 0)
|
||||
auto txt_checksum_it = checksums_txt_files.find(file_name);
|
||||
if (txt_checksum_it == checksums_txt_files.end() || txt_checksum_it->second.uncompressed_size == 0)
|
||||
{
|
||||
/// The file is not compressed.
|
||||
checksum_file(file_name);
|
||||
|
@ -8,10 +8,251 @@
|
||||
#include <Storages/System/StorageSystemPartsBase.h>
|
||||
#include <Processors/Sources/SourceFromSingleChunk.h>
|
||||
#include <QueryPipeline/Pipe.h>
|
||||
#include <IO/IOThreadPool.h>
|
||||
#include <Interpreters/threadPoolCallbackRunner.h>
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
void calculateTotalSizeOnDiskImpl(const DiskPtr & disk, const String & from, UInt64 & total_size)
|
||||
{
|
||||
/// Files or directories of detached part may not exist. Only count the size of existing files.
|
||||
if (disk->isFile(from))
|
||||
{
|
||||
total_size += disk->getFileSize(from);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto it = disk->iterateDirectory(from); it->isValid(); it->next())
|
||||
calculateTotalSizeOnDiskImpl(disk, fs::path(from) / it->name(), total_size);
|
||||
}
|
||||
}
|
||||
|
||||
UInt64 calculateTotalSizeOnDisk(const DiskPtr & disk, const String & from)
|
||||
{
|
||||
UInt64 total_size = 0;
|
||||
try
|
||||
{
|
||||
calculateTotalSizeOnDiskImpl(disk, from, total_size);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
return total_size;
|
||||
}
|
||||
|
||||
class SourceState
|
||||
{
|
||||
std::mutex mutex;
|
||||
StoragesInfoStream stream;
|
||||
|
||||
public:
|
||||
explicit SourceState(StoragesInfoStream && stream_)
|
||||
: stream(std::move(stream_))
|
||||
{}
|
||||
|
||||
StoragesInfo next()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return stream.next();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct WorkerState
|
||||
{
|
||||
struct Task
|
||||
{
|
||||
DiskPtr disk;
|
||||
String path;
|
||||
std::atomic<size_t> * counter = nullptr;
|
||||
};
|
||||
|
||||
std::vector<Task> tasks;
|
||||
std::atomic<size_t> next_task = {0};
|
||||
};
|
||||
|
||||
class DetachedPartsSource : public ISource
|
||||
{
|
||||
public:
|
||||
DetachedPartsSource(Block header_, std::shared_ptr<SourceState> state_, std::vector<UInt8> columns_mask_, UInt64 block_size_,
|
||||
bool has_bytes_on_disk_column_)
|
||||
: ISource(std::move(header_))
|
||||
, state(state_)
|
||||
, columns_mask(std::move(columns_mask_))
|
||||
, block_size(block_size_)
|
||||
, has_bytes_on_disk_column(has_bytes_on_disk_column_)
|
||||
{}
|
||||
|
||||
String getName() const override { return "DataPartsSource"; }
|
||||
|
||||
protected:
|
||||
static Chunk nullWhenNoRows(MutableColumns && new_columns)
|
||||
{
|
||||
chassert(!new_columns.empty());
|
||||
const auto rows = new_columns[0]->size();
|
||||
|
||||
if (!rows)
|
||||
return {};
|
||||
|
||||
return {std::move(new_columns), rows};
|
||||
}
|
||||
|
||||
Chunk generate() override
|
||||
{
|
||||
MutableColumns new_columns = getPort().getHeader().cloneEmptyColumns();
|
||||
chassert(!new_columns.empty());
|
||||
|
||||
while (new_columns[0]->size() < block_size)
|
||||
{
|
||||
if (detached_parts.empty())
|
||||
getMoreParts();
|
||||
|
||||
if (detached_parts.empty())
|
||||
return nullWhenNoRows(std::move(new_columns));
|
||||
|
||||
generateRows(new_columns, block_size - new_columns[0]->size());
|
||||
}
|
||||
|
||||
return nullWhenNoRows(std::move(new_columns));
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<SourceState> state;
|
||||
const std::vector<UInt8> columns_mask;
|
||||
const UInt64 block_size;
|
||||
const bool has_bytes_on_disk_column;
|
||||
const size_t support_threads = 35;
|
||||
|
||||
StoragesInfo current_info;
|
||||
DetachedPartsInfo detached_parts;
|
||||
|
||||
void getMoreParts()
|
||||
{
|
||||
chassert(detached_parts.empty());
|
||||
|
||||
while (detached_parts.empty())
|
||||
{
|
||||
current_info = state->next();
|
||||
if (!current_info)
|
||||
return;
|
||||
|
||||
detached_parts = current_info.data->getDetachedParts();
|
||||
}
|
||||
}
|
||||
|
||||
void calculatePartSizeOnDisk(size_t begin, std::vector<std::atomic<size_t>> & parts_sizes)
|
||||
{
|
||||
if (!has_bytes_on_disk_column)
|
||||
return;
|
||||
|
||||
WorkerState worker_state;
|
||||
|
||||
for (auto p_id = begin; p_id < detached_parts.size(); ++p_id)
|
||||
{
|
||||
auto & part = detached_parts[p_id];
|
||||
auto part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / part.dir_name;
|
||||
auto relative_path = fs::path(current_info.data->getRelativeDataPath()) / part_path;
|
||||
worker_state.tasks.push_back({part.disk, relative_path, &parts_sizes.at(p_id - begin)});
|
||||
}
|
||||
|
||||
std::vector<std::future<void>> futures;
|
||||
SCOPE_EXIT_SAFE({
|
||||
/// Cancel all workers
|
||||
worker_state.next_task.store(worker_state.tasks.size());
|
||||
/// Exceptions are not propagated
|
||||
for (auto & future : futures)
|
||||
if (future.valid())
|
||||
future.wait();
|
||||
futures.clear();
|
||||
});
|
||||
|
||||
auto max_thread_to_run = std::max(size_t(1), std::min(support_threads, worker_state.tasks.size() / 10));
|
||||
futures.reserve(max_thread_to_run);
|
||||
|
||||
for (size_t i = 0; i < max_thread_to_run; ++i)
|
||||
{
|
||||
if (worker_state.next_task.load() >= worker_state.tasks.size())
|
||||
break;
|
||||
|
||||
auto worker = [&worker_state] ()
|
||||
{
|
||||
for (auto id = worker_state.next_task++; id < worker_state.tasks.size(); id = worker_state.next_task++)
|
||||
{
|
||||
auto & task = worker_state.tasks.at(id);
|
||||
size_t size = calculateTotalSizeOnDisk(task.disk, task.path);
|
||||
task.counter->store(size);
|
||||
}
|
||||
};
|
||||
|
||||
futures.push_back(
|
||||
scheduleFromThreadPool<void>(
|
||||
std::move(worker),
|
||||
IOThreadPool::get(),
|
||||
"DP_BytesOnDisk"));
|
||||
}
|
||||
|
||||
/// Exceptions are propagated
|
||||
for (auto & future : futures)
|
||||
future.get();
|
||||
}
|
||||
|
||||
void generateRows(MutableColumns & new_columns, size_t max_rows)
|
||||
{
|
||||
chassert(current_info);
|
||||
|
||||
auto rows = std::min(max_rows, detached_parts.size());
|
||||
auto begin = detached_parts.size() - rows;
|
||||
|
||||
std::vector<std::atomic<size_t>> parts_sizes(rows);
|
||||
calculatePartSizeOnDisk(begin, parts_sizes);
|
||||
|
||||
for (auto p_id = begin; p_id < detached_parts.size(); ++p_id)
|
||||
{
|
||||
auto & p = detached_parts.at(p_id);
|
||||
|
||||
size_t src_index = 0;
|
||||
size_t res_index = 0;
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(current_info.database);
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(current_info.table);
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(p.valid_name ? p.partition_id : Field());
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(p.dir_name);
|
||||
if (columns_mask[src_index++])
|
||||
{
|
||||
chassert(has_bytes_on_disk_column);
|
||||
size_t bytes_on_disk = parts_sizes.at(p_id - begin).load();
|
||||
new_columns[res_index++]->insert(bytes_on_disk);
|
||||
}
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(p.disk->getName());
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert((fs::path(current_info.data->getFullPathOnDisk(p.disk)) / MergeTreeData::DETACHED_DIR_NAME / p.dir_name).string());
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(p.valid_name ? p.prefix : Field());
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(p.valid_name ? p.min_block : Field());
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(p.valid_name ? p.max_block : Field());
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(p.valid_name ? p.level : Field());
|
||||
}
|
||||
|
||||
detached_parts.resize(begin);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_id_)
|
||||
: IStorage(table_id_)
|
||||
{
|
||||
@ -31,33 +272,6 @@ StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_i
|
||||
}});
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
}
|
||||
static void calculateTotalSizeOnDiskImpl(const DiskPtr & disk, const String & from, UInt64 & total_size)
|
||||
{
|
||||
/// Files or directories of detached part may not exist. Only count the size of existing files.
|
||||
if (disk->isFile(from))
|
||||
{
|
||||
total_size += disk->getFileSize(from);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto it = disk->iterateDirectory(from); it->isValid(); it->next())
|
||||
calculateTotalSizeOnDiskImpl(disk, fs::path(from) / it->name(), total_size);
|
||||
}
|
||||
}
|
||||
|
||||
static UInt64 calculateTotalSizeOnDisk(const DiskPtr & disk, const String & from)
|
||||
{
|
||||
UInt64 total_size = 0;
|
||||
try
|
||||
{
|
||||
calculateTotalSizeOnDiskImpl(disk, from, total_size);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
return total_size;
|
||||
}
|
||||
|
||||
Pipe StorageSystemDetachedParts::read(
|
||||
const Names & column_names,
|
||||
@ -65,66 +279,39 @@ Pipe StorageSystemDetachedParts::read(
|
||||
SelectQueryInfo & query_info,
|
||||
ContextPtr context,
|
||||
QueryProcessingStage::Enum /*processed_stage*/,
|
||||
const size_t /*max_block_size*/,
|
||||
const size_t /*num_streams*/)
|
||||
const size_t max_block_size,
|
||||
const size_t num_streams)
|
||||
{
|
||||
storage_snapshot->check(column_names);
|
||||
|
||||
StoragesInfoStream stream(query_info, context);
|
||||
|
||||
/// Create the result.
|
||||
Block block = storage_snapshot->metadata->getSampleBlock();
|
||||
Block sample_block = storage_snapshot->metadata->getSampleBlock();
|
||||
|
||||
NameSet names_set(column_names.begin(), column_names.end());
|
||||
std::vector<UInt8> columns_mask(block.columns());
|
||||
Block header;
|
||||
|
||||
for (size_t i = 0; i < block.columns(); ++i)
|
||||
Block header;
|
||||
std::vector<UInt8> columns_mask(sample_block.columns());
|
||||
|
||||
for (size_t i = 0; i < columns_mask.size(); ++i)
|
||||
{
|
||||
if (names_set.contains(block.getByPosition(i).name))
|
||||
if (names_set.contains(sample_block.getByPosition(i).name))
|
||||
{
|
||||
columns_mask[i] = 1;
|
||||
header.insert(block.getByPosition(i));
|
||||
header.insert(sample_block.getByPosition(i));
|
||||
}
|
||||
}
|
||||
|
||||
MutableColumns new_columns = header.cloneEmptyColumns();
|
||||
while (StoragesInfo info = stream.next())
|
||||
bool has_bytes_on_disk_column = names_set.contains("bytes_on_disk");
|
||||
|
||||
auto state = std::make_shared<SourceState>(StoragesInfoStream(query_info, context));
|
||||
|
||||
Pipe pipe;
|
||||
|
||||
for (size_t i = 0; i < num_streams; ++i)
|
||||
{
|
||||
const auto parts = info.data->getDetachedParts();
|
||||
for (const auto & p : parts)
|
||||
{
|
||||
size_t src_index = 0, res_index = 0;
|
||||
String detached_part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name;
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(info.database);
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(info.table);
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(p.valid_name ? p.partition_id : Field());
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(p.dir_name);
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(calculateTotalSizeOnDisk(p.disk, fs::path(info.data->getRelativeDataPath()) / detached_part_path));
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(p.disk->getName());
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert((fs::path(info.data->getFullPathOnDisk(p.disk)) / detached_part_path).string());
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(p.valid_name ? p.prefix : Field());
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(p.valid_name ? p.min_block : Field());
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(p.valid_name ? p.max_block : Field());
|
||||
if (columns_mask[src_index++])
|
||||
new_columns[res_index++]->insert(p.valid_name ? p.level : Field());
|
||||
}
|
||||
auto source = std::make_shared<DetachedPartsSource>(header.cloneEmpty(), state, columns_mask, max_block_size, has_bytes_on_disk_column);
|
||||
pipe.addSource(std::move(source));
|
||||
}
|
||||
|
||||
UInt64 num_rows = new_columns.at(0)->size();
|
||||
Chunk chunk(std::move(new_columns), num_rows);
|
||||
|
||||
return Pipe(std::make_shared<SourceFromSingleChunk>(std::move(header), std::move(chunk)));
|
||||
return pipe;
|
||||
}
|
||||
|
||||
}
|
||||
|
39
src/Storages/System/StorageSystemServerSettings.cpp
Normal file
39
src/Storages/System/StorageSystemServerSettings.cpp
Normal file
@ -0,0 +1,39 @@
|
||||
#include <Storages/System/StorageSystemServerSettings.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Core/ServerSettings.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes()
|
||||
{
|
||||
return {
|
||||
{"name", std::make_shared<DataTypeString>()},
|
||||
{"value", std::make_shared<DataTypeString>()},
|
||||
{"default", std::make_shared<DataTypeString>()},
|
||||
{"changed", std::make_shared<DataTypeUInt8>()},
|
||||
{"description", std::make_shared<DataTypeString>()},
|
||||
{"type", std::make_shared<DataTypeString>()},
|
||||
};
|
||||
}
|
||||
|
||||
void StorageSystemServerSettings::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const
|
||||
{
|
||||
const auto & config = context->getConfigRef();
|
||||
ServerSettings settings;
|
||||
settings.loadSettingsFromConfig(config);
|
||||
|
||||
for (const auto & setting : settings.all())
|
||||
{
|
||||
const auto & setting_name = setting.getName();
|
||||
res_columns[0]->insert(setting_name);
|
||||
res_columns[1]->insert(setting.getValueString());
|
||||
res_columns[2]->insert(setting.getDefaultValueString());
|
||||
res_columns[3]->insert(setting.isValueChanged());
|
||||
res_columns[4]->insert(setting.getDescription());
|
||||
res_columns[5]->insert(setting.getTypeName());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
27
src/Storages/System/StorageSystemServerSettings.h
Normal file
27
src/Storages/System/StorageSystemServerSettings.h
Normal file
@ -0,0 +1,27 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/System/IStorageSystemOneBlock.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
|
||||
/** implements system table "settings", which allows to get information about the current settings.
|
||||
*/
|
||||
class StorageSystemServerSettings final : public IStorageSystemOneBlock<StorageSystemServerSettings>
|
||||
{
|
||||
public:
|
||||
std::string getName() const override { return "SystemServerSettings"; }
|
||||
|
||||
static NamesAndTypesList getNamesAndTypes();
|
||||
|
||||
protected:
|
||||
using IStorageSystemOneBlock::IStorageSystemOneBlock;
|
||||
|
||||
void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override;
|
||||
};
|
||||
|
||||
}
|
@ -19,6 +19,7 @@ NamesAndTypesList StorageSystemSettings::getNamesAndTypes()
|
||||
{"max", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())},
|
||||
{"readonly", std::make_shared<DataTypeUInt8>()},
|
||||
{"type", std::make_shared<DataTypeString>()},
|
||||
{"default", std::make_shared<DataTypeString>()},
|
||||
{"alias_for", std::make_shared<DataTypeString>()},
|
||||
};
|
||||
}
|
||||
@ -53,6 +54,7 @@ void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr co
|
||||
res_columns[5]->insert(max);
|
||||
res_columns[6]->insert(writability == SettingConstraintWritability::CONST);
|
||||
res_columns[7]->insert(setting.getTypeName());
|
||||
res_columns[8]->insert(setting.getDefaultValueString());
|
||||
};
|
||||
|
||||
const auto & settings_to_aliases = Settings::Traits::settingsToAliases();
|
||||
@ -62,7 +64,7 @@ void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr co
|
||||
res_columns[0]->insert(setting_name);
|
||||
|
||||
fill_data_for_setting(setting_name, setting);
|
||||
res_columns[8]->insert("");
|
||||
res_columns[9]->insert("");
|
||||
|
||||
if (auto it = settings_to_aliases.find(setting_name); it != settings_to_aliases.end())
|
||||
{
|
||||
@ -70,7 +72,7 @@ void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr co
|
||||
{
|
||||
res_columns[0]->insert(alias);
|
||||
fill_data_for_setting(alias, setting);
|
||||
res_columns[8]->insert(setting_name);
|
||||
res_columns[9]->insert(setting_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include <Storages/System/StorageSystemReplicas.h>
|
||||
#include <Storages/System/StorageSystemReplicationQueue.h>
|
||||
#include <Storages/System/StorageSystemDistributionQueue.h>
|
||||
#include <Storages/System/StorageSystemServerSettings.h>
|
||||
#include <Storages/System/StorageSystemSettings.h>
|
||||
#include <Storages/System/StorageSystemSettingsChanges.h>
|
||||
#include <Storages/System/StorageSystemMergeTreeSettings.h>
|
||||
@ -105,6 +106,7 @@ void attachSystemTablesLocal(ContextPtr context, IDatabase & system_database)
|
||||
attach<StorageSystemFunctions>(context, system_database, "functions");
|
||||
attach<StorageSystemEvents>(context, system_database, "events");
|
||||
attach<StorageSystemSettings>(context, system_database, "settings");
|
||||
attach<StorageSystemServerSettings>(context, system_database, "server_settings");
|
||||
attach<StorageSystemSettingsChanges>(context, system_database, "settings_changes");
|
||||
attach<SystemMergeTreeSettings<false>>(context, system_database, "merge_tree_settings");
|
||||
attach<SystemMergeTreeSettings<true>>(context, system_database, "replicated_merge_tree_settings");
|
||||
|
@ -92,7 +92,8 @@ void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr
|
||||
|
||||
const auto & literal = args[0]->as<const ASTLiteral>();
|
||||
String value;
|
||||
if (args.size() > 1 && literal && literal->value.tryGet(value) && tryParseColumnsListFromString(value, structure, context))
|
||||
String error;
|
||||
if (args.size() > 1 && literal && literal->value.tryGet(value) && tryParseColumnsListFromString(value, structure, context, error))
|
||||
{
|
||||
has_structure_in_arguments = true;
|
||||
return;
|
||||
|
@ -52,7 +52,7 @@ class Labels:
|
||||
|
||||
|
||||
class ReleaseBranch:
|
||||
CHERRYPICK_DESCRIPTION = """This pull-request is a first step of an automated \
|
||||
CHERRYPICK_DESCRIPTION = f"""This pull-request is a first step of an automated \
|
||||
backporting.
|
||||
It contains changes like after calling a local command `git cherry-pick`.
|
||||
If you intend to continue backporting this changes, then resolve all conflicts if any.
|
||||
@ -60,13 +60,16 @@ Otherwise, if you do not want to backport them, then just close this pull-reques
|
||||
|
||||
The check results does not matter at this step - you can safely ignore them.
|
||||
Also this pull-request will be merged automatically as it reaches the mergeable state, \
|
||||
but you always can merge it manually.
|
||||
**do not merge it manually**.
|
||||
|
||||
If it stuck, check the original PR for `{Labels.BACKPORTS_CREATED}` and delete it if \
|
||||
necessary.
|
||||
"""
|
||||
BACKPORT_DESCRIPTION = """This pull-request is a last step of an automated \
|
||||
backporting.
|
||||
Treat it as a standard pull-request: look at the checks and resolve conflicts.
|
||||
Merge it only if you intend to backport changes to the target branch, otherwise just \
|
||||
close it.
|
||||
close it.
|
||||
"""
|
||||
REMOTE = ""
|
||||
|
||||
|
@ -289,6 +289,18 @@ CI_CONFIG = {
|
||||
"Stress test (debug)": {
|
||||
"required_build": "package_debug",
|
||||
},
|
||||
"Upgrade check (asan)": {
|
||||
"required_build": "package_asan",
|
||||
},
|
||||
"Upgrade check (tsan)": {
|
||||
"required_build": "package_tsan",
|
||||
},
|
||||
"Upgrade check (msan)": {
|
||||
"required_build": "package_msan",
|
||||
},
|
||||
"Upgrade check (debug)": {
|
||||
"required_build": "package_debug",
|
||||
},
|
||||
"Integration tests (asan)": {
|
||||
"required_build": "package_asan",
|
||||
},
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user