Merge branch 'master' into revert-46909-revert-45911-mutations_rename_hang

This commit is contained in:
alesapin 2023-02-28 18:07:54 +01:00
commit c7c6dff2f1
179 changed files with 2142 additions and 1360 deletions

View File

@ -3105,10 +3105,10 @@ jobs:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
TEMP_PATH=${{runner.temp}}/stress_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (asan)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
REPO_COPY=${{runner.temp}}/stress_asan/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
@ -3267,6 +3267,142 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
######################################### UPGRADE CHECK ######################################
##############################################################################################
UpgradeCheckAsan:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/upgrade_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Upgrade check (asan)
REPO_COPY=${{runner.temp}}/upgrade_asan/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Upgrade check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 upgrade_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
UpgradeCheckTsan:
needs: [BuilderDebTsan]
# same as for stress test with tsan
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/upgrade_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Upgrade check (tsan)
REPO_COPY=${{runner.temp}}/upgrade_thread/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Upgrade check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 upgrade_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
UpgradeCheckMsan:
needs: [BuilderDebMsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/upgrade_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Upgrade check (msan)
REPO_COPY=${{runner.temp}}/upgrade_memory/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Upgrade check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 upgrade_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
UpgradeCheckDebug:
needs: [BuilderDebDebug]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/upgrade_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Upgrade check (debug)
REPO_COPY=${{runner.temp}}/upgrade_debug/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Upgrade check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 upgrade_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
##################################### AST FUZZERS ############################################
##############################################################################################

View File

@ -54,6 +54,10 @@
"name": "clickhouse/stress-test",
"dependent": []
},
"docker/test/upgrade": {
"name": "clickhouse/upgrade-check",
"dependent": []
},
"docker/test/codebrowser": {
"name": "clickhouse/codebrowser",
"dependent": []

View File

@ -1,4 +1,4 @@
FROM ubuntu:20.04
FROM ubuntu:22.04
# see https://github.com/moby/moby/issues/4032#issuecomment-192327844
ARG DEBIAN_FRONTEND=noninteractive
@ -9,13 +9,14 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
&& groupadd -r clickhouse --gid=101 \
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
&& apt-get update \
&& apt-get upgrade -yq \
&& apt-get install --yes --no-install-recommends \
apt-transport-https \
ca-certificates \
dirmngr \
gnupg \
locales \
gnupg2 \
wget \
locales \
tzdata \
&& apt-get clean
@ -80,15 +81,8 @@ RUN arch=${TARGETARCH:-amd64} \
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
# Remove as much of Ubuntu as possible.
# ClickHouse does not need Ubuntu. It can run on top of Linux kernel without any OS distribution.
# ClickHouse does not need Docker at all. ClickHouse is above all that.
# It does not care about Ubuntu, Docker, or other cruft and you should neither.
# The fact that this Docker image is based on Ubuntu is just a misconception.
# Some vulnerability scanners are arguing about Ubuntu, which is not relevant to ClickHouse at all.
# ClickHouse does not care when you report false vulnerabilities by running some Docker scanners.
RUN apt-get remove --purge -y libksba8 && apt-get autoremove -y
RUN apt-get autoremove --purge -yq libksba8 && \
apt-get autoremove -yq
# we need to allow "others" access to clickhouse folder, because docker container
# can be started with arbitrary uid (openshift usecase)

View File

@ -21,10 +21,9 @@ RUN apt-get update -y \
openssl \
netcat-openbsd \
telnet \
llvm-9 \
brotli
brotli \
&& apt-get clean
COPY ./stress /stress
COPY run.sh /
ENV DATASETS="hits visits"

View File

@ -8,229 +8,13 @@ dmesg --clear
set -x
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
# we mount tests folder from repo to /usr/share
ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
OK="\tOK\t\\N\t"
FAIL="\tFAIL\t\\N\t"
FAILURE_CONTEXT_LINES=50
FAILURE_CONTEXT_MAX_LINE_WIDTH=400
function escaped()
{
# That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language.
# Also limit lines width just in case (too long lines are not really useful usually)
clickhouse local -S 's String' --input-format=LineAsString -q "select substr(s, 1, $FAILURE_CONTEXT_MAX_LINE_WIDTH)
from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'"
}
function head_escaped()
{
head -n $FAILURE_CONTEXT_LINES $1 | escaped
}
function unts()
{
grep -Po "[0-9][0-9]:[0-9][0-9] \K.*"
}
function trim_server_logs()
{
head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped
}
function install_packages()
{
dpkg -i $1/clickhouse-common-static_*.deb
dpkg -i $1/clickhouse-common-static-dbg_*.deb
dpkg -i $1/clickhouse-server_*.deb
dpkg -i $1/clickhouse-client_*.deb
}
function configure()
{
# install test configs
export USE_DATABASE_ORDINARY=1
export EXPORT_S3_STORAGE_POLICIES=1
/usr/share/clickhouse-test/config/install.sh
# we mount tests folder from repo to /usr/share
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
# avoid too slow startup
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
| sed "s|<snapshot_distance>100000</snapshot_distance>|<snapshot_distance>10000</snapshot_distance>|" \
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
# for clickhouse-server (via service)
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
# for clickhouse-client
export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
# since we run clickhouse from root
sudo chown root: /var/lib/clickhouse
# Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM).
echo "<clickhouse><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></clickhouse>" \
> /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml
local total_mem
total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB
total_mem=$(( total_mem*1024 )) # bytes
# Set maximum memory usage as half of total memory (less chance of OOM).
#
# But not via max_server_memory_usage but via max_memory_usage_for_user,
# so that we can override this setting and execute service queries, like:
# - hung check
# - show/drop database
# - ...
#
# So max_memory_usage_for_user will be a soft limit, and
# max_server_memory_usage will be hard limit, and queries that should be
# executed regardless memory limits will use max_memory_usage_for_user=0,
# instead of relying on max_untracked_memory
max_server_memory_usage_to_ram_ratio=0.5
echo "Setting max_server_memory_usage_to_ram_ratio to ${max_server_memory_usage_to_ram_ratio}"
cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml <<EOL
<clickhouse>
<max_server_memory_usage_to_ram_ratio>${max_server_memory_usage_to_ram_ratio}</max_server_memory_usage_to_ram_ratio>
</clickhouse>
EOL
local max_users_mem
max_users_mem=$((total_mem*30/100)) # 30%
echo "Setting max_memory_usage_for_user=$max_users_mem and max_memory_usage for queries to 10G"
cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml <<EOL
<clickhouse>
<profiles>
<default>
<max_memory_usage>10G</max_memory_usage>
<max_memory_usage_for_user>${max_users_mem}</max_memory_usage_for_user>
</default>
</profiles>
</clickhouse>
EOL
cat > /etc/clickhouse-server/config.d/core.xml <<EOL
<clickhouse>
<core_dump>
<!-- 100GiB -->
<size_limit>107374182400</size_limit>
</core_dump>
<!-- NOTE: no need to configure core_path,
since clickhouse is not started as daemon (via clickhouse start)
-->
<core_path>$PWD</core_path>
</clickhouse>
EOL
# Let OOM killer terminate other processes before clickhouse-server:
cat > /etc/clickhouse-server/config.d/oom_score.xml <<EOL
<clickhouse>
<oom_score>-1000</oom_score>
</clickhouse>
EOL
# Analyzer is not yet ready for testing
cat > /etc/clickhouse-server/users.d/no_analyzer.xml <<EOL
<clickhouse>
<profiles>
<default>
<constraints>
<allow_experimental_analyzer>
<readonly/>
</allow_experimental_analyzer>
</constraints>
</default>
</profiles>
</clickhouse>
EOL
}
function stop()
{
local max_tries="${1:-90}"
local pid
# Preserve the pid, since the server can hung after the PID will be deleted.
pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)"
clickhouse stop --max-tries "$max_tries" --do-not-kill && return
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv
kill -TERM "$(pidof gdb)" ||:
sleep 5
echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
clickhouse stop --force
}
function start()
{
counter=0
until clickhouse-client --query "SELECT 1"
do
if [ "$counter" -gt ${1:-120} ]
then
echo "Cannot start clickhouse-server"
rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||:
echo -e "Cannot start clickhouse-server$FAIL$(trim_server_logs application_errors.txt)" >> /test_output/test_results.tsv
cat /var/log/clickhouse-server/stdout.log
tail -n100 /var/log/clickhouse-server/stderr.log
tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e '<Warning> RaftInstance:' -e '<Information> RaftInstance' | tail -n100
break
fi
# use root to match with current uid
clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log
sleep 0.5
counter=$((counter + 1))
done
# Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
# and clickhouse-server can do fork-exec, for example, to run some bridge.
# Do not set nostop noprint for all signals, because some it may cause gdb to hang,
# explicitly ignore non-fatal signals that are used by server.
# Number of SIGRTMIN can be determined only in runtime.
RTMIN=$(kill -l SIGRTMIN)
echo "
set follow-fork-mode parent
handle SIGHUP nostop noprint pass
handle SIGINT nostop noprint pass
handle SIGQUIT nostop noprint pass
handle SIGPIPE nostop noprint pass
handle SIGTERM nostop noprint pass
handle SIGUSR1 nostop noprint pass
handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass
info signals
continue
backtrace full
thread apply all backtrace full
info registers
disassemble /s
up
disassemble /s
up
disassemble /s
p \"done\"
detach
quit
" > script.gdb
# FIXME Hung check may work incorrectly because of attached gdb
# 1. False positives are possible
# 2. We cannot attach another gdb to get stacktraces if some queries hung
gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log &
sleep 5
# gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
}
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
install_packages package_folder
@ -396,7 +180,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
start
./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
&& echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv
@ -413,316 +197,27 @@ unset "${!THREAD_@}"
start
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \
&& echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \
>> /test_output/test_results.tsv)
check_server_start
stop
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
[ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL"
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.final.log
# Grep logs for sanitizer asserts, crashes and other critical errors
check_logs_for_critical_errors
# Sanitizer asserts
rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
&& echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|| echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv
rm -f /test_output/tmp
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
# OOM
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
&& echo -e "Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|| echo -e "No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Logical errors
rg -Fa "Code: 49. DB::Exception: " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \
&& echo -e "Logical error thrown (see clickhouse-server.log or logical_errors.txt)$FAIL$(head_escaped /test_output/logical_errors.txt)" >> /test_output/test_results.tsv \
|| echo -e "No logical errors$OK" >> /test_output/test_results.tsv
# Remove file logical_errors.txt if it's empty
[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt
# No such key errors
rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \
&& echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \
|| echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv
# Remove file no_such_key_errors.txt if it's empty
[ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt
# Crash
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
&& echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|| echo -e "Not crashed$OK" >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog)
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/fatal_messages.txt \
&& echo -e "Fatal message in clickhouse-server.log (see fatal_messages.txt)$FAIL$(trim_server_logs fatal_messages.txt)" >> /test_output/test_results.tsv \
|| echo -e "No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Remove file fatal_messages.txt if it's empty
[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
rg -Fa "########################################" /test_output/* > /dev/null \
&& echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv
function get_gdb_log_context()
{
rg -A50 -Fa " received signal " /test_output/gdb.log | head_escaped
}
rg -Fa " received signal " /test_output/gdb.log > /dev/null \
&& echo -e "Found signal in gdb.log$FAIL$(get_gdb_log_context)" >> /test_output/test_results.tsv
if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
echo -e "Backward compatibility check\n"
echo "Get previous release tag"
previous_release_tag=$(clickhouse-client --version | rg -o "[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*" | get_previous_release_tag)
echo $previous_release_tag
echo "Clone previous release repository"
git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository
echo "Download clickhouse-server from the previous release"
mkdir previous_release_package_folder
echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log
for table in query_log trace_log
do
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
done
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
# Check if we cloned previous release repository successfully
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
then
echo -e "Backward compatibility check: Failed to clone previous release tests$FAIL" >> /test_output/test_results.tsv
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
then
echo -e "Backward compatibility check: Failed to download previous release packages$FAIL" >> /test_output/test_results.tsv
else
echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv
# Uninstall current packages
dpkg --remove clickhouse-client
dpkg --remove clickhouse-server
dpkg --remove clickhouse-common-static-dbg
dpkg --remove clickhouse-common-static
rm -rf /var/lib/clickhouse/*
# Make BC check more funny by forcing Ordinary engine for system database
mkdir /var/lib/clickhouse/metadata
echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql
# Install previous release packages
install_packages previous_release_package_folder
# Start server from previous release
# Previous version may not be ready for fault injections
export ZOOKEEPER_FAULT_INJECTION=0
configure
# Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..."
rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||:
rm -f /etc/clickhouse-server/users.d/marks.xml ||:
# Remove s3 related configs to avoid "there is no disk type `cache`"
rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||:
rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||:
# Turn on after 22.12
rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||:
# it uses recently introduced settings which previous versions may not have
rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||:
# Turn on after 23.1
rm -f /etc/clickhouse-server/users.d/prefetch_settings.xml ||:
start
clickhouse-client --query="SELECT 'Server version: ', version()"
# Install new package before running stress test because we should use new
# clickhouse-client and new clickhouse-test.
#
# But we should leave old binary in /usr/bin/ and debug symbols in
# /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it
# will print sane stacktraces and also to avoid possible crashes.
#
# FIXME: those files can be extracted directly from debian package, but
# actually better solution will be to use different PATH instead of playing
# games with files from packages.
mv /usr/bin/clickhouse previous_release_package_folder/
mv /usr/lib/debug/usr/bin/clickhouse.debug previous_release_package_folder/
install_packages package_folder
mv /usr/bin/clickhouse package_folder/
mv /usr/lib/debug/usr/bin/clickhouse.debug package_folder/
mv previous_release_package_folder/clickhouse /usr/bin/
mv previous_release_package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug
mkdir tmp_stress_output
./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" \
--backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
&& echo -e "Backward compatibility check: Test script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: Test script failed$FAIL" >> /test_output/test_results.tsv
rm -rf tmp_stress_output
# We experienced deadlocks in this command in very rare cases. Let's debug it:
timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" ||
(
echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
clickhouse stop --force
)
# Use bigger timeout for previous version
stop 300
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log
# Start new server
mv package_folder/clickhouse /usr/bin/
mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug
# Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables).
export ZOOKEEPER_FAULT_INJECTION=0
configure
start 500
clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt \
&& echo -e "Backward compatibility check: Server failed to start$FAIL$(trim_server_logs bc_check_application_errors.txt)" >> /test_output/test_results.tsv)
clickhouse-client --query="SELECT 'Server version: ', version()"
# Let the server run for a while before checking log.
sleep 60
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.dirty.log
# Error messages (we should ignore some errors)
# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64")
# FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server.
# Let's just ignore all errors from queries ("} <Error> TCPHandler: Code:", "} <Error> executeQuery: Code:")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility
echo "Check for Error messages in server log:"
rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
-e "Code: 236. DB::Exception: Cancelled mutating parts" \
-e "REPLICA_IS_ALREADY_ACTIVE" \
-e "REPLICA_ALREADY_EXISTS" \
-e "ALL_REPLICAS_LOST" \
-e "DDLWorker: Cannot parse DDL task query" \
-e "RaftInstance: failed to accept a rpc connection due to error 125" \
-e "UNKNOWN_DATABASE" \
-e "NETWORK_ERROR" \
-e "UNKNOWN_TABLE" \
-e "ZooKeeperClient" \
-e "KEEPER_EXCEPTION" \
-e "DirectoryMonitor" \
-e "TABLE_IS_READ_ONLY" \
-e "Code: 1000, e.code() = 111, Connection refused" \
-e "UNFINISHED" \
-e "NETLINK_ERROR" \
-e "Renaming unexpected part" \
-e "PART_IS_TEMPORARILY_LOCKED" \
-e "and a merge is impossible: we didn't find" \
-e "found in queue and some source parts for it was lost" \
-e "is lost forever." \
-e "Unknown index: idx." \
-e "Cannot parse string 'Hello' as UInt64" \
-e "} <Error> TCPHandler: Code:" \
-e "} <Error> executeQuery: Code:" \
-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
-e "[Queue = DB::DynamicRuntimeQueue]: Code: 235. DB::Exception: Part" \
-e "The set of parts restored in place of" \
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
-e "Code: 269. DB::Exception: Destination table is myself" \
-e "Coordination::Exception: Connection loss" \
-e "MutateFromLogEntryTask" \
-e "No connection to ZooKeeper, cannot get shared table ID" \
-e "Session expired" \
-e "TOO_MANY_PARTS" \
-e "Container already exists" \
/var/log/clickhouse-server/clickhouse-server.backward.dirty.log | rg -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
&& echo -e "Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)$FAIL$(trim_server_logs bc_check_error_messages.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No Error messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Remove file bc_check_error_messages.txt if it's empty
[ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt
# Sanitizer asserts
rg -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
&& echo -e "Backward compatibility check: Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No sanitizer asserts$OK" >> /test_output/test_results.tsv
rm -f /test_output/tmp
# OOM
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
&& echo -e "Backward compatibility check: Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Logical errors
echo "Check for Logical errors in server log:"
rg -Fa -A20 "Code: 49. DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \
&& echo -e "Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)$FAIL$(trim_server_logs bc_check_logical_errors.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No logical errors$OK" >> /test_output/test_results.tsv
# Remove file bc_check_logical_errors.txt if it's empty
[ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt
# Crash
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
&& echo -e "Backward compatibility check: Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: Not crashed$OK" >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog)
echo "Check for Fatal message in server log:"
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \
&& echo -e "Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)$FAIL$(trim_server_logs bc_check_fatal_messages.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Remove file bc_check_fatal_messages.txt if it's empty
[ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||:
for table in query_log trace_log
do
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" \
| zstd --threads=0 > /test_output/$table.backward.tsv.zst ||:
done
fi
fi
dmesg -T > /test_output/dmesg.log
# OOM in dmesg -- those are real
grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \
&& echo -e "OOM in dmesg$FAIL$(head_escaped /test_output/dmesg.log)" >> /test_output/test_results.tsv \
|| echo -e "No OOM in dmesg$OK" >> /test_output/test_results.tsv
collect_query_and_trace_logs
mv /var/log/clickhouse-server/stderr.log /test_output/
# Write check result into check_status.tsv
# Try to choose most specific error for the whole check status
clickhouse-local --structure "test String, res String, time Nullable(Float32), desc String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by
(test like 'Backward compatibility check%'), -- BC check goes last
(test like '%Sanitizer%') DESC,
(test like '%Killed by signal%') DESC,
(test like '%gdb.log%') DESC,
@ -732,14 +227,8 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d
(test like '%OOM%') DESC,
(test like '%Signal 9%') DESC,
(test like '%Fatal message%') DESC,
(test like '%Error message%') DESC,
(test like '%previous release%') DESC,
rowNumberInAllBlocks()
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# Core dumps
find . -type f -maxdepth 1 -name 'core.*' | while read core; do
zstd --threads=0 $core
mv $core.zst /test_output/
done
collect_core_dumps

View File

@ -0,0 +1,31 @@
# rebuild in #33610
# docker build -t clickhouse/upgrade-check .
ARG FROM_TAG=latest
FROM clickhouse/stateful-test:$FROM_TAG
RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get install --yes --no-install-recommends \
bash \
tzdata \
fakeroot \
debhelper \
parallel \
expect \
python3 \
python3-lxml \
python3-termcolor \
python3-requests \
curl \
sudo \
openssl \
netcat-openbsd \
telnet \
brotli \
&& apt-get clean
COPY run.sh /
ENV EXPORT_S3_STORAGE_POLICIES=1
CMD ["/bin/bash", "/run.sh"]

200
docker/test/upgrade/run.sh Normal file
View File

@ -0,0 +1,200 @@
#!/bin/bash
# shellcheck disable=SC2094
# shellcheck disable=SC2086
# shellcheck disable=SC2024
# Avoid overlaps with previous runs
dmesg --clear
set -x
# we mount tests folder from repo to /usr/share
ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
./setup_minio.sh stateless # to have a proper environment
echo "Get previous release tag"
previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb | grep "Version: " | awk '{print $2}' | cut -f1 -d'+' | get_previous_release_tag)
echo $previous_release_tag
echo "Clone previous release repository"
git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository
echo "Download clickhouse-server from the previous release"
mkdir previous_release_package_folder
echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv
# Check if we cloned previous release repository successfully
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
then
echo -e 'failure\tFailed to clone previous release tests' > /test_output/check_status.tsv
exit
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
then
echo -e 'failure\tFailed to download previous release packages' > /test_output/check_status.tsv
exit
fi
echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv
# Make upgrade check more funny by forcing Ordinary engine for system database
mkdir /var/lib/clickhouse/metadata
echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql
# Install previous release packages
install_packages previous_release_package_folder
# Start server from previous release
# Let's enable S3 storage by default
export USE_S3_STORAGE_FOR_MERGE_TREE=1
# Previous version may not be ready for fault injections
export ZOOKEEPER_FAULT_INJECTION=0
configure
# But we still need default disk because some tables loaded only into it
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
| sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
> /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
start
clickhouse-client --query="SELECT 'Server version: ', version()"
mkdir tmp_stress_output
stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --upgrade-check --output-folder tmp_stress_output --global-time-limit=1200 \
&& echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv
rm -rf tmp_stress_output
# We experienced deadlocks in this command in very rare cases. Let's debug it:
timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" ||
(
echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
clickhouse stop --force
)
# Use bigger timeout for previous version and disable additional hang check
stop 300 false
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log
# Install and start new server
install_packages package_folder
# Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables).
export ZOOKEEPER_FAULT_INJECTION=0
configure
start 500
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \
&& echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \
>> /test_output/test_results.tsv)
# Remove file application_errors.txt if it's empty
[ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt
clickhouse-client --query="SELECT 'Server version: ', version()"
# Let the server run for a while before checking log.
sleep 60
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.upgrade.log
# Error messages (we should ignore some errors)
# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64")
# FIXME Not sure if it's expected, but some tests from stress test may not be finished yet when we restarting server.
# Let's just ignore all errors from queries ("} <Error> TCPHandler: Code:", "} <Error> executeQuery: Code:")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility
echo "Check for Error messages in server log:"
rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
-e "Code: 236. DB::Exception: Cancelled mutating parts" \
-e "REPLICA_IS_ALREADY_ACTIVE" \
-e "REPLICA_ALREADY_EXISTS" \
-e "ALL_REPLICAS_LOST" \
-e "DDLWorker: Cannot parse DDL task query" \
-e "RaftInstance: failed to accept a rpc connection due to error 125" \
-e "UNKNOWN_DATABASE" \
-e "NETWORK_ERROR" \
-e "UNKNOWN_TABLE" \
-e "ZooKeeperClient" \
-e "KEEPER_EXCEPTION" \
-e "DirectoryMonitor" \
-e "TABLE_IS_READ_ONLY" \
-e "Code: 1000, e.code() = 111, Connection refused" \
-e "UNFINISHED" \
-e "NETLINK_ERROR" \
-e "Renaming unexpected part" \
-e "PART_IS_TEMPORARILY_LOCKED" \
-e "and a merge is impossible: we didn't find" \
-e "found in queue and some source parts for it was lost" \
-e "is lost forever." \
-e "Unknown index: idx." \
-e "Cannot parse string 'Hello' as UInt64" \
-e "} <Error> TCPHandler: Code:" \
-e "} <Error> executeQuery: Code:" \
-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
-e "The set of parts restored in place of" \
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
-e "Code: 269. DB::Exception: Destination table is myself" \
-e "Coordination::Exception: Connection loss" \
-e "MutateFromLogEntryTask" \
-e "No connection to ZooKeeper, cannot get shared table ID" \
-e "Session expired" \
-e "TOO_MANY_PARTS" \
-e "Authentication failed" \
-e "Container already exists" \
/var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
&& echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/bc_check_error_messages.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv
# Remove file upgrade_error_messages.txt if it's empty
[ -s /test_output/upgrade_error_messages.txt ] || rm /test_output/upgrade_error_messages.txt
# Grep logs for sanitizer asserts, crashes and other critical errors
check_logs_for_critical_errors
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
collect_query_and_trace_logs
check_oom_in_dmesg
mv /var/log/clickhouse-server/stderr.log /test_output/
# Write check result into check_status.tsv
# Try to choose most specific error for the whole check status
clickhouse-local --structure "test String, res String, time Nullable(Float32), desc String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by
(test like '%Sanitizer%') DESC,
(test like '%Killed by signal%') DESC,
(test like '%gdb.log%') DESC,
(test ilike '%possible deadlock%') DESC,
(test like '%start%') DESC,
(test like '%dmesg%') DESC,
(test like '%OOM%') DESC,
(test like '%Signal 9%') DESC,
(test like '%Fatal message%') DESC,
(test like '%Error message%') DESC,
(test like '%previous release%') DESC,
rowNumberInAllBlocks()
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
collect_core_dumps

View File

@ -0,0 +1,17 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v22.3.19.6-lts (467e0a7bd77) FIXME as compared to v22.3.18.37-lts (fe512717551)
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#46440](https://github.com/ClickHouse/ClickHouse/issues/46440): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -1971,7 +1971,8 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`.
- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`.
- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
## Arrow {#data-format-arrow}

View File

@ -229,7 +229,7 @@ To prevent inferring the same schema every time ClickHouse read the data from th
There are special settings that control this cache:
- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config.
- `use_cache_for_{file,s3,hdfs,url}_schema_inference` - allows turning on/off using cache for schema inference. These settings can be used in queries.
- `schema_inference_use_cache_for_{file,s3,hdfs,url}` - allows turning on/off using cache for schema inference. These settings can be used in queries.
The schema of the file can be changed by modifying the data or by changing format settings.
For this reason, the schema inference cache identifies the schema by file source, format name, used format settings, and the last modification time of the file.
@ -1177,8 +1177,7 @@ This setting can be used to specify the types of columns that could not be deter
**Example**
```sql
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}'
SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)'
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1
```
```response
┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐

View File

@ -142,6 +142,10 @@ y Nullable(String)
z IPv4
```
:::warning
If the `schema_inference_hints` is not formated properly, or if there is a typo or a wrong datatype, etc... the whole schema_inference_hints will be ignored.
:::
## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable}
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
@ -507,7 +511,7 @@ Enabled by default.
Ignore unknown keys in json object for named tuples.
Disabled by default.
Enabled by default.
## input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple}
@ -1102,6 +1106,12 @@ Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedStrin
Enabled by default.
### output_format_parquet_version {#output_format_parquet_version}
The version of Parquet format used in output format. Supported versions: `1.0`, `2.4`, `2.6` and `2.latest`.
Default value: `2.latest`.
## Hive format settings {#hive-format-settings}
### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}

View File

@ -226,6 +226,17 @@ SELECT splitByNonAlpha(' 1! a, b. ');
Concatenates string representations of values listed in the array with the separator. `separator` is an optional parameter: a constant string, set to an empty string by default.
Returns the string.
**Example**
``` sql
SELECT arrayStringConcat(['12/05/2021', '12:50:00'], ' ') AS DateString;
```
```text
┌─DateString──────────┐
│ 12/05/2021 12:50:00 │
└─────────────────────┘
```
## alphaTokens(s[, max_substrings]), splitByAlpha(s[, max_substrings])
Selects substrings of consecutive bytes from the ranges a-z and A-Z.Returns an array of substrings.
@ -364,4 +375,4 @@ Result:
┌─tokens────────────────────────────┐
│ ['test1','test2','test3','test4'] │
└───────────────────────────────────┘
```
```

View File

@ -54,6 +54,10 @@ SELECT * FROM view(column1=value1, column2=value2 ...)
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
```
:::tip
Here is a step by step guide on using [Materialized views](docs/en/guides/developer/cascading-materialized-views.md).
:::
Materialized views store data transformed by the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query.
When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` the table engine for storing data.

View File

@ -23,23 +23,3 @@ You can use table functions in:
:::warning
You cant use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled.
:::
| Function | Description |
|------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------|
| [file](../../sql-reference/table-functions/file.md) | Creates a [File](../../engines/table-engines/special/file.md)-engine table. |
| [merge](../../sql-reference/table-functions/merge.md) | Creates a [Merge](../../engines/table-engines/special/merge.md)-engine table. |
| [numbers](../../sql-reference/table-functions/numbers.md) | Creates a table with a single column filled with integer numbers. |
| [remote](../../sql-reference/table-functions/remote.md) | Allows you to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md)-engine table. |
| [url](../../sql-reference/table-functions/url.md) | Creates a [Url](../../engines/table-engines/special/url.md)-engine table. |
| [mysql](../../sql-reference/table-functions/mysql.md) | Creates a [MySQL](../../engines/table-engines/integrations/mysql.md)-engine table. |
| [postgresql](../../sql-reference/table-functions/postgresql.md) | Creates a [PostgreSQL](../../engines/table-engines/integrations/postgresql.md)-engine table. |
| [jdbc](../../sql-reference/table-functions/jdbc.md) | Creates a [JDBC](../../engines/table-engines/integrations/jdbc.md)-engine table. |
| [odbc](../../sql-reference/table-functions/odbc.md) | Creates a [ODBC](../../engines/table-engines/integrations/odbc.md)-engine table. |
| [hdfs](../../sql-reference/table-functions/hdfs.md) | Creates a [HDFS](../../engines/table-engines/integrations/hdfs.md)-engine table. |
| [s3](../../sql-reference/table-functions/s3.md) | Creates a [S3](../../engines/table-engines/integrations/s3.md)-engine table. |
| [sqlite](../../sql-reference/table-functions/sqlite.md) | Creates a [sqlite](../../engines/table-engines/integrations/sqlite.md)-engine table. |
:::note
Only these table functions are enabled in readonly mode :
null, view, viewIfPermitted, numbers, numbers_mt, generateRandom, values, cluster, clusterAllReplicas
:::

View File

@ -24,6 +24,8 @@ RuntimeDirectory=%p
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=%t/%p/%p.pid
# Minus means that this file is optional.
EnvironmentFile=-/etc/default/%p
# Bring back /etc/default/clickhouse for backward compatibility
EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE

View File

@ -696,7 +696,7 @@ try
{
const String config_path = config().getString("config-file", "config.xml");
const auto config_dir = std::filesystem::path{config_path}.replace_filename("openssl.conf");
setenv("OPENSSL_CONF", config_dir.string(), true);
setenv("OPENSSL_CONF", config_dir.c_str(), true);
}
#endif

View File

@ -13,7 +13,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -129,7 +129,7 @@ public:
const IColumn::Offsets & ith_offsets = ith_column.getOffsets();
if (ith_offsets[row_num] != end || (row_num != 0 && ith_offsets[row_num - 1] != begin))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
}
for (size_t i = begin; i < end; ++i)

View File

@ -19,7 +19,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
}
@ -197,7 +197,7 @@ public:
const IColumn::Offsets & ith_offsets = ith_column.getOffsets();
if (ith_offsets[row_num] != end || (row_num != 0 && ith_offsets[row_num - 1] != begin))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
}
AggregateFunctionForEachData & state = ensureAggregateData(place, end - begin, *arena);

View File

@ -233,11 +233,43 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
auto select_settings = select_query_typed.settings();
SettingsChanges settings_changes;
/// We are going to remove settings LIMIT and OFFSET and
/// further replace them with corresponding expression nodes
UInt64 limit = 0;
UInt64 offset = 0;
/// Remove global settings limit and offset
if (const auto & settings_ref = updated_context->getSettingsRef(); settings_ref.limit || settings_ref.offset)
{
Settings settings = updated_context->getSettings();
limit = settings.limit;
offset = settings.offset;
settings.limit = 0;
settings.offset = 0;
updated_context->setSettings(settings);
}
if (select_settings)
{
auto & set_query = select_settings->as<ASTSetQuery &>();
updated_context->applySettingsChanges(set_query.changes);
settings_changes = set_query.changes;
/// Remove expression settings limit and offset
if (auto * limit_field = set_query.changes.tryGet("limit"))
{
limit = limit_field->safeGet<UInt64>();
set_query.changes.removeSetting("limit");
}
if (auto * offset_field = set_query.changes.tryGet("offset"))
{
offset = offset_field->safeGet<UInt64>();
set_query.changes.removeSetting("offset");
}
if (!set_query.changes.empty())
{
updated_context->applySettingsChanges(set_query.changes);
settings_changes = set_query.changes;
}
}
auto current_query_tree = std::make_shared<QueryNode>(std::move(updated_context), std::move(settings_changes));
@ -323,12 +355,32 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
if (select_limit_by)
current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by, current_context);
/// Combine limit expression with limit setting
auto select_limit = select_query_typed.limitLength();
if (select_limit)
if (select_limit && limit)
{
auto function_node = std::make_shared<FunctionNode>("least");
function_node->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(limit));
current_query_tree->getLimit() = std::move(function_node);
}
else if (limit)
current_query_tree->getLimit() = std::make_shared<ConstantNode>(limit);
else if (select_limit)
current_query_tree->getLimit() = buildExpression(select_limit, current_context);
/// Combine offset expression with offset setting
auto select_offset = select_query_typed.limitOffset();
if (select_offset)
if (select_offset && offset)
{
auto function_node = std::make_shared<FunctionNode>("plus");
function_node->getArguments().getNodes().push_back(buildExpression(select_offset, current_context));
function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(offset));
current_query_tree->getOffset() = std::move(function_node);
}
else if (offset)
current_query_tree->getOffset() = std::make_shared<ConstantNode>(offset);
else if (select_offset)
current_query_tree->getOffset() = buildExpression(select_offset, current_context);
return current_query_tree;

View File

@ -577,8 +577,8 @@ if (TARGET ch_contrib::annoy)
endif()
if (TARGET ch_rust::skim)
# Add only -I, library is needed only for clickhouse-client/clickhouse-local
dbms_target_include_directories(PRIVATE $<TARGET_PROPERTY:ch_rust::skim,INTERFACE_INCLUDE_DIRECTORIES>)
dbms_target_link_libraries(PUBLIC ch_rust::skim)
endif()
include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")

View File

@ -197,7 +197,7 @@
M(187, COLLATION_COMPARISON_FAILED) \
M(188, UNKNOWN_ACTION) \
M(189, TABLE_MUST_NOT_BE_CREATED_MANUALLY) \
M(190, SIZES_OF_ARRAYS_DOESNT_MATCH) \
M(190, SIZES_OF_ARRAYS_DONT_MATCH) \
M(191, SET_SIZE_LIMIT_EXCEEDED) \
M(192, UNKNOWN_USER) \
M(193, WRONG_PASSWORD) \

View File

@ -100,8 +100,8 @@ private:
bool required_substring_is_prefix;
bool is_case_insensitive;
std::string required_substring;
std::optional<DB::StringSearcher<true, true>> case_sensitive_substring_searcher;
std::optional<DB::StringSearcher<false, true>> case_insensitive_substring_searcher;
std::optional<DB::ASCIICaseSensitiveStringSearcher> case_sensitive_substring_searcher;
std::optional<DB::ASCIICaseInsensitiveStringSearcher> case_insensitive_substring_searcher;
std::unique_ptr<RegexType> re2;
unsigned number_of_subpatterns;

View File

@ -46,4 +46,30 @@ Field * SettingsChanges::tryGet(std::string_view name)
return &change->value;
}
bool SettingsChanges::insertSetting(std::string_view name, const Field & value)
{
auto it = std::find_if(begin(), end(), [&name](const SettingChange & change) { return change.name == name; });
if (it != end())
return false;
emplace_back(name, value);
return true;
}
void SettingsChanges::setSetting(std::string_view name, const Field & value)
{
if (auto * setting_value = tryGet(name))
*setting_value = value;
else
insertSetting(name, value);
}
bool SettingsChanges::removeSetting(std::string_view name)
{
auto it = std::find_if(begin(), end(), [&name](const SettingChange & change) { return change.name == name; });
if (it == end())
return false;
erase(it);
return true;
}
}

View File

@ -28,6 +28,13 @@ public:
bool tryGet(std::string_view name, Field & out_value) const;
const Field * tryGet(std::string_view name) const;
Field * tryGet(std::string_view name);
/// Inserts element if doesn't exists and returns true, otherwise just returns false
bool insertSetting(std::string_view name, const Field & value);
/// Sets element to value, inserts if doesn't exist
void setSetting(std::string_view name, const Field & value);
/// If element exists - removes it and returns true, otherwise returns false
bool removeSetting(std::string_view name);
};
}

File diff suppressed because it is too large Load Diff

View File

@ -809,7 +809,7 @@ class IColumn;
M(Bool, input_format_json_read_numbers_as_strings, false, "Allow to parse numbers as strings in JSON input formats", 0) \
M(Bool, input_format_json_read_objects_as_strings, true, "Allow to parse JSON objects as strings in JSON input formats", 0) \
M(Bool, input_format_json_named_tuples_as_objects, true, "Deserialize named tuple columns as JSON objects", 0) \
M(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, false, "Ignore unknown keys in json object for named tuples", 0) \
M(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, true, "Ignore unknown keys in json object for named tuples", 0) \
M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
@ -856,6 +856,7 @@ class IColumn;
M(UInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.", 0) \
M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \
M(Bool, output_format_parquet_fixed_string_as_fixed_byte_array, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary for FixedString columns.", 0) \
M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \
M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \

View File

@ -80,6 +80,8 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"},
{"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}}},
{"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"},
{"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"},
{"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"},

View File

@ -171,4 +171,12 @@ IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS,
{{"mmap", LocalFSReadMethod::mmap},
{"pread", LocalFSReadMethod::pread},
{"read", LocalFSReadMethod::read}})
IMPLEMENT_SETTING_ENUM_WITH_RENAME(ParquetVersion, ErrorCodes::BAD_ARGUMENTS,
{{"1.0", FormatSettings::ParquetVersion::V1_0},
{"2.4", FormatSettings::ParquetVersion::V2_4},
{"2.6", FormatSettings::ParquetVersion::V2_6},
{"2.latest", FormatSettings::ParquetVersion::V2_LATEST}})
}

View File

@ -72,6 +72,8 @@ DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, FormatSettings::DateTimeIn
DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOutputFormat, FormatSettings::DateTimeOutputFormat)
DECLARE_SETTING_ENUM_WITH_RENAME(ParquetVersion, FormatSettings::ParquetVersion)
enum class LogsLevel
{
none = 0, /// Disable

View File

@ -25,7 +25,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
}
namespace Nested
@ -242,7 +242,7 @@ void validateArraySizes(const Block & block)
const ColumnArray & another_array_column = assert_cast<const ColumnArray &>(*elem.column);
if (!first_array_column.hasEqualOffsets(another_array_column))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"Elements '{}' and '{}' "
"of Nested data structure '{}' (Array columns) have different array sizes.",
block.getByPosition(it->second).name, elem.name, split.first);

View File

@ -99,6 +99,17 @@ struct RegExpTreeDictionary::RegexTreeNode
return searcher.Match(haystack, 0, size, re2_st::RE2::Anchor::UNANCHORED, nullptr, 0);
}
/// check if this node can cover all the attributes from the query.
bool containsAll(const std::unordered_map<String, const DictionaryAttribute &> & matching_attributes) const
{
for (const auto & [key, value] : matching_attributes)
{
if (!attributes.contains(key))
return false;
}
return true;
}
struct AttributeValue
{
Field field;
@ -498,6 +509,9 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
if (node_ptr->match(reinterpret_cast<const char *>(keys_data.data()) + offset, length))
{
match_result.insertNodeID(node_ptr->id);
/// When this node is leaf and contains all the required attributes, it means a match.
if (node_ptr->containsAll(attributes) && node_ptr->children.empty())
break;
}
}

View File

@ -110,6 +110,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
format_settings.parquet.output_version = settings.output_format_parquet_version;
format_settings.parquet.import_nested = settings.input_format_parquet_import_nested;
format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching;
format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;

View File

@ -175,6 +175,14 @@ struct FormatSettings
String column_for_object_name;
} json_object_each_row;
enum class ParquetVersion
{
V1_0,
V2_4,
V2_6,
V2_LATEST,
};
struct
{
UInt64 row_group_size = 1000000;
@ -186,6 +194,7 @@ struct FormatSettings
bool output_string_as_string = false;
bool output_fixed_string_as_fixed_byte_array = true;
UInt64 max_block_size = 8192;
ParquetVersion output_version;
} parquet;
struct Pretty

View File

@ -16,7 +16,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -213,7 +213,7 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments)
if (i == 0)
offsets = offsets_i;
else if (*offsets_i != *offsets)
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to aggregate function must be equal.");
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to aggregate function must be equal.");
}
return {nested_columns, offsets->data()};
}

View File

@ -23,9 +23,10 @@ namespace ErrorCodes
namespace impl
{
/// Is the [I]LIKE expression reduced to finding a substring in a string?
/// Is the [I]LIKE expression equivalent to a substring search?
inline bool likePatternIsSubstring(std::string_view pattern, String & res)
{
/// TODO: ignore multiple leading or trailing %
if (pattern.size() < 2 || !pattern.starts_with('%') || !pattern.ends_with('%'))
return false;
@ -45,9 +46,25 @@ inline bool likePatternIsSubstring(std::string_view pattern, String & res)
case '\\':
++pos;
if (pos == end)
/// pattern ends with \% --> trailing % is to be taken literally and pattern doesn't qualify for substring search
return false;
else
res += *pos;
{
switch (*pos)
{
/// Known LIKE escape sequences:
case '%':
case '_':
case '\\':
res += *pos;
break;
/// For all other escape sequences, the backslash loses its special meaning
default:
res += '\\';
res += *pos;
break;
}
}
break;
default:
res += *pos;

View File

@ -37,7 +37,7 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int LOGICAL_ERROR;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
@ -361,7 +361,7 @@ public:
if (getOffsetsPtr(*column_array) != offsets_column
&& getOffsets(*column_array) != typeid_cast<const ColumnArray::ColumnOffsets &>(*offsets_column).getData())
throw Exception(
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"{}s passed to {} must have equal size",
argument_type_name,
getName());

View File

@ -16,7 +16,7 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int LOGICAL_ERROR;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int ARGUMENT_OUT_OF_BOUND;
}
@ -356,7 +356,7 @@ private:
{
ColumnArray::Offset prev_offset = row > 0 ? offsets_x[row] : 0;
throw Exception(
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"Arguments of function {} have different array sizes: {} and {}",
getName(),
offsets_x[row] - prev_offset,
@ -423,7 +423,7 @@ private:
if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset))
{
throw Exception(
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"Arguments of function {} have different array sizes: {} and {}",
getName(),
offsets_x[0],

View File

@ -20,7 +20,7 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
}
class FunctionArrayEnumerateUniq;
@ -153,7 +153,7 @@ ColumnPtr FunctionArrayEnumerateExtended<Derived>::executeImpl(const ColumnsWith
offsets_column = array->getOffsetsPtr();
}
else if (offsets_i != *offsets)
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
getName());
const auto * array_data = &array->getData();

View File

@ -60,7 +60,7 @@ namespace DB
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
}
class FunctionArrayEnumerateUniqRanked;
@ -194,7 +194,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
{
if (*offsets_by_depth[0] != array->getOffsets())
{
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"Lengths and effective depths of all arrays passed to {} must be equal.", getName());
}
}
@ -217,7 +217,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
{
if (*offsets_by_depth[col_depth] != array->getOffsets())
{
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"Lengths and effective depths of all arrays passed to {} must be equal.", getName());
}
}
@ -225,7 +225,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
if (col_depth < arrays_depths.depths[array_num])
{
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"{}: Passed array number {} depth ({}) is more than the actual array depth ({}).",
getName(), array_num, std::to_string(arrays_depths.depths[array_num]), col_depth);
}

View File

@ -19,7 +19,7 @@ namespace DB
namespace ErrorCodes
{
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
@ -144,7 +144,7 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume
if (i == 0)
offsets = offsets_i;
else if (*offsets_i != *offsets)
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
getName());
}
const IColumn ** aggregate_arguments = aggregate_arguments_vec.data();

View File

@ -21,7 +21,7 @@ namespace DB
namespace ErrorCodes
{
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
@ -190,7 +190,7 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl(
if (i == 0)
offsets = offsets_i;
else if (*offsets_i != *offsets)
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
getName());
}
const IColumn ** aggregate_arguments = aggregate_arguments_vec.data();

View File

@ -18,7 +18,7 @@ namespace DB
namespace ErrorCodes
{
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
@ -151,7 +151,7 @@ ColumnPtr FunctionArrayUniq::executeImpl(const ColumnsWithTypeAndName & argument
if (i == 0)
offsets = &offsets_i;
else if (offsets_i != *offsets)
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
getName());
const auto * array_data = &array->getData();

View File

@ -13,7 +13,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
}
@ -81,7 +81,7 @@ public:
}
else if (!column_array->hasEqualOffsets(static_cast<const ColumnArray &>(*first_array_column)))
{
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"The argument 1 and argument {} of function {} have different array sizes",
i + 1, getName());
}

View File

@ -55,14 +55,19 @@ private:
getName(), arguments.size());
}
for (const auto & arg : arguments)
DataTypes arg_types;
for (size_t i = 0, size = arguments.size(); i < size; ++i)
{
if (!isInteger(arg))
if (i < 2 && WhichDataType(arguments[i]).isIPv4())
arg_types.emplace_back(std::make_shared<DataTypeUInt32>());
else if (isInteger(arguments[i]))
arg_types.push_back(arguments[i]);
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
arg->getName(), getName());
arguments[i]->getName(), getName());
}
DataTypePtr common_type = getLeastSupertype(arguments);
DataTypePtr common_type = getLeastSupertype(arg_types);
return std::make_shared<DataTypeArray>(common_type);
}

View File

@ -20,7 +20,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
}
namespace
@ -118,7 +118,7 @@ public:
const auto * rhs_array = assert_cast<const ColumnArray *>(arguments[i].column.get());
if (!lhs_array->hasEqualOffsets(*rhs_array))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"The argument 1 and argument {} of function {} have different array offsets",
i + 1,
getName());

View File

@ -21,7 +21,7 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
extern const int NUMBER_OF_DIMENSIONS_MISMATCHED;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
}
namespace
@ -200,7 +200,7 @@ private:
const auto & array_y = *assert_cast<const ColumnArray *>(col_y.get());
if (!array_x.hasEqualOffsets(array_y))
{
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"The argument 1 and argument 3 of function {} have different array sizes", getName());
}
}
@ -222,7 +222,7 @@ private:
{
if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset))
{
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"The argument 1 and argument 3 of function {} have different array sizes", getName());
}
prev_offset = offsets_y[row];

View File

@ -12,7 +12,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
}
/** Function validateNestedArraySizes is used to check the consistency of Nested DataType subcolumns's offsets when Update
@ -106,7 +106,7 @@ ColumnPtr FunctionValidateNestedArraySizes::executeImpl(
else if (first_length != length)
{
throw Exception(
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"Elements '{}' and '{}' of Nested data structure (Array columns) "
"have different array sizes ({} and {} respectively) on row {}",
arguments[1].name, arguments[args_idx].name, first_length, length, i);

View File

@ -14,7 +14,7 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int TYPE_MISMATCH;
}
@ -186,7 +186,7 @@ void ArrayJoinAction::execute(Block & block)
const ColumnArray & array = typeid_cast<const ColumnArray &>(*array_ptr);
if (!is_unaligned && !array.hasEqualOffsets(*any_array))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Sizes of ARRAY-JOIN-ed arrays do not match");
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Sizes of ARRAY-JOIN-ed arrays do not match");
current.column = typeid_cast<const ColumnArray &>(*array_ptr).getDataPtr();
current.type = type->getNestedType();

View File

@ -92,12 +92,11 @@ ColumnsDescription parseColumnsListFromString(const std::string & structure, con
return columns;
}
bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context)
bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context, String & error)
{
ParserColumnDeclarationList parser(true, true);
const Settings & settings = context->getSettingsRef();
String error;
const char * start = structure.data();
const char * end = structure.data() + structure.size();
ASTPtr columns_list_raw = tryParseQuery(parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth);
@ -106,7 +105,10 @@ bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescrip
auto * columns_list = dynamic_cast<ASTExpressionList *>(columns_list_raw.get());
if (!columns_list)
{
error = fmt::format("Invalid columns declaration list: \"{}\"", structure);
return false;
}
try
{
@ -118,6 +120,7 @@ bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescrip
}
catch (...)
{
error = getCurrentExceptionMessage(false);
return false;
}
}

View File

@ -33,6 +33,6 @@ void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings
/// Parses a common argument for table functions such as table structure given in string
ColumnsDescription parseColumnsListFromString(const std::string & structure, const ContextPtr & context);
bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context);
bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, const ContextPtr & context, String & error);
}

View File

@ -18,6 +18,7 @@ namespace ErrorCodes
ASTPtr ASTColumnsRegexpMatcher::clone() const
{
auto clone = std::make_shared<ASTColumnsRegexpMatcher>(*this);
clone->children.clear();
if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); }
if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
@ -91,6 +92,7 @@ bool ASTColumnsRegexpMatcher::isColumnMatching(const String & column_name) const
ASTPtr ASTColumnsListMatcher::clone() const
{
auto clone = std::make_shared<ASTColumnsListMatcher>(*this);
clone->children.clear();
if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); }
if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
@ -150,6 +152,7 @@ void ASTColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatSt
ASTPtr ASTQualifiedColumnsRegexpMatcher::clone() const
{
auto clone = std::make_shared<ASTQualifiedColumnsRegexpMatcher>(*this);
clone->children.clear();
if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
@ -216,6 +219,7 @@ void ASTQualifiedColumnsRegexpMatcher::formatImpl(const FormatSettings & setting
ASTPtr ASTQualifiedColumnsListMatcher::clone() const
{
auto clone = std::make_shared<ASTQualifiedColumnsListMatcher>(*this);
clone->children.clear();
if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }

View File

@ -1,6 +1,7 @@
#include <Processors/Formats/ISchemaReader.h>
#include <Formats/SchemaInferenceUtils.h>
#include <DataTypes/DataTypeString.h>
#include <Common/logger_useful.h>
#include <Interpreters/parseColumnsListForTableFunction.h>
#include <boost/algorithm/string.hpp>
@ -15,20 +16,38 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read)
void checkFinalInferredType(
DataTypePtr & type,
const String & name,
const FormatSettings & settings,
const DataTypePtr & default_type,
size_t rows_read,
const String & hints_parsing_error)
{
if (!checkIfTypeIsComplete(type))
{
if (!default_type)
throw Exception(
ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA,
"Cannot determine type for column '{}' by first {} rows "
"of data, most likely this column contains only Nulls or empty "
"Arrays/Maps. You can specify the type for this column using setting schema_inference_hints. "
"If your data contains complex JSON objects, try enabling one "
"of the settings allow_experimental_object_type/input_format_json_read_objects_as_strings",
name,
rows_read);
{
if (hints_parsing_error.empty())
throw Exception(
ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA,
"Cannot determine type for column '{}' by first {} rows "
"of data, most likely this column contains only Nulls or empty "
"Arrays/Maps. You can specify the type for this column using setting schema_inference_hints. "
"If your data contains complex JSON objects, try enabling one "
"of the settings allow_experimental_object_type/input_format_json_read_objects_as_strings",
name,
rows_read);
else
throw Exception(
ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA,
"Cannot determine type for column '{}' by first {} rows "
"of data, most likely this column contains only Nulls or empty Arrays/Maps. "
"Column types from setting schema_inference_hints couldn't be parsed because of error: {}",
name,
rows_read,
hints_parsing_error);
}
type = default_type;
}
@ -46,11 +65,15 @@ IIRowSchemaReader::IIRowSchemaReader(ReadBuffer & in_, const FormatSettings & fo
void IIRowSchemaReader::setContext(ContextPtr & context)
{
ColumnsDescription columns;
if (tryParseColumnsListFromString(hints_str, columns, context))
if (tryParseColumnsListFromString(hints_str, columns, context, hints_parsing_error))
{
for (const auto & [name, type] : columns.getAll())
hints[name] = type;
}
else
{
LOG_WARNING(&Poco::Logger::get("IIRowSchemaReader"), "Couldn't parse schema inference hints: {}. This setting will be ignored", hints_parsing_error);
}
}
void IIRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
@ -137,7 +160,14 @@ NamesAndTypesList IRowSchemaReader::readSchema()
if (!new_data_types[field_index] || hints.contains(column_names[field_index]))
continue;
chooseResultColumnType(*this, data_types[field_index], new_data_types[field_index], getDefaultType(field_index), std::to_string(field_index + 1), rows_read);
chooseResultColumnType(
*this,
data_types[field_index],
new_data_types[field_index],
getDefaultType(field_index),
std::to_string(field_index + 1),
rows_read,
hints_parsing_error);
}
}
@ -149,7 +179,7 @@ NamesAndTypesList IRowSchemaReader::readSchema()
{
transformFinalTypeIfNeeded(data_types[field_index]);
/// Check that we could determine the type of this column.
checkFinalInferredType(data_types[field_index], column_names[field_index], format_settings, getDefaultType(field_index), rows_read);
checkFinalInferredType(data_types[field_index], column_names[field_index], format_settings, getDefaultType(field_index), rows_read, hints_parsing_error);
}
result.emplace_back(column_names[field_index], data_types[field_index]);
}
@ -246,7 +276,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
continue;
auto & type = it->second;
chooseResultColumnType(*this, type, new_type, default_type, name, rows_read);
chooseResultColumnType(*this, type, new_type, default_type, name, rows_read, hints_parsing_error);
}
}
@ -263,7 +293,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
{
transformFinalTypeIfNeeded(type);
/// Check that we could determine the type of this column.
checkFinalInferredType(type, name, format_settings, default_type, rows_read);
checkFinalInferredType(type, name, format_settings, default_type, rows_read, hints_parsing_error);
}
result.emplace_back(name, type);
}

View File

@ -65,6 +65,7 @@ protected:
String hints_str;
FormatSettings format_settings;
std::unordered_map<String, DataTypePtr> hints;
String hints_parsing_error;
};
/// Base class for schema inference for formats that read data row by row.
@ -145,7 +146,8 @@ void chooseResultColumnType(
DataTypePtr & new_type,
const DataTypePtr & default_type,
const String & column_name,
size_t row)
size_t row,
const String & hints_parsing_error = "")
{
if (!type)
{
@ -166,14 +168,25 @@ void chooseResultColumnType(
type = default_type;
else
{
throw Exception(
ErrorCodes::TYPE_MISMATCH,
"Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. "
"You can specify the type for this column using setting schema_inference_hints",
type->getName(),
column_name,
row,
new_type->getName());
if (hints_parsing_error.empty())
throw Exception(
ErrorCodes::TYPE_MISMATCH,
"Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. "
"You can specify the type for this column using setting schema_inference_hints",
type->getName(),
column_name,
row,
new_type->getName());
else
throw Exception(
ErrorCodes::TYPE_MISMATCH,
"Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. "
"Column types from setting schema_inference_hints couldn't be parsed because of error: {}",
type->getName(),
column_name,
row,
new_type->getName(),
hints_parsing_error);
}
}
@ -196,7 +209,13 @@ void chooseResultColumnTypes(
chooseResultColumnType(schema_reader, types[i], new_types[i], default_type, column_names[i], row);
}
void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read);
void checkFinalInferredType(
DataTypePtr & type,
const String & name,
const FormatSettings & settings,
const DataTypePtr & default_type,
size_t rows_read,
const String & hints_parsing_error);
Strings splitColumnNames(const String & column_names_str);

View File

@ -182,7 +182,7 @@ JSONColumnsSchemaReaderBase::JSONColumnsSchemaReaderBase(
void JSONColumnsSchemaReaderBase::setContext(ContextPtr & ctx)
{
ColumnsDescription columns;
if (tryParseColumnsListFromString(hints_str, columns, ctx))
if (tryParseColumnsListFromString(hints_str, columns, ctx, hints_parsing_error))
{
for (const auto & [name, type] : columns.getAll())
hints[name] = type;
@ -238,7 +238,7 @@ NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema()
rows_in_block = 0;
auto column_type = readColumnAndGetDataType(
column_name, rows_in_block, format_settings.max_rows_to_read_for_schema_inference - total_rows_read);
chooseResultColumnType(*this, names_to_types[column_name], column_type, nullptr, column_name, total_rows_read + 1);
chooseResultColumnType(*this, names_to_types[column_name], column_type, nullptr, column_name, total_rows_read + 1, hints_parsing_error);
}
++iteration;
@ -260,7 +260,7 @@ NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema()
{
transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info);
/// Check that we could determine the type of this column.
checkFinalInferredType(type, name, format_settings, nullptr, format_settings.max_rows_to_read_for_schema_inference);
checkFinalInferredType(type, name, format_settings, nullptr, format_settings.max_rows_to_read_for_schema_inference, hints_parsing_error);
}
result.emplace_back(name, type);
}

View File

@ -91,6 +91,7 @@ private:
const FormatSettings format_settings;
String hints_str;
std::unordered_map<String, DataTypePtr> hints;
String hints_parsing_error;
std::unique_ptr<JSONColumnsReaderBase> reader;
Names column_names_from_settings;
JSONInferenceInfo inference_info;

View File

@ -16,6 +16,21 @@ namespace ErrorCodes
extern const int UNKNOWN_EXCEPTION;
}
static parquet::ParquetVersion::type getParquetVersion(const FormatSettings & settings)
{
switch (settings.parquet.output_version)
{
case FormatSettings::ParquetVersion::V1_0:
return parquet::ParquetVersion::PARQUET_1_0;
case FormatSettings::ParquetVersion::V2_4:
return parquet::ParquetVersion::PARQUET_2_4;
case FormatSettings::ParquetVersion::V2_6:
return parquet::ParquetVersion::PARQUET_2_6;
case FormatSettings::ParquetVersion::V2_LATEST:
return parquet::ParquetVersion::PARQUET_2_LATEST;
}
}
ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_)
: IOutputFormat(header_, out_), format_settings{format_settings_}
{
@ -44,6 +59,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk)
auto sink = std::make_shared<ArrowBufferedOutputStream>(out);
parquet::WriterProperties::Builder builder;
builder.version(getParquetVersion(format_settings));
#if USE_SNAPPY
builder.compression(parquet::Compression::SNAPPY);
#endif

View File

@ -387,7 +387,8 @@ Chain buildPushingToViewsChain(
chains.emplace_back(std::move(out));
/// Add the view to the query access info so it can appear in system.query_log
if (!no_destination)
/// hasQueryContext - for materialized tables with background replication process query context is not added
if (!no_destination && context->hasQueryContext())
{
context->getQueryContext()->addQueryAccessInfo(
backQuoteIfNeed(view_id.getDatabaseName()), views_data->views.back().runtime_stats->target_name, {}, "", view_id.getFullTableName());
@ -757,7 +758,6 @@ IProcessor::Status FinalizingViewsTransform::prepare()
output.finish();
return Status::Finished;
}
return Status::NeedData;
}

View File

@ -690,12 +690,6 @@ void DataPartStorageOnDiskBase::clearDirectory(
request.emplace_back(fs::path(dir) / "txn_version.txt", true);
request.emplace_back(fs::path(dir) / "metadata_version.txt", true);
/// Inverted index
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_dict", true);
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_post", true);
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_seg", true);
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_sid", true);
disk->removeSharedFiles(request, !can_remove_shared_data, names_not_to_remove);
disk->removeDirectory(dir);
}

View File

@ -498,12 +498,12 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart(
if (!disk)
{
LOG_TRACE(log, "Disk for fetch is not provided, reserving space using storage balanced reservation");
LOG_TEST(log, "Disk for fetch is not provided, reserving space using storage balanced reservation");
reservation
= data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, &ttl_infos, true);
if (!reservation)
{
LOG_TRACE(log, "Disk for fetch is not provided, reserving space using TTL rules");
LOG_TEST(log, "Disk for fetch is not provided, reserving space using TTL rules");
reservation
= data.reserveSpacePreferringTTLRules(metadata_snapshot, sum_files_size, ttl_infos, std::time(nullptr), 0, true);
}
@ -511,18 +511,18 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart(
}
else if (!disk)
{
LOG_TRACE(log, "Making balanced reservation");
LOG_TEST(log, "Making balanced reservation");
reservation = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, nullptr);
if (!reservation)
{
LOG_TRACE(log, "Making simple reservation");
LOG_TEST(log, "Making simple reservation");
reservation = data.reserveSpace(sum_files_size);
}
}
}
else if (!disk)
{
LOG_TRACE(log, "Making reservation on the largest disk");
LOG_TEST(log, "Making reservation on the largest disk");
/// We don't know real size of part because sender server version is too old
reservation = data.makeEmptyReservationOnLargestDisk();
}
@ -530,11 +530,11 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart(
if (!disk)
{
disk = reservation->getDisk();
LOG_INFO(log, "Disk for fetch is not provided, getting disk from reservation {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type));
LOG_TRACE(log, "Disk for fetch is not provided, getting disk from reservation {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type));
}
else
{
LOG_INFO(log, "Disk for fetch is disk {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type));
LOG_TEST(log, "Disk for fetch is disk {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type));
}
UInt64 revision = parse<UInt64>(in->getResponseCookie("disk_revision", "0"));

View File

@ -75,6 +75,10 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r
{
const String & name = it.first;
/// Exclude files written by inverted index from check. No correct checksums are available for them currently.
if (name.ends_with(".gin_dict") || name.ends_with(".gin_post") || name.ends_with(".gin_seg") || name.ends_with(".gin_sid"))
continue;
auto jt = rhs.files.find(name);
if (jt == rhs.files.end())
throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No file {} in data part", name);

View File

@ -208,26 +208,26 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices()
auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr);
for (const auto & index_helper : skip_indices)
for (const auto & skip_index : skip_indices)
{
String stream_name = index_helper->getFileName();
String stream_name = skip_index->getFileName();
skip_indices_streams.emplace_back(
std::make_unique<MergeTreeDataPartWriterOnDisk::Stream>(
stream_name,
data_part->getDataPartStoragePtr(),
stream_name, index_helper->getSerializedFileExtension(),
stream_name, skip_index->getSerializedFileExtension(),
stream_name, marks_file_extension,
default_codec, settings.max_compress_block_size,
marks_compression_codec, settings.marks_compress_block_size,
settings.query_write_settings));
GinIndexStorePtr store = nullptr;
if (dynamic_cast<const MergeTreeIndexInverted *>(&*index_helper) != nullptr)
if (typeid_cast<const MergeTreeIndexInverted *>(&*skip_index) != nullptr)
{
store = std::make_shared<GinIndexStore>(stream_name, data_part->getDataPartStoragePtr(), data_part->getDataPartStoragePtr(), storage.getSettings()->max_digestion_size_per_segment);
gin_index_stores[stream_name] = store;
}
skip_indices_aggregators.push_back(index_helper->createIndexAggregatorForPart(store));
skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store));
skip_index_accumulated_marks.push_back(0);
}
}
@ -284,7 +284,7 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block
WriteBuffer & marks_out = stream.compress_marks ? stream.marks_compressed_hashing : stream.marks_hashing;
GinIndexStorePtr store;
if (dynamic_cast<const MergeTreeIndexInverted *>(&*index_helper) != nullptr)
if (typeid_cast<const MergeTreeIndexInverted *>(&*index_helper) != nullptr)
{
String stream_name = index_helper->getFileName();
auto it = gin_index_stores.find(stream_name);
@ -388,6 +388,18 @@ void MergeTreeDataPartWriterOnDisk::fillSkipIndicesChecksums(MergeTreeData::Data
auto & stream = *skip_indices_streams[i];
if (!skip_indices_aggregators[i]->empty())
skip_indices_aggregators[i]->getGranuleAndReset()->serializeBinary(stream.compressed_hashing);
/// Register additional files written only by the inverted index. Required because otherwise DROP TABLE complains about unknown
/// files. Note that the provided actual checksums are bogus. The problem is that at this point the file writes happened already and
/// we'd need to re-open + hash the files (fixing this is TODO). For now, CHECK TABLE skips these four files.
if (typeid_cast<const MergeTreeIndexInverted *>(&*skip_indices[i]) != nullptr)
{
String filename_without_extension = skip_indices[i]->getFileName();
checksums.files[filename_without_extension + ".gin_dict"] = MergeTreeDataPartChecksums::Checksum();
checksums.files[filename_without_extension + ".gin_post"] = MergeTreeDataPartChecksums::Checksum();
checksums.files[filename_without_extension + ".gin_seg"] = MergeTreeDataPartChecksums::Checksum();
checksums.files[filename_without_extension + ".gin_sid"] = MergeTreeDataPartChecksums::Checksum();
}
}
for (auto & stream : skip_indices_streams)

View File

@ -157,25 +157,29 @@ IMergeTreeDataPart::Checksums checkDataPart(
}
NameSet projections_on_disk;
const auto & checksum_files_txt = checksums_txt.files;
const auto & checksums_txt_files = checksums_txt.files;
for (auto it = data_part_storage.iterate(); it->isValid(); it->next())
{
auto file_name = it->name();
/// We will check projections later.
if (data_part_storage.isDirectory(file_name) && endsWith(file_name, ".proj"))
if (data_part_storage.isDirectory(file_name) && file_name.ends_with(".proj"))
{
projections_on_disk.insert(file_name);
continue;
}
/// Exclude files written by inverted index from check. No correct checksums are available for them currently.
if (file_name.ends_with(".gin_dict") || file_name.ends_with(".gin_post") || file_name.ends_with(".gin_seg") || file_name.ends_with(".gin_sid"))
continue;
auto checksum_it = checksums_data.files.find(file_name);
/// Skip files that we already calculated. Also skip metadata files that are not checksummed.
if (checksum_it == checksums_data.files.end() && !files_without_checksums.contains(file_name))
{
auto txt_checksum_it = checksum_files_txt.find(file_name);
if (txt_checksum_it == checksum_files_txt.end() || txt_checksum_it->second.uncompressed_size == 0)
auto txt_checksum_it = checksums_txt_files.find(file_name);
if (txt_checksum_it == checksums_txt_files.end() || txt_checksum_it->second.uncompressed_size == 0)
{
/// The file is not compressed.
checksum_file(file_name);

View File

@ -8,10 +8,251 @@
#include <Storages/System/StorageSystemPartsBase.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <QueryPipeline/Pipe.h>
#include <IO/IOThreadPool.h>
#include <Interpreters/threadPoolCallbackRunner.h>
#include <mutex>
namespace DB
{
namespace
{
void calculateTotalSizeOnDiskImpl(const DiskPtr & disk, const String & from, UInt64 & total_size)
{
/// Files or directories of detached part may not exist. Only count the size of existing files.
if (disk->isFile(from))
{
total_size += disk->getFileSize(from);
}
else
{
for (auto it = disk->iterateDirectory(from); it->isValid(); it->next())
calculateTotalSizeOnDiskImpl(disk, fs::path(from) / it->name(), total_size);
}
}
UInt64 calculateTotalSizeOnDisk(const DiskPtr & disk, const String & from)
{
UInt64 total_size = 0;
try
{
calculateTotalSizeOnDiskImpl(disk, from, total_size);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
return total_size;
}
class SourceState
{
std::mutex mutex;
StoragesInfoStream stream;
public:
explicit SourceState(StoragesInfoStream && stream_)
: stream(std::move(stream_))
{}
StoragesInfo next()
{
std::lock_guard lock(mutex);
return stream.next();
}
};
struct WorkerState
{
struct Task
{
DiskPtr disk;
String path;
std::atomic<size_t> * counter = nullptr;
};
std::vector<Task> tasks;
std::atomic<size_t> next_task = {0};
};
class DetachedPartsSource : public ISource
{
public:
DetachedPartsSource(Block header_, std::shared_ptr<SourceState> state_, std::vector<UInt8> columns_mask_, UInt64 block_size_,
bool has_bytes_on_disk_column_)
: ISource(std::move(header_))
, state(state_)
, columns_mask(std::move(columns_mask_))
, block_size(block_size_)
, has_bytes_on_disk_column(has_bytes_on_disk_column_)
{}
String getName() const override { return "DataPartsSource"; }
protected:
static Chunk nullWhenNoRows(MutableColumns && new_columns)
{
chassert(!new_columns.empty());
const auto rows = new_columns[0]->size();
if (!rows)
return {};
return {std::move(new_columns), rows};
}
Chunk generate() override
{
MutableColumns new_columns = getPort().getHeader().cloneEmptyColumns();
chassert(!new_columns.empty());
while (new_columns[0]->size() < block_size)
{
if (detached_parts.empty())
getMoreParts();
if (detached_parts.empty())
return nullWhenNoRows(std::move(new_columns));
generateRows(new_columns, block_size - new_columns[0]->size());
}
return nullWhenNoRows(std::move(new_columns));
}
private:
std::shared_ptr<SourceState> state;
const std::vector<UInt8> columns_mask;
const UInt64 block_size;
const bool has_bytes_on_disk_column;
const size_t support_threads = 35;
StoragesInfo current_info;
DetachedPartsInfo detached_parts;
void getMoreParts()
{
chassert(detached_parts.empty());
while (detached_parts.empty())
{
current_info = state->next();
if (!current_info)
return;
detached_parts = current_info.data->getDetachedParts();
}
}
void calculatePartSizeOnDisk(size_t begin, std::vector<std::atomic<size_t>> & parts_sizes)
{
if (!has_bytes_on_disk_column)
return;
WorkerState worker_state;
for (auto p_id = begin; p_id < detached_parts.size(); ++p_id)
{
auto & part = detached_parts[p_id];
auto part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / part.dir_name;
auto relative_path = fs::path(current_info.data->getRelativeDataPath()) / part_path;
worker_state.tasks.push_back({part.disk, relative_path, &parts_sizes.at(p_id - begin)});
}
std::vector<std::future<void>> futures;
SCOPE_EXIT_SAFE({
/// Cancel all workers
worker_state.next_task.store(worker_state.tasks.size());
/// Exceptions are not propagated
for (auto & future : futures)
if (future.valid())
future.wait();
futures.clear();
});
auto max_thread_to_run = std::max(size_t(1), std::min(support_threads, worker_state.tasks.size() / 10));
futures.reserve(max_thread_to_run);
for (size_t i = 0; i < max_thread_to_run; ++i)
{
if (worker_state.next_task.load() >= worker_state.tasks.size())
break;
auto worker = [&worker_state] ()
{
for (auto id = worker_state.next_task++; id < worker_state.tasks.size(); id = worker_state.next_task++)
{
auto & task = worker_state.tasks.at(id);
size_t size = calculateTotalSizeOnDisk(task.disk, task.path);
task.counter->store(size);
}
};
futures.push_back(
scheduleFromThreadPool<void>(
std::move(worker),
IOThreadPool::get(),
"DP_BytesOnDisk"));
}
/// Exceptions are propagated
for (auto & future : futures)
future.get();
}
void generateRows(MutableColumns & new_columns, size_t max_rows)
{
chassert(current_info);
auto rows = std::min(max_rows, detached_parts.size());
auto begin = detached_parts.size() - rows;
std::vector<std::atomic<size_t>> parts_sizes(rows);
calculatePartSizeOnDisk(begin, parts_sizes);
for (auto p_id = begin; p_id < detached_parts.size(); ++p_id)
{
auto & p = detached_parts.at(p_id);
size_t src_index = 0;
size_t res_index = 0;
if (columns_mask[src_index++])
new_columns[res_index++]->insert(current_info.database);
if (columns_mask[src_index++])
new_columns[res_index++]->insert(current_info.table);
if (columns_mask[src_index++])
new_columns[res_index++]->insert(p.valid_name ? p.partition_id : Field());
if (columns_mask[src_index++])
new_columns[res_index++]->insert(p.dir_name);
if (columns_mask[src_index++])
{
chassert(has_bytes_on_disk_column);
size_t bytes_on_disk = parts_sizes.at(p_id - begin).load();
new_columns[res_index++]->insert(bytes_on_disk);
}
if (columns_mask[src_index++])
new_columns[res_index++]->insert(p.disk->getName());
if (columns_mask[src_index++])
new_columns[res_index++]->insert((fs::path(current_info.data->getFullPathOnDisk(p.disk)) / MergeTreeData::DETACHED_DIR_NAME / p.dir_name).string());
if (columns_mask[src_index++])
new_columns[res_index++]->insert(p.valid_name ? p.prefix : Field());
if (columns_mask[src_index++])
new_columns[res_index++]->insert(p.valid_name ? p.min_block : Field());
if (columns_mask[src_index++])
new_columns[res_index++]->insert(p.valid_name ? p.max_block : Field());
if (columns_mask[src_index++])
new_columns[res_index++]->insert(p.valid_name ? p.level : Field());
}
detached_parts.resize(begin);
}
};
}
StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_id_)
: IStorage(table_id_)
{
@ -31,33 +272,6 @@ StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_i
}});
setInMemoryMetadata(storage_metadata);
}
static void calculateTotalSizeOnDiskImpl(const DiskPtr & disk, const String & from, UInt64 & total_size)
{
/// Files or directories of detached part may not exist. Only count the size of existing files.
if (disk->isFile(from))
{
total_size += disk->getFileSize(from);
}
else
{
for (auto it = disk->iterateDirectory(from); it->isValid(); it->next())
calculateTotalSizeOnDiskImpl(disk, fs::path(from) / it->name(), total_size);
}
}
static UInt64 calculateTotalSizeOnDisk(const DiskPtr & disk, const String & from)
{
UInt64 total_size = 0;
try
{
calculateTotalSizeOnDiskImpl(disk, from, total_size);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
return total_size;
}
Pipe StorageSystemDetachedParts::read(
const Names & column_names,
@ -65,66 +279,39 @@ Pipe StorageSystemDetachedParts::read(
SelectQueryInfo & query_info,
ContextPtr context,
QueryProcessingStage::Enum /*processed_stage*/,
const size_t /*max_block_size*/,
const size_t /*num_streams*/)
const size_t max_block_size,
const size_t num_streams)
{
storage_snapshot->check(column_names);
StoragesInfoStream stream(query_info, context);
/// Create the result.
Block block = storage_snapshot->metadata->getSampleBlock();
Block sample_block = storage_snapshot->metadata->getSampleBlock();
NameSet names_set(column_names.begin(), column_names.end());
std::vector<UInt8> columns_mask(block.columns());
Block header;
for (size_t i = 0; i < block.columns(); ++i)
Block header;
std::vector<UInt8> columns_mask(sample_block.columns());
for (size_t i = 0; i < columns_mask.size(); ++i)
{
if (names_set.contains(block.getByPosition(i).name))
if (names_set.contains(sample_block.getByPosition(i).name))
{
columns_mask[i] = 1;
header.insert(block.getByPosition(i));
header.insert(sample_block.getByPosition(i));
}
}
MutableColumns new_columns = header.cloneEmptyColumns();
while (StoragesInfo info = stream.next())
bool has_bytes_on_disk_column = names_set.contains("bytes_on_disk");
auto state = std::make_shared<SourceState>(StoragesInfoStream(query_info, context));
Pipe pipe;
for (size_t i = 0; i < num_streams; ++i)
{
const auto parts = info.data->getDetachedParts();
for (const auto & p : parts)
{
size_t src_index = 0, res_index = 0;
String detached_part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name;
if (columns_mask[src_index++])
new_columns[res_index++]->insert(info.database);
if (columns_mask[src_index++])
new_columns[res_index++]->insert(info.table);
if (columns_mask[src_index++])
new_columns[res_index++]->insert(p.valid_name ? p.partition_id : Field());
if (columns_mask[src_index++])
new_columns[res_index++]->insert(p.dir_name);
if (columns_mask[src_index++])
new_columns[res_index++]->insert(calculateTotalSizeOnDisk(p.disk, fs::path(info.data->getRelativeDataPath()) / detached_part_path));
if (columns_mask[src_index++])
new_columns[res_index++]->insert(p.disk->getName());
if (columns_mask[src_index++])
new_columns[res_index++]->insert((fs::path(info.data->getFullPathOnDisk(p.disk)) / detached_part_path).string());
if (columns_mask[src_index++])
new_columns[res_index++]->insert(p.valid_name ? p.prefix : Field());
if (columns_mask[src_index++])
new_columns[res_index++]->insert(p.valid_name ? p.min_block : Field());
if (columns_mask[src_index++])
new_columns[res_index++]->insert(p.valid_name ? p.max_block : Field());
if (columns_mask[src_index++])
new_columns[res_index++]->insert(p.valid_name ? p.level : Field());
}
auto source = std::make_shared<DetachedPartsSource>(header.cloneEmpty(), state, columns_mask, max_block_size, has_bytes_on_disk_column);
pipe.addSource(std::move(source));
}
UInt64 num_rows = new_columns.at(0)->size();
Chunk chunk(std::move(new_columns), num_rows);
return Pipe(std::make_shared<SourceFromSingleChunk>(std::move(header), std::move(chunk)));
return pipe;
}
}

View File

@ -92,7 +92,8 @@ void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr
const auto & literal = args[0]->as<const ASTLiteral>();
String value;
if (args.size() > 1 && literal && literal->value.tryGet(value) && tryParseColumnsListFromString(value, structure, context))
String error;
if (args.size() > 1 && literal && literal->value.tryGet(value) && tryParseColumnsListFromString(value, structure, context, error))
{
has_structure_in_arguments = true;
return;

View File

@ -52,7 +52,7 @@ class Labels:
class ReleaseBranch:
CHERRYPICK_DESCRIPTION = """This pull-request is a first step of an automated \
CHERRYPICK_DESCRIPTION = f"""This pull-request is a first step of an automated \
backporting.
It contains changes like after calling a local command `git cherry-pick`.
If you intend to continue backporting this changes, then resolve all conflicts if any.
@ -60,13 +60,16 @@ Otherwise, if you do not want to backport them, then just close this pull-reques
The check results does not matter at this step - you can safely ignore them.
Also this pull-request will be merged automatically as it reaches the mergeable state, \
but you always can merge it manually.
**do not merge it manually**.
If it stuck, check the original PR for `{Labels.BACKPORTS_CREATED}` and delete it if \
necessary.
"""
BACKPORT_DESCRIPTION = """This pull-request is a last step of an automated \
backporting.
Treat it as a standard pull-request: look at the checks and resolve conflicts.
Merge it only if you intend to backport changes to the target branch, otherwise just \
close it.
close it.
"""
REMOTE = ""

View File

@ -289,6 +289,18 @@ CI_CONFIG = {
"Stress test (debug)": {
"required_build": "package_debug",
},
"Upgrade check (asan)": {
"required_build": "package_asan",
},
"Upgrade check (tsan)": {
"required_build": "package_tsan",
},
"Upgrade check (msan)": {
"required_build": "package_msan",
},
"Upgrade check (debug)": {
"required_build": "package_debug",
},
"Integration tests (asan)": {
"required_build": "package_asan",
},

View File

@ -218,7 +218,7 @@ def main():
else:
description = "Nothing to update"
format_description(description)
description = format_description(description)
gh = Github(get_best_robot_token(), per_page=100)
post_commit_status(gh, pr_info.sha, NAME, description, status, url)

View File

@ -369,7 +369,7 @@ def main():
description = f"Processed tags: {', '.join(tags)}"
format_description(description)
description = format_description(description)
gh = Github(get_best_robot_token(), per_page=100)
post_commit_status(gh, pr_info.sha, NAME, description, status, url)

View File

@ -345,7 +345,7 @@ def main():
ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, args.check_name, test_results)
format_description(description)
description = format_description(description)
post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url)

View File

@ -273,7 +273,9 @@ class Release:
self, version: ClickHouseVersion, reset_tweak: bool = True
) -> None:
if reset_tweak:
desc = version.description
version = version.reset_tweak()
version.with_description(desc)
update_cmake_version(version)
update_contributors(raise_error=True)
if self.dry_run:

View File

@ -200,6 +200,7 @@ if __name__ == "__main__":
pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True)
can_run, description, labels_state = should_run_checks_for_pr(pr_info)
description = format_description(description)
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)

View File

@ -176,7 +176,7 @@ def main():
# status = "failure"
description = "Task failed: $?=" + str(retcode)
format_description(description)
description = format_description(description)
report_url = upload_results(
s3_helper,

View File

@ -8,13 +8,13 @@ import logging
import time
def get_options(i, backward_compatibility_check):
def get_options(i, upgrade_check):
options = []
client_options = []
if 0 < i:
if i > 0:
options.append("--order=random")
if i % 3 == 2 and not backward_compatibility_check:
if i % 3 == 2 and not upgrade_check:
options.append(
'''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i)
)
@ -30,26 +30,24 @@ def get_options(i, backward_compatibility_check):
if i % 2 == 1:
join_alg_num = i // 2
if join_alg_num % 5 == 0:
if join_alg_num % 4 == 0:
client_options.append("join_algorithm='parallel_hash'")
if join_alg_num % 5 == 1:
if join_alg_num % 4 == 1:
client_options.append("join_algorithm='partial_merge'")
if join_alg_num % 5 == 2:
if join_alg_num % 4 == 2:
client_options.append("join_algorithm='full_sorting_merge'")
if join_alg_num % 5 == 3:
client_options.append("join_algorithm='grace_hash'")
if join_alg_num % 5 == 4:
if join_alg_num % 4 == 3:
client_options.append("join_algorithm='auto'")
client_options.append('max_rows_in_join=1000')
client_options.append("max_rows_in_join=1000")
if i % 5 == 1:
client_options.append("memory_tracker_fault_probability=0.001")
if i % 2 == 1 and not backward_compatibility_check:
if i % 2 == 1 and not upgrade_check:
client_options.append("group_by_use_nulls=1")
# 12 % 3 == 0, so it's Atomic database
if i == 12 and not backward_compatibility_check:
if i == 12 and not upgrade_check:
client_options.append("implicit_transaction=1")
client_options.append("throw_on_unsupported_query_inside_transaction=0")
@ -65,11 +63,9 @@ def run_func_test(
num_processes,
skip_tests_option,
global_time_limit,
backward_compatibility_check,
upgrade_check,
):
backward_compatibility_check_option = (
"--backward-compatibility-check" if backward_compatibility_check else ""
)
upgrade_check_option = "--upgrade-check" if upgrade_check else ""
global_time_limit_option = ""
if global_time_limit:
global_time_limit_option = "--global_time_limit={}".format(global_time_limit)
@ -79,14 +75,14 @@ def run_func_test(
for i in range(num_processes)
]
pipes = []
for i in range(0, len(output_paths)):
f = open(output_paths[i], "w")
for i, path in enumerate(output_paths):
f = open(path, "w")
full_command = "{} {} {} {} {}".format(
cmd,
get_options(i, backward_compatibility_check),
get_options(i, upgrade_check),
global_time_limit_option,
skip_tests_option,
backward_compatibility_check_option,
upgrade_check_option,
)
logging.info("Run func tests '%s'", full_command)
p = Popen(full_command, shell=True, stdout=f, stderr=f)
@ -178,7 +174,7 @@ def prepare_for_hung_check(drop_databases):
for db in databases:
if db == "system":
continue
command = make_query_command(f'DETACH DATABASE {db}')
command = make_query_command(f"DETACH DATABASE {db}")
# we don't wait for drop
Popen(command, shell=True)
break
@ -237,7 +233,7 @@ if __name__ == "__main__":
parser.add_argument("--output-folder")
parser.add_argument("--global-time-limit", type=int, default=1800)
parser.add_argument("--num-parallel", type=int, default=cpu_count())
parser.add_argument("--backward-compatibility-check", action="store_true")
parser.add_argument("--upgrade-check", action="store_true")
parser.add_argument("--hung-check", action="store_true", default=False)
# make sense only for hung check
parser.add_argument("--drop-databases", action="store_true", default=False)
@ -252,7 +248,7 @@ if __name__ == "__main__":
args.num_parallel,
args.skip_func_tests,
args.global_time_limit,
args.backward_compatibility_check,
args.upgrade_check,
)
logging.info("Will wait functests to finish")
@ -305,11 +301,12 @@ if __name__ == "__main__":
]
)
hung_check_log = os.path.join(args.output_folder, "hung_check.log")
tee = Popen(['/usr/bin/tee', hung_check_log], stdin=PIPE)
tee = Popen(["/usr/bin/tee", hung_check_log], stdin=PIPE)
res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT)
tee.stdin.close()
if tee.stdin is not None:
tee.stdin.close()
if res != 0 and have_long_running_queries:
logging.info("Hung check failed with exit code {}".format(res))
logging.info("Hung check failed with exit code %d", res)
else:
hung_check_status = "No queries hung\tOK\t\\N\t\n"
with open(
@ -318,5 +315,4 @@ if __name__ == "__main__":
results.write(hung_check_status)
os.remove(hung_check_log)
logging.info("Stress test finished")

View File

@ -36,7 +36,6 @@ def get_run_command(
"docker run --cap-add=SYS_PTRACE "
# a static link, don't use S3_URL or S3_DOWNLOAD
"-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets' "
f"-e DISABLE_BC_CHECK={os.environ.get('DISABLE_BC_CHECK', '0')} "
# For dmesg and sysctl
"--privileged "
f"--volume={build_path}:/package_folder "
@ -109,7 +108,7 @@ def process_results(
return state, description, test_results, additional_files
def main():
def run_stress_test(docker_image_name):
logging.basicConfig(level=logging.INFO)
stopwatch = Stopwatch()
@ -132,7 +131,7 @@ def main():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
docker_image = get_image_with_version(reports_path, "clickhouse/stress-test")
docker_image = get_image_with_version(reports_path, docker_image_name)
packages_path = os.path.join(temp_path, "packages")
if not os.path.exists(packages_path):
@ -199,4 +198,4 @@ def main():
if __name__ == "__main__":
main()
run_stress_test("clickhouse/stress-test")

309
tests/ci/stress_tests.lib Normal file
View File

@ -0,0 +1,309 @@
#!/bin/bash
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
OK="\tOK\t\\N\t"
FAIL="\tFAIL\t\\N\t"
FAILURE_CONTEXT_LINES=50
FAILURE_CONTEXT_MAX_LINE_WIDTH=400
function escaped()
{
# That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language.
# Also limit lines width just in case (too long lines are not really useful usually)
clickhouse local -S 's String' --input-format=LineAsString -q "select substr(s, 1, $FAILURE_CONTEXT_MAX_LINE_WIDTH)
from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'"
}
function head_escaped()
{
head -n $FAILURE_CONTEXT_LINES $1 | escaped
}
function unts()
{
grep -Po "[0-9][0-9]:[0-9][0-9] \K.*"
}
function trim_server_logs()
{
head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped
}
function install_packages()
{
dpkg -i $1/clickhouse-common-static_*.deb
dpkg -i $1/clickhouse-common-static-dbg_*.deb
dpkg -i $1/clickhouse-server_*.deb
dpkg -i $1/clickhouse-client_*.deb
}
function configure()
{
# install test configs
export USE_DATABASE_ORDINARY=1
export EXPORT_S3_STORAGE_POLICIES=1
/usr/share/clickhouse-test/config/install.sh
# avoid too slow startup
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
| sed "s|<snapshot_distance>100000</snapshot_distance>|<snapshot_distance>10000</snapshot_distance>|" \
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
# for clickhouse-server (via service)
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
# for clickhouse-client
export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
# since we run clickhouse from root
sudo chown root: /var/lib/clickhouse
# Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM).
echo "<clickhouse><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></clickhouse>" \
> /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml
local total_mem
total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB
total_mem=$(( total_mem*1024 )) # bytes
# Set maximum memory usage as half of total memory (less chance of OOM).
#
# But not via max_server_memory_usage but via max_memory_usage_for_user,
# so that we can override this setting and execute service queries, like:
# - hung check
# - show/drop database
# - ...
#
# So max_memory_usage_for_user will be a soft limit, and
# max_server_memory_usage will be hard limit, and queries that should be
# executed regardless memory limits will use max_memory_usage_for_user=0,
# instead of relying on max_untracked_memory
max_server_memory_usage_to_ram_ratio=0.5
echo "Setting max_server_memory_usage_to_ram_ratio to ${max_server_memory_usage_to_ram_ratio}"
cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml <<EOL
<clickhouse>
<max_server_memory_usage_to_ram_ratio>${max_server_memory_usage_to_ram_ratio}</max_server_memory_usage_to_ram_ratio>
</clickhouse>
EOL
local max_users_mem
max_users_mem=$((total_mem*30/100)) # 30%
echo "Setting max_memory_usage_for_user=$max_users_mem and max_memory_usage for queries to 10G"
cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml <<EOL
<clickhouse>
<profiles>
<default>
<max_memory_usage>10G</max_memory_usage>
<max_memory_usage_for_user>${max_users_mem}</max_memory_usage_for_user>
</default>
</profiles>
</clickhouse>
EOL
cat > /etc/clickhouse-server/config.d/core.xml <<EOL
<clickhouse>
<core_dump>
<!-- 100GiB -->
<size_limit>107374182400</size_limit>
</core_dump>
<!-- NOTE: no need to configure core_path,
since clickhouse is not started as daemon (via clickhouse start)
-->
<core_path>$PWD</core_path>
</clickhouse>
EOL
# Analyzer is not yet ready for testing
cat > /etc/clickhouse-server/users.d/no_analyzer.xml <<EOL
<clickhouse>
<profiles>
<default>
<constraints>
<allow_experimental_analyzer>
<readonly/>
</allow_experimental_analyzer>
</constraints>
</default>
</profiles>
</clickhouse>
EOL
}
function stop()
{
local max_tries="${1:-90}"
local check_hang="${2:-true}"
local pid
# Preserve the pid, since the server can hung after the PID will be deleted.
pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)"
clickhouse stop --max-tries "$max_tries" --do-not-kill && return
if [ $check_hang == true ]
then
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv
kill -TERM "$(pidof gdb)" ||:
sleep 5
echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
clickhouse stop --force
fi
}
function start()
{
counter=0
until clickhouse-client --query "SELECT 1"
do
if [ "$counter" -gt ${1:-120} ]
then
echo "Cannot start clickhouse-server"
rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||:
echo -e "Cannot start clickhouse-server$FAIL$(trim_server_logs application_errors.txt)" >> /test_output/test_results.tsv
cat /var/log/clickhouse-server/stdout.log
tail -n100 /var/log/clickhouse-server/stderr.log
tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e '<Warning> RaftInstance:' -e '<Information> RaftInstance' | tail -n100
break
fi
# use root to match with current uid
clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log
sleep 0.5
counter=$((counter + 1))
done
# Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
# and clickhouse-server can do fork-exec, for example, to run some bridge.
# Do not set nostop noprint for all signals, because some it may cause gdb to hang,
# explicitly ignore non-fatal signals that are used by server.
# Number of SIGRTMIN can be determined only in runtime.
RTMIN=$(kill -l SIGRTMIN)
echo "
set follow-fork-mode parent
handle SIGHUP nostop noprint pass
handle SIGINT nostop noprint pass
handle SIGQUIT nostop noprint pass
handle SIGPIPE nostop noprint pass
handle SIGTERM nostop noprint pass
handle SIGUSR1 nostop noprint pass
handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass
info signals
continue
backtrace full
thread apply all backtrace full
info registers
disassemble /s
up
disassemble /s
up
disassemble /s
p \"done\"
detach
quit
" > script.gdb
# FIXME Hung check may work incorrectly because of attached gdb
# 1. False positives are possible
# 2. We cannot attach another gdb to get stacktraces if some queries hung
gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log &
sleep 5
# gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
}
function check_server_start()
{
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \
&& echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \
>> /test_output/test_results.tsv)
# Remove file application_errors.txt if it's empty
[ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt
}
function check_logs_for_critical_errors()
{
# Sanitizer asserts
rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
&& echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|| echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv
rm -f /test_output/tmp
# OOM
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
&& echo -e "Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|| echo -e "No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Logical errors
rg -Fa "Code: 49. DB::Exception: " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \
&& echo -e "Logical error thrown (see clickhouse-server.log or logical_errors.txt)$FAIL$(head_escaped /test_output/logical_errors.txt)" >> /test_output/test_results.tsv \
|| echo -e "No logical errors$OK" >> /test_output/test_results.tsv
# Remove file logical_errors.txt if it's empty
[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt
# No such key errors
rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \
&& echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \
|| echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv
# Remove file no_such_key_errors.txt if it's empty
[ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt
# Crash
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
&& echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|| echo -e "Not crashed$OK" >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog)
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/fatal_messages.txt \
&& echo -e "Fatal message in clickhouse-server.log (see fatal_messages.txt)$FAIL$(trim_server_logs fatal_messages.txt)" >> /test_output/test_results.tsv \
|| echo -e "No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Remove file fatal_messages.txt if it's empty
[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
rg -Fa "########################################" /test_output/* > /dev/null \
&& echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv
function get_gdb_log_context()
{
rg -A50 -Fa " received signal " /test_output/gdb.log | head_escaped
}
rg -Fa " received signal " /test_output/gdb.log > /dev/null \
&& echo -e "Found signal in gdb.log$FAIL$(get_gdb_log_context)" >> /test_output/test_results.tsv
dmesg -T > /test_output/dmesg.log
# OOM in dmesg -- those are real
grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \
&& echo -e "OOM in dmesg$FAIL$(head_escaped /test_output/dmesg.log)" >> /test_output/test_results.tsv \
|| echo -e "No OOM in dmesg$OK" >> /test_output/test_results.tsv
}
function collect_query_and_trace_logs()
{
for table in query_log trace_log
do
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
done
}
function collect_core_dumps()
{
find . -type f -maxdepth 1 -name 'core.*' | while read core; do
zstd --threads=0 $core
mv $core.zst /test_output/
done
}

View File

@ -0,0 +1,4 @@
import stress_check
if __name__ == "__main__":
stress_check.run_stress_test("clickhouse/upgrade-check")

View File

@ -58,6 +58,7 @@ class ClickHouseVersion:
elif self._git is not None:
self._tweak = self._git.tweak
self._describe = ""
self._description = ""
def update(self, part: Literal["major", "minor", "patch"]) -> "ClickHouseVersion":
"""If part is valid, returns a new version"""
@ -125,6 +126,10 @@ class ClickHouseVersion:
def describe(self):
return self._describe
@property
def description(self) -> str:
return self._description
@property
def string(self):
return ".".join(
@ -149,6 +154,7 @@ class ClickHouseVersion:
def with_description(self, version_type):
if version_type not in VersionType.VALID:
raise ValueError(f"version type {version_type} not in {VersionType.VALID}")
self._description = version_type
self._describe = f"v{self.string}-{version_type}"
def __eq__(self, other: Any) -> bool:

View File

@ -123,6 +123,7 @@ TRUSTED_CONTRIBUTORS = {
"tonickkozlov", # Cloudflare
"tylerhannan", # ClickHouse Employee
"myrrc", # Mike Kot, DoubleCloud
"thevar1able", # ClickHouse Employee
]
}

View File

@ -450,7 +450,7 @@ class FailureReason(enum.Enum):
REPLICATED_DB = "replicated-database"
S3_STORAGE = "s3-storage"
BUILD = "not running for current build"
BACKWARD_INCOMPATIBLE = "test is backward incompatible"
NO_UPGRADE_CHECK = "not running for upgrade check"
NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas"
# UNKNOWN reasons
@ -773,35 +773,6 @@ class TestCase:
else ""
)
# Check if test contains tag "no-backward-compatibility-check" and we should skip it
def check_backward_incompatible_tag(self) -> bool:
for tag in self.tags:
if tag.startswith("no-backward-compatibility-check"):
split = tag.split(":")
# If version is not specified in tag, always skip this test.
if len(split) == 1:
return True
version_from_tag = split[1]
# Check if extracted string from tag is a real ClickHouse version, if not - always skip test.
if re.match(VERSION_PATTERN, version_from_tag) is None:
return True
server_version = str(
clickhouse_execute(args, "SELECT version()").decode()
)
# If server version is less or equal from the version specified in tag, we should skip this test.
version_from_tag_split = list(map(int, version_from_tag.split(".")))
server_version_split = list(map(int, server_version.split(".")))
if (
server_version_split[: len(version_from_tag_split)]
<= version_from_tag_split
):
return True
return False
# should skip test, should increment skipped_total, skip reason
def should_skip_test(self, suite) -> Optional[FailureReason]:
tags = self.tags
@ -852,10 +823,10 @@ class TestCase:
elif tags and ("no-replicated-database" in tags) and args.replicated_database:
return FailureReason.REPLICATED_DB
elif (
args.backward_compatibility_check and self.check_backward_incompatible_tag()
):
return FailureReason.BACKWARD_INCOMPATIBLE
# TODO: remove checking "no-upgrade-check" after 23.1
elif args.upgrade_check and (
"no-upgrade-check" in tags or "no-upgrade-check" in tags):
return FailureReason.NO_UPGRADE_CHECK
elif tags and ("no-s3-storage" in tags) and args.s3_storage:
return FailureReason.S3_STORAGE
@ -2461,9 +2432,9 @@ def parse_args():
)
group.add_argument(
"--backward-compatibility-check",
"--upgrade-check",
action="store_true",
help="Run tests for further backward compatibility testing by ignoring all"
help="Run tests for further server upgrade testing by ignoring all"
"drop queries in tests for collecting data from new version of server",
)
parser.add_argument(
@ -2614,7 +2585,7 @@ if __name__ == "__main__":
else:
args.client_database = "default"
if args.backward_compatibility_check:
if args.upgrade_check:
args.client += " --fake-drop"
if args.client_option or args.secure:

View File

@ -628,6 +628,27 @@ def test_table_override(started_cluster):
assert_eq_with_retry(instance, query, expected)
def test_materialized_view(started_cluster):
cursor = pg_manager.get_db_cursor()
cursor.execute(f"DROP TABLE IF EXISTS test_table")
cursor.execute(f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)")
cursor.execute(f"INSERT INTO test_table SELECT 1, 2")
instance.query("DROP DATABASE IF EXISTS test_database")
instance.query(
"CREATE DATABASE test_database ENGINE = MaterializedPostgreSQL(postgres1) SETTINGS materialized_postgresql_tables_list='test_table'"
)
check_tables_are_synchronized(instance, "test_table")
instance.query("DROP TABLE IF EXISTS mv")
instance.query(
"CREATE MATERIALIZED VIEW mv ENGINE=MergeTree ORDER BY tuple() POPULATE AS SELECT * FROM test_database.test_table"
)
assert "1\t2" == instance.query("SELECT * FROM mv").strip()
cursor.execute(f"INSERT INTO test_table SELECT 3, 4")
check_tables_are_synchronized(instance, "test_table")
assert "1\t2\n3\t4" == instance.query("SELECT * FROM mv ORDER BY 1, 2").strip()
pg_manager.drop_materialized_db()
if __name__ == "__main__":
cluster.start()
input("Cluster created, press any key to destroy...")

View File

@ -1,9 +1,8 @@
<test>
<settings>
<!-- will enable in a subsequent PR -->
<!-- <allow_aggregate_partitions_independently>1</allow_aggregate_partitions_independently> -->
<!-- <force_aggregate_partitions_independently>1</force_aggregate_partitions_independently> -->
<!-- <max_number_of_partitions_for_independent_aggregation>256</max_number_of_partitions_for_independent_aggregation> -->
<allow_aggregate_partitions_independently>1</allow_aggregate_partitions_independently>
<force_aggregate_partitions_independently>1</force_aggregate_partitions_independently>
<max_number_of_partitions_for_independent_aggregation>256</max_number_of_partitions_for_independent_aggregation>
<max_memory_usage>0</max_memory_usage>
<max_partitions_per_insert_block>256</max_partitions_per_insert_block>
</settings>

View File

@ -1,4 +1,4 @@
-- Tags: no-backward-compatibility-check
-- Tags: no-upgrade-check
DROP TABLE IF EXISTS alter_00061;
set allow_deprecated_syntax_for_merge_tree=1;

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: zookeeper, no-parallel, no-s3-storage, no-backward-compatibility-check
# Tags: zookeeper, no-parallel, no-s3-storage, no-upgrade-check
# Because REPLACE PARTITION does not forces immediate removal of replaced data parts from local filesystem
# (it tries to do it as quick as possible, but it still performed in separate thread asynchronously)

View File

@ -1,4 +1,4 @@
-- Tags: long, zookeeper, no-replicated-database, no-backward-compatibility-check
-- Tags: long, zookeeper, no-replicated-database, no-upgrade-check
-- Tag no-replicated-database: Fails due to additional replicas or shards
SET send_logs_level = 'fatal';

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: no-backward-compatibility-check
# Tags: no-upgrade-check
# Test fix for issue #5066
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: race, zookeeper, no-parallel, no-backward-compatibility-check
# Tags: race, zookeeper, no-parallel, no-upgrade-check
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: zookeeper, no-parallel, no-fasttest, no-backward-compatibility-check
# Tags: zookeeper, no-parallel, no-fasttest, no-upgrade-check
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: race, zookeeper, no-backward-compatibility-check
# Tags: race, zookeeper, no-upgrade-check
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -1,4 +1,4 @@
-- Tags: no-parallel, no-backward-compatibility-check
-- Tags: no-parallel, no-upgrade-check
DROP DATABASE IF EXISTS test_01191;
CREATE DATABASE test_01191 ENGINE=Atomic;

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: long, zookeeper, no-parallel, no-backward-compatibility-check
# Tags: long, zookeeper, no-parallel, no-upgrade-check
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -1,4 +1,4 @@
-- Tags: zookeeper, no-backward-compatibility-check
-- Tags: zookeeper, no-upgrade-check
DROP TABLE IF EXISTS table_rename_with_ttl;

View File

@ -1,4 +1,4 @@
-- Tags: no-parallel, no-backward-compatibility-check
-- Tags: no-parallel, no-upgrade-check
DROP DATABASE IF EXISTS db_01391;
CREATE DATABASE db_01391;

View File

@ -1,4 +1,4 @@
-- Tags: no-backward-compatibility-check
-- Tags: no-upgrade-check
-- force data path with the user/pass in it
set use_compact_format_in_distributed_parts_names=0;

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: no-backward-compatibility-check
# Tags: no-upgrade-check
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -1,4 +1,4 @@
-- Tags: long, no-backward-compatibility-check
-- Tags: long, no-upgrade-check
DROP TABLE IF EXISTS test_01640;
DROP TABLE IF EXISTS restore_01640;

View File

@ -1,4 +1,4 @@
-- Tags: no-backward-compatibility-check
-- Tags: no-upgrade-check
SET mutations_sync = 2;

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: no-backward-compatibility-check
# Tags: no-upgrade-check
set -eu

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: no-backward-compatibility-check
# Tags: no-upgrade-check
set -eu

View File

@ -1,4 +1,4 @@
-- Tags: no-backward-compatibility-check, no-fasttest
-- Tags: no-upgrade-check, no-fasttest
DROP TABLE IF EXISTS partslost_0;
DROP TABLE IF EXISTS partslost_1;

View File

@ -72,7 +72,7 @@ uint8 Nullable(UInt8)
int16 Nullable(Int16)
uint16 Nullable(UInt16)
int32 Nullable(Int32)
uint32 Nullable(Int64)
uint32 Nullable(UInt32)
int64 Nullable(Int64)
uint64 Nullable(UInt64)
0 0 0 0 0 0 0 0

Some files were not shown because too many files have changed in this diff Show More