Merge remote-tracking branch 'origin/master' into analyzer_trivial_count_optimization

This commit is contained in:
Igor Nikonov 2023-03-03 17:44:08 +00:00
commit b6f05a6399
550 changed files with 6720 additions and 3179 deletions

View File

@ -209,3 +209,5 @@ CheckOptions:
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
- key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp
value: expr-type
- key: cppcoreguidelines-avoid-do-while.IgnoreMacros
value: true

View File

@ -3105,10 +3105,10 @@ jobs:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
TEMP_PATH=${{runner.temp}}/stress_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (asan)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
REPO_COPY=${{runner.temp}}/stress_asan/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
@ -3267,6 +3267,142 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
######################################### UPGRADE CHECK ######################################
##############################################################################################
UpgradeCheckAsan:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/upgrade_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Upgrade check (asan)
REPO_COPY=${{runner.temp}}/upgrade_asan/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Upgrade check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 upgrade_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
UpgradeCheckTsan:
needs: [BuilderDebTsan]
# same as for stress test with tsan
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/upgrade_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Upgrade check (tsan)
REPO_COPY=${{runner.temp}}/upgrade_thread/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Upgrade check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 upgrade_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
UpgradeCheckMsan:
needs: [BuilderDebMsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/upgrade_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Upgrade check (msan)
REPO_COPY=${{runner.temp}}/upgrade_memory/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Upgrade check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 upgrade_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
UpgradeCheckDebug:
needs: [BuilderDebDebug]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/upgrade_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Upgrade check (debug)
REPO_COPY=${{runner.temp}}/upgrade_debug/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Upgrade check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 upgrade_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
##################################### AST FUZZERS ############################################
##############################################################################################

View File

@ -391,10 +391,12 @@ if (COMPILER_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers")
# Set new experimental pass manager, it's a performance, build time and binary size win.
# Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
# Set new experimental pass manager, it's a performance, build time and binary size win.
# Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager")
endif ()
# We cannot afford to use LTO when compiling unit tests, and it's not enough
# to only supply -fno-lto at the final linking stage. So we disable it

View File

@ -195,7 +195,6 @@ long splice(int fd_in, off_t *off_in, int fd_out, off_t *off_out, size_t len, un
#include <sys/stat.h>
#include <stdint.h>
#if !defined(__aarch64__)
struct statx {
uint32_t stx_mask;
uint32_t stx_blksize;
@ -226,7 +225,6 @@ int statx(int fd, const char *restrict path, int flag,
{
return syscall(SYS_statx, fd, path, flag, mask, statxbuf);
}
#endif
#include <syscall.h>

View File

@ -8,3 +8,8 @@ int fallocate(int fd, int mode, off_t base, off_t len)
{
return syscall(SYS_fallocate, fd, mode, base, len);
}
int fallocate64(int fd, int mode, off_t base, off_t len)
{
return fallocate(fd, mode, base, len);
}

View File

@ -9,3 +9,8 @@ ssize_t pwritev(int fd, const struct iovec *iov, int count, off_t ofs)
/// There was cancellable syscall (syscall_cp), but I don't care.
return syscall(SYS_pwritev, fd, iov, count, (long)(ofs), (long)(ofs>>32));
}
ssize_t pwritev64(int fd, const struct iovec *iov, int count, off_t ofs)
{
return pwritev(fd, iov, count, ofs);
}

View File

@ -67,19 +67,7 @@ public:
void swap(Timespan & timespan);
/// Swaps the Timespan with another one.
bool operator==(const Timespan & ts) const;
bool operator!=(const Timespan & ts) const;
bool operator>(const Timespan & ts) const;
bool operator>=(const Timespan & ts) const;
bool operator<(const Timespan & ts) const;
bool operator<=(const Timespan & ts) const;
bool operator==(TimeDiff microSeconds) const;
bool operator!=(TimeDiff microSeconds) const;
bool operator>(TimeDiff microSeconds) const;
bool operator>=(TimeDiff microSeconds) const;
bool operator<(TimeDiff microSeconds) const;
bool operator<=(TimeDiff microSeconds) const;
auto operator<=>(const Timespan & ts) const = default;
Timespan operator+(const Timespan & d) const;
Timespan operator-(const Timespan & d) const;
@ -215,78 +203,6 @@ inline Timespan::TimeDiff Timespan::totalMicroseconds() const
}
inline bool Timespan::operator==(const Timespan & ts) const
{
return _span == ts._span;
}
inline bool Timespan::operator!=(const Timespan & ts) const
{
return _span != ts._span;
}
inline bool Timespan::operator>(const Timespan & ts) const
{
return _span > ts._span;
}
inline bool Timespan::operator>=(const Timespan & ts) const
{
return _span >= ts._span;
}
inline bool Timespan::operator<(const Timespan & ts) const
{
return _span < ts._span;
}
inline bool Timespan::operator<=(const Timespan & ts) const
{
return _span <= ts._span;
}
inline bool Timespan::operator==(TimeDiff microSeconds) const
{
return _span == microSeconds;
}
inline bool Timespan::operator!=(TimeDiff microSeconds) const
{
return _span != microSeconds;
}
inline bool Timespan::operator>(TimeDiff microSeconds) const
{
return _span > microSeconds;
}
inline bool Timespan::operator>=(TimeDiff microSeconds) const
{
return _span >= microSeconds;
}
inline bool Timespan::operator<(TimeDiff microSeconds) const
{
return _span < microSeconds;
}
inline bool Timespan::operator<=(TimeDiff microSeconds) const
{
return _span <= microSeconds;
}
inline void swap(Timespan & s1, Timespan & s2)
{
s1.swap(s2);

View File

@ -30,7 +30,7 @@ elseif (ARCH_AARCH64)
# support it.
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8+crc")
else ()
# ARMv8.2 is quite ancient but the lowest common denominator supported by both Graviton 2 and 3 processors [1]. In particular, it
# ARMv8.2 is quite ancient but the lowest common denominator supported by both Graviton 2 and 3 processors [1, 10]. In particular, it
# includes LSE (made mandatory with ARMv8.1) which provides nice speedups without having to fall back to compat flag
# "-moutline-atomics" for v8.0 [2, 3, 4] that requires a recent glibc with runtime dispatch helper, limiting our ability to run on
# old OSs.
@ -45,19 +45,20 @@ elseif (ARCH_AARCH64)
# dotprod: Scalar vector product (SDOT and UDOT instructions). Probably the most obscure extra flag with doubtful performance benefits
# but it has been activated since always, so why not enable it. It's not 100% clear in which revision this flag was
# introduced as optional, either in v8.2 [7] or in v8.4 [8].
# ldapr: Load-Acquire RCpc Register. Better support of release/acquire of atomics. Good for allocators and high contention code.
# Optional in v8.2, mandatory in v8.3 [9]. Supported in Graviton 2+, Azure and GCP instances. Generated from clang 15.
# rcpc: Load-Acquire RCpc Register. Better support of release/acquire of atomics. Good for allocators and high contention code.
# Optional in v8.2, mandatory in v8.3 [9]. Supported in Graviton >=2, Azure and GCP instances.
#
# [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md
# [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10
# [3] https://mysqlonarm.github.io/ARM-LSE-and-MySQL/
# [4] https://dev.to/aws-builders/large-system-extensions-for-aws-graviton-processors-3eci
# [5] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support
# [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en
# [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html
# [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions-
# [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs -Xclang=-target-feature -Xclang=+ldapr -Wno-unused-command-line-argument")
# [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md
# [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10
# [3] https://mysqlonarm.github.io/ARM-LSE-and-MySQL/
# [4] https://dev.to/aws-builders/large-system-extensions-for-aws-graviton-processors-3eci
# [5] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support
# [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en
# [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html
# [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions-
# [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en
# [10] https://github.com/aws/aws-graviton-getting-started/blob/main/README.md
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs+rcpc")
endif ()
# Best-effort check: The build generates and executes intermediate binaries, e.g. protoc and llvm-tablegen. If we build on ARM for ARM

View File

@ -45,6 +45,7 @@ if (COMPILER_CLANG)
no_warning(weak-vtables)
no_warning(thread-safety-negative) # experimental flag, too many false positives
no_warning(enum-constexpr-conversion) # breaks magic-enum library in clang-16
no_warning(unsafe-buffer-usage) # too aggressive
# TODO Enable conversion, sign-conversion, double-promotion warnings.
elseif (COMPILER_GCC)
# Add compiler options only to c++ compiler

2
contrib/capnproto vendored

@ -1 +1 @@
Subproject commit e19cd661e49dd9022d3f920b69d843333b896451
Subproject commit dc8b50b999777bcb23c89bb5907c785c3f654441

View File

@ -98,6 +98,16 @@ set(LLVM_ENABLE_BINDINGS 0 CACHE INTERNAL "")
set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm")
set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm")
# Since we always use toolchain files to generate hermatic builds, cmake will
# think it's a cross compilation, and LLVM will try to configure NATIVE LLVM
# targets with all tests enabled, which will slow down cmake configuration and
# compilation (You'll see Building native llvm-tblgen...). Let's disable the
# cross compiling indicator for now.
#
# TODO We should let cmake know whether it's indeed a cross compilation in the
# first place.
set (CMAKE_CROSSCOMPILING 0)
add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}")
set_directory_properties (PROPERTIES

View File

@ -43,7 +43,8 @@
"docker/test/stateful": {
"name": "clickhouse/stateful-test",
"dependent": [
"docker/test/stress"
"docker/test/stress",
"docker/test/upgrade"
]
},
"docker/test/unit": {
@ -54,6 +55,10 @@
"name": "clickhouse/stress-test",
"dependent": []
},
"docker/test/upgrade": {
"name": "clickhouse/upgrade-check",
"dependent": []
},
"docker/test/codebrowser": {
"name": "clickhouse/codebrowser",
"dependent": []

View File

@ -1,4 +1,4 @@
# rebuild in #33610
# rebuild in #47031
# docker build -t clickhouse/stateful-test .
ARG FROM_TAG=latest
FROM clickhouse/stateless-test:$FROM_TAG

View File

@ -21,10 +21,9 @@ RUN apt-get update -y \
openssl \
netcat-openbsd \
telnet \
llvm-9 \
brotli
brotli \
&& apt-get clean
COPY ./stress /stress
COPY run.sh /
ENV DATASETS="hits visits"

View File

@ -8,229 +8,13 @@ dmesg --clear
set -x
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
# we mount tests folder from repo to /usr/share
ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
OK="\tOK\t\\N\t"
FAIL="\tFAIL\t\\N\t"
FAILURE_CONTEXT_LINES=50
FAILURE_CONTEXT_MAX_LINE_WIDTH=400
function escaped()
{
# That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language.
# Also limit lines width just in case (too long lines are not really useful usually)
clickhouse local -S 's String' --input-format=LineAsString -q "select substr(s, 1, $FAILURE_CONTEXT_MAX_LINE_WIDTH)
from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'"
}
function head_escaped()
{
head -n $FAILURE_CONTEXT_LINES $1 | escaped
}
function unts()
{
grep -Po "[0-9][0-9]:[0-9][0-9] \K.*"
}
function trim_server_logs()
{
head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped
}
function install_packages()
{
dpkg -i $1/clickhouse-common-static_*.deb
dpkg -i $1/clickhouse-common-static-dbg_*.deb
dpkg -i $1/clickhouse-server_*.deb
dpkg -i $1/clickhouse-client_*.deb
}
function configure()
{
# install test configs
export USE_DATABASE_ORDINARY=1
export EXPORT_S3_STORAGE_POLICIES=1
/usr/share/clickhouse-test/config/install.sh
# we mount tests folder from repo to /usr/share
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
# avoid too slow startup
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
| sed "s|<snapshot_distance>100000</snapshot_distance>|<snapshot_distance>10000</snapshot_distance>|" \
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
# for clickhouse-server (via service)
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
# for clickhouse-client
export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
# since we run clickhouse from root
sudo chown root: /var/lib/clickhouse
# Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM).
echo "<clickhouse><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></clickhouse>" \
> /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml
local total_mem
total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB
total_mem=$(( total_mem*1024 )) # bytes
# Set maximum memory usage as half of total memory (less chance of OOM).
#
# But not via max_server_memory_usage but via max_memory_usage_for_user,
# so that we can override this setting and execute service queries, like:
# - hung check
# - show/drop database
# - ...
#
# So max_memory_usage_for_user will be a soft limit, and
# max_server_memory_usage will be hard limit, and queries that should be
# executed regardless memory limits will use max_memory_usage_for_user=0,
# instead of relying on max_untracked_memory
max_server_memory_usage_to_ram_ratio=0.5
echo "Setting max_server_memory_usage_to_ram_ratio to ${max_server_memory_usage_to_ram_ratio}"
cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml <<EOL
<clickhouse>
<max_server_memory_usage_to_ram_ratio>${max_server_memory_usage_to_ram_ratio}</max_server_memory_usage_to_ram_ratio>
</clickhouse>
EOL
local max_users_mem
max_users_mem=$((total_mem*30/100)) # 30%
echo "Setting max_memory_usage_for_user=$max_users_mem and max_memory_usage for queries to 10G"
cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml <<EOL
<clickhouse>
<profiles>
<default>
<max_memory_usage>10G</max_memory_usage>
<max_memory_usage_for_user>${max_users_mem}</max_memory_usage_for_user>
</default>
</profiles>
</clickhouse>
EOL
cat > /etc/clickhouse-server/config.d/core.xml <<EOL
<clickhouse>
<core_dump>
<!-- 100GiB -->
<size_limit>107374182400</size_limit>
</core_dump>
<!-- NOTE: no need to configure core_path,
since clickhouse is not started as daemon (via clickhouse start)
-->
<core_path>$PWD</core_path>
</clickhouse>
EOL
# Let OOM killer terminate other processes before clickhouse-server:
cat > /etc/clickhouse-server/config.d/oom_score.xml <<EOL
<clickhouse>
<oom_score>-1000</oom_score>
</clickhouse>
EOL
# Analyzer is not yet ready for testing
cat > /etc/clickhouse-server/users.d/no_analyzer.xml <<EOL
<clickhouse>
<profiles>
<default>
<constraints>
<allow_experimental_analyzer>
<readonly/>
</allow_experimental_analyzer>
</constraints>
</default>
</profiles>
</clickhouse>
EOL
}
function stop()
{
local max_tries="${1:-90}"
local pid
# Preserve the pid, since the server can hung after the PID will be deleted.
pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)"
clickhouse stop --max-tries "$max_tries" --do-not-kill && return
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv
kill -TERM "$(pidof gdb)" ||:
sleep 5
echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
clickhouse stop --force
}
function start()
{
counter=0
until clickhouse-client --query "SELECT 1"
do
if [ "$counter" -gt ${1:-120} ]
then
echo "Cannot start clickhouse-server"
rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||:
echo -e "Cannot start clickhouse-server$FAIL$(trim_server_logs application_errors.txt)" >> /test_output/test_results.tsv
cat /var/log/clickhouse-server/stdout.log
tail -n100 /var/log/clickhouse-server/stderr.log
tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e '<Warning> RaftInstance:' -e '<Information> RaftInstance' | tail -n100
break
fi
# use root to match with current uid
clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log
sleep 0.5
counter=$((counter + 1))
done
# Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
# and clickhouse-server can do fork-exec, for example, to run some bridge.
# Do not set nostop noprint for all signals, because some it may cause gdb to hang,
# explicitly ignore non-fatal signals that are used by server.
# Number of SIGRTMIN can be determined only in runtime.
RTMIN=$(kill -l SIGRTMIN)
echo "
set follow-fork-mode parent
handle SIGHUP nostop noprint pass
handle SIGINT nostop noprint pass
handle SIGQUIT nostop noprint pass
handle SIGPIPE nostop noprint pass
handle SIGTERM nostop noprint pass
handle SIGUSR1 nostop noprint pass
handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass
info signals
continue
backtrace full
thread apply all backtrace full
info registers
disassemble /s
up
disassemble /s
up
disassemble /s
p \"done\"
detach
quit
" > script.gdb
# FIXME Hung check may work incorrectly because of attached gdb
# 1. False positives are possible
# 2. We cannot attach another gdb to get stacktraces if some queries hung
gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log &
sleep 5
# gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
}
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
install_packages package_folder
@ -396,7 +180,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
start
./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
&& echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv
@ -413,316 +197,27 @@ unset "${!THREAD_@}"
start
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \
&& echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \
>> /test_output/test_results.tsv)
check_server_start
stop
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
[ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL"
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.final.log
# Grep logs for sanitizer asserts, crashes and other critical errors
check_logs_for_critical_errors
# Sanitizer asserts
rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
&& echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|| echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv
rm -f /test_output/tmp
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
# OOM
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
&& echo -e "Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|| echo -e "No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Logical errors
rg -Fa "Code: 49. DB::Exception: " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \
&& echo -e "Logical error thrown (see clickhouse-server.log or logical_errors.txt)$FAIL$(head_escaped /test_output/logical_errors.txt)" >> /test_output/test_results.tsv \
|| echo -e "No logical errors$OK" >> /test_output/test_results.tsv
# Remove file logical_errors.txt if it's empty
[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt
# No such key errors
rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \
&& echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \
|| echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv
# Remove file no_such_key_errors.txt if it's empty
[ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt
# Crash
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
&& echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|| echo -e "Not crashed$OK" >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog)
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/fatal_messages.txt \
&& echo -e "Fatal message in clickhouse-server.log (see fatal_messages.txt)$FAIL$(trim_server_logs fatal_messages.txt)" >> /test_output/test_results.tsv \
|| echo -e "No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Remove file fatal_messages.txt if it's empty
[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
rg -Fa "########################################" /test_output/* > /dev/null \
&& echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv
function get_gdb_log_context()
{
rg -A50 -Fa " received signal " /test_output/gdb.log | head_escaped
}
rg -Fa " received signal " /test_output/gdb.log > /dev/null \
&& echo -e "Found signal in gdb.log$FAIL$(get_gdb_log_context)" >> /test_output/test_results.tsv
if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
echo -e "Backward compatibility check\n"
echo "Get previous release tag"
previous_release_tag=$(clickhouse-client --version | rg -o "[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*" | get_previous_release_tag)
echo $previous_release_tag
echo "Clone previous release repository"
git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository
echo "Download clickhouse-server from the previous release"
mkdir previous_release_package_folder
echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log
for table in query_log trace_log
do
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
done
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
# Check if we cloned previous release repository successfully
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
then
echo -e "Backward compatibility check: Failed to clone previous release tests$FAIL" >> /test_output/test_results.tsv
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
then
echo -e "Backward compatibility check: Failed to download previous release packages$FAIL" >> /test_output/test_results.tsv
else
echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv
# Uninstall current packages
dpkg --remove clickhouse-client
dpkg --remove clickhouse-server
dpkg --remove clickhouse-common-static-dbg
dpkg --remove clickhouse-common-static
rm -rf /var/lib/clickhouse/*
# Make BC check more funny by forcing Ordinary engine for system database
mkdir /var/lib/clickhouse/metadata
echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql
# Install previous release packages
install_packages previous_release_package_folder
# Start server from previous release
# Previous version may not be ready for fault injections
export ZOOKEEPER_FAULT_INJECTION=0
configure
# Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..."
rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||:
rm -f /etc/clickhouse-server/users.d/marks.xml ||:
# Remove s3 related configs to avoid "there is no disk type `cache`"
rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||:
rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||:
# Turn on after 22.12
rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||:
# it uses recently introduced settings which previous versions may not have
rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||:
# Turn on after 23.1
rm -f /etc/clickhouse-server/users.d/prefetch_settings.xml ||:
start
clickhouse-client --query="SELECT 'Server version: ', version()"
# Install new package before running stress test because we should use new
# clickhouse-client and new clickhouse-test.
#
# But we should leave old binary in /usr/bin/ and debug symbols in
# /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it
# will print sane stacktraces and also to avoid possible crashes.
#
# FIXME: those files can be extracted directly from debian package, but
# actually better solution will be to use different PATH instead of playing
# games with files from packages.
mv /usr/bin/clickhouse previous_release_package_folder/
mv /usr/lib/debug/usr/bin/clickhouse.debug previous_release_package_folder/
install_packages package_folder
mv /usr/bin/clickhouse package_folder/
mv /usr/lib/debug/usr/bin/clickhouse.debug package_folder/
mv previous_release_package_folder/clickhouse /usr/bin/
mv previous_release_package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug
mkdir tmp_stress_output
./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" \
--backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
&& echo -e "Backward compatibility check: Test script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: Test script failed$FAIL" >> /test_output/test_results.tsv
rm -rf tmp_stress_output
# We experienced deadlocks in this command in very rare cases. Let's debug it:
timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" ||
(
echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
clickhouse stop --force
)
# Use bigger timeout for previous version
stop 300
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log
# Start new server
mv package_folder/clickhouse /usr/bin/
mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug
# Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables).
export ZOOKEEPER_FAULT_INJECTION=0
configure
start 500
clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt \
&& echo -e "Backward compatibility check: Server failed to start$FAIL$(trim_server_logs bc_check_application_errors.txt)" >> /test_output/test_results.tsv)
clickhouse-client --query="SELECT 'Server version: ', version()"
# Let the server run for a while before checking log.
sleep 60
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.dirty.log
# Error messages (we should ignore some errors)
# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64")
# FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server.
# Let's just ignore all errors from queries ("} <Error> TCPHandler: Code:", "} <Error> executeQuery: Code:")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility
echo "Check for Error messages in server log:"
rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
-e "Code: 236. DB::Exception: Cancelled mutating parts" \
-e "REPLICA_IS_ALREADY_ACTIVE" \
-e "REPLICA_ALREADY_EXISTS" \
-e "ALL_REPLICAS_LOST" \
-e "DDLWorker: Cannot parse DDL task query" \
-e "RaftInstance: failed to accept a rpc connection due to error 125" \
-e "UNKNOWN_DATABASE" \
-e "NETWORK_ERROR" \
-e "UNKNOWN_TABLE" \
-e "ZooKeeperClient" \
-e "KEEPER_EXCEPTION" \
-e "DirectoryMonitor" \
-e "TABLE_IS_READ_ONLY" \
-e "Code: 1000, e.code() = 111, Connection refused" \
-e "UNFINISHED" \
-e "NETLINK_ERROR" \
-e "Renaming unexpected part" \
-e "PART_IS_TEMPORARILY_LOCKED" \
-e "and a merge is impossible: we didn't find" \
-e "found in queue and some source parts for it was lost" \
-e "is lost forever." \
-e "Unknown index: idx." \
-e "Cannot parse string 'Hello' as UInt64" \
-e "} <Error> TCPHandler: Code:" \
-e "} <Error> executeQuery: Code:" \
-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
-e "[Queue = DB::DynamicRuntimeQueue]: Code: 235. DB::Exception: Part" \
-e "The set of parts restored in place of" \
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
-e "Code: 269. DB::Exception: Destination table is myself" \
-e "Coordination::Exception: Connection loss" \
-e "MutateFromLogEntryTask" \
-e "No connection to ZooKeeper, cannot get shared table ID" \
-e "Session expired" \
-e "TOO_MANY_PARTS" \
-e "Container already exists" \
/var/log/clickhouse-server/clickhouse-server.backward.dirty.log | rg -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
&& echo -e "Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)$FAIL$(trim_server_logs bc_check_error_messages.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No Error messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Remove file bc_check_error_messages.txt if it's empty
[ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt
# Sanitizer asserts
rg -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
&& echo -e "Backward compatibility check: Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No sanitizer asserts$OK" >> /test_output/test_results.tsv
rm -f /test_output/tmp
# OOM
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
&& echo -e "Backward compatibility check: Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Logical errors
echo "Check for Logical errors in server log:"
rg -Fa -A20 "Code: 49. DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \
&& echo -e "Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)$FAIL$(trim_server_logs bc_check_logical_errors.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No logical errors$OK" >> /test_output/test_results.tsv
# Remove file bc_check_logical_errors.txt if it's empty
[ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt
# Crash
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
&& echo -e "Backward compatibility check: Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: Not crashed$OK" >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog)
echo "Check for Fatal message in server log:"
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \
&& echo -e "Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)$FAIL$(trim_server_logs bc_check_fatal_messages.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Remove file bc_check_fatal_messages.txt if it's empty
[ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||:
for table in query_log trace_log
do
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" \
| zstd --threads=0 > /test_output/$table.backward.tsv.zst ||:
done
fi
fi
dmesg -T > /test_output/dmesg.log
# OOM in dmesg -- those are real
grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \
&& echo -e "OOM in dmesg$FAIL$(head_escaped /test_output/dmesg.log)" >> /test_output/test_results.tsv \
|| echo -e "No OOM in dmesg$OK" >> /test_output/test_results.tsv
collect_query_and_trace_logs
mv /var/log/clickhouse-server/stderr.log /test_output/
# Write check result into check_status.tsv
# Try to choose most specific error for the whole check status
clickhouse-local --structure "test String, res String, time Nullable(Float32), desc String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by
(test like 'Backward compatibility check%'), -- BC check goes last
(test like '%Sanitizer%') DESC,
(test like '%Killed by signal%') DESC,
(test like '%gdb.log%') DESC,
@ -732,14 +227,8 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d
(test like '%OOM%') DESC,
(test like '%Signal 9%') DESC,
(test like '%Fatal message%') DESC,
(test like '%Error message%') DESC,
(test like '%previous release%') DESC,
rowNumberInAllBlocks()
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# Core dumps
find . -type f -maxdepth 1 -name 'core.*' | while read core; do
zstd --threads=0 $core
mv $core.zst /test_output/
done
collect_core_dumps

View File

@ -0,0 +1,31 @@
# rebuild in #33610
# docker build -t clickhouse/upgrade-check .
ARG FROM_TAG=latest
FROM clickhouse/stateful-test:$FROM_TAG
RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get install --yes --no-install-recommends \
bash \
tzdata \
fakeroot \
debhelper \
parallel \
expect \
python3 \
python3-lxml \
python3-termcolor \
python3-requests \
curl \
sudo \
openssl \
netcat-openbsd \
telnet \
brotli \
&& apt-get clean
COPY run.sh /
ENV EXPORT_S3_STORAGE_POLICIES=1
CMD ["/bin/bash", "/run.sh"]

201
docker/test/upgrade/run.sh Normal file
View File

@ -0,0 +1,201 @@
#!/bin/bash
# shellcheck disable=SC2094
# shellcheck disable=SC2086
# shellcheck disable=SC2024
# Avoid overlaps with previous runs
dmesg --clear
set -x
# we mount tests folder from repo to /usr/share
ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
./setup_minio.sh stateless # to have a proper environment
echo "Get previous release tag"
previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb | grep "Version: " | awk '{print $2}' | cut -f1 -d'+' | get_previous_release_tag)
echo $previous_release_tag
echo "Clone previous release repository"
git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository
echo "Download clickhouse-server from the previous release"
mkdir previous_release_package_folder
echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv
# Check if we cloned previous release repository successfully
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
then
echo -e 'failure\tFailed to clone previous release tests' > /test_output/check_status.tsv
exit
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
then
echo -e 'failure\tFailed to download previous release packages' > /test_output/check_status.tsv
exit
fi
echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv
# Make upgrade check more funny by forcing Ordinary engine for system database
mkdir /var/lib/clickhouse/metadata
echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql
# Install previous release packages
install_packages previous_release_package_folder
# Start server from previous release
# Let's enable S3 storage by default
export USE_S3_STORAGE_FOR_MERGE_TREE=1
# Previous version may not be ready for fault injections
export ZOOKEEPER_FAULT_INJECTION=0
configure
# But we still need default disk because some tables loaded only into it
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
| sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
> /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
start
clickhouse-client --query="SELECT 'Server version: ', version()"
mkdir tmp_stress_output
stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --upgrade-check --output-folder tmp_stress_output --global-time-limit=1200 \
&& echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv
rm -rf tmp_stress_output
# We experienced deadlocks in this command in very rare cases. Let's debug it:
timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" ||
(
echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
clickhouse stop --force
)
# Use bigger timeout for previous version and disable additional hang check
stop 300 false
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log
# Install and start new server
install_packages package_folder
# Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables).
export ZOOKEEPER_FAULT_INJECTION=0
configure
start 500
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \
&& echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \
>> /test_output/test_results.tsv)
# Remove file application_errors.txt if it's empty
[ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt
clickhouse-client --query="SELECT 'Server version: ', version()"
# Let the server run for a while before checking log.
sleep 60
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.upgrade.log
# Error messages (we should ignore some errors)
# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64")
# FIXME Not sure if it's expected, but some tests from stress test may not be finished yet when we restarting server.
# Let's just ignore all errors from queries ("} <Error> TCPHandler: Code:", "} <Error> executeQuery: Code:")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility
echo "Check for Error messages in server log:"
rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
-e "Code: 236. DB::Exception: Cancelled mutating parts" \
-e "REPLICA_IS_ALREADY_ACTIVE" \
-e "REPLICA_ALREADY_EXISTS" \
-e "ALL_REPLICAS_LOST" \
-e "DDLWorker: Cannot parse DDL task query" \
-e "RaftInstance: failed to accept a rpc connection due to error 125" \
-e "UNKNOWN_DATABASE" \
-e "NETWORK_ERROR" \
-e "UNKNOWN_TABLE" \
-e "ZooKeeperClient" \
-e "KEEPER_EXCEPTION" \
-e "DirectoryMonitor" \
-e "TABLE_IS_READ_ONLY" \
-e "Code: 1000, e.code() = 111, Connection refused" \
-e "UNFINISHED" \
-e "NETLINK_ERROR" \
-e "Renaming unexpected part" \
-e "PART_IS_TEMPORARILY_LOCKED" \
-e "and a merge is impossible: we didn't find" \
-e "found in queue and some source parts for it was lost" \
-e "is lost forever." \
-e "Unknown index: idx." \
-e "Cannot parse string 'Hello' as UInt64" \
-e "} <Error> TCPHandler: Code:" \
-e "} <Error> executeQuery: Code:" \
-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
-e "The set of parts restored in place of" \
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
-e "Code: 269. DB::Exception: Destination table is myself" \
-e "Coordination::Exception: Connection loss" \
-e "MutateFromLogEntryTask" \
-e "No connection to ZooKeeper, cannot get shared table ID" \
-e "Session expired" \
-e "TOO_MANY_PARTS" \
-e "Authentication failed" \
-e "Cannot flush" \
-e "Container already exists" \
/var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
&& echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/upgrade_error_messages.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv
# Remove file upgrade_error_messages.txt if it's empty
[ -s /test_output/upgrade_error_messages.txt ] || rm /test_output/upgrade_error_messages.txt
# Grep logs for sanitizer asserts, crashes and other critical errors
check_logs_for_critical_errors
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
collect_query_and_trace_logs
check_oom_in_dmesg
mv /var/log/clickhouse-server/stderr.log /test_output/
# Write check result into check_status.tsv
# Try to choose most specific error for the whole check status
clickhouse-local --structure "test String, res String, time Nullable(Float32), desc String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by
(test like '%Sanitizer%') DESC,
(test like '%Killed by signal%') DESC,
(test like '%gdb.log%') DESC,
(test ilike '%possible deadlock%') DESC,
(test like '%start%') DESC,
(test like '%dmesg%') DESC,
(test like '%OOM%') DESC,
(test like '%Signal 9%') DESC,
(test like '%Fatal message%') DESC,
(test like '%Error message%') DESC,
(test like '%previous release%') DESC,
rowNumberInAllBlocks()
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
collect_core_dumps

View File

@ -60,12 +60,21 @@ fi
clickhouse_download_filename_prefix="clickhouse"
clickhouse="$clickhouse_download_filename_prefix"
i=0
while [ -f "$clickhouse" ]
do
clickhouse="${clickhouse_download_filename_prefix}.${i}"
i=$(($i+1))
done
if [ -f "$clickhouse" ]
then
read -p "ClickHouse binary ${clickhouse} already exists. Overwrite? [y/N] " answer
if [ "$answer" = "y" -o "$answer" = "Y" ]
then
rm -f "$clickhouse"
else
i=0
while [ -f "$clickhouse" ]
do
clickhouse="${clickhouse_download_filename_prefix}.${i}"
i=$(($i+1))
done
fi
fi
URL="https://builds.clickhouse.com/master/${DIR}/clickhouse"
echo

View File

@ -39,12 +39,59 @@ To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` director
Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables.
### Restricting test runs
A test can have zero or more _test tags_ specifying restrictions for test runs.
For `.sh` tests tags are written as a comment on the second line:
```bash
#!/usr/bin/env bash
# Tags: no-fasttest
```
For `.sql` tests tags are placed in the first line as a SQL comment:
```sql
-- Tags: no-fasttest
SELECT 1
```
|Tag name | What it does | Usage example |
|---|---|---|
| `disabled`| Test is not run ||
| `long` | Test's execution time is extended from 1 to 10 minutes ||
| `deadlock` | Test is run in a loop for a long time ||
| `race` | Same as `deadlock`. Prefer `deadlock` ||
| `shard` | Server is required to listen to `127.0.0.*` ||
| `distributed` | Same as `shard`. Prefer `shard` ||
| `global` | Same as `shard`. Prefer `shard` ||
| `zookeeper` | Test requires Zookeeper or ClickHouse Keeper to run | Test uses `ReplicatedMergeTree` |
| `replica` | Same as `zookeeper`. Prefer `zookeeper` ||
| `no-fasttest`| Test is not run under [Fast test](continuous-integration#fast-test) | Test uses `MySQL` table engine which is disabled in Fast test|
| `no-[asan, tsan, msan, ubsan]` | Disables tests in build with [sanitizers](#sanitizers) | Test is run under QEMU which doesn't work with sanitizers |
| `no-replicated-database` |||
| `no-ordinary-database` |||
| `no-parallel` | Disables running other tests in parallel with this one | Test reads from `system` tables and invariants may be broken|
| `no-parallel-replicas` |||
| `no-debug` |||
| `no-stress` |||
| `no-polymorphic-parts` |||
| `no-random-settings` |||
| `no-random-merge-tree-settings` |||
| `no-backward-compatibility-check` |||
| `no-cpu-x86_64` |||
| `no-cpu-aarch64` |||
| `no-cpu-ppc64le` |||
| `no-s3-storage` |||
In addition to the above settings, you can use `USE_*` flags from `system.build_options` to define usage of particular ClickHouse features.
For example, if your test uses a MySQL table, you should add a tag `use-mysql`.
### Choosing the Test Name
The name of the test starts with a five-digit prefix followed by a descriptive name, such as `00422_hash_function_constexpr.sql`. To choose the prefix, find the largest prefix already present in the directory, and increment it by one. In the meantime, some other tests might be added with the same numeric prefix, but this is OK and does not lead to any problems, you don't have to change it later.
Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that requires server to listen `127.0.0.*`; `distributed` or `global` have the same meaning. `long` is for tests that run slightly longer that one second. You can disable these groups of tests using `--no-zookeeper`, `--no-shard` and `--no-long` options, respectively. Make sure to add a proper prefix to your test name if it needs ZooKeeper or distributed queries.
### Checking for an Error that Must Occur
Sometimes you want to test that a server error occurs for an incorrect query. We support special annotations for this in SQL tests, in the following form:

View File

@ -1,6 +1,6 @@
# Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex}
The main task that indexes achieve is to quickly find nearest neighbors for multidimensional data. An example of such a problem can be finding similar pictures (texts) for a given picture (text). That problem can be reduced to finding the nearest [embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning). They can be created from data using [UDF](../../../sql-reference/functions/index.md#executable-user-defined-functions).
The main task that indexes achieve is to quickly find nearest neighbors for multidimensional data. An example of such a problem can be finding similar pictures (texts) for a given picture (text). That problem can be reduced to finding the nearest [embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning). They can be created from data using [UDF](/docs/en/sql-reference/functions/index.md/#executable-user-defined-functions).
The next queries find the closest neighbors in N-dimensional space using the L2 (Euclidean) distance:
``` sql
@ -39,7 +39,7 @@ Approximate Nearest Neighbor Search Indexes (`ANNIndexes`) are similar to skip i
LIMIT N
```
In these queries, `DistanceFunction` is selected from [distance functions](../../../sql-reference/functions/distance-functions). `Point` is a known vector (something like `(0.1, 0.1, ... )`). To avoid writing large vectors, use [client parameters](../../../interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters). `Value` - a float value that will bound the neighbourhood.
In these queries, `DistanceFunction` is selected from [distance functions](/docs/en/sql-reference/functions/distance-functions.md). `Point` is a known vector (something like `(0.1, 0.1, ... )`). To avoid writing large vectors, use [client parameters](/docs/en//interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters). `Value` - a float value that will bound the neighbourhood.
:::note
ANN index can't speed up query that satisfies both types (`where + order by`, only one of them). All queries must have the limit, as algorithms are used to find nearest neighbors and need a specific number of them.
@ -85,13 +85,13 @@ As the indexes are built only during insertions into table, `INSERT` and `OPTIMI
You can create your table with index which uses certain algorithm. Now only indices based on the following algorithms are supported:
# Index list
- [Annoy](../../../engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
- [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
# Annoy {#annoy}
Implementation of the algorithm was taken from [this repository](https://github.com/spotify/annoy).
Short description of the algorithm:
The algorithm recursively divides in half all space by random linear surfaces (lines in 2D, planes in 3D e.t.c.). Thus it makes tree of polyhedrons and points that they contains. Repeating the operation several times for greater accuracy it creates a forest.
The algorithm recursively divides in half all space by random linear surfaces (lines in 2D, planes in 3D etc.). Thus it makes tree of polyhedrons and points that they contains. Repeating the operation several times for greater accuracy it creates a forest.
To find K Nearest Neighbours it goes down through the trees and fills the buffer of closest points using the priority queue of polyhedrons. Next, it sorts buffer and return the nearest K points.
__Examples__:
@ -118,7 +118,7 @@ ORDER BY id;
```
:::note
Table with array field will work faster, but all arrays **must** have same length. Use [CONSTRAINT](../../../sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(data) = 256`.
Table with array field will work faster, but all arrays **must** have same length. Use [CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(data) = 256`.
:::
Parameter `NumTrees` is the number of trees which the algorithm will create. The bigger it is, the slower (approximately linear) it works (in both `CREATE` and `SELECT` requests), but the better accuracy you get (adjusted for randomness). By default it is set to `100`. Parameter `DistanceName` is name of distance function. By default it is set to `L2Distance`. It can be set without changing first parameter, for example

View File

@ -1971,7 +1971,8 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`.
- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`.
- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
## Arrow {#data-format-arrow}

View File

@ -26,6 +26,7 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasnt don
- [one-ck](https://github.com/lizhichao/one-ck)
- [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
- [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php)
- [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
- Go
- [clickhouse](https://github.com/kshvakov/clickhouse/)
- [go-clickhouse](https://github.com/roistat/go-clickhouse)

View File

@ -50,7 +50,7 @@ If there are multiple profiles active for a user, then constraints are merged. M
Read-only mode is enabled by `readonly` setting (not to confuse with `readonly` constraint type):
- `readonly=0`: No read-only restrictions.
- `readonly=1`: Only read queries are allowed and settings cannot be changes unless `changeable_in_readonly` is set.
- `readonly=1`: Only read queries are allowed and settings cannot be changed unless `changeable_in_readonly` is set.
- `readonly=2`: Only read queries are allowed, but settings can be changed, except for `readonly` setting itself.

View File

@ -1106,6 +1106,12 @@ Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedStrin
Enabled by default.
### output_format_parquet_version {#output_format_parquet_version}
The version of Parquet format used in output format. Supported versions: `1.0`, `2.4`, `2.6` and `2.latest`.
Default value: `2.latest`.
## Hive format settings {#hive-format-settings}
### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}

View File

@ -0,0 +1,52 @@
---
slug: /en/operations/system-tables/server_settings
---
# server_settings
Contains information about global settings for the server, which were specified in `config.xml`.
Currently, the table shows only settings from the first layer of `config.xml` and doesn't support nested configs (e.g. [logger](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-logger)).
Columns:
- `name` ([String](../../sql-reference/data-types/string.md)) — Server setting name.
- `value` ([String](../../sql-reference/data-types/string.md)) — Server setting value.
- `default` ([String](../../sql-reference/data-types/string.md)) — Server setting default value.
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml`
- `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description.
- `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type.
**Example**
The following example shows how to get information about server settings which name contains `thread_pool`.
``` sql
SELECT *
FROM system.server_settings
WHERE name LIKE '%thread_pool%'
```
``` text
┌─name─────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐
│ max_thread_pool_size │ 5000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │
│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │
│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │
│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │
│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │
│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │
└──────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘
```
Using of `WHERE changed` can be useful, for example, when you want to check
whether settings in configuration files are loaded correctly and are in use.
<!-- -->
``` sql
SELECT * FROM system.server_settings WHERE changed AND name='max_thread_pool_size'
```
**See also**
- [Settings](../../operations/system-tables/settings.md)
- [Configuration Files](../../operations/configuration-files.md)
- [Server Settings](../../operations/server-configuration-parameters/settings.md)

View File

@ -16,6 +16,7 @@ Columns:
- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting:
- `0` — Current user can change the setting.
- `1` — Current user cant change the setting.
- `default` ([String](../../sql-reference/data-types/string.md)) — Setting default value.
**Example**

View File

@ -7,8 +7,8 @@ sidebar_position: 37
Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`.
Returns Float64. When `n <= 1`, returns +∞.
Returns Float64. When `n <= 1`, returns `nan`.
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error.
:::
:::

View File

@ -48,7 +48,35 @@ When dividing by zero you get inf, -inf, or nan.
## intDiv(a, b)
Calculates the quotient of the numbers. Divides into integers, rounding down (by the absolute value).
An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one.
Returns an integer of the type of the dividend (the first parameter).
An exception is thrown when dividing by zero, when the quotient does not fit in the range of the dividend, or when dividing a minimal negative number by minus one.
**Example**
Query:
```sql
SELECT
intDiv(toFloat64(1), 0.001) AS res,
toTypeName(res)
```
```response
┌──res─┬─toTypeName(intDiv(toFloat64(1), 0.001))─┐
│ 1000 │ Int64 │
└──────┴─────────────────────────────────────────┘
```
```sql
SELECT
intDiv(1, 0.001) AS res,
toTypeName(res)
```
```response
Received exception from server (version 23.2.1):
Code: 153. DB::Exception: Received from localhost:9000. DB::Exception: Cannot perform integer division, because it will produce infinite or too large number: While processing intDiv(1, 0.001) AS res, toTypeName(res). (ILLEGAL_DIVISION)
```
## intDivOrZero(a, b)

View File

@ -1126,15 +1126,48 @@ Rounds the time to the half hour.
## toYYYYMM
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM).
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
### example
```sql
SELECT
toYYYYMM(now(), 'US/Eastern')
```
```response
┌─toYYYYMM(now(), 'US/Eastern')─┐
│ 202303 │
└───────────────────────────────┘
```
## toYYYYMMDD
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD).
Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 10000 + MM \* 100 + DD). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
### example
```sql
SELECT
toYYYYMMDD(now(), 'US/Eastern')
```
```response
┌─toYYYYMMDD(now(), 'US/Eastern')─┐
│ 20230302 │
└─────────────────────────────────┘
```
## toYYYYMMDDhhmmss
Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss).
Converts a date or date with time to a UInt64 number containing the year and month number (YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss). Accepts a second optional timezone argument. If provided, the timezone must be a string constant.
### example
```sql
SELECT
toYYYYMMDDhhmmss(now(), 'US/Eastern')
```
```response
┌─toYYYYMMDDhhmmss(now(), 'US/Eastern')─┐
│ 20230302112209 │
└───────────────────────────────────────┘
```
## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters
@ -1231,8 +1264,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %e | day of the month, space-padded (1-31) | &nbsp; 2 |
| %f | fractional second from the fractional part of DateTime64 | 1234560 |
| %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 |
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
| %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 |
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
| %h | hour in 12h format (01-12) | 09 |
| %H | hour in 24h format (00-23) | 22 |
| %i | minute (00-59) | 33 |

View File

@ -579,3 +579,33 @@ Result:
│ 3628800 │
└───────────────┘
```
## width_bucket(operand, low, high, count)
Returns the number of the bucket in which `operand` falls in a histogram having `count` equal-width buckets spanning the range `low` to `high`. Returns `0` if `operand < low`, and returns `count+1` if `operand >= high`.
`operand`, `low`, `high` can be any native number type. `count` can only be unsigned native integer and its value cannot be zero.
**Syntax**
```sql
widthBucket(operand, low, high, count)
```
There is also a case insensitive alias called `WIDTH_BUCKET` to provide compatibility with other databases.
**Example**
Query:
``` sql
SELECT widthBucket(10.15, -8.6, 23, 18);
```
Result:
``` text
┌─widthBucket(10.15, -8.6, 23, 18)─┐
│ 11 │
└──────────────────────────────────┘
```

View File

@ -226,6 +226,17 @@ SELECT splitByNonAlpha(' 1! a, b. ');
Concatenates string representations of values listed in the array with the separator. `separator` is an optional parameter: a constant string, set to an empty string by default.
Returns the string.
**Example**
``` sql
SELECT arrayStringConcat(['12/05/2021', '12:50:00'], ' ') AS DateString;
```
```text
┌─DateString──────────┐
│ 12/05/2021 12:50:00 │
└─────────────────────┘
```
## alphaTokens(s[, max_substrings]), splitByAlpha(s[, max_substrings])
Selects substrings of consecutive bytes from the ranges a-z and A-Z.Returns an array of substrings.
@ -364,4 +375,4 @@ Result:
┌─tokens────────────────────────────┐
│ ['test1','test2','test3','test4'] │
└───────────────────────────────────┘
```
```

View File

@ -66,6 +66,42 @@ Result:
- [Map(key, value)](../../sql-reference/data-types/map.md) data type
## mapFromArrays
Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md).
The function is a more convenient alternative to `CAST((key_array, value_array), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`.
**Syntax**
```sql
mapFromArrays(keys, values)
```
Alias: `MAP_FROM_ARRAYS(keys, values)`
**Arguments**
- `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md)
- `values` - Given value array to create a map from.
**Returned value**
- A map whose keys and values are constructed from the key and value arrays
**Example**
Query:
```sql
select mapFromArrays(['a', 'b', 'c'], [1, 2, 3])
```
```text
┌─mapFromArrays(['a', 'b', 'c'], [1, 2, 3])─┐
│ {'a':1,'b':2,'c':3} │
└───────────────────────────────────────────┘
```
## mapAdd
Collect all the keys and sum corresponding values.
@ -235,7 +271,7 @@ Determines whether the `map` contains the `key` parameter.
mapContains(map, key)
```
**Parameters**
**Arguments**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
- `key` — Key. Type matches the type of keys of `map` parameter.
@ -280,7 +316,7 @@ Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operat
mapKeys(map)
```
**Parameters**
**Arguments**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
@ -323,7 +359,7 @@ Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operat
mapValues(map)
```
**Parameters**
**Arguments**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
@ -362,7 +398,7 @@ Result:
mapContainsKeyLike(map, pattern)
```
**Parameters**
**Arguments**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
- `pattern` - String pattern to match.
@ -400,7 +436,7 @@ Result:
mapExtractKeyLike(map, pattern)
```
**Parameters**
**Arguments**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
- `pattern` - String pattern to match.
@ -438,7 +474,7 @@ Result:
mapApply(func, map)
```
**Parameters**
**Arguments**
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `map` — [Map](../../sql-reference/data-types/map.md).
@ -478,7 +514,7 @@ Result:
mapFilter(func, map)
```
**Parameters**
**Arguments**
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `map` — [Map](../../sql-reference/data-types/map.md).
@ -520,7 +556,7 @@ Result:
mapUpdate(map1, map2)
```
**Parameters**
**Arguments**
- `map1` [Map](../../sql-reference/data-types/map.md).
- `map2` [Map](../../sql-reference/data-types/map.md).

View File

@ -6,22 +6,23 @@ sidebar_label: Type Conversion
# Type Conversion Functions
## Common Issues of Numeric Conversions
## Common Issues with Data Conversion
When you convert a value from one to another data type, you should remember that if you try to fit a value from a larger data type to a smaller one (for example Int64 to Int32), or convert from one data type to another (for example `String` to `Int`), you could have data loss. Test beforehand.
Be aware of potential data loss if values of a datatype are converted to a smaller datatype (for example from `Int64` to `Int32`) or between
incompatible datatypes (for example from `String` to `Int`). Make sure to check carefully if the result is as expected.
ClickHouse has the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion).
ClickHouse generally uses the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion).
## toInt(8\|16\|32\|64\|128\|256)
Converts an input value to the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
Converts an input value to a value the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
- `toInt8(expr)`Results in the `Int8` data type.
- `toInt16(expr)`Results in the `Int16` data type.
- `toInt32(expr)`Results in the `Int32` data type.
- `toInt64(expr)`Results in the `Int64` data type.
- `toInt128(expr)`Results in the `Int128` data type.
- `toInt256(expr)`Results in the `Int256` data type.
- `toInt8(expr)`Converts to a value of data type `Int8`.
- `toInt16(expr)`Converts to a value of data type `Int16`.
- `toInt32(expr)`Converts to a value of data type `Int32`.
- `toInt64(expr)`Converts to a value of data type `Int64`.
- `toInt128(expr)`Converts to a value of data type `Int128`.
- `toInt256(expr)`Converts to a value of data type `Int256`.
**Arguments**
@ -53,7 +54,7 @@ Result:
## toInt(8\|16\|32\|64\|128\|256)OrZero
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns 0.
Takes an argument of type [String](/docs/en/sql-reference/data-types/string.md) and tries to parse it into an Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `0`.
**Example**
@ -73,7 +74,7 @@ Result:
## toInt(8\|16\|32\|64\|128\|256)OrNull
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns NULL.
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `NULL`.
**Example**
@ -93,7 +94,7 @@ Result:
## toInt(8\|16\|32\|64\|128\|256)OrDefault
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns the default type value.
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns the default type value.
**Example**
@ -116,11 +117,11 @@ Result:
Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
- `toUInt8(expr)`Results in the `UInt8` data type.
- `toUInt16(expr)`Results in the `UInt16` data type.
- `toUInt32(expr)`Results in the `UInt32` data type.
- `toUInt64(expr)`Results in the `UInt64` data type.
- `toUInt256(expr)`Results in the `UInt256` data type.
- `toUInt8(expr)`Converts to a value of data type `UInt8`.
- `toUInt16(expr)`Converts to a value of data type `UInt16`.
- `toUInt32(expr)`Converts to a value of data type `UInt32`.
- `toUInt64(expr)`Converts to a value of data type `UInt64`.
- `toUInt256(expr)`Converts to a value of data type `UInt256`.
**Arguments**
@ -128,7 +129,7 @@ Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint
**Returned value**
Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type.
- Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type.
Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
@ -166,26 +167,30 @@ Result:
## toDate
Converts the argument to `Date` data type.
Converts the argument to [Date](/docs/en/sql-reference/data-types/date.md) data type.
If the argument is [DateTime](/docs/en/sql-reference/data-types/datetime.md) or [DateTime64](/docs/en/sql-reference/data-types/datetime64.md), it truncates it and leaves the date component of the DateTime:
If the argument is `DateTime` or `DateTime64`, it truncates it, leaving the date component of the DateTime:
```sql
SELECT
now() AS x,
toDate(x)
```
```response
┌───────────────────x─┬─toDate(now())─┐
│ 2022-12-30 13:44:17 │ 2022-12-30 │
└─────────────────────┴───────────────┘
```
If the argument is a string, it is parsed as Date or DateTime. If it was parsed as DateTime, the date component is being used:
If the argument is a [String](/docs/en/sql-reference/data-types/string.md), it is parsed as [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). If it was parsed as [DateTime](/docs/en/sql-reference/data-types/datetime.md), the date component is being used:
```sql
SELECT
toDate('2022-12-30') AS x,
toTypeName(x)
```
```response
┌──────────x─┬─toTypeName(toDate('2022-12-30'))─┐
│ 2022-12-30 │ Date │
@ -193,18 +198,20 @@ SELECT
1 row in set. Elapsed: 0.001 sec.
```
```sql
SELECT
toDate('2022-12-30 01:02:03') AS x,
toTypeName(x)
```
```response
┌──────────x─┬─toTypeName(toDate('2022-12-30 01:02:03'))─┐
│ 2022-12-30 │ Date │
└────────────┴───────────────────────────────────────────┘
```
If the argument is a number and it looks like a UNIX timestamp (is greater than 65535), it is interpreted as a DateTime, then truncated to Date in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to Date depends on the timezone:
If the argument is a number and looks like a UNIX timestamp (is greater than 65535), it is interpreted as a [DateTime](/docs/en/sql-reference/data-types/datetime.md), then truncated to [Date](/docs/en/sql-reference/data-types/date.md) in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to [Date](/docs/en/sql-reference/data-types/date.md) depends on the timezone:
```sql
SELECT
@ -217,6 +224,7 @@ SELECT
toDate(ts) AS date_Amsterdam_2,
toDate(ts, 'Pacific/Apia') AS date_Samoa_2
```
```response
Row 1:
──────
@ -232,7 +240,7 @@ date_Samoa_2: 2022-12-31
The example above demonstrates how the same UNIX timestamp can be interpreted as different dates in different time zones.
If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (a UNIX day) and converted to Date. It corresponds to the internal numeric representation of the `Date` data type. Example:
If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (the first UNIX day) and converted to [Date](/docs/en/sql-reference/data-types/date.md). It corresponds to the internal numeric representation of the `Date` data type. Example:
```sql
SELECT toDate(12345)
@ -270,8 +278,6 @@ SELECT
└─────────────────────┴───────────────┴─────────────┴─────────────────────┘
```
Have a nice day working with dates and times.
## toDateOrZero
## toDateOrNull
@ -288,7 +294,7 @@ Have a nice day working with dates and times.
## toDate32
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account.
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account.
**Syntax**
@ -302,9 +308,7 @@ toDate32(expr)
**Returned value**
- A calendar date.
Type: [Date32](/docs/en/sql-reference/data-types/date32.md).
- A calendar date. Type [Date32](/docs/en/sql-reference/data-types/date32.md).
**Example**
@ -332,7 +336,7 @@ SELECT toDate32('1899-01-01') AS value, toTypeName(value);
└────────────┴────────────────────────────────────┘
```
3. With `Date`-type argument:
3. With [Date](/docs/en/sql-reference/data-types/date.md) argument:
``` sql
SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value);
@ -386,7 +390,7 @@ Result:
## toDate32OrDefault
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account. Returns default value if an invalid argument is received.
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account. Returns default value if an invalid argument is received.
**Example**
@ -666,7 +670,7 @@ YYYY-MM-DD
YYYY-MM-DD hh:mm:ss
```
As an exception, if converting from UInt32, Int32, UInt64, or Int64 numeric types to Date, and if the number is greater than or equal to 65536, the number is interpreted as a Unix timestamp (and not as the number of days) and is rounded to the date. This allows support for the common occurrence of writing toDate(unix_timestamp), which otherwise would be an error and would require writing the more cumbersome toDate(toDateTime(unix_timestamp)).
As an exception, if converting from UInt32, Int32, UInt64, or Int64 numeric types to Date, and if the number is greater than or equal to 65536, the number is interpreted as a Unix timestamp (and not as the number of days) and is rounded to the date. This allows support for the common occurrence of writing `toDate(unix_timestamp)`, which otherwise would be an error and would require writing the more cumbersome `toDate(toDateTime(unix_timestamp))`.
Conversion between a date and a date with time is performed the natural way: by adding a null time or dropping the time.
@ -696,7 +700,7 @@ Also see the `toUnixTimestamp` function.
## toFixedString(s, N)
Converts a String type argument to a FixedString(N) type (a string with fixed length N). N must be a constant.
Converts a [String](/docs/en/sql-reference/data-types/string.md) type argument to a [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) type (a string of fixed length N).
If the string has fewer bytes than N, it is padded with null bytes to the right. If the string has more bytes than N, an exception is thrown.
## toStringCutToZero(s)
@ -914,7 +918,7 @@ Result:
└─────────────────────┴─────────────────────┴────────────┴─────────────────────┴───────────────────────────┘
```
Conversion to FixedString(N) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
Conversion to [FixedString (N)](/docs/en/sql-reference/data-types/fixedstring.md) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
Type conversion to [Nullable](/docs/en/sql-reference/data-types/nullable.md) and back is supported.
@ -1174,7 +1178,7 @@ For all of the formats with separator the function parses months names expressed
**Returned value**
- `time_string` converted to the `DateTime` data type.
- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type.
**Examples**
@ -1254,10 +1258,10 @@ Result:
**See Also**
- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/)
- [RFC 1123](https://tools.ietf.org/html/rfc1123)
- [toDate](#todate)
- [toDateTime](#todatetime)
- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/)
## parseDateTimeBestEffortUS

View File

@ -19,8 +19,15 @@ CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_n
## Example
Create a user:
```sql
CREATE USER robin IDENTIFIED BY 'password';
```
Create the `max_memory_usage_profile` settings profile with value and constraints for the `max_memory_usage` setting and assign it to user `robin`:
``` sql
CREATE SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin
CREATE
SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000
TO robin
```

View File

@ -17,10 +17,11 @@ By default, tables are created only on the current server. Distributed DDL queri
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1],
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2],
name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1] [COMMENT 'comment for column'],
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2] [COMMENT 'comment for column'],
...
) ENGINE = engine
COMMENT 'comment for table'
```
Creates a table named `table_name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine.
@ -32,6 +33,8 @@ Expressions can also be defined for default values (see below).
If necessary, primary key can be specified, with one or more key expressions.
Comments can be added for columns and for the table.
### With a Schema Similar to Other Table
``` sql
@ -267,7 +270,7 @@ You can define a [primary key](../../../engines/table-engines/mergetree-family/m
CREATE TABLE db.table_name
(
name1 type1, name2 type2, ...,
PRIMARY KEY(expr1[, expr2,...])]
PRIMARY KEY(expr1[, expr2,...])
)
ENGINE = engine;
```

View File

@ -54,6 +54,10 @@ SELECT * FROM view(column1=value1, column2=value2 ...)
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
```
:::tip
Here is a step by step guide on using [Materialized views](docs/en/guides/developer/cascading-materialized-views.md).
:::
Materialized views store data transformed by the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query.
When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` the table engine for storing data.

View File

@ -6,21 +6,22 @@ sidebar_label: file
# file
Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones.
Creates a table from a file. This table function is similar to [url](/docs/en/sql-reference/table-functions/url.md) and [hdfs](/docs/en/sql-reference/table-functions/hdfs.md) ones.
`file` function can be used in `SELECT` and `INSERT` queries on data in [File](../../engines/table-engines/special/file.md) tables.
`file` function can be used in `SELECT` and `INSERT` queries on data in [File](/docs/en/engines/table-engines/special/file.md) tables.
**Syntax**
``` sql
file(path [,format] [,structure])
file(path [,format] [,structure] [,compression])
```
**Parameters**
- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
**Returned value**
@ -53,7 +54,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
└─────────┴─────────┴─────────┘
```
Getting the first 10 lines of a table that contains 3 columns of [UInt32](../../sql-reference/data-types/int-uint.md) type from a CSV file:
Getting the first 10 lines of a table that contains 3 columns of [UInt32](/docs/en/sql-reference/data-types/int-uint.md) type from a CSV file:
``` sql
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10;
@ -143,4 +144,4 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
**See Also**
- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)
- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)

View File

@ -23,23 +23,3 @@ You can use table functions in:
:::warning
You cant use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled.
:::
| Function | Description |
|------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------|
| [file](../../sql-reference/table-functions/file.md) | Creates a [File](../../engines/table-engines/special/file.md)-engine table. |
| [merge](../../sql-reference/table-functions/merge.md) | Creates a [Merge](../../engines/table-engines/special/merge.md)-engine table. |
| [numbers](../../sql-reference/table-functions/numbers.md) | Creates a table with a single column filled with integer numbers. |
| [remote](../../sql-reference/table-functions/remote.md) | Allows you to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md)-engine table. |
| [url](../../sql-reference/table-functions/url.md) | Creates a [Url](../../engines/table-engines/special/url.md)-engine table. |
| [mysql](../../sql-reference/table-functions/mysql.md) | Creates a [MySQL](../../engines/table-engines/integrations/mysql.md)-engine table. |
| [postgresql](../../sql-reference/table-functions/postgresql.md) | Creates a [PostgreSQL](../../engines/table-engines/integrations/postgresql.md)-engine table. |
| [jdbc](../../sql-reference/table-functions/jdbc.md) | Creates a [JDBC](../../engines/table-engines/integrations/jdbc.md)-engine table. |
| [odbc](../../sql-reference/table-functions/odbc.md) | Creates a [ODBC](../../engines/table-engines/integrations/odbc.md)-engine table. |
| [hdfs](../../sql-reference/table-functions/hdfs.md) | Creates a [HDFS](../../engines/table-engines/integrations/hdfs.md)-engine table. |
| [s3](../../sql-reference/table-functions/s3.md) | Creates a [S3](../../engines/table-engines/integrations/s3.md)-engine table. |
| [sqlite](../../sql-reference/table-functions/sqlite.md) | Creates a [sqlite](../../engines/table-engines/integrations/sqlite.md)-engine table. |
:::note
Only these table functions are enabled in readonly mode :
null, view, viewIfPermitted, numbers, numbers_mt, generateRandom, values, cluster, clusterAllReplicas
:::

View File

@ -24,6 +24,7 @@ sidebar_label: "Клиентские библиотеки от сторонни
- [SeasClick C++ client](https://github.com/SeasX/SeasClick)
- [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
- [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php)
- [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
- Go
- [clickhouse](https://github.com/kshvakov/clickhouse/)
- [go-clickhouse](https://github.com/roistat/go-clickhouse)

View File

@ -0,0 +1,53 @@
---
slug: /ru/operations/system-tables/server_settings
---
# system.server_settings
Содержит информацию о конфигурации сервера.
В настоящий момент таблица содержит только верхнеуровневые параметры из файла `config.xml` и не поддерживает вложенные конфигурации
(например [logger](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-logger))
Столбцы:
- `name` ([String](../../sql-reference/data-types/string.md)) — имя настройки.
- `value` ([String](../../sql-reference/data-types/string.md)) — значение настройки.
- `default` ([String](../../sql-reference/data-types/string.md)) — значению настройки по умолчанию.
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — показывает, была ли настройка указана в `config.xml` или является значением по-умолчанию.
- `description` ([String](../../sql-reference/data-types/string.md)) — краткое описание настройки.
- `type` ([String](../../sql-reference/data-types/string.md)) — тип настройки.
**Пример**
Пример показывает как получить информацию о настройках, имена которых содержат `thread_pool`.
``` sql
SELECT *
FROM system.server_settings
WHERE name LIKE '%thread_pool%'
```
``` text
┌─name─────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐
│ max_thread_pool_size │ 5000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │
│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │
│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │
│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │
│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │
│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │
└──────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘
```
Использование `WHERE changed` может быть полезно, например, если необходимо проверить,
что настройки корректно загрузились из конфигурационного файла и используются.
<!-- -->
``` sql
SELECT * FROM system.settings WHERE changed AND name='max_thread_pool_size'
```
**Cм. также**
- [Настройки](../../operations/system-tables/settings.md)
- [Конфигурационные файлы](../../operations/configuration-files.md)
- [Настройки сервера](../../operations/server-configuration-parameters/settings.md)

View File

@ -16,6 +16,7 @@ slug: /ru/operations/system-tables/settings
- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Показывает, может ли пользователь изменять настройку:
- `0` — Текущий пользователь может изменять настройку.
- `1` — Текущий пользователь не может изменять настройку.
- `default` ([String](../../sql-reference/data-types/string.md)) — значению настройки по умолчанию.
**Пример**

View File

@ -301,7 +301,7 @@ ClickHouse поддерживает временные таблицы со сл
- Временные таблицы исчезают после завершения сессии, в том числе при обрыве соединения.
- Временная таблица использует только модуль памяти.
- Невозможно указать базу данных для временной таблицы. Она создается вне баз данных.
- Невозможно создать временную таблицу распределнным DDL запросом на всех серверах кластера (с опцией `ON CLUSTER`): такая таблица существует только в рамках существующей сессии.
- Невозможно создать временную таблицу распределённым DDL запросом на всех серверах кластера (с опцией `ON CLUSTER`): такая таблица существует только в рамках существующей сессии.
- Если временная таблица имеет то же имя, что и некоторая другая, то, при упоминании в запросе без указания БД, будет использована временная таблица.
- При распределённой обработке запроса, используемые в запросе временные таблицы, передаются на удалённые серверы.
@ -344,7 +344,9 @@ REPLACE TABLE myOldTable SELECT * FROM myOldTable WHERE CounterID <12345;
### Синтаксис
```sql
{CREATE [OR REPLACE]|REPLACE} TABLE [db.]table_name
```
Для данного запроса можно использовать любые варианты синтаксиса запроса `CREATE`. Запрос `REPLACE` для несуществующей таблицы вызовет ошибку.

View File

@ -108,7 +108,7 @@ SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
## Модификатор WITH CUBE {#with-cube-modifier}
Модификатор `WITH CUBE` применятеся для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`.
Модификатор `WITH CUBE` применяется для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`.
Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым выполняется группировка, указывается значение `0` или пустая строка.

View File

@ -16,7 +16,7 @@ FROM <left_table>
(ON <expr_list>)|(USING <column_list>) ...
```
Выражения из секции `ON` и столбцы из секции `USING` называется «ключами соединения». Если не указано иное, при присоединение создаётся [Декартово произведение](https://en.wikipedia.org/wiki/Cartesian_product) из строк с совпадающими значениями ключей соединения, что может привести к получению результатов с гораздо большим количеством строк, чем исходные таблицы.
Выражения из секции `ON` и столбцы из секции `USING` называются «ключами соединения». Если не указано иное, при присоединение создаётся [Декартово произведение](https://en.wikipedia.org/wiki/Cartesian_product) из строк с совпадающими значениями ключей соединения, что может привести к получению результатов с гораздо большим количеством строк, чем исходные таблицы.
## Поддерживаемые типы соединения {#select-join-types}
@ -28,7 +28,7 @@ FROM <left_table>
- `FULL OUTER JOIN`, не совпадающие строки из обеих таблиц возвращаются в дополнение к совпадающим строкам.
- `CROSS JOIN`, производит декартово произведение таблиц целиком, ключи соединения не указываются.
Без указания типа `JOIN` подразумевается `INNER`. Ключевое слово `OUTER` можно опускать. Альтернативным синтаксисом для `CROSS JOIN` является ли указание нескольких таблиц, разделённых запятыми, в [секции FROM](from.md).
Без указания типа `JOIN` подразумевается `INNER`. Ключевое слово `OUTER` можно опускать. Альтернативным синтаксисом для `CROSS JOIN` является указание нескольких таблиц, разделённых запятыми, в [секции FROM](from.md).
Дополнительные типы соединений, доступные в ClickHouse:
@ -62,7 +62,7 @@ FROM <left_table>
Строки объединяются только тогда, когда всё составное условие выполнено. Если оно не выполнено, то строки могут попасть в результат в зависимости от типа `JOIN`. Обратите внимание, что если то же самое условие поместить в секцию `WHERE`, то строки, для которых оно не выполняется, никогда не попаду в результат.
Оператор `OR` внутри секции `ON` работает, используя алгоритм хеш-соединения — на каждый агрумент `OR` с ключами соединений для `JOIN` создается отдельная хеш-таблица, поэтому потребление памяти и время выполнения запроса растет линейно при увеличении количества выражений `OR` секции `ON`.
Оператор `OR` внутри секции `ON` работает, используя алгоритм хеш-соединения — на каждый аргумент `OR` с ключами соединений для `JOIN` создается отдельная хеш-таблица, поэтому потребление памяти и время выполнения запроса растет линейно при увеличении количества выражений `OR` секции `ON`.
:::note "Примечание"
Если в условии использованы столбцы из разных таблиц, то пока поддерживается только оператор равенства (`=`).
@ -280,7 +280,7 @@ SELECT a, b, toTypeName(a), toTypeName(b) FROM t_1 FULL JOIN t_2 USING (a, b);
Каждый раз для выполнения запроса с одинаковым `JOIN`, подзапрос выполняется заново — результат не кэшируется. Это можно избежать, используя специальный движок таблиц [Join](../../../engines/table-engines/special/join.md), представляющий собой подготовленное множество для соединения, которое всегда находится в оперативке.
В некоторых случаях это более эффективно использовать [IN](../../operators/in.md) вместо `JOIN`.
В некоторых случаях более эффективно использовать [IN](../../operators/in.md) вместо `JOIN`.
Если `JOIN` необходим для соединения с таблицами измерений (dimension tables - сравнительно небольшие таблицы, которые содержат свойства измерений - например, имена для рекламных кампаний), то использование `JOIN` может быть не очень удобным из-за громоздкости синтаксиса, а также из-за того, что правая таблица читается заново при каждом запросе. Специально для таких случаев существует функциональность «Внешние словари», которую следует использовать вместо `JOIN`. Дополнительные сведения смотрите в разделе «Внешние словари».

View File

@ -67,7 +67,7 @@ sidebar_label: ORDER BY
## Примеры с использованием сравнения {#collation-examples}
Пример с значениями типа [String](../../../sql-reference/data-types/string.md):
Пример со значениями типа [String](../../../sql-reference/data-types/string.md):
Входная таблица:
@ -241,13 +241,13 @@ SELECT * FROM collate_test ORDER BY s ASC COLLATE 'en';
└───┴─────────┘
```
## Деталь реализации {#implementation-details}
## Детали реализации {#implementation-details}
Если кроме `ORDER BY` указан также не слишком большой [LIMIT](limit.md), то расходуется меньше оперативки. Иначе расходуется количество памяти, пропорциональное количеству данных для сортировки. При распределённой обработке запроса, если отсутствует [GROUP BY](group-by.md), сортировка частично делается на удалённых серверах, а на сервере-инициаторе запроса производится слияние результатов. Таким образом, при распределённой сортировке, может сортироваться объём данных, превышающий размер памяти на одном сервере.
Существует возможность выполнять сортировку во внешней памяти (с созданием временных файлов на диске), если оперативной памяти не хватает. Для этого предназначена настройка `max_bytes_before_external_sort`. Если она выставлена в 0 (по умолчанию), то внешняя сортировка выключена. Если она включена, то при достижении объёмом данных для сортировки указанного количества байт, накопленные данные будут отсортированы и сброшены во временный файл. После того, как все данные будут прочитаны, будет произведено слияние всех сортированных файлов и выдача результата. Файлы записываются в директорию `/var/lib/clickhouse/tmp/` (по умолчанию, может быть изменено с помощью параметра `tmp_path`) в конфиге.
На выполнение запроса может расходоваться больше памяти, чем `max_bytes_before_external_sort`. Поэтому, значение этой настройки должно быть существенно меньше, чем `max_memory_usage`. Для примера, если на вашем сервере 128 GB оперативки, и вам нужно выполнить один запрос, то выставите `max_memory_usage` в 100 GB, а `max_bytes_before_external_sort` в 80 GB.
На выполнение запроса может расходоваться больше памяти, чем `max_bytes_before_external_sort`. Поэтому значение этой настройки должно быть существенно меньше, чем `max_memory_usage`. Для примера, если на вашем сервере 128 GB оперативки, и вам нужно выполнить один запрос, то выставьте `max_memory_usage` в 100 GB, а `max_bytes_before_external_sort` в 80 GB.
Внешняя сортировка работает существенно менее эффективно, чем сортировка в оперативке.
@ -366,9 +366,9 @@ ORDER BY
└────────────┴────────────┴──────────┘
```
Поле `d1` не заполняется и использует значение по умолчанию. Поскольку у нас нет повторяющихся значений для `d2`, мы не можем правильно рассчитать последователность заполнения для `d1`.
Поле `d1` не заполняется и использует значение по умолчанию. Поскольку у нас нет повторяющихся значений для `d2`, мы не можем правильно рассчитать последовательность заполнения для `d1`.
едующий запрос (с измененым порядком в ORDER BY):
едующий запрос (с измененным порядком в ORDER BY):
```sql
SELECT
toDate((number * 10) * 86400) AS d1,

View File

@ -13,7 +13,7 @@ Prewhere — это оптимизация для более эффективн
`PREWHERE` имеет смысл использовать, если есть условия фильтрации, которые использует меньшинство столбцов из тех, что есть в запросе, но достаточно сильно фильтрует данные. Таким образом, сокращается количество читаемых данных.
В запросе может быть одновременно указаны и `PREWHERE`, и `WHERE`. В этом случае `PREWHERE` предшествует `WHERE`.
В запросе могут быть одновременно указаны и `PREWHERE`, и `WHERE`. В этом случае `PREWHERE` предшествует `WHERE`.
Если значение параметра [optimize_move_to_prewhere](../../../operations/settings/settings.md#optimize_move_to_prewhere) равно 0, эвристика по автоматическому перемещению части выражений из `WHERE` к `PREWHERE` отключается.

View File

@ -10,7 +10,7 @@ sidebar_label: SAMPLE
Сэмплирование имеет смысл, когда:
1. Точность результата не важна, например, для оценочных расчетов.
2. Возможности аппаратной части не позволяют соответствовать строгим критериям. Например, время ответа должно быть \&lt;100 мс. При этом точность расчета имеет более низкий приоритет.
2. Возможности аппаратной части не позволяют соответствовать строгим критериям. Например, время ответа должно быть &lt;100 мс. При этом точность расчета имеет более низкий приоритет.
3. Точность результата участвует в бизнес-модели сервиса. Например, пользователи с бесплатной подпиской на сервис могут получать отчеты с меньшей точностью, чем пользователи с премиум подпиской.
:::note "Внимание"

View File

@ -26,7 +26,7 @@ SELECT CounterID, 2 AS table, sum(Sign) AS c
Результирующие столбцы сопоставляются по их индексу (порядку внутри `SELECT`). Если имена столбцов не совпадают, то имена для конечного результата берутся из первого запроса.
При объединении выполняет приведение типов. Например, если два запроса имеют одно и то же поле с не-`Nullable` и `Nullable` совместимыми типами, полученные в результате `UNION` данные будут иметь `Nullable` тип.
При объединении выполняется приведение типов. Например, если два запроса имеют одно и то же поле с не-`Nullable` и `Nullable` совместимыми типами, полученные в результате `UNION` данные будут иметь `Nullable` тип.
Запросы, которые являются частью `UNION`, могут быть заключены в круглые скобки. [ORDER BY](order-by.md) и [LIMIT](limit.md) применяются к отдельным запросам, а не к конечному результату. Если вам нужно применить преобразование к конечному результату, вы можете разместить все объединенные с помощью `UNION` запросы в подзапрос в секции [FROM](from.md).

View File

@ -5,7 +5,7 @@ sidebar_label: WITH
# Секция WITH {#with-clause}
Clickhouse поддерживает [Общие табличные выражения](https://ru.wikipedia.org/wiki/Иерархические_и_рекурсивныеапросы_в_SQL), то есть позволяет использовать результаты выражений из секции `WITH` в остальной части `SELECT` запроса. Именованные подзапросы могут быть включены в текущий и дочерний контекст запроса в тех местах, где разрешены табличные объекты. Рекурсия предотвращается путем скрытия общего табличного выражения текущего уровня из выражения `WITH`.
ClickHouse поддерживает [Общие табличные выражения](https://ru.wikipedia.org/wiki/Иерархические_и_рекурсивныеапросы_в_SQL), то есть позволяет использовать результаты выражений из секции `WITH` в остальной части `SELECT` запроса. Именованные подзапросы могут быть включены в текущий и дочерний контекст запроса в тех местах, где разрешены табличные объекты. Рекурсия предотвращается путем скрытия общего табличного выражения текущего уровня из выражения `WITH`.
## Синтаксис

View File

@ -19,7 +19,6 @@ CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_na
- `shard_name` — 分片的名字。数据库副本按`shard_name`分组到分片中。
- `replica_name` — 副本的名字。同一分片的所有副本的副本名称必须不同。
!!! note "警告"
对于[ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication)表,如果没有提供参数,则使用默认参数:`/clickhouse/tables/{uuid}/{shard}`和`{replica}`。这些可以在服务器设置[default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path)和[default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name)中更改。宏`{uuid}`被展开到表的uuid `{shard}`和`{replica}`被展开到服务器配置的值而不是数据库引擎参数。但是在将来可以使用Replicated数据库的`shard_name`和`replica_name`。
## 使用方式 {#specifics-and-recommendations}
@ -52,8 +51,8 @@ CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n;
```
``` text
┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ shard1|replica1 │ 0 │ │ 2 │ 0 │
┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┐
│ shard1|replica1 │ 0 │ │ 2 │ 0 │
│ shard1|other_replica │ 0 │ │ 1 │ 0 │
│ other_shard|r1 │ 0 │ │ 0 │ 0 │
└──────────────────────┴─────────┴───────┴─────────────────────┴──────────────────┘
@ -62,13 +61,13 @@ CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n;
显示系统表:
``` sql
SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local
SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local
FROM system.clusters WHERE cluster='r';
```
``` text
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘
@ -83,9 +82,9 @@ node1 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY
```
``` text
┌─hosts─┬─groupArray(n)─┐
│ node1 │ [1,3,5,7,9] │
│ node2 │ [0,2,4,6,8] │
┌─hosts─┬─groupArray(n)─┐
│ node1 │ [1,3,5,7,9] │
│ node2 │ [0,2,4,6,8] │
└───────┴───────────────┘
```
@ -98,8 +97,8 @@ node4 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','r2');
集群配置如下所示:
``` text
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┐
│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │
│ r │ 1 │ 2 │ node4 │ 127.0.0.1 │ 9003 │ 0 │
│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │
│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │
@ -113,8 +112,8 @@ node2 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY
```
```text
┌─hosts─┬─groupArray(n)─┐
│ node2 │ [1,3,5,7,9] │
│ node4 │ [0,2,4,6,8] │
┌─hosts─┬─groupArray(n)─┐
│ node2 │ [1,3,5,7,9] │
│ node4 │ [0,2,4,6,8] │
└───────┴───────────────┘
```

View File

@ -1,6 +1,6 @@
---
slug: /zh/engines/table-engines/mergetree-family/mergetree
---
---
slug: /zh/engines/table-engines/mergetree-family/mergetree
---
# MergeTree {#table_engines-mergetree}
Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及该系列(`*MergeTree`)中的其他引擎。
@ -25,8 +25,9 @@ Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及
需要的话,您可以给表设置一个采样方法。
!!! note "注意"
[合并](../special/merge.md#merge) 引擎并不属于 `*MergeTree` 系列。
:::info
[合并](../special/merge.md#merge) 引擎并不属于 `*MergeTree` 系列。
:::
## 建表 {#table_engine-mergetree-creating-a-table}
@ -364,7 +365,7 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达
常量参数小于 ngram 大小的函数不能使用 `ngrambf_v1` 进行查询优化。
!!! note "注意"
:::note
布隆过滤器可能会包含不符合条件的匹配,所以 `ngrambf_v1`, `tokenbf_v1``bloom_filter` 索引不能用于结果返回为假的函数,例如:
- 可以用来优化的场景
@ -379,6 +380,7 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达
- `NOT s = 1`
- `s != 1`
- `NOT startsWith(s, 'test')`
:::
## 并发数据访问 {#concurrent-data-access}

View File

@ -45,7 +45,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) 设置
- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) 查看示例
**分布式设置**
- `fsync_after_insert` - 对异步插入到分布式的文件数据执行`fsync`。确保操作系统将所有插入的数据刷新到启动节点**磁盘上的一个文件**中。
@ -66,19 +66,20 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2
- `monitor_max_sleep_time_ms` - 等同于 [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms)
!!! note "备注"
::note
**稳定性设置** (`fsync_...`):
**稳定性设置** (`fsync_...`):
- 只影响异步插入(例如:`insert_distributed_sync=false`), 当数据首先存储在启动节点磁盘上然后再异步发送到shard。
— 可能会显著降低`insert`的性能
- 影响将存储在分布式表文件夹中的数据写入 **接受您插入的节点** 。如果你需要保证写入数据到底层的MergeTree表中请参阅 `system.merge_tree_settings` 中的持久性设置(`...fsync...`)
- 只影响异步插入(例如:`insert_distributed_sync=false`), 当数据首先存储在启动节点磁盘上然后再异步发送到shard。
— 可能会显著降低`insert`的性能
- 影响将存储在分布式表文件夹中的数据写入 **接受您插入的节点** 。如果你需要保证写入数据到底层的MergeTree表中请参阅 `system.merge_tree_settings` 中的持久性设置(`...fsync...`)
**插入限制设置** (`..._insert`) 请见:
**插入限制设置** (`..._insert`) 请见:
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) 设置
- [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) 设置
- `bytes_to_throw_insert``bytes_to_delay_insert` 之前处理,所以你不应该设置它的值小于 `bytes_to_delay_insert`
:::
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) 设置
- [prefer_localhost_replica](../../../operations/settings/settings.md#settings-prefer-localhost-replica) 设置
- `bytes_to_throw_insert``bytes_to_delay_insert` 之前处理,所以你不应该设置它的值小于 `bytes_to_delay_insert`
**示例**
``` sql
@ -214,7 +215,7 @@ SELECT 查询会被发送到所有分片,并且无论数据在分片中如何
## 读取数据 {#distributed-reading-data}
当查询一个`Distributed`表时,`SELECT`查询被发送到所有的分片,不管数据是如何分布在分片上的(它们可以完全随机分布)。当您添加一个新分片时,您不必将旧数据传输到它。相反,您可以使用更重的权重向其写入新数据——数据的分布会稍微不均匀,但查询将正确有效地工作。
当启用`max_parallel_replicas`选项时,查询处理将在单个分片中的所有副本之间并行化。更多信息,请参见[max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas)。
@ -225,8 +226,9 @@ SELECT 查询会被发送到所有分片,并且无论数据在分片中如何
- `_shard_num` — 表`system.clusters` 中的 `shard_num` 值 . 数据类型: [UInt32](../../../sql-reference/data-types/int-uint.md).
!!! note "备注"
因为 [remote](../../../sql-reference/table-functions/remote.md) 和 [cluster](../../../sql-reference/table-functions/cluster.mdx) 表方法内部创建了分布式表, `_shard_num` 对他们都有效.
:::note
因为 [remote](../../../sql-reference/table-functions/remote.md) 和 [cluster](../../../sql-reference/table-functions/cluster.mdx) 表方法内部创建了分布式表, `_shard_num` 对他们都有效.
:::
**详见**
- [虚拟列](../../../engines/table-engines/index.md#table_engines-virtual_columns) 描述

View File

@ -617,8 +617,9 @@ INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
启用本机复制[Zookeeper](http://zookeeper.apache.org/)是必需的。 ClickHouse负责所有副本的数据一致性并在失败后自动运行恢复过程。建议将ZooKeeper集群部署在单独的服务器上其中没有其他进程包括运行的ClickHouse
!!! note "注意"
ZooKeeper不是一个严格的要求在某些简单的情况下您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是**不**建议的在这种情况下ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。
:::note
ZooKeeper不是一个严格的要求在某些简单的情况下您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是**不**建议的在这种情况下ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。
:::
ZooKeeper位置在配置文件中指定:

View File

@ -685,8 +685,9 @@ CREATE TABLE IF NOT EXISTS example_table
- 如果`input_format_defaults_for_omitted_fields = 0`, 那么`x`和`a`的默认值等于`0`(作为`UInt32`数据类型的默认值)。
- 如果`input_format_defaults_for_omitted_fields = 1`, 那么`x`的默认值为`0`,但`a`的默认值为`x * 2`。
!!! note "注意"
:::warning
当使用`input_format_defaults_for_omitted_fields = 1`插入数据时,与使用`input_format_defaults_for_omitted_fields = 0`相比ClickHouse消耗更多的计算资源。
:::
### Selecting Data {#selecting-data}
@ -708,8 +709,9 @@ CREATE TABLE IF NOT EXISTS example_table
与[JSON](#json)格式不同没有替换无效的UTF-8序列。值以与`JSON`相同的方式转义。
!!! note "提示"
:::info
字符串中可以输出任意一组字节。如果您确信表中的数据可以被格式化为JSON而不会丢失任何信息那么就使用`JSONEachRow`格式。
:::
### Nested Structures {#jsoneachrow-nested}
@ -1216,9 +1218,9 @@ SET format_avro_schema_registry_url = 'http://schema-registry';
SELECT * FROM topic1_stream;
```
!!! note "警告"
设置 `format_avro_schema_registry_url` 需要写入配置文件`users.xml`以在Clickhouse重启后该设置仍为您的设定值。您也可以在使用Kafka引擎的时候指定该设置。
:::warning
设置 `format_avro_schema_registry_url` 需要写入配置文件`users.xml`以在Clickhouse重启后该设置仍为您的设定值。您也可以在使用Kafka引擎的时候指定该设置。
:::
## Parquet {#data-format-parquet}

View File

@ -188,8 +188,9 @@ $ curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number
$ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
```
!!! note "警告"
一些HTTP客户端可能会在默认情况下从服务器解压数据(使用`gzip`和`deflate`),即使您未正确地使用了压缩设置,您也可能会得到解压数据。
:::warning
一些HTTP客户端可能会在默认情况下从服务器解压数据(使用`gzip`和`deflate`),即使您未正确地使用了压缩设置,您也可能会得到解压数据。
:::
您可以使用`database`URL参数或`X-ClickHouse-Database`头来指定默认数据库。
@ -447,8 +448,9 @@ $ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:
max_final_threads 2
```
!!! note "警告"
在一个`predefined_query_handler`中只支持insert类型的一个`查询`。
:::warning
在一个`predefined_query_handler`中只支持insert类型的一个`查询`。
:::
### 动态查询 {#dynamic_query_handler}

View File

@ -24,6 +24,7 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试
- [SeasClick C++ client](https://github.com/SeasX/SeasClick)
- [one-ck](https://github.com/lizhichao/one-ck)
- [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
- [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
- Go
- [clickhouse](https://github.com/kshvakov/clickhouse/)
- [go-clickhouse](https://github.com/roistat/go-clickhouse)

View File

@ -13,5 +13,6 @@ sidebar_position: 24
- [GUI](../../interfaces/third-party/gui.md)
- [Proxies](../../interfaces/third-party/proxy.md)
!!! note "注意"
:::note
支持通用API的通用工具[ODBC](../../interfaces/odbc.md)或[JDBC](../../interfaces/jdbc.md)通常也适用于ClickHouse但这里没有列出因为它们实在太多了。
:::

View File

@ -24,9 +24,9 @@ ClickHouse权限实体包括
我们建议你使用SQL工作流的方式。当然配置的方式也可以同时起作用, 所以如果你正在用服务端配置的方式来管理权限和账户你可以平滑的切换到SQL驱动的工作流方式。
!!! note "警告"
你无法同时使用两个配置的方式来管理同一个权限实体。
:::warning
你无法同时使用两个配置的方式来管理同一个权限实体。
:::
## 用法 {#access-control-usage}

View File

@ -12,8 +12,9 @@ sidebar_label: "\u6570\u636E\u5907\u4EFD"
不同公司有不同的可用资源和业务需求因此不存在一个通用的解决方案可以应对各种情况下的ClickHouse备份和恢复。 适用于 1GB 数据的方案可能并不适用于几十 PB 数据的情况。 有多种具备各自优缺点的可能方法,将在下面对其进行讨论。最好使用几种方法而不是仅仅使用一种方法来弥补它们的各种缺点。。
!!! note "注"
需要注意的是,如果您备份了某些内容并且从未尝试过还原它,那么当您实际需要它时可能无法正常恢复(或者至少需要的时间比业务能够容忍的时间更长)。 因此无论您选择哪种备份方法请确保自动还原过程并定期在备用ClickHouse群集上演练。
:::note
需要注意的是,如果您备份了某些内容并且从未尝试过还原它,那么当您实际需要它时可能无法正常恢复(或者至少需要的时间比业务能够容忍的时间更长)。 因此无论您选择哪种备份方法请确保自动还原过程并定期在备用ClickHouse群集上演练。
:::
## 将源数据复制到其它地方 {#duplicating-source-data-somewhere-else}

View File

@ -528,8 +528,9 @@ SSL客户端/服务器配置。
包含数据的目录的路径。
!!! note "注"
尾部斜杠是强制性的。
:::note
尾部斜杠是强制性的。
:::
**示例**
@ -714,8 +715,9 @@ TCP端口用于与客户端进行安全通信。 使用它与 [OpenSSL](#serv
用于处理大型查询的临时数据的路径。
!!! note "注"
尾部斜杠是强制性的。
:::note
尾部斜杠是强制性的。
:::
**示例**
@ -728,11 +730,12 @@ TCP端口用于与客户端进行安全通信。 使用它与 [OpenSSL](#serv
从政策 [`storage_configuration`](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) 存储临时文件。
如果没有设置 [`tmp_path`](#server-settings-tmp_path) 被使用,否则被忽略。
!!! note "注"
- `move_factor` 被忽略
:::note
- `move_factor` 被忽略
- `keep_free_space_bytes` 被忽略
- `max_data_part_size_bytes` 被忽略
-您必须在该政策中只有一个卷
:::
## uncompressed_cache_size {#server-settings-uncompressed_cache_size}

View File

@ -8,8 +8,9 @@ sidebar_label: "\u8BBE\u7F6E\u914D\u7F6E"
设置配置是设置的集合,并按照相同的名称进行分组。
!!! note "信息"
ClickHouse 还支持用 [SQL驱动的工作流](../../operations/access-rights.md#access-control) 管理设置配置。我们建议使用它。
:::info
ClickHouse 还支持用 [SQL驱动的工作流](../../operations/access-rights.md#access-control) 管理设置配置。我们建议使用它。
:::
设置配置可以任意命名。你可以为不同的用户指定相同的设置配置。您可以在设置配置中写入的最重要的内容是 `readonly=1`,这将确保只读访问。

View File

@ -10,8 +10,9 @@ sidebar_label: "\u7528\u6237\u8BBE\u7F6E"
`user.xml` 中的 `users` 配置段包含了用户配置
!!! note "提示"
ClickHouse还支持 [SQL驱动的工作流](../access-rights.md#access-control) 用于管理用户。 我们建议使用它。
:::note
ClickHouse还支持 [SQL驱动的工作流](../access-rights.md#access-control) 用于管理用户。 我们建议使用它。
:::
`users` 配置段的结构:

View File

@ -266,8 +266,9 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (
执行时 `INSERT` 查询时,将省略的输入列值替换为相应列的默认值。 此选项仅适用于 [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv) 和 [TabSeparated](../../interfaces/formats.md#tabseparated) 格式。
!!! note "注"
启用此选项后,扩展表元数据将从服务器发送到客户端。 它会消耗服务器上的额外计算资源,并可能降低性能。
:::note
启用此选项后,扩展表元数据将从服务器发送到客户端。 它会消耗服务器上的额外计算资源,并可能降低性能。
:::
可能的值:

View File

@ -99,8 +99,9 @@ slug: /zh/operations/system-tables/parts
- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 表达式的数组。 每个表达式定义一个 [TTL MOVE 规则](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
!!! note "警告"
保留 `move_ttl_info.expression` 数组主要是为了向后兼容,现在检查 `TTL MOVE` 规则最简单的方法是使用 `move_ttl_info.min``move_ttl_info.max` 字段。
:::warning
保留 `move_ttl_info.expression` 数组主要是为了向后兼容,现在检查 `TTL MOVE` 规则最简单的方法是使用 `move_ttl_info.min``move_ttl_info.max` 字段。
:::
- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — 日期值和时间值的数组。数组中的每个元素都描述了一个 [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的最小键值。

View File

@ -8,8 +8,9 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
包含已执行查询的相关信息,例如:开始时间、处理持续时间、错误消息。
!!! note "注"
此表不包含以下内容的摄取数据 `INSERT` 查询。
:::note
此表不包含以下内容的摄取数据 `INSERT` 查询。
:::
您可以更改query_log的设置在服务器配置的 [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) 部分。

View File

@ -12,5 +12,6 @@ sidebar_position: 107
计算Pearson相关系数: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `corrStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::note
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `corrStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::

View File

@ -12,5 +12,6 @@ covarPop(x, y)
计算 `Σ((x - x̅)(y - y̅)) / n` 的值。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarPopStable` 函数。 它的工作速度较慢,但提供了较低的计算错误。
:::note
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarPopStable` 函数。 它的工作速度较慢,但提供了较低的计算错误。
:::

View File

@ -14,5 +14,6 @@ covarSamp(x, y)
返回Float64。 当 `n <= 1`, 返回 +∞。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::note
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::

View File

@ -37,8 +37,9 @@ quantileTiming(level)(expr)
否则计算结果将四舍五入到16毫秒的最接近倍数。
!!! note "注"
对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。
:::note
对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。
:::
**返回值**
@ -46,8 +47,9 @@ quantileTiming(level)(expr)
类型: `Float32`
!!! note "注"
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
:::note
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
:::
**示例**

View File

@ -39,8 +39,9 @@ quantileTimingWeighted(level)(expr, weight)
否则计算结果将四舍五入到16毫秒的最接近倍数。
!!! note "注"
对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。
:::note
对于计算页面加载时间分位数, 此函数比[quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile)更有效和准确。
:::
**返回值**
@ -48,8 +49,9 @@ quantileTimingWeighted(level)(expr, weight)
类型: `Float32`
!!! note "注"
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
:::note
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
:::
**示例**

View File

@ -7,5 +7,6 @@ sidebar_position: 30
结果等于 [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md)的平方根。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::note
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::

View File

@ -7,5 +7,6 @@ sidebar_position: 31
结果等于 [varSamp] (../../../sql-reference/aggregate-functions/reference/varsamp.md)的平方根。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::note
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::

View File

@ -36,8 +36,9 @@ uniqCombined(HLL_precision)(x[, ...])
- 确定性地提供结果(它不依赖于查询处理顺序)。
!!! note "注"
由于它对非 `String` 类型使用32位哈希对于基数显著大于`UINT_MAX` ,结果将有非常高的误差(误差将在几百亿不同值之后迅速提高), 因此这种情况,你应该使用 [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
:::note
由于它对非 `String` 类型使用32位哈希对于基数显著大于`UINT_MAX` ,结果将有非常高的误差(误差将在几百亿不同值之后迅速提高), 因此这种情况,你应该使用 [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
:::
相比于 [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) 函数, 该 `uniqCombined`:

View File

@ -9,5 +9,6 @@ sidebar_position: 32
换句话说,计算一组数据的离差。 返回 `Float64`
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::note
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::

View File

@ -11,5 +11,6 @@ sidebar_position: 33
返回 `Float64`。 当 `n <= 1`,返回 `+∞`
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::note
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
:::

View File

@ -6,8 +6,9 @@ sidebar_label: "ANSI\u517C\u5BB9\u6027"
# ClickHouse SQL方言 与ANSI SQL的兼容性{#ansi-sql-compatibility-of-clickhouse-sql-dialect}
!!! note "注"
本文参考Annex G所著的[ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8)标准.
:::note
本文参考Annex G所著的[ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8)标准.
:::
## 行为差异 {#differences-in-behaviour}

View File

@ -25,10 +25,10 @@ slug: /zh/sql-reference/data-types/simpleaggregatefunction
- [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md)
!!! note "注"
`SimpleAggregateFunction(func, Type)` 的值外观和存储方式于 `Type` 相同, 所以你不需要应用带有 `-Merge`/`-State` 后缀的函数。
`SimpleAggregateFunction` 的性能优于具有相同聚合函数的 `AggregateFunction`
:::note
`SimpleAggregateFunction(func, Type)` 的值外观和存储方式于 `Type` 相同, 所以你不需要应用带有 `-Merge`/`-State` 后缀的函数。
`SimpleAggregateFunction` 的性能优于具有相同聚合函数的 `AggregateFunction`
:::
**参数**

View File

@ -42,8 +42,9 @@ slug: /zh/sql-reference/functions/string-search-functions
对于不区分大小写的搜索或/和UTF-8格式使用函数`multiSearchAnyCaseInsensitivemultiSearchAnyUTF8multiSearchAnyCaseInsensitiveUTF8`。
!!! note "注意"
在所有`multiSearch*`函数中由于实现规范needles的数量应小于2<sup>8</sup>
:::note
在所有`multiSearch*`函数中由于实现规范needles的数量应小于2<sup>8</sup>
:::
## 匹配(大海捞针,模式) {#matchhaystack-pattern}
@ -60,8 +61,9 @@ slug: /zh/sql-reference/functions/string-search-functions
与`match`相同但如果所有正则表达式都不匹配则返回0如果任何模式匹配则返回1。它使用[超扫描](https://github.com/intel/hyperscan)库。对于在字符串中搜索子字符串的模式最好使用«multisearchany»因为它更高效。
!!! note "注意"
任何`haystack`字符串的长度必须小于2<sup>32\</sup>字节否则抛出异常。这种限制是因为hyperscan API而产生的。
:::note
任何`haystack`字符串的长度必须小于2<sup>32\</sup>字节否则抛出异常。这种限制是因为hyperscan API而产生的。
:::
## multiMatchAnyIndex大海捞针\[模式<sub>1</sub>,模式<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn}
@ -75,11 +77,13 @@ slug: /zh/sql-reference/functions/string-search-functions
与`multiFuzzyMatchAny`相同,但返回匹配项的匹配能容的索引位置。
!!! note "注意"
`multiFuzzyMatch*`函数不支持UTF-8正则表达式由于hyperscan限制这些表达式被按字节解析。
:::note
`multiFuzzyMatch*`函数不支持UTF-8正则表达式由于hyperscan限制这些表达式被按字节解析。
:::
!!! note "注意"
如要关闭所有hyperscan函数的使用请设置`SET allow_hyperscan = 0;`。
:::note
如要关闭所有hyperscan函数的使用请设置`SET allow_hyperscan = 0;`。
:::
## 提取(大海捞针,图案) {#extracthaystack-pattern}
@ -119,5 +123,6 @@ slug: /zh/sql-reference/functions/string-search-functions
对于不区分大小写的搜索或/和UTF-8格式使用函数`ngramSearchCaseInsensitivengramSearchUTF8ngramSearchCaseInsensitiveUTF8`。
!!! note "注意"
对于UTF-8我们使用3-gram。所有这些都不是完全公平的n-gram距离。我们使用2字节哈希来散列n-gram然后计算这些哈希表之间的对称差异 - 可能会发生冲突。对于UTF-8不区分大小写的格式我们不使用公平的`tolower`函数 - 我们将每个Unicode字符字节的第5位从零开始和字节的第一位归零 - 这适用于拉丁语,主要用于所有西里尔字母。
:::note
对于UTF-8我们使用3-gram。所有这些都不是完全公平的n-gram距离。我们使用2字节哈希来散列n-gram然后计算这些哈希表之间的对称差异 - 可能会发生冲突。对于UTF-8不区分大小写的格式我们不使用公平的`tolower`函数 - 我们将每个Unicode字符字节的第5位从零开始和字节的第一位归零 - 这适用于拉丁语,主要用于所有西里尔字母。
:::

View File

@ -12,8 +12,9 @@ ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr
删除匹配指定过滤表达式的数据。实现为[突变](../../../sql-reference/statements/alter/index.md#mutations).
!!! note "备注"
`ALTER TABLE`前缀使得这个语法不同于大多数其他支持SQL的系统。它的目的是表示与OLTP数据库中的类似查询不同这是一个不为经常使用而设计的繁重操作。
:::note
`ALTER TABLE`前缀使得这个语法不同于大多数其他支持SQL的系统。它的目的是表示与OLTP数据库中的类似查询不同这是一个不为经常使用而设计的繁重操作。
:::
`filter_expr` 的类型必须是`UInt8`。该查询删除表中该表达式接受非零值的行。

View File

@ -17,8 +17,9 @@ sidebar_label: ALTER
- [CONSTRAINT](../../../sql-reference/statements/alter/constraint.md)
- [TTL](../../../sql-reference/statements/alter/ttl.md)
!!! note "备注"
大多数 `ALTER TABLE` 查询只支持[\*MergeTree](../../../engines/table-engines/mergetree-family/index.md)表,以及[Merge](../../../engines/table-engines/special/merge.md)和[Distributed](../../../engines/table-engines/special/distributed.md)。
:::note
大多数 `ALTER TABLE` 查询只支持[\*MergeTree](../../../engines/table-engines/mergetree-family/index.md)表,以及[Merge](../../../engines/table-engines/special/merge.md)和[Distributed](../../../engines/table-engines/special/distributed.md)。
:::
这些 `ALTER` 语句操作视图:

View File

@ -14,5 +14,6 @@ ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY ORDER BY new_expression
从某种意义上说,该命令是轻量级的,它只更改元数据。要保持数据部分行按排序键表达式排序的属性,您不能向排序键添加包含现有列的表达式(仅在相同的`ALTER`查询中由`ADD COLUMN`命令添加的列,没有默认的列值)。
!!! note "备注"
它只适用于[`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md)表族(包括[replicated](../../../engines/table-engines/mergetree-family/replication.md)表)。
:::note
它只适用于[`MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md)表族(包括[replicated](../../../engines/table-engines/mergetree-family/replication.md)表)。
:::

View File

@ -14,8 +14,9 @@ sidebar_label: SETTING
ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY|RESET SETTING ...
```
!!! note "注意"
这些查询只能应用于 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 表。
:::note
这些查询只能应用于 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 表。
:::
## 修改设置 {#alter_modify_setting}

View File

@ -12,8 +12,9 @@ ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr
操作与指定过滤表达式相匹配的数据。作为一个[变更 mutation](../../../sql-reference/statements/alter/index.md#mutations)来实现.
!!! note "Note"
`ALTER TABLE` 的前缀使这个语法与其他大多数支持SQL的系统不同。它的目的是表明与OLTP数据库中的类似查询不同这是一个繁重的操作不是为频繁使用而设计。
:::note
`ALTER TABLE` 的前缀使这个语法与其他大多数支持SQL的系统不同。它的目的是表明与OLTP数据库中的类似查询不同这是一个繁重的操作不是为频繁使用而设计。
:::
`filter_expr`必须是`UInt8`类型。这个查询将指定列的值更新为行中相应表达式的值,对于这些行,`filter_expr`取值为非零。使用`CAST`操作符将数值映射到列的类型上。不支持更新用于计算主键或分区键的列。

View File

@ -9,8 +9,9 @@ sidebar_label: EXCHANGE
以原子方式交换两个表或字典的名称。
此任务也可以通过使用[RENAME](./rename.md)来完成,但在这种情况下操作不是原子的。
!!! note "注意"
:::note
`EXCHANGE`仅支持[Atomic](../../engines/database-engines/atomic.md)数据库引擎.
:::
**语法**

View File

@ -9,8 +9,9 @@ sidebar_label: RENAME
重命名数据库、表或字典。 可以在单个查询中重命名多个实体。
请注意,具有多个实体的`RENAME`查询是非原子操作。 要以原子方式交换实体名称,请使用[EXCHANGE](./exchange.md)语法.
!!! note "注意"
:::note
`RENAME`仅支持[Atomic](../../engines/database-engines/atomic.md)数据库引擎.
:::
**语法**

View File

@ -11,8 +11,9 @@ sidebar_label: GROUP BY
- 在所有的表达式在 [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having),和 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句中 **必须** 基于键表达式进行计算 **或** 上 [聚合函数](../../../sql-reference/aggregate-functions/index.md) 在非键表达式(包括纯列)上。 换句话说,从表中选择的每个列必须用于键表达式或聚合函数内,但不能同时使用。
- 聚合结果 `SELECT` 查询将包含尽可能多的行,因为有唯一值 “grouping key” 在源表中。 通常这会显着减少行数,通常是数量级,但不一定:如果所有行数保持不变 “grouping key” 值是不同的。
!!! note "注"
还有一种额外的方法可以在表上运行聚合。 如果查询仅在聚合函数中包含表列,则 `GROUP BY` 可以省略,并且通过一个空的键集合来假定聚合。 这样的查询总是只返回一行。
:::note
还有一种额外的方法可以在表上运行聚合。 如果查询仅在聚合函数中包含表列,则 `GROUP BY` 可以省略,并且通过一个空的键集合来假定聚合。 这样的查询总是只返回一行。
:::
## 空处理 {#null-processing}

View File

@ -39,8 +39,9 @@ ClickHouse中提供的其他联接类型:
## 严格 {#join-settings}
!!! note "注"
可以使用以下方式复盖默认的严格性值 [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) 设置。
:::note
可以使用以下方式复盖默认的严格性值 [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) 设置。
:::
Also the behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting.
@ -91,8 +92,9 @@ USING (equi_column1, ... equi_columnN, asof_column)
`ASOF JOIN`会从 `table_2` 中的用户事件时间戳找出和 `table_1` 中用户事件时间戳中最近的一个时间戳,来满足最接近匹配的条件。如果有得话,则相等的时间戳值是最接近的值。在此例中,`user_id` 列可用于条件匹配,`ev_time` 列可用于最接近匹配。在此例中,`event_1_1` 可以 JOIN `event_2_1``event_1_2` 可以JOIN `event_2_3`,但是 `event_2_2` 不能被JOIN。
!!! note "注"
`ASOF JOIN`在 [JOIN](../../../engines/table-engines/special/join.md) 表引擎中 **不受** 支持。
:::note
`ASOF JOIN`在 [JOIN](../../../engines/table-engines/special/join.md) 表引擎中 **不受** 支持。
:::
## 分布式联接 {#global-join}

View File

@ -14,8 +14,9 @@ ClickHouse支持以下语法变体:
处理查询时ClickHouse首先选择经由排序键排序过后的数据。排序键可以显式地使用[ORDER BY](order-by.md#select-order-by)从句指定,或隐式地使用表引擎使用的排序键(数据的顺序仅在使用[ORDER BY](order-by.md#select-order-by)时才可以保证否则由于多线程处理数据顺序会随机化。然后ClickHouse执行`LIMIT n BY expressions`从句,将每一行按 `expressions` 的值进行分组,并对每一分组返回前`n`行。如果指定了`OFFSET`那么对于每一分组ClickHouse会跳过前`offset_value`行,接着返回前`n`行。如果`offset_value`大于某一分组的行数ClickHouse会从分组返回0行。
!!! note "注"
`LIMIT BY`与[LIMIT](../../../sql-reference/statements/select/limit.md)没有关系。它们可以在同一个查询中使用。
:::note
`LIMIT BY`与[LIMIT](../../../sql-reference/statements/select/limit.md)没有关系。它们可以在同一个查询中使用。
:::
## 例 {#examples}

View File

@ -15,8 +15,9 @@ sidebar_label: SAMPLE
- 当您的原始数据不准确时,所以近似不会明显降低质量。
- 业务需求的目标是近似结果(为了成本效益,或者向高级用户推销确切结果)。
!!! note "注"
您只能使用采样中的表 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 族,并且只有在表创建过程中指定了采样表达式(请参阅 [MergeTree引擎](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)).
:::note
您只能使用采样中的表 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 族,并且只有在表创建过程中指定了采样表达式(请参阅 [MergeTree引擎](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)).
:::
下面列出了数据采样的功能:

View File

@ -11,9 +11,10 @@ sidebar_label: WHERE
如果基础表引擎支持,`WHERE`表达式会使用索引和分区进行剪枝。
!!! note "注"
有一个叫做过滤优化 [prewhere](../../../sql-reference/statements/select/prewhere.md) 的东西.
:::note
有一个叫做过滤优化 [prewhere](../../../sql-reference/statements/select/prewhere.md) 的东西.
:::
如果需要测试一个 [NULL](../../../sql-reference/syntax.md#null-literal) 值,请使用 [IS NULL](../../operators/index.md#operator-is-null) and [IS NOT NULL](../../operators/index.md#is-not-null) 运算符或 [isNull](../../../sql-reference/functions/functions-for-nulls.md#isnull) 和 [isNotNull](../../../sql-reference/functions/functions-for-nulls.md#isnotnull) 函数。否则带有 NULL 的表达式永远不会通过。
**示例**

View File

@ -124,10 +124,9 @@ ClickHouse可以管理 [MergeTree](../../engines/table-engines/mergetree-family/
SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
```
!!! note "Note"
`DETACH / ATTACH` 表操作会在后台进行表的merge操作甚至当所有MergeTree表的合并操作已经停止的情况下。
:::note
`DETACH / ATTACH` 表操作会在后台进行表的merge操作甚至当所有MergeTree表的合并操作已经停止的情况下。
:::
### START MERGES {#query_language-system-start-merges}

View File

@ -49,8 +49,9 @@ SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database',
与原始MySQL表具有相同列的表对象。
!!! note "注意"
在`INSERT`查询中为了区分`mysql(...)`与带有列名列表的表名的表函数,你必须使用关键字`FUNCTION`或`TABLE FUNCTION`。查看如下示例。
:::note
在`INSERT`查询中为了区分`mysql(...)`与带有列名列表的表名的表函数,你必须使用关键字`FUNCTION`或`TABLE FUNCTION`。查看如下示例。
:::
## 用法示例 {#usage-example}

View File

@ -14,7 +14,8 @@ User=clickhouse
Group=clickhouse
Restart=always
RestartSec=30
RuntimeDirectory=%p # %p is resolved to the systemd unit name
# %p is resolved to the systemd unit name
RuntimeDirectory=%p
ExecStart=/usr/bin/clickhouse-keeper --config=/etc/clickhouse-keeper/keeper_config.xml --pid-file=%t/%p/%p.pid
# Minus means that this file is optional.
EnvironmentFile=-/etc/default/%p

View File

@ -18,12 +18,14 @@ Group=clickhouse
Restart=always
RestartSec=30
# Since ClickHouse is systemd aware default 1m30sec may not be enough
TimeoutStartSec=inifinity
TimeoutStartSec=infinity
# %p is resolved to the systemd unit name
RuntimeDirectory=%p
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=%t/%p/%p.pid
# Minus means that this file is optional.
EnvironmentFile=-/etc/default/%p
# Bring back /etc/default/clickhouse for backward compatibility
EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE

View File

@ -158,6 +158,8 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
if (cloned)
{
writeStringBinary("1", out);
out.finalize();
return;
}
else
{

View File

@ -91,6 +91,7 @@
#include <Server/ProtocolServerAdapter.h>
#include <Server/HTTP/HTTPServer.h>
#include <Interpreters/AsynchronousInsertQueue.h>
#include <Core/ServerSettings.h>
#include <filesystem>
#include <unordered_set>
@ -662,7 +663,10 @@ try
MainThreadStatus::getInstance();
StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true));
ServerSettings server_settings;
server_settings.loadSettingsFromConfig(config());
StackTrace::setShowAddresses(server_settings.show_addresses_in_stack_traces);
#if USE_HDFS
/// This will point libhdfs3 to the right location for its config.
@ -696,7 +700,7 @@ try
{
const String config_path = config().getString("config-file", "config.xml");
const auto config_dir = std::filesystem::path{config_path}.replace_filename("openssl.conf");
setenv("OPENSSL_CONF", config_dir.string(), true);
setenv("OPENSSL_CONF", config_dir.c_str(), true);
}
#endif
@ -747,9 +751,9 @@ try
// nodes (`from_zk`), because ZooKeeper interface uses the pool. We will
// ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well.
GlobalThreadPool::initialize(
config().getUInt("max_thread_pool_size", 10000),
config().getUInt("max_thread_pool_free_size", 1000),
config().getUInt("thread_pool_queue_size", 10000));
server_settings.max_thread_pool_size,
server_settings.max_thread_pool_free_size,
server_settings.thread_pool_queue_size);
#if USE_AZURE_BLOB_STORAGE
/// It makes sense to deinitialize libxml after joining of all threads
@ -765,9 +769,9 @@ try
#endif
IOThreadPool::initialize(
config().getUInt("max_io_thread_pool_size", 100),
config().getUInt("max_io_thread_pool_free_size", 0),
config().getUInt("io_thread_pool_queue_size", 10000));
server_settings.max_io_thread_pool_size,
server_settings.max_io_thread_pool_free_size,
server_settings.io_thread_pool_queue_size);
/// Initialize global local cache for remote filesystem.
if (config().has("local_cache_for_remote_fs"))
@ -783,15 +787,15 @@ try
}
}
Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024));
Poco::ThreadPool server_pool(3, server_settings.max_connections);
std::mutex servers_lock;
std::vector<ProtocolServerAdapter> servers;
std::vector<ProtocolServerAdapter> servers_to_start_before_tables;
/// This object will periodically calculate some metrics.
ServerAsynchronousMetrics async_metrics(
global_context,
config().getUInt("asynchronous_metrics_update_period_s", 1),
config().getUInt("asynchronous_heavy_metrics_update_period_s", 120),
server_settings.asynchronous_metrics_update_period_s,
server_settings.asynchronous_heavy_metrics_update_period_s,
[&]() -> std::vector<ProtocolServerMetrics>
{
std::vector<ProtocolServerMetrics> metrics;
@ -806,7 +810,7 @@ try
}
);
ConnectionCollector::init(global_context, config().getUInt("max_threads_for_connection_collector", 10));
ConnectionCollector::init(global_context, server_settings.max_threads_for_connection_collector);
bool has_zookeeper = config().has("zookeeper");
@ -825,6 +829,9 @@ try
Settings::checkNoSettingNamesAtTopLevel(config(), config_path);
/// We need to reload server settings because config could be updated via zookeeper.
server_settings.loadSettingsFromConfig(config());
#if defined(OS_LINUX)
std::string executable_path = getExecutablePath();
@ -944,7 +951,7 @@ try
std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
fs::path path = path_str;
std::string default_database = config().getString("default_database", "default");
std::string default_database = server_settings.default_database.toString();
/// Check that the process user id matches the owner of the data.
const auto effective_user_id = geteuid();
@ -1035,21 +1042,18 @@ try
LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone());
/// Storage with temporary data for processing of heavy queries.
if (auto temporary_policy = config().getString("tmp_policy", ""); !temporary_policy.empty())
if (!server_settings.tmp_policy.value.empty())
{
size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0);
global_context->setTemporaryStoragePolicy(temporary_policy, max_size);
global_context->setTemporaryStoragePolicy(server_settings.tmp_policy, server_settings.max_temporary_data_on_disk_size);
}
else if (auto temporary_cache = config().getString("temporary_data_in_cache", ""); !temporary_cache.empty())
else if (!server_settings.temporary_data_in_cache.value.empty())
{
size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0);
global_context->setTemporaryStorageInCache(temporary_cache, max_size);
global_context->setTemporaryStorageInCache(server_settings.temporary_data_in_cache, server_settings.max_temporary_data_on_disk_size);
}
else
{
std::string temporary_path = config().getString("tmp_path", path / "tmp/");
size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0);
global_context->setTemporaryStoragePath(temporary_path, max_size);
global_context->setTemporaryStoragePath(temporary_path, server_settings.max_temporary_data_on_disk_size);
}
/** Directory with 'flags': files indicating temporary settings for the server set by system administrator.
@ -1184,10 +1188,12 @@ try
{
Settings::checkNoSettingNamesAtTopLevel(*config, config_path);
/// Limit on total memory usage
size_t max_server_memory_usage = config->getUInt64("max_server_memory_usage", 0);
ServerSettings server_settings;
server_settings.loadSettingsFromConfig(*config);
double max_server_memory_usage_to_ram_ratio = config->getDouble("max_server_memory_usage_to_ram_ratio", 0.9);
size_t max_server_memory_usage = server_settings.max_server_memory_usage;
double max_server_memory_usage_to_ram_ratio = server_settings.max_server_memory_usage_to_ram_ratio;
size_t default_max_server_memory_usage = static_cast<size_t>(memory_amount * max_server_memory_usage_to_ram_ratio);
if (max_server_memory_usage == 0)
@ -1215,8 +1221,7 @@ try
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
bool allow_use_jemalloc_memory = config->getBool("allow_use_jemalloc_memory", true);
total_memory_tracker.setAllowUseJemallocMemory(allow_use_jemalloc_memory);
total_memory_tracker.setAllowUseJemallocMemory(server_settings.allow_use_jemalloc_memory);
auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker();
total_memory_tracker.setOvercommitTracker(global_overcommit_tracker);
@ -1234,36 +1239,23 @@ try
global_context->setRemoteHostFilter(*config);
/// Setup protection to avoid accidental DROP for big tables (that are greater than 50 GB by default)
if (config->has("max_table_size_to_drop"))
global_context->setMaxTableSizeToDrop(config->getUInt64("max_table_size_to_drop"));
if (config->has("max_partition_size_to_drop"))
global_context->setMaxPartitionSizeToDrop(config->getUInt64("max_partition_size_to_drop"));
global_context->setMaxTableSizeToDrop(server_settings.max_table_size_to_drop);
global_context->setMaxPartitionSizeToDrop(server_settings.max_partition_size_to_drop);
ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited;
if (config->has("concurrent_threads_soft_limit_num"))
if (server_settings.concurrent_threads_soft_limit_num > 0 && server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit)
concurrent_threads_soft_limit = server_settings.concurrent_threads_soft_limit_num;
if (server_settings.concurrent_threads_soft_limit_ratio_to_cores > 0)
{
auto value = config->getUInt64("concurrent_threads_soft_limit_num", 0);
if (value > 0 && value < concurrent_threads_soft_limit)
concurrent_threads_soft_limit = value;
}
if (config->has("concurrent_threads_soft_limit_ratio_to_cores"))
{
auto value = config->getUInt64("concurrent_threads_soft_limit_ratio_to_cores", 0) * std::thread::hardware_concurrency();
auto value = server_settings.concurrent_threads_soft_limit_ratio_to_cores * std::thread::hardware_concurrency();
if (value > 0 && value < concurrent_threads_soft_limit)
concurrent_threads_soft_limit = value;
}
ConcurrencyControl::instance().setMaxConcurrency(concurrent_threads_soft_limit);
if (config->has("max_concurrent_queries"))
global_context->getProcessList().setMaxSize(config->getInt("max_concurrent_queries", 0));
if (config->has("max_concurrent_insert_queries"))
global_context->getProcessList().setMaxInsertQueriesAmount(config->getInt("max_concurrent_insert_queries", 0));
if (config->has("max_concurrent_select_queries"))
global_context->getProcessList().setMaxSelectQueriesAmount(config->getInt("max_concurrent_select_queries", 0));
global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries);
global_context->getProcessList().setMaxInsertQueriesAmount(server_settings.max_concurrent_insert_queries);
global_context->getProcessList().setMaxSelectQueriesAmount(server_settings.max_concurrent_select_queries);
if (config->has("keeper_server"))
global_context->updateKeeperConfiguration(*config);
@ -1272,56 +1264,36 @@ try
/// Note: If you specified it in the top level config (not it config of default profile)
/// then ClickHouse will use it exactly.
/// This is done for backward compatibility.
if (global_context->areBackgroundExecutorsInitialized() && (config->has("background_pool_size") || config->has("background_merges_mutations_concurrency_ratio")))
if (global_context->areBackgroundExecutorsInitialized())
{
auto new_pool_size = config->getUInt64("background_pool_size", 16);
auto new_ratio = config->getUInt64("background_merges_mutations_concurrency_ratio", 2);
auto new_pool_size = server_settings.background_pool_size;
auto new_ratio = server_settings.background_merges_mutations_concurrency_ratio;
global_context->getMergeMutateExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size * new_ratio);
auto new_scheduling_policy = config->getString("background_merges_mutations_scheduling_policy", "round_robin");
global_context->getMergeMutateExecutor()->updateSchedulingPolicy(new_scheduling_policy);
global_context->getMergeMutateExecutor()->updateSchedulingPolicy(server_settings.background_merges_mutations_scheduling_policy.toString());
}
if (global_context->areBackgroundExecutorsInitialized() && config->has("background_move_pool_size"))
if (global_context->areBackgroundExecutorsInitialized())
{
auto new_pool_size = config->getUInt64("background_move_pool_size");
auto new_pool_size = server_settings.background_move_pool_size;
global_context->getMovesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
}
if (global_context->areBackgroundExecutorsInitialized() && config->has("background_fetches_pool_size"))
if (global_context->areBackgroundExecutorsInitialized())
{
auto new_pool_size = config->getUInt64("background_fetches_pool_size");
auto new_pool_size = server_settings.background_fetches_pool_size;
global_context->getFetchesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
}
if (global_context->areBackgroundExecutorsInitialized() && config->has("background_common_pool_size"))
if (global_context->areBackgroundExecutorsInitialized())
{
auto new_pool_size = config->getUInt64("background_common_pool_size");
auto new_pool_size = server_settings.background_common_pool_size;
global_context->getCommonExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
}
if (config->has("background_buffer_flush_schedule_pool_size"))
{
auto new_pool_size = config->getUInt64("background_buffer_flush_schedule_pool_size");
global_context->getBufferFlushSchedulePool().increaseThreadsCount(new_pool_size);
}
if (config->has("background_schedule_pool_size"))
{
auto new_pool_size = config->getUInt64("background_schedule_pool_size");
global_context->getSchedulePool().increaseThreadsCount(new_pool_size);
}
if (config->has("background_message_broker_schedule_pool_size"))
{
auto new_pool_size = config->getUInt64("background_message_broker_schedule_pool_size");
global_context->getMessageBrokerSchedulePool().increaseThreadsCount(new_pool_size);
}
if (config->has("background_distributed_schedule_pool_size"))
{
auto new_pool_size = config->getUInt64("background_distributed_schedule_pool_size");
global_context->getDistributedSchedulePool().increaseThreadsCount(new_pool_size);
}
global_context->getBufferFlushSchedulePool().increaseThreadsCount(server_settings.background_buffer_flush_schedule_pool_size);
global_context->getSchedulePool().increaseThreadsCount(server_settings.background_schedule_pool_size);
global_context->getMessageBrokerSchedulePool().increaseThreadsCount(server_settings.background_message_broker_schedule_pool_size);
global_context->getDistributedSchedulePool().increaseThreadsCount(server_settings.background_distributed_schedule_pool_size);
if (config->has("resources"))
{
@ -1466,18 +1438,15 @@ try
});
/// Limit on total number of concurrently executed queries.
global_context->getProcessList().setMaxSize(config().getInt("max_concurrent_queries", 0));
global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries);
/// Set up caches.
/// Lower cache size on low-memory systems.
double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5);
size_t max_cache_size = static_cast<size_t>(memory_amount * cache_size_to_ram_max_ratio);
size_t max_cache_size = static_cast<size_t>(memory_amount * server_settings.cache_size_to_ram_max_ratio);
/// Size of cache for uncompressed blocks. Zero means disabled.
String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", "SLRU");
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
LOG_INFO(log, "Uncompressed cache policy name {}", uncompressed_cache_policy);
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0);
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
@ -1499,9 +1468,8 @@ try
global_context,
settings.async_insert_threads));
/// Size of cache for marks (index of MergeTree family of tables).
size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120);
String mark_cache_policy = config().getString("mark_cache_policy", "SLRU");
size_t mark_cache_size = server_settings.mark_cache_size;
String mark_cache_policy = server_settings.mark_cache_policy;
if (!mark_cache_size)
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
if (mark_cache_size > max_cache_size)
@ -1512,20 +1480,14 @@ try
}
global_context->setMarkCache(mark_cache_size, mark_cache_policy);
/// Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.
size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", 0);
if (index_uncompressed_cache_size)
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
if (server_settings.index_uncompressed_cache_size)
global_context->setIndexUncompressedCache(server_settings.index_uncompressed_cache_size);
/// Size of cache for index marks (index of MergeTree skip indices).
size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", 0);
if (index_mark_cache_size)
global_context->setIndexMarkCache(index_mark_cache_size);
if (server_settings.index_mark_cache_size)
global_context->setIndexMarkCache(server_settings.index_mark_cache_size);
/// A cache for mmapped files.
size_t mmap_cache_size = config().getUInt64("mmap_cache_size", 1000); /// The choice of default is arbitrary.
if (mmap_cache_size)
global_context->setMMappedFileCache(mmap_cache_size);
if (server_settings.mmap_cache_size)
global_context->setMMappedFileCache(server_settings.mmap_cache_size);
/// A cache for query results.
global_context->setQueryCache(config());
@ -1611,7 +1573,7 @@ try
/// context is destroyed.
/// In addition this object has to be created before the loading of the tables.
std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
if (config().has("disable_internal_dns_cache") && config().getInt("disable_internal_dns_cache"))
if (server_settings.disable_internal_dns_cache)
{
/// Disable DNS caching at all
DNSResolver::instance().setDisableCacheFlag();
@ -1621,7 +1583,7 @@ try
{
/// Initialize a watcher periodically updating DNS cache
dns_cache_updater = std::make_unique<DNSCacheUpdater>(
global_context, config().getInt("dns_cache_update_period", 15), config().getUInt("dns_max_consecutive_failures", 5));
global_context, server_settings.dns_cache_update_period, server_settings.dns_max_consecutive_failures);
}
if (dns_cache_updater)
@ -1886,7 +1848,7 @@ try
LOG_INFO(log, "Closed all listening sockets.");
/// Killing remaining queries.
if (!config().getBool("shutdown_wait_unfinished_queries", false))
if (server_settings.shutdown_wait_unfinished_queries)
global_context->getProcessList().killAllQueries();
if (current_connections)

View File

@ -398,15 +398,30 @@
fill: var(--logo-color);
}
#cloud-logo
{
color: var(--background-color);
text-shadow: 0rem 0rem 2rem var(--logo-color);
font-size: 10vw;
display: block;
}
#logo:hover
{
fill: var(--logo-color-active);
color: var(--logo-color-active);
}
#cloud-logo:hover
{
filter: brightness(150%);
}
#logo-container
{
text-align: center;
margin-top: 5em;
line-height: 0.75;
}
#chart
@ -487,6 +502,7 @@
</g>
</svg>
</a>
<a id="cloud-logo" href="https://clickhouse.cloud/"></a>
</p>
</body>
@ -669,6 +685,33 @@
elem.selectionStart = selection_start + 4;
elem.selectionEnd = selection_start + 4;
e.preventDefault();
return false;
} else if (e.key === 'Enter') {
// If the user presses Enter, and the previous line starts with spaces,
// then we will insert the same number of spaces.
const elem = e.target;
if (elem.selectionStart !== elem.selectionEnd) {
// If there is a selection, then we will not insert spaces.
return;
}
const cursor_pos = elem.selectionStart;
const elem_value = elem.value;
const text_before_cursor = elem_value.substring(0, cursor_pos);
const text_after_cursor = elem_value.substring(cursor_pos);
const prev_lines = text_before_cursor.split('\n');
const prev_line = prev_lines.pop();
const lead_spaces = prev_line.match(/^\s*/)[0];
if (!lead_spaces) {
return;
}
// Add leading spaces to the current line.
elem.value = text_before_cursor + '\n' + lead_spaces + text_after_cursor;
elem.selectionStart = cursor_pos + lead_spaces.length + 1;
elem.selectionEnd = elem.selectionStart;
e.preventDefault();
return false;
}

View File

@ -1,8 +1,8 @@
#include <Access/Common/AllowedClientHosts.h>
#include <Common/Exception.h>
#include <Common/likePatternToRegexp.h>
#include <Common/logger_useful.h>
#include <base/scope_guard.h>
#include <Functions/likePatternToRegexp.h>
#include <Poco/Net/SocketAddress.h>
#include <Poco/RegularExpression.h>
#include <boost/algorithm/string/predicate.hpp>

View File

@ -5,8 +5,10 @@
#include <IO/Operators.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTExpressionList.h>
#include <Analyzer/Utils.h>
#include <Analyzer/ColumnNode.h>
namespace DB
{
@ -52,8 +54,24 @@ ASTPtr ArrayJoinNode::toASTImpl() const
auto array_join_ast = std::make_shared<ASTArrayJoin>();
array_join_ast->kind = is_left ? ASTArrayJoin::Kind::Left : ASTArrayJoin::Kind::Inner;
const auto & join_expression_list_node = getJoinExpressionsNode();
array_join_ast->children.push_back(join_expression_list_node->toAST());
auto array_join_expressions_ast = std::make_shared<ASTExpressionList>();
const auto & array_join_expressions = getJoinExpressions().getNodes();
for (const auto & array_join_expression : array_join_expressions)
{
ASTPtr array_join_expression_ast;
auto * column_node = array_join_expression->as<ColumnNode>();
if (column_node && column_node->getExpression())
array_join_expression_ast = column_node->getExpression()->toAST();
else
array_join_expression_ast = array_join_expression->toAST();
array_join_expression_ast->setAlias(array_join_expression->getAlias());
array_join_expressions_ast->children.push_back(std::move(array_join_expression_ast));
}
array_join_ast->children.push_back(std::move(array_join_expressions_ast));
array_join_ast->expression_list = array_join_ast->children.back();
ASTPtr tables_in_select_query_ast = std::make_shared<ASTTablesInSelectQuery>();

View File

@ -110,8 +110,15 @@ ASTPtr ColumnNode::toASTImpl() const
}
else if (auto * table_node = column_source->as<TableNode>())
{
const auto & table_storage_id = table_node->getStorageID();
column_identifier_parts = {table_storage_id.getDatabaseName(), table_storage_id.getTableName()};
if (!table_node->getTemporaryTableName().empty())
{
column_identifier_parts = { table_node->getTemporaryTableName() };
}
else
{
const auto & table_storage_id = table_node->getStorageID();
column_identifier_parts = { table_storage_id.getDatabaseName(), table_storage_id.getTableName() };
}
}
}
}

Some files were not shown because too many files have changed in this diff Show More