Merge remote-tracking branch 'upstream/master' into named-collections-granular-access

This commit is contained in:
kssenii 2023-03-02 19:08:29 +01:00
commit 0c4bb3c80f
470 changed files with 6849 additions and 2710 deletions

View File

@ -209,3 +209,5 @@ CheckOptions:
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
- key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp
value: expr-type
- key: cppcoreguidelines-avoid-do-while.IgnoreMacros
value: true

View File

@ -3105,10 +3105,10 @@ jobs:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
TEMP_PATH=${{runner.temp}}/stress_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (asan)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
REPO_COPY=${{runner.temp}}/stress_asan/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
@ -3267,6 +3267,142 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
######################################### UPGRADE CHECK ######################################
##############################################################################################
UpgradeCheckAsan:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/upgrade_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Upgrade check (asan)
REPO_COPY=${{runner.temp}}/upgrade_asan/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Upgrade check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 upgrade_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
UpgradeCheckTsan:
needs: [BuilderDebTsan]
# same as for stress test with tsan
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/upgrade_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Upgrade check (tsan)
REPO_COPY=${{runner.temp}}/upgrade_thread/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Upgrade check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 upgrade_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
UpgradeCheckMsan:
needs: [BuilderDebMsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/upgrade_memory
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Upgrade check (msan)
REPO_COPY=${{runner.temp}}/upgrade_memory/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Upgrade check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 upgrade_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
UpgradeCheckDebug:
needs: [BuilderDebDebug]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/upgrade_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Upgrade check (debug)
REPO_COPY=${{runner.temp}}/upgrade_debug/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Upgrade check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 upgrade_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
##################################### AST FUZZERS ############################################
##############################################################################################

View File

@ -29,7 +29,7 @@
* Add two new functions which allow for user-defined keys/seeds with SipHash{64,128}. [#45513](https://github.com/ClickHouse/ClickHouse/pull/45513) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
* Allow a three-argument version for table function `format`. close [#45808](https://github.com/ClickHouse/ClickHouse/issues/45808). [#45873](https://github.com/ClickHouse/ClickHouse/pull/45873) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)).
* Add `JodaTime` format support for 'x','w','S'. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. [#46073](https://github.com/ClickHouse/ClickHouse/pull/46073) ([zk_kiger](https://github.com/zk-kiger)).
* Support window function `ntile`.
* Support window function `ntile`. ([lgbo](https://github.com/lgbo-ustc)).
* Add setting `final` to implicitly apply the `FINAL` modifier to every table. [#40945](https://github.com/ClickHouse/ClickHouse/pull/40945) ([Arthur Passos](https://github.com/arthurpassos)).
* Added `arrayPartialSort` and `arrayPartialReverseSort` functions. [#46296](https://github.com/ClickHouse/ClickHouse/pull/46296) ([Joanna Hulboj](https://github.com/jh0x)).
* The new http parameter `client_protocol_version` allows setting a client protocol version for HTTP responses using the Native format. [#40397](https://github.com/ClickHouse/ClickHouse/issues/40397). [#46360](https://github.com/ClickHouse/ClickHouse/pull/46360) ([Geoff Genz](https://github.com/genzgd)).

View File

@ -391,10 +391,12 @@ if (COMPILER_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers")
# Set new experimental pass manager, it's a performance, build time and binary size win.
# Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
# Set new experimental pass manager, it's a performance, build time and binary size win.
# Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager")
endif ()
# We cannot afford to use LTO when compiling unit tests, and it's not enough
# to only supply -fno-lto at the final linking stage. So we disable it

View File

@ -14,7 +14,7 @@ curl https://clickhouse.com/ | sh
* [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
* [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
* [Slack](https://clickhousedb.slack.com/) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.

View File

@ -195,7 +195,6 @@ long splice(int fd_in, off_t *off_in, int fd_out, off_t *off_out, size_t len, un
#include <sys/stat.h>
#include <stdint.h>
#if !defined(__aarch64__)
struct statx {
uint32_t stx_mask;
uint32_t stx_blksize;
@ -226,7 +225,6 @@ int statx(int fd, const char *restrict path, int flag,
{
return syscall(SYS_statx, fd, path, flag, mask, statxbuf);
}
#endif
#include <syscall.h>

View File

@ -8,3 +8,8 @@ int fallocate(int fd, int mode, off_t base, off_t len)
{
return syscall(SYS_fallocate, fd, mode, base, len);
}
int fallocate64(int fd, int mode, off_t base, off_t len)
{
return fallocate(fd, mode, base, len);
}

View File

@ -9,3 +9,8 @@ ssize_t pwritev(int fd, const struct iovec *iov, int count, off_t ofs)
/// There was cancellable syscall (syscall_cp), but I don't care.
return syscall(SYS_pwritev, fd, iov, count, (long)(ofs), (long)(ofs>>32));
}
ssize_t pwritev64(int fd, const struct iovec *iov, int count, off_t ofs)
{
return pwritev(fd, iov, count, ofs);
}

View File

@ -67,19 +67,7 @@ public:
void swap(Timespan & timespan);
/// Swaps the Timespan with another one.
bool operator==(const Timespan & ts) const;
bool operator!=(const Timespan & ts) const;
bool operator>(const Timespan & ts) const;
bool operator>=(const Timespan & ts) const;
bool operator<(const Timespan & ts) const;
bool operator<=(const Timespan & ts) const;
bool operator==(TimeDiff microSeconds) const;
bool operator!=(TimeDiff microSeconds) const;
bool operator>(TimeDiff microSeconds) const;
bool operator>=(TimeDiff microSeconds) const;
bool operator<(TimeDiff microSeconds) const;
bool operator<=(TimeDiff microSeconds) const;
auto operator<=>(const Timespan & ts) const = default;
Timespan operator+(const Timespan & d) const;
Timespan operator-(const Timespan & d) const;
@ -215,78 +203,6 @@ inline Timespan::TimeDiff Timespan::totalMicroseconds() const
}
inline bool Timespan::operator==(const Timespan & ts) const
{
return _span == ts._span;
}
inline bool Timespan::operator!=(const Timespan & ts) const
{
return _span != ts._span;
}
inline bool Timespan::operator>(const Timespan & ts) const
{
return _span > ts._span;
}
inline bool Timespan::operator>=(const Timespan & ts) const
{
return _span >= ts._span;
}
inline bool Timespan::operator<(const Timespan & ts) const
{
return _span < ts._span;
}
inline bool Timespan::operator<=(const Timespan & ts) const
{
return _span <= ts._span;
}
inline bool Timespan::operator==(TimeDiff microSeconds) const
{
return _span == microSeconds;
}
inline bool Timespan::operator!=(TimeDiff microSeconds) const
{
return _span != microSeconds;
}
inline bool Timespan::operator>(TimeDiff microSeconds) const
{
return _span > microSeconds;
}
inline bool Timespan::operator>=(TimeDiff microSeconds) const
{
return _span >= microSeconds;
}
inline bool Timespan::operator<(TimeDiff microSeconds) const
{
return _span < microSeconds;
}
inline bool Timespan::operator<=(TimeDiff microSeconds) const
{
return _span <= microSeconds;
}
inline void swap(Timespan & s1, Timespan & s2)
{
s1.swap(s2);

View File

@ -44,7 +44,7 @@ int NumberParser::parse(const std::string& s, char thSep)
if (tryParse(s, result, thSep))
return result;
else
throw SyntaxException("Not a valid integer", s);
throw SyntaxException("Not a valid integer", "'" + s + "'");
}
@ -60,7 +60,7 @@ unsigned NumberParser::parseUnsigned(const std::string& s, char thSep)
if (tryParseUnsigned(s, result, thSep))
return result;
else
throw SyntaxException("Not a valid unsigned integer", s);
throw SyntaxException("Not a valid unsigned integer", "'" + s + "'");
}
@ -76,7 +76,7 @@ unsigned NumberParser::parseHex(const std::string& s)
if (tryParseHex(s, result))
return result;
else
throw SyntaxException("Not a valid hexadecimal integer", s);
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
}
@ -94,7 +94,7 @@ unsigned NumberParser::parseOct(const std::string& s)
if (tryParseOct(s, result))
return result;
else
throw SyntaxException("Not a valid hexadecimal integer", s);
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
}
@ -112,7 +112,7 @@ Int64 NumberParser::parse64(const std::string& s, char thSep)
if (tryParse64(s, result, thSep))
return result;
else
throw SyntaxException("Not a valid integer", s);
throw SyntaxException("Not a valid integer", "'" + s + "'");
}
@ -128,7 +128,7 @@ UInt64 NumberParser::parseUnsigned64(const std::string& s, char thSep)
if (tryParseUnsigned64(s, result, thSep))
return result;
else
throw SyntaxException("Not a valid unsigned integer", s);
throw SyntaxException("Not a valid unsigned integer", "'" + s + "'");
}
@ -144,7 +144,7 @@ UInt64 NumberParser::parseHex64(const std::string& s)
if (tryParseHex64(s, result))
return result;
else
throw SyntaxException("Not a valid hexadecimal integer", s);
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
}
@ -162,7 +162,7 @@ UInt64 NumberParser::parseOct64(const std::string& s)
if (tryParseOct64(s, result))
return result;
else
throw SyntaxException("Not a valid hexadecimal integer", s);
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
}
@ -180,7 +180,7 @@ double NumberParser::parseFloat(const std::string& s, char decSep, char thSep)
if (tryParseFloat(s, result, decSep, thSep))
return result;
else
throw SyntaxException("Not a valid floating-point number", s);
throw SyntaxException("Not a valid floating-point number", "'" + s + "'");
}
@ -196,7 +196,7 @@ bool NumberParser::parseBool(const std::string& s)
if (tryParseBool(s, result))
return result;
else
throw SyntaxException("Not a valid bool number", s);
throw SyntaxException("Not a valid bool number", "'" + s + "'");
}

View File

@ -30,7 +30,7 @@ elseif (ARCH_AARCH64)
# support it.
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8+crc")
else ()
# ARMv8.2 is quite ancient but the lowest common denominator supported by both Graviton 2 and 3 processors [1]. In particular, it
# ARMv8.2 is quite ancient but the lowest common denominator supported by both Graviton 2 and 3 processors [1, 10]. In particular, it
# includes LSE (made mandatory with ARMv8.1) which provides nice speedups without having to fall back to compat flag
# "-moutline-atomics" for v8.0 [2, 3, 4] that requires a recent glibc with runtime dispatch helper, limiting our ability to run on
# old OSs.
@ -45,21 +45,39 @@ elseif (ARCH_AARCH64)
# dotprod: Scalar vector product (SDOT and UDOT instructions). Probably the most obscure extra flag with doubtful performance benefits
# but it has been activated since always, so why not enable it. It's not 100% clear in which revision this flag was
# introduced as optional, either in v8.2 [7] or in v8.4 [8].
# ldapr: Load-Acquire RCpc Register. Better support of release/acquire of atomics. Good for allocators and high contention code.
# Optional in v8.2, mandatory in v8.3 [9]. Supported in Graviton 2+, Azure and GCP instances. Generated from clang 15.
# rcpc: Load-Acquire RCpc Register. Better support of release/acquire of atomics. Good for allocators and high contention code.
# Optional in v8.2, mandatory in v8.3 [9]. Supported in Graviton >=2, Azure and GCP instances.
#
# [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md
# [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10
# [3] https://mysqlonarm.github.io/ARM-LSE-and-MySQL/
# [4] https://dev.to/aws-builders/large-system-extensions-for-aws-graviton-processors-3eci
# [5] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support
# [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en
# [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html
# [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions-
# [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs -Xclang=-target-feature -Xclang=+ldapr -Wno-unused-command-line-argument")
# [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md
# [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10
# [3] https://mysqlonarm.github.io/ARM-LSE-and-MySQL/
# [4] https://dev.to/aws-builders/large-system-extensions-for-aws-graviton-processors-3eci
# [5] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support
# [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en
# [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html
# [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions-
# [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en
# [10] https://github.com/aws/aws-graviton-getting-started/blob/main/README.md
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs+rcpc")
endif ()
# Best-effort check: The build generates and executes intermediate binaries, e.g. protoc and llvm-tablegen. If we build on ARM for ARM
# and the build machine is too old, i.e. doesn't satisfy above modern profile, then these intermediate binaries will not run (dump
# SIGILL). Even if they could run, the build machine wouldn't be able to run the ClickHouse binary. In that case, suggest to run the
# build with the compat profile.
if (OS_LINUX AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)" AND NOT NO_ARMV81_OR_HIGHER)
# CPU features in /proc/cpuinfo and compiler flags don't align :( ... pick some obvious flags contained in the modern but not in the
# legacy profile (full Graviton 3 /proc/cpuinfo is "fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm
# jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs paca pacg dcpodp svei8mm svebf16 i8mm
# bf16 dgh rng")
execute_process(
COMMAND grep -P "^(?=.*atomic)(?=.*ssbs)" /proc/cpuinfo
OUTPUT_VARIABLE FLAGS)
if (NOT FLAGS)
MESSAGE(FATAL_ERROR "The build machine does not satisfy the minimum CPU requirements, try to run cmake with -DNO_ARMV81_OR_HIGHER=1")
endif()
endif()
elseif (ARCH_PPC64LE)
# By Default, build for power8 and up, allow building for power9 and up
# Note that gcc and clang have support for x86 SSE2 intrinsics when building for PowerPC
@ -102,6 +120,22 @@ elseif (ARCH_AMD64)
SET(ENABLE_AVX512_FOR_SPEC_OP 0)
endif()
# Same best-effort check for x86 as above for ARM.
if (OS_LINUX AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64" AND NOT NO_SSE3_OR_HIGHER)
# Test for flags in standard profile but not in NO_SSE3_OR_HIGHER profile.
# /proc/cpuid for Intel Xeon 8124: "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse
# sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl xtopology nonstop_tsc cpuid aperfmperf
# tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c
# rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx
# avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke""
execute_process(
COMMAND grep -P "^(?=.*ssse3)(?=.*sse4_1)(?=.*sse4_2)" /proc/cpuinfo
OUTPUT_VARIABLE FLAGS)
if (NOT FLAGS)
MESSAGE(FATAL_ERROR "The build machine does not satisfy the minimum CPU requirements, try to run cmake with -DNO_SSE3_OR_HIGHER=1")
endif()
endif()
# ClickHouse can be cross-compiled (e.g. on an ARM host for x86) but it is also possible to build ClickHouse on x86 w/o AVX for x86 w/
# AVX. We only check that the compiler can emit certain SIMD instructions, we don't care if the host system is able to run the binary.
# Therefore, use check_cxx_source_compiles (= does the code compile+link?) instead of check_cxx_source_runs (= does the code

View File

@ -45,6 +45,7 @@ if (COMPILER_CLANG)
no_warning(weak-vtables)
no_warning(thread-safety-negative) # experimental flag, too many false positives
no_warning(enum-constexpr-conversion) # breaks magic-enum library in clang-16
no_warning(unsafe-buffer-usage) # too aggressive
# TODO Enable conversion, sign-conversion, double-promotion warnings.
elseif (COMPILER_GCC)
# Add compiler options only to c++ compiler

2
contrib/capnproto vendored

@ -1 +1 @@
Subproject commit e19cd661e49dd9022d3f920b69d843333b896451
Subproject commit dc8b50b999777bcb23c89bb5907c785c3f654441

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit 5022f30f3e092a54a7c101c335ce5e08769db366
Subproject commit e48aa13f67dc722511b5af33a32ba9b7748176b5

View File

@ -98,6 +98,16 @@ set(LLVM_ENABLE_BINDINGS 0 CACHE INTERNAL "")
set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm")
set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm")
# Since we always use toolchain files to generate hermatic builds, cmake will
# think it's a cross compilation, and LLVM will try to configure NATIVE LLVM
# targets with all tests enabled, which will slow down cmake configuration and
# compilation (You'll see Building native llvm-tblgen...). Let's disable the
# cross compiling indicator for now.
#
# TODO We should let cmake know whether it's indeed a cross compilation in the
# first place.
set (CMAKE_CROSSCOMPILING 0)
add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}")
set_directory_properties (PROPERTIES

View File

@ -94,6 +94,10 @@ if (ARCH_AMD64 OR ARCH_AARCH64)
add_compile_definitions(TUKLIB_FAST_UNALIGNED_ACCESS=1)
endif ()
if (ARCH_S390X)
add_compile_definitions(WORDS_BIGENDIAN)
endif ()
find_package(Threads REQUIRED)

View File

@ -30,25 +30,10 @@
# - zstd homepage : http://www.zstd.net/
# ################################################################
# Get library version based on information from input content (use regular exp)
function(GetLibraryVersion _content _outputVar1 _outputVar2 _outputVar3)
string(REGEX MATCHALL ".*define ZSTD_VERSION_MAJOR+.* ([0-9]+).*define ZSTD_VERSION_MINOR+.* ([0-9]+).*define ZSTD_VERSION_RELEASE+.* ([0-9]+)" VERSION_REGEX "${_content}")
SET(${_outputVar1} ${CMAKE_MATCH_1} PARENT_SCOPE)
SET(${_outputVar2} ${CMAKE_MATCH_2} PARENT_SCOPE)
SET(${_outputVar3} ${CMAKE_MATCH_3} PARENT_SCOPE)
endfunction()
# Define library directory, where sources and header files are located
SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib")
INCLUDE_DIRECTORIES(BEFORE ${LIBRARY_DIR} "${LIBRARY_DIR}/common")
# Read file content
FILE(READ "${LIBRARY_DIR}/zstd.h" HEADER_CONTENT)
# Parse version
GetLibraryVersion("${HEADER_CONTENT}" LIBVER_MAJOR LIBVER_MINOR LIBVER_RELEASE)
MESSAGE(STATUS "ZSTD VERSION ${LIBVER_MAJOR}.${LIBVER_MINOR}.${LIBVER_RELEASE}")
# cd contrib/zstd/lib
# find . -name '*.c' -or -name '*.S' | grep -vP 'deprecated|legacy' | sort | sed 's/^\./ "${LIBRARY_DIR}/"'
SET(Sources

View File

@ -43,7 +43,8 @@
"docker/test/stateful": {
"name": "clickhouse/stateful-test",
"dependent": [
"docker/test/stress"
"docker/test/stress",
"docker/test/upgrade"
]
},
"docker/test/unit": {
@ -54,6 +55,10 @@
"name": "clickhouse/stress-test",
"dependent": []
},
"docker/test/upgrade": {
"name": "clickhouse/upgrade-check",
"dependent": []
},
"docker/test/codebrowser": {
"name": "clickhouse/codebrowser",
"dependent": []

View File

@ -1,4 +1,4 @@
FROM ubuntu:20.04
FROM ubuntu:22.04
# see https://github.com/moby/moby/issues/4032#issuecomment-192327844
ARG DEBIAN_FRONTEND=noninteractive
@ -9,13 +9,14 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
&& groupadd -r clickhouse --gid=101 \
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
&& apt-get update \
&& apt-get upgrade -yq \
&& apt-get install --yes --no-install-recommends \
apt-transport-https \
ca-certificates \
dirmngr \
gnupg \
locales \
gnupg2 \
wget \
locales \
tzdata \
&& apt-get clean
@ -80,15 +81,8 @@ RUN arch=${TARGETARCH:-amd64} \
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
# Remove as much of Ubuntu as possible.
# ClickHouse does not need Ubuntu. It can run on top of Linux kernel without any OS distribution.
# ClickHouse does not need Docker at all. ClickHouse is above all that.
# It does not care about Ubuntu, Docker, or other cruft and you should neither.
# The fact that this Docker image is based on Ubuntu is just a misconception.
# Some vulnerability scanners are arguing about Ubuntu, which is not relevant to ClickHouse at all.
# ClickHouse does not care when you report false vulnerabilities by running some Docker scanners.
RUN apt-get remove --purge -y libksba8 && apt-get autoremove -y
RUN apt-get autoremove --purge -yq libksba8 && \
apt-get autoremove -yq
# we need to allow "others" access to clickhouse folder, because docker container
# can be started with arbitrary uid (openshift usecase)

View File

@ -1,4 +1,4 @@
# rebuild in #33610
# rebuild in #47031
# docker build -t clickhouse/stateful-test .
ARG FROM_TAG=latest
FROM clickhouse/stateless-test:$FROM_TAG

View File

@ -21,10 +21,9 @@ RUN apt-get update -y \
openssl \
netcat-openbsd \
telnet \
llvm-9 \
brotli
brotli \
&& apt-get clean
COPY ./stress /stress
COPY run.sh /
ENV DATASETS="hits visits"

View File

@ -8,229 +8,13 @@ dmesg --clear
set -x
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
# we mount tests folder from repo to /usr/share
ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
OK="\tOK\t\\N\t"
FAIL="\tFAIL\t\\N\t"
FAILURE_CONTEXT_LINES=50
FAILURE_CONTEXT_MAX_LINE_WIDTH=400
function escaped()
{
# That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language.
# Also limit lines width just in case (too long lines are not really useful usually)
clickhouse local -S 's String' --input-format=LineAsString -q "select substr(s, 1, $FAILURE_CONTEXT_MAX_LINE_WIDTH)
from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'"
}
function head_escaped()
{
head -n $FAILURE_CONTEXT_LINES $1 | escaped
}
function unts()
{
grep -Po "[0-9][0-9]:[0-9][0-9] \K.*"
}
function trim_server_logs()
{
head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped
}
function install_packages()
{
dpkg -i $1/clickhouse-common-static_*.deb
dpkg -i $1/clickhouse-common-static-dbg_*.deb
dpkg -i $1/clickhouse-server_*.deb
dpkg -i $1/clickhouse-client_*.deb
}
function configure()
{
# install test configs
export USE_DATABASE_ORDINARY=1
export EXPORT_S3_STORAGE_POLICIES=1
/usr/share/clickhouse-test/config/install.sh
# we mount tests folder from repo to /usr/share
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
# avoid too slow startup
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
| sed "s|<snapshot_distance>100000</snapshot_distance>|<snapshot_distance>10000</snapshot_distance>|" \
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
# for clickhouse-server (via service)
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
# for clickhouse-client
export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
# since we run clickhouse from root
sudo chown root: /var/lib/clickhouse
# Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM).
echo "<clickhouse><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></clickhouse>" \
> /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml
local total_mem
total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB
total_mem=$(( total_mem*1024 )) # bytes
# Set maximum memory usage as half of total memory (less chance of OOM).
#
# But not via max_server_memory_usage but via max_memory_usage_for_user,
# so that we can override this setting and execute service queries, like:
# - hung check
# - show/drop database
# - ...
#
# So max_memory_usage_for_user will be a soft limit, and
# max_server_memory_usage will be hard limit, and queries that should be
# executed regardless memory limits will use max_memory_usage_for_user=0,
# instead of relying on max_untracked_memory
max_server_memory_usage_to_ram_ratio=0.5
echo "Setting max_server_memory_usage_to_ram_ratio to ${max_server_memory_usage_to_ram_ratio}"
cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml <<EOL
<clickhouse>
<max_server_memory_usage_to_ram_ratio>${max_server_memory_usage_to_ram_ratio}</max_server_memory_usage_to_ram_ratio>
</clickhouse>
EOL
local max_users_mem
max_users_mem=$((total_mem*30/100)) # 30%
echo "Setting max_memory_usage_for_user=$max_users_mem and max_memory_usage for queries to 10G"
cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml <<EOL
<clickhouse>
<profiles>
<default>
<max_memory_usage>10G</max_memory_usage>
<max_memory_usage_for_user>${max_users_mem}</max_memory_usage_for_user>
</default>
</profiles>
</clickhouse>
EOL
cat > /etc/clickhouse-server/config.d/core.xml <<EOL
<clickhouse>
<core_dump>
<!-- 100GiB -->
<size_limit>107374182400</size_limit>
</core_dump>
<!-- NOTE: no need to configure core_path,
since clickhouse is not started as daemon (via clickhouse start)
-->
<core_path>$PWD</core_path>
</clickhouse>
EOL
# Let OOM killer terminate other processes before clickhouse-server:
cat > /etc/clickhouse-server/config.d/oom_score.xml <<EOL
<clickhouse>
<oom_score>-1000</oom_score>
</clickhouse>
EOL
# Analyzer is not yet ready for testing
cat > /etc/clickhouse-server/users.d/no_analyzer.xml <<EOL
<clickhouse>
<profiles>
<default>
<constraints>
<allow_experimental_analyzer>
<readonly/>
</allow_experimental_analyzer>
</constraints>
</default>
</profiles>
</clickhouse>
EOL
}
function stop()
{
local max_tries="${1:-90}"
local pid
# Preserve the pid, since the server can hung after the PID will be deleted.
pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)"
clickhouse stop --max-tries "$max_tries" --do-not-kill && return
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv
kill -TERM "$(pidof gdb)" ||:
sleep 5
echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
clickhouse stop --force
}
function start()
{
counter=0
until clickhouse-client --query "SELECT 1"
do
if [ "$counter" -gt ${1:-120} ]
then
echo "Cannot start clickhouse-server"
rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||:
echo -e "Cannot start clickhouse-server$FAIL$(trim_server_logs application_errors.txt)" >> /test_output/test_results.tsv
cat /var/log/clickhouse-server/stdout.log
tail -n100 /var/log/clickhouse-server/stderr.log
tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e '<Warning> RaftInstance:' -e '<Information> RaftInstance' | tail -n100
break
fi
# use root to match with current uid
clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log
sleep 0.5
counter=$((counter + 1))
done
# Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
# and clickhouse-server can do fork-exec, for example, to run some bridge.
# Do not set nostop noprint for all signals, because some it may cause gdb to hang,
# explicitly ignore non-fatal signals that are used by server.
# Number of SIGRTMIN can be determined only in runtime.
RTMIN=$(kill -l SIGRTMIN)
echo "
set follow-fork-mode parent
handle SIGHUP nostop noprint pass
handle SIGINT nostop noprint pass
handle SIGQUIT nostop noprint pass
handle SIGPIPE nostop noprint pass
handle SIGTERM nostop noprint pass
handle SIGUSR1 nostop noprint pass
handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass
info signals
continue
backtrace full
thread apply all backtrace full
info registers
disassemble /s
up
disassemble /s
up
disassemble /s
p \"done\"
detach
quit
" > script.gdb
# FIXME Hung check may work incorrectly because of attached gdb
# 1. False positives are possible
# 2. We cannot attach another gdb to get stacktraces if some queries hung
gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log &
sleep 5
# gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
}
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
install_packages package_folder
@ -396,7 +180,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
start
./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
&& echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv
@ -413,316 +197,27 @@ unset "${!THREAD_@}"
start
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \
&& echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \
>> /test_output/test_results.tsv)
check_server_start
stop
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
[ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL"
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.final.log
# Grep logs for sanitizer asserts, crashes and other critical errors
check_logs_for_critical_errors
# Sanitizer asserts
rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
&& echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|| echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv
rm -f /test_output/tmp
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
# OOM
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
&& echo -e "Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|| echo -e "No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Logical errors
rg -Fa "Code: 49. DB::Exception: " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \
&& echo -e "Logical error thrown (see clickhouse-server.log or logical_errors.txt)$FAIL$(head_escaped /test_output/logical_errors.txt)" >> /test_output/test_results.tsv \
|| echo -e "No logical errors$OK" >> /test_output/test_results.tsv
# Remove file logical_errors.txt if it's empty
[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt
# No such key errors
rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \
&& echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \
|| echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv
# Remove file no_such_key_errors.txt if it's empty
[ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt
# Crash
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
&& echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|| echo -e "Not crashed$OK" >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog)
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/fatal_messages.txt \
&& echo -e "Fatal message in clickhouse-server.log (see fatal_messages.txt)$FAIL$(trim_server_logs fatal_messages.txt)" >> /test_output/test_results.tsv \
|| echo -e "No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Remove file fatal_messages.txt if it's empty
[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
rg -Fa "########################################" /test_output/* > /dev/null \
&& echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv
function get_gdb_log_context()
{
rg -A50 -Fa " received signal " /test_output/gdb.log | head_escaped
}
rg -Fa " received signal " /test_output/gdb.log > /dev/null \
&& echo -e "Found signal in gdb.log$FAIL$(get_gdb_log_context)" >> /test_output/test_results.tsv
if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
echo -e "Backward compatibility check\n"
echo "Get previous release tag"
previous_release_tag=$(clickhouse-client --version | rg -o "[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*" | get_previous_release_tag)
echo $previous_release_tag
echo "Clone previous release repository"
git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository
echo "Download clickhouse-server from the previous release"
mkdir previous_release_package_folder
echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log
for table in query_log trace_log
do
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
done
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
# Check if we cloned previous release repository successfully
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
then
echo -e "Backward compatibility check: Failed to clone previous release tests$FAIL" >> /test_output/test_results.tsv
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
then
echo -e "Backward compatibility check: Failed to download previous release packages$FAIL" >> /test_output/test_results.tsv
else
echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv
# Uninstall current packages
dpkg --remove clickhouse-client
dpkg --remove clickhouse-server
dpkg --remove clickhouse-common-static-dbg
dpkg --remove clickhouse-common-static
rm -rf /var/lib/clickhouse/*
# Make BC check more funny by forcing Ordinary engine for system database
mkdir /var/lib/clickhouse/metadata
echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql
# Install previous release packages
install_packages previous_release_package_folder
# Start server from previous release
# Previous version may not be ready for fault injections
export ZOOKEEPER_FAULT_INJECTION=0
configure
# Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..."
rm -f /etc/clickhouse-server/users.d/enable_blobs_check.xml ||:
rm -f /etc/clickhouse-server/users.d/marks.xml ||:
# Remove s3 related configs to avoid "there is no disk type `cache`"
rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||:
rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||:
# Turn on after 22.12
rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||:
# it uses recently introduced settings which previous versions may not have
rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||:
# Turn on after 23.1
rm -f /etc/clickhouse-server/users.d/prefetch_settings.xml ||:
start
clickhouse-client --query="SELECT 'Server version: ', version()"
# Install new package before running stress test because we should use new
# clickhouse-client and new clickhouse-test.
#
# But we should leave old binary in /usr/bin/ and debug symbols in
# /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it
# will print sane stacktraces and also to avoid possible crashes.
#
# FIXME: those files can be extracted directly from debian package, but
# actually better solution will be to use different PATH instead of playing
# games with files from packages.
mv /usr/bin/clickhouse previous_release_package_folder/
mv /usr/lib/debug/usr/bin/clickhouse.debug previous_release_package_folder/
install_packages package_folder
mv /usr/bin/clickhouse package_folder/
mv /usr/lib/debug/usr/bin/clickhouse.debug package_folder/
mv previous_release_package_folder/clickhouse /usr/bin/
mv previous_release_package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug
mkdir tmp_stress_output
./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" \
--backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
&& echo -e "Backward compatibility check: Test script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: Test script failed$FAIL" >> /test_output/test_results.tsv
rm -rf tmp_stress_output
# We experienced deadlocks in this command in very rare cases. Let's debug it:
timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" ||
(
echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
clickhouse stop --force
)
# Use bigger timeout for previous version
stop 300
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log
# Start new server
mv package_folder/clickhouse /usr/bin/
mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug
# Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables).
export ZOOKEEPER_FAULT_INJECTION=0
configure
start 500
clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt \
&& echo -e "Backward compatibility check: Server failed to start$FAIL$(trim_server_logs bc_check_application_errors.txt)" >> /test_output/test_results.tsv)
clickhouse-client --query="SELECT 'Server version: ', version()"
# Let the server run for a while before checking log.
sleep 60
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.dirty.log
# Error messages (we should ignore some errors)
# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64")
# FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server.
# Let's just ignore all errors from queries ("} <Error> TCPHandler: Code:", "} <Error> executeQuery: Code:")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility
echo "Check for Error messages in server log:"
rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
-e "Code: 236. DB::Exception: Cancelled mutating parts" \
-e "REPLICA_IS_ALREADY_ACTIVE" \
-e "REPLICA_ALREADY_EXISTS" \
-e "ALL_REPLICAS_LOST" \
-e "DDLWorker: Cannot parse DDL task query" \
-e "RaftInstance: failed to accept a rpc connection due to error 125" \
-e "UNKNOWN_DATABASE" \
-e "NETWORK_ERROR" \
-e "UNKNOWN_TABLE" \
-e "ZooKeeperClient" \
-e "KEEPER_EXCEPTION" \
-e "DirectoryMonitor" \
-e "TABLE_IS_READ_ONLY" \
-e "Code: 1000, e.code() = 111, Connection refused" \
-e "UNFINISHED" \
-e "NETLINK_ERROR" \
-e "Renaming unexpected part" \
-e "PART_IS_TEMPORARILY_LOCKED" \
-e "and a merge is impossible: we didn't find" \
-e "found in queue and some source parts for it was lost" \
-e "is lost forever." \
-e "Unknown index: idx." \
-e "Cannot parse string 'Hello' as UInt64" \
-e "} <Error> TCPHandler: Code:" \
-e "} <Error> executeQuery: Code:" \
-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
-e "[Queue = DB::DynamicRuntimeQueue]: Code: 235. DB::Exception: Part" \
-e "The set of parts restored in place of" \
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
-e "Code: 269. DB::Exception: Destination table is myself" \
-e "Coordination::Exception: Connection loss" \
-e "MutateFromLogEntryTask" \
-e "No connection to ZooKeeper, cannot get shared table ID" \
-e "Session expired" \
-e "TOO_MANY_PARTS" \
-e "Container already exists" \
/var/log/clickhouse-server/clickhouse-server.backward.dirty.log | rg -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
&& echo -e "Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)$FAIL$(trim_server_logs bc_check_error_messages.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No Error messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Remove file bc_check_error_messages.txt if it's empty
[ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt
# Sanitizer asserts
rg -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
&& echo -e "Backward compatibility check: Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No sanitizer asserts$OK" >> /test_output/test_results.tsv
rm -f /test_output/tmp
# OOM
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
&& echo -e "Backward compatibility check: Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Logical errors
echo "Check for Logical errors in server log:"
rg -Fa -A20 "Code: 49. DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \
&& echo -e "Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)$FAIL$(trim_server_logs bc_check_logical_errors.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No logical errors$OK" >> /test_output/test_results.tsv
# Remove file bc_check_logical_errors.txt if it's empty
[ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt
# Crash
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
&& echo -e "Backward compatibility check: Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: Not crashed$OK" >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog)
echo "Check for Fatal message in server log:"
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \
&& echo -e "Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)$FAIL$(trim_server_logs bc_check_fatal_messages.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Remove file bc_check_fatal_messages.txt if it's empty
[ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||:
for table in query_log trace_log
do
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" \
| zstd --threads=0 > /test_output/$table.backward.tsv.zst ||:
done
fi
fi
dmesg -T > /test_output/dmesg.log
# OOM in dmesg -- those are real
grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \
&& echo -e "OOM in dmesg$FAIL$(head_escaped /test_output/dmesg.log)" >> /test_output/test_results.tsv \
|| echo -e "No OOM in dmesg$OK" >> /test_output/test_results.tsv
collect_query_and_trace_logs
mv /var/log/clickhouse-server/stderr.log /test_output/
# Write check result into check_status.tsv
# Try to choose most specific error for the whole check status
clickhouse-local --structure "test String, res String, time Nullable(Float32), desc String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by
(test like 'Backward compatibility check%'), -- BC check goes last
(test like '%Sanitizer%') DESC,
(test like '%Killed by signal%') DESC,
(test like '%gdb.log%') DESC,
@ -732,14 +227,8 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d
(test like '%OOM%') DESC,
(test like '%Signal 9%') DESC,
(test like '%Fatal message%') DESC,
(test like '%Error message%') DESC,
(test like '%previous release%') DESC,
rowNumberInAllBlocks()
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# Core dumps
find . -type f -maxdepth 1 -name 'core.*' | while read core; do
zstd --threads=0 $core
mv $core.zst /test_output/
done
collect_core_dumps

View File

@ -0,0 +1,31 @@
# rebuild in #33610
# docker build -t clickhouse/upgrade-check .
ARG FROM_TAG=latest
FROM clickhouse/stateful-test:$FROM_TAG
RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get install --yes --no-install-recommends \
bash \
tzdata \
fakeroot \
debhelper \
parallel \
expect \
python3 \
python3-lxml \
python3-termcolor \
python3-requests \
curl \
sudo \
openssl \
netcat-openbsd \
telnet \
brotli \
&& apt-get clean
COPY run.sh /
ENV EXPORT_S3_STORAGE_POLICIES=1
CMD ["/bin/bash", "/run.sh"]

201
docker/test/upgrade/run.sh Normal file
View File

@ -0,0 +1,201 @@
#!/bin/bash
# shellcheck disable=SC2094
# shellcheck disable=SC2086
# shellcheck disable=SC2024
# Avoid overlaps with previous runs
dmesg --clear
set -x
# we mount tests folder from repo to /usr/share
ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
./setup_minio.sh stateless # to have a proper environment
echo "Get previous release tag"
previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb | grep "Version: " | awk '{print $2}' | cut -f1 -d'+' | get_previous_release_tag)
echo $previous_release_tag
echo "Clone previous release repository"
git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository
echo "Download clickhouse-server from the previous release"
mkdir previous_release_package_folder
echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv
# Check if we cloned previous release repository successfully
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
then
echo -e 'failure\tFailed to clone previous release tests' > /test_output/check_status.tsv
exit
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
then
echo -e 'failure\tFailed to download previous release packages' > /test_output/check_status.tsv
exit
fi
echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv
# Make upgrade check more funny by forcing Ordinary engine for system database
mkdir /var/lib/clickhouse/metadata
echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql
# Install previous release packages
install_packages previous_release_package_folder
# Start server from previous release
# Let's enable S3 storage by default
export USE_S3_STORAGE_FOR_MERGE_TREE=1
# Previous version may not be ready for fault injections
export ZOOKEEPER_FAULT_INJECTION=0
configure
# But we still need default disk because some tables loaded only into it
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
| sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
> /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
start
clickhouse-client --query="SELECT 'Server version: ', version()"
mkdir tmp_stress_output
stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --upgrade-check --output-folder tmp_stress_output --global-time-limit=1200 \
&& echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv
rm -rf tmp_stress_output
# We experienced deadlocks in this command in very rare cases. Let's debug it:
timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" ||
(
echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
clickhouse stop --force
)
# Use bigger timeout for previous version and disable additional hang check
stop 300 false
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log
# Install and start new server
install_packages package_folder
# Disable fault injections on start (we don't test them here, and it can lead to tons of requests in case of huge number of tables).
export ZOOKEEPER_FAULT_INJECTION=0
configure
start 500
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \
&& echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \
>> /test_output/test_results.tsv)
# Remove file application_errors.txt if it's empty
[ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt
clickhouse-client --query="SELECT 'Server version: ', version()"
# Let the server run for a while before checking log.
sleep 60
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.upgrade.log
# Error messages (we should ignore some errors)
# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64")
# FIXME Not sure if it's expected, but some tests from stress test may not be finished yet when we restarting server.
# Let's just ignore all errors from queries ("} <Error> TCPHandler: Code:", "} <Error> executeQuery: Code:")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility
echo "Check for Error messages in server log:"
rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
-e "Code: 236. DB::Exception: Cancelled mutating parts" \
-e "REPLICA_IS_ALREADY_ACTIVE" \
-e "REPLICA_ALREADY_EXISTS" \
-e "ALL_REPLICAS_LOST" \
-e "DDLWorker: Cannot parse DDL task query" \
-e "RaftInstance: failed to accept a rpc connection due to error 125" \
-e "UNKNOWN_DATABASE" \
-e "NETWORK_ERROR" \
-e "UNKNOWN_TABLE" \
-e "ZooKeeperClient" \
-e "KEEPER_EXCEPTION" \
-e "DirectoryMonitor" \
-e "TABLE_IS_READ_ONLY" \
-e "Code: 1000, e.code() = 111, Connection refused" \
-e "UNFINISHED" \
-e "NETLINK_ERROR" \
-e "Renaming unexpected part" \
-e "PART_IS_TEMPORARILY_LOCKED" \
-e "and a merge is impossible: we didn't find" \
-e "found in queue and some source parts for it was lost" \
-e "is lost forever." \
-e "Unknown index: idx." \
-e "Cannot parse string 'Hello' as UInt64" \
-e "} <Error> TCPHandler: Code:" \
-e "} <Error> executeQuery: Code:" \
-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
-e "The set of parts restored in place of" \
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
-e "Code: 269. DB::Exception: Destination table is myself" \
-e "Coordination::Exception: Connection loss" \
-e "MutateFromLogEntryTask" \
-e "No connection to ZooKeeper, cannot get shared table ID" \
-e "Session expired" \
-e "TOO_MANY_PARTS" \
-e "Authentication failed" \
-e "Cannot flush" \
-e "Container already exists" \
/var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
&& echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/bc_check_error_messages.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv
# Remove file upgrade_error_messages.txt if it's empty
[ -s /test_output/upgrade_error_messages.txt ] || rm /test_output/upgrade_error_messages.txt
# Grep logs for sanitizer asserts, crashes and other critical errors
check_logs_for_critical_errors
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
collect_query_and_trace_logs
check_oom_in_dmesg
mv /var/log/clickhouse-server/stderr.log /test_output/
# Write check result into check_status.tsv
# Try to choose most specific error for the whole check status
clickhouse-local --structure "test String, res String, time Nullable(Float32), desc String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by
(test like '%Sanitizer%') DESC,
(test like '%Killed by signal%') DESC,
(test like '%gdb.log%') DESC,
(test ilike '%possible deadlock%') DESC,
(test like '%start%') DESC,
(test like '%dmesg%') DESC,
(test like '%OOM%') DESC,
(test like '%Signal 9%') DESC,
(test like '%Fatal message%') DESC,
(test like '%Error message%') DESC,
(test like '%previous release%') DESC,
rowNumberInAllBlocks()
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
collect_core_dumps

View File

@ -60,12 +60,21 @@ fi
clickhouse_download_filename_prefix="clickhouse"
clickhouse="$clickhouse_download_filename_prefix"
i=0
while [ -f "$clickhouse" ]
do
clickhouse="${clickhouse_download_filename_prefix}.${i}"
i=$(($i+1))
done
if [ -f "$clickhouse" ]
then
read -p "ClickHouse binary ${clickhouse} already exists. Overwrite? [y/N] " answer
if [ "$answer" = "y" -o "$answer" = "Y" ]
then
rm -f "$clickhouse"
else
i=0
while [ -f "$clickhouse" ]
do
clickhouse="${clickhouse_download_filename_prefix}.${i}"
i=$(($i+1))
done
fi
fi
URL="https://builds.clickhouse.com/master/${DIR}/clickhouse"
echo
@ -76,9 +85,9 @@ echo
echo "Successfully downloaded the ClickHouse binary, you can run it as:
./${clickhouse}"
if [ "${OS}" = "Linux" ]
then
echo
echo "You can also install it:
sudo ./${clickhouse} install"
fi
#if [ "${OS}" = "Linux" ]
#then
#echo
#echo "You can also install it:
#sudo ./${clickhouse} install"
#fi

View File

@ -0,0 +1,17 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v22.3.19.6-lts (467e0a7bd77) FIXME as compared to v22.3.18.37-lts (fe512717551)
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#46440](https://github.com/ClickHouse/ClickHouse/issues/46440): Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Get rid of legacy DocsReleaseChecks [#46665](https://github.com/ClickHouse/ClickHouse/pull/46665) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -39,12 +39,59 @@ To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` director
Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables.
### Restricting test runs
A test can have zero or more _test tags_ specifying restrictions for test runs.
For `.sh` tests tags are written as a comment on the second line:
```bash
#!/usr/bin/env bash
# Tags: no-fasttest
```
For `.sql` tests tags are placed in the first line as a SQL comment:
```sql
-- Tags: no-fasttest
SELECT 1
```
|Tag name | What it does | Usage example |
|---|---|---|
| `disabled`| Test is not run ||
| `long` | Test's execution time is extended from 1 to 10 minutes ||
| `deadlock` | Test is run in a loop for a long time ||
| `race` | Same as `deadlock`. Prefer `deadlock` ||
| `shard` | Server is required to listen to `127.0.0.*` ||
| `distributed` | Same as `shard`. Prefer `shard` ||
| `global` | Same as `shard`. Prefer `shard` ||
| `zookeeper` | Test requires Zookeeper or ClickHouse Keeper to run | Test uses `ReplicatedMergeTree` |
| `replica` | Same as `zookeeper`. Prefer `zookeeper` ||
| `no-fasttest`| Test is not run under [Fast test](continuous-integration#fast-test) | Test uses `MySQL` table engine which is disabled in Fast test|
| `no-[asan, tsan, msan, ubsan]` | Disables tests in build with [sanitizers](#sanitizers) | Test is run under QEMU which doesn't work with sanitizers |
| `no-replicated-database` |||
| `no-ordinary-database` |||
| `no-parallel` | Disables running other tests in parallel with this one | Test reads from `system` tables and invariants may be broken|
| `no-parallel-replicas` |||
| `no-debug` |||
| `no-stress` |||
| `no-polymorphic-parts` |||
| `no-random-settings` |||
| `no-random-merge-tree-settings` |||
| `no-backward-compatibility-check` |||
| `no-cpu-x86_64` |||
| `no-cpu-aarch64` |||
| `no-cpu-ppc64le` |||
| `no-s3-storage` |||
In addition to the above settings, you can use `USE_*` flags from `system.build_options` to define usage of particular ClickHouse features.
For example, if your test uses a MySQL table, you should add a tag `use-mysql`.
### Choosing the Test Name
The name of the test starts with a five-digit prefix followed by a descriptive name, such as `00422_hash_function_constexpr.sql`. To choose the prefix, find the largest prefix already present in the directory, and increment it by one. In the meantime, some other tests might be added with the same numeric prefix, but this is OK and does not lead to any problems, you don't have to change it later.
Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that requires server to listen `127.0.0.*`; `distributed` or `global` have the same meaning. `long` is for tests that run slightly longer that one second. You can disable these groups of tests using `--no-zookeeper`, `--no-shard` and `--no-long` options, respectively. Make sure to add a proper prefix to your test name if it needs ZooKeeper or distributed queries.
### Checking for an Error that Must Occur
Sometimes you want to test that a server error occurs for an incorrect query. We support special annotations for this in SQL tests, in the following form:

View File

@ -162,22 +162,59 @@ If you want to change the target table by using `ALTER`, we recommend disabling
## Configuration {#configuration}
Similar to GraphiteMergeTree, the Kafka engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (`kafka`) and topic-level (`kafka_*`). The global configuration is applied first, and then the topic-level configuration is applied (if it exists).
Similar to GraphiteMergeTree, the Kafka engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (below `<kafka>`) and topic-level (below `<kafka><kafka_topic>`). The global configuration is applied first, and then the topic-level configuration is applied (if it exists).
``` xml
<!-- Global configuration options for all tables of Kafka engine type -->
<kafka>
<!-- Global configuration options for all tables of Kafka engine type -->
<debug>cgrp</debug>
<auto_offset_reset>smallest</auto_offset_reset>
<!-- Configuration specific to topics "logs" and "stats" -->
<kafka_topic>
<name>logs</name>
<retry_backoff_ms>250</retry_backoff_ms>
<fetch_min_bytes>100000</fetch_min_bytes>
</kafka_topic>
<kafka_topic>
<name>stats</name>
<retry_backoff_ms>400</retry_backoff_ms>
<fetch_min_bytes>50000</fetch_min_bytes>
</kafka_topic>
</kafka>
```
<details markdown="1">
<summary>Example in deprecated syntax</summary>
``` xml
<kafka>
<!-- Global configuration options for all tables of Kafka engine type -->
<debug>cgrp</debug>
<auto_offset_reset>smallest</auto_offset_reset>
</kafka>
<!-- Configuration specific for topic "logs" -->
<!-- Configuration specific to topics "logs" and "stats" -->
<!-- Does NOT support periods in topic names, e.g. "logs.security"> -->
<kafka_logs>
<retry_backoff_ms>250</retry_backoff_ms>
<fetch_min_bytes>100000</fetch_min_bytes>
</kafka_logs>
<kafka_stats>
<retry_backoff_ms>400</retry_backoff_ms>
<fetch_min_bytes>50000</fetch_min_bytes>
</kafka_stats>
```
</details>
For a list of possible configuration options, see the [librdkafka configuration reference](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Use the underscore (`_`) instead of a dot in the ClickHouse configuration. For example, `check.crcs=true` will be `<check_crcs>true</check_crcs>`.
### Kerberos support {#kafka-kerberos-support}

View File

@ -1,6 +1,6 @@
# Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex}
The main task that indexes achieve is to quickly find nearest neighbors for multidimensional data. An example of such a problem can be finding similar pictures (texts) for a given picture (text). That problem can be reduced to finding the nearest [embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning). They can be created from data using [UDF](../../../sql-reference/functions/index.md#executable-user-defined-functions).
The main task that indexes achieve is to quickly find nearest neighbors for multidimensional data. An example of such a problem can be finding similar pictures (texts) for a given picture (text). That problem can be reduced to finding the nearest [embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning). They can be created from data using [UDF](/docs/en/sql-reference/functions/index.md/#executable-user-defined-functions).
The next queries find the closest neighbors in N-dimensional space using the L2 (Euclidean) distance:
``` sql
@ -39,7 +39,7 @@ Approximate Nearest Neighbor Search Indexes (`ANNIndexes`) are similar to skip i
LIMIT N
```
In these queries, `DistanceFunction` is selected from [distance functions](../../../sql-reference/functions/distance-functions). `Point` is a known vector (something like `(0.1, 0.1, ... )`). To avoid writing large vectors, use [client parameters](../../../interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters). `Value` - a float value that will bound the neighbourhood.
In these queries, `DistanceFunction` is selected from [distance functions](/docs/en/sql-reference/functions/distance-functions.md). `Point` is a known vector (something like `(0.1, 0.1, ... )`). To avoid writing large vectors, use [client parameters](/docs/en//interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters). `Value` - a float value that will bound the neighbourhood.
:::note
ANN index can't speed up query that satisfies both types (`where + order by`, only one of them). All queries must have the limit, as algorithms are used to find nearest neighbors and need a specific number of them.
@ -85,13 +85,13 @@ As the indexes are built only during insertions into table, `INSERT` and `OPTIMI
You can create your table with index which uses certain algorithm. Now only indices based on the following algorithms are supported:
# Index list
- [Annoy](../../../engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
- [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
# Annoy {#annoy}
Implementation of the algorithm was taken from [this repository](https://github.com/spotify/annoy).
Short description of the algorithm:
The algorithm recursively divides in half all space by random linear surfaces (lines in 2D, planes in 3D e.t.c.). Thus it makes tree of polyhedrons and points that they contains. Repeating the operation several times for greater accuracy it creates a forest.
The algorithm recursively divides in half all space by random linear surfaces (lines in 2D, planes in 3D etc.). Thus it makes tree of polyhedrons and points that they contains. Repeating the operation several times for greater accuracy it creates a forest.
To find K Nearest Neighbours it goes down through the trees and fills the buffer of closest points using the priority queue of polyhedrons. Next, it sorts buffer and return the nearest K points.
__Examples__:
@ -118,7 +118,7 @@ ORDER BY id;
```
:::note
Table with array field will work faster, but all arrays **must** have same length. Use [CONSTRAINT](../../../sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(data) = 256`.
Table with array field will work faster, but all arrays **must** have same length. Use [CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(data) = 256`.
:::
Parameter `NumTrees` is the number of trees which the algorithm will create. The bigger it is, the slower (approximately linear) it works (in both `CREATE` and `SELECT` requests), but the better accuracy you get (adjusted for randomness). By default it is set to `100`. Parameter `DistanceName` is name of distance function. By default it is set to `L2Distance`. It can be set without changing first parameter, for example

View File

@ -5,6 +5,10 @@ description: Install ClickHouse
slug: /en/install
---
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import CodeBlock from '@theme/CodeBlock';
# Install ClickHouse
You have three options for getting up and running with ClickHouse:
@ -19,17 +23,27 @@ The quickest and easiest way to get up and running with ClickHouse is to create
## Self-Managed Install
:::tip
For production installs of a specific release version see the [installation options](#available-installation-options) down below.
:::
<Tabs>
<TabItem value="linux" label="Linux" default>
1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable:
```bash
curl https://clickhouse.com/ | sh
```
1. Run the `install` command, which defines a collection of useful symlinks along with the files and folders used by ClickHouse - all of which you can see in the output of the install script:
```bash
sudo ./clickhouse install
```
1. At the end of the install script, you are prompted for a password for the `default` user. Feel free to enter a password, or you can optionally leave it blank:
```response
Creating log directory /var/log/clickhouse-server.
Creating data directory /var/lib/clickhouse.
@ -40,6 +54,7 @@ The quickest and easiest way to get up and running with ClickHouse is to create
Enter password for default user:
```
You should see the following output:
```response
ClickHouse has been successfully installed.
@ -51,10 +66,45 @@ The quickest and easiest way to get up and running with ClickHouse is to create
```
1. Run the following command to start the ClickHouse server:
```bash
sudo clickhouse start
```
</TabItem>
<TabItem value="macos" label="macOS">
1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable:
```bash
sudo clickhouse start
curl https://clickhouse.com/ | sh
```
1. Run the ClickHouse server:
```bash
./clickhouse server
```
1. Open a new terminal and use the **clickhouse-client** to connect to your service:
```bash
./clickhouse client
```
```response
./clickhouse client
ClickHouse client version 23.2.1.1501 (official build).
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 23.2.1 revision 54461.
local-host :)
```
You are ready to start sending DDL and SQL commands to ClickHouse!
</TabItem>
</Tabs>
:::tip
The [Quick Start](/docs/en/quick-start.mdx/#step-1-get-clickhouse) walks through the steps to download and run ClickHouse, connect to it, and insert data.
:::

View File

@ -1971,7 +1971,8 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`.
- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`.
- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
## Arrow {#data-format-arrow}

View File

@ -229,7 +229,7 @@ To prevent inferring the same schema every time ClickHouse read the data from th
There are special settings that control this cache:
- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config.
- `use_cache_for_{file,s3,hdfs,url}_schema_inference` - allows turning on/off using cache for schema inference. These settings can be used in queries.
- `schema_inference_use_cache_for_{file,s3,hdfs,url}` - allows turning on/off using cache for schema inference. These settings can be used in queries.
The schema of the file can be changed by modifying the data or by changing format settings.
For this reason, the schema inference cache identifies the schema by file source, format name, used format settings, and the last modification time of the file.
@ -1177,8 +1177,7 @@ This setting can be used to specify the types of columns that could not be deter
**Example**
```sql
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}'
SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)'
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1
```
```response
┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐

View File

@ -26,6 +26,7 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasnt don
- [one-ck](https://github.com/lizhichao/one-ck)
- [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
- [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php)
- [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
- Go
- [clickhouse](https://github.com/kshvakov/clickhouse/)
- [go-clickhouse](https://github.com/roistat/go-clickhouse)

View File

@ -50,7 +50,7 @@ If there are multiple profiles active for a user, then constraints are merged. M
Read-only mode is enabled by `readonly` setting (not to confuse with `readonly` constraint type):
- `readonly=0`: No read-only restrictions.
- `readonly=1`: Only read queries are allowed and settings cannot be changes unless `changeable_in_readonly` is set.
- `readonly=1`: Only read queries are allowed and settings cannot be changed unless `changeable_in_readonly` is set.
- `readonly=2`: Only read queries are allowed, but settings can be changed, except for `readonly` setting itself.

View File

@ -142,6 +142,10 @@ y Nullable(String)
z IPv4
```
:::warning
If the `schema_inference_hints` is not formated properly, or if there is a typo or a wrong datatype, etc... the whole schema_inference_hints will be ignored.
:::
## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable}
Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
@ -507,7 +511,7 @@ Enabled by default.
Ignore unknown keys in json object for named tuples.
Disabled by default.
Enabled by default.
## input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple}
@ -1102,6 +1106,12 @@ Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedStrin
Enabled by default.
### output_format_parquet_version {#output_format_parquet_version}
The version of Parquet format used in output format. Supported versions: `1.0`, `2.4`, `2.6` and `2.latest`.
Default value: `2.latest`.
## Hive format settings {#hive-format-settings}
### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}

View File

@ -3948,3 +3948,71 @@ Default value: `0`.
:::note
Use this setting only for backward compatibility if your use cases depend on old syntax.
:::
## final {#final}
Automatically applies [FINAL](../../sql-reference/statements/select/from/#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from/#final-modifier) is applicable, including joined tables and tables in sub-queries, and
distributed tables.
Possible values:
- 0 - disabled
- 1 - enabled
Default value: `0`.
Example:
```sql
CREATE TABLE test
(
key Int64,
some String
)
ENGINE = ReplacingMergeTree
ORDER BY key;
INSERT INTO test FORMAT Values (1, 'first');
INSERT INTO test FORMAT Values (1, 'second');
SELECT * FROM test;
┌─key─┬─some───┐
│ 1 │ second │
└─────┴────────┘
┌─key─┬─some──┐
│ 1 │ first │
└─────┴───────┘
SELECT * FROM test SETTINGS final = 1;
┌─key─┬─some───┐
│ 1 │ second │
└─────┴────────┘
SET final = 1;
SELECT * FROM test;
┌─key─┬─some───┐
│ 1 │ second │
└─────┴────────┘
```
## asterisk_include_materialized_columns {#asterisk_include_materialized_columns}
Include [MATERIALIZED](../../sql-reference/statements/create/table/#materialized) columns for wildcard query (`SELECT *`).
Possible values:
- 0 - disabled
- 1 - enabled
Default value: `0`.
## asterisk_include_alias_columns {#asterisk_include_alias_columns}
Include [ALIAS](../../sql-reference/statements/create/table/#alias) columns for wildcard query (`SELECT *`).
Possible values:
- 0 - disabled
- 1 - enabled
Default value: `0`.

View File

@ -0,0 +1,52 @@
---
slug: /en/operations/system-tables/server_settings
---
# server_settings
Contains information about global settings for the server, which were specified in `config.xml`.
Currently, the table shows only settings from the first layer of `config.xml` and doesn't support nested configs (e.g. [logger](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-logger)).
Columns:
- `name` ([String](../../sql-reference/data-types/string.md)) — Server setting name.
- `value` ([String](../../sql-reference/data-types/string.md)) — Server setting value.
- `default` ([String](../../sql-reference/data-types/string.md)) — Server setting default value.
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml`
- `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description.
- `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type.
**Example**
The following example shows how to get information about server settings which name contains `thread_pool`.
``` sql
SELECT *
FROM system.server_settings
WHERE name LIKE '%thread_pool%'
```
``` text
┌─name─────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐
│ max_thread_pool_size │ 5000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │
│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │
│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │
│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │
│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │
│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │
└──────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘
```
Using of `WHERE changed` can be useful, for example, when you want to check
whether settings in configuration files are loaded correctly and are in use.
<!-- -->
``` sql
SELECT * FROM system.server_settings WHERE changed AND name='max_thread_pool_size'
```
**See also**
- [Settings](../../operations/system-tables/settings.md)
- [Configuration Files](../../operations/configuration-files.md)
- [Server Settings](../../operations/server-configuration-parameters/settings.md)

View File

@ -16,6 +16,7 @@ Columns:
- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting:
- `0` — Current user can change the setting.
- `1` — Current user cant change the setting.
- `default` ([String](../../sql-reference/data-types/string.md)) — Setting default value.
**Example**

View File

@ -7,8 +7,8 @@ sidebar_position: 37
Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`.
Returns Float64. When `n <= 1`, returns +∞.
Returns Float64. When `n <= 1`, returns `nan`.
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error.
:::
:::

View File

@ -48,7 +48,35 @@ When dividing by zero you get inf, -inf, or nan.
## intDiv(a, b)
Calculates the quotient of the numbers. Divides into integers, rounding down (by the absolute value).
An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one.
Returns an integer of the type of the dividend (the first parameter).
An exception is thrown when dividing by zero, when the quotient does not fit in the range of the dividend, or when dividing a minimal negative number by minus one.
**Example**
Query:
```sql
SELECT
intDiv(toFloat64(1), 0.001) AS res,
toTypeName(res)
```
```response
┌──res─┬─toTypeName(intDiv(toFloat64(1), 0.001))─┐
│ 1000 │ Int64 │
└──────┴─────────────────────────────────────────┘
```
```sql
SELECT
intDiv(1, 0.001) AS res,
toTypeName(res)
```
```response
Received exception from server (version 23.2.1):
Code: 153. DB::Exception: Received from localhost:9000. DB::Exception: Cannot perform integer division, because it will produce infinite or too large number: While processing intDiv(1, 0.001) AS res, toTypeName(res). (ILLEGAL_DIVISION)
```
## intDivOrZero(a, b)

View File

@ -1231,8 +1231,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %e | day of the month, space-padded (1-31) | &nbsp; 2 |
| %f | fractional second from the fractional part of DateTime64 | 1234560 |
| %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 |
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
| %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 |
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
| %h | hour in 12h format (01-12) | 09 |
| %H | hour in 24h format (00-23) | 22 |
| %i | minute (00-59) | 33 |

View File

@ -579,3 +579,33 @@ Result:
│ 3628800 │
└───────────────┘
```
## width_bucket(operand, low, high, count)
Returns the number of the bucket in which `operand` falls in a histogram having `count` equal-width buckets spanning the range `low` to `high`. Returns `0` if `operand < low`, and returns `count+1` if `operand >= high`.
`operand`, `low`, `high` can be any native number type. `count` can only be unsigned native integer and its value cannot be zero.
**Syntax**
```sql
widthBucket(operand, low, high, count)
```
There is also a case insensitive alias called `WIDTH_BUCKET` to provide compatibility with other databases.
**Example**
Query:
``` sql
SELECT widthBucket(10.15, -8.6, 23, 18);
```
Result:
``` text
┌─widthBucket(10.15, -8.6, 23, 18)─┐
│ 11 │
└──────────────────────────────────┘
```

View File

@ -226,6 +226,17 @@ SELECT splitByNonAlpha(' 1! a, b. ');
Concatenates string representations of values listed in the array with the separator. `separator` is an optional parameter: a constant string, set to an empty string by default.
Returns the string.
**Example**
``` sql
SELECT arrayStringConcat(['12/05/2021', '12:50:00'], ' ') AS DateString;
```
```text
┌─DateString──────────┐
│ 12/05/2021 12:50:00 │
└─────────────────────┘
```
## alphaTokens(s[, max_substrings]), splitByAlpha(s[, max_substrings])
Selects substrings of consecutive bytes from the ranges a-z and A-Z.Returns an array of substrings.
@ -364,4 +375,4 @@ Result:
┌─tokens────────────────────────────┐
│ ['test1','test2','test3','test4'] │
└───────────────────────────────────┘
```
```

View File

@ -1,7 +1,7 @@
---
slug: /en/sql-reference/functions/string-replace-functions
sidebar_position: 42
sidebar_label: For Replacing in Strings
sidebar_label: Replacing in Strings
---
# Functions for Searching and Replacing in Strings

View File

@ -1,7 +1,7 @@
---
slug: /en/sql-reference/functions/string-search-functions
sidebar_position: 41
sidebar_label: For Searching in Strings
sidebar_label: Searching in Strings
---
# Functions for Searching in Strings
@ -382,12 +382,12 @@ Checks whether string `haystack` matches the regular expression `pattern`. The p
Returns 1 in case of a match, and 0 otherwise.
Matching is based on UTF-8, e.g. `.` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes. The regular expression must not contain null bytes.
If the haystack or pattern contain a sequence of bytes that are not valid UTF-8, the behavior is undefined.
No automatic Unicode normalization is performed, if you need it you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
If the haystack or the pattern are not valid UTF-8, then the behavior is undefined.
No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
Unlike re2's default behavior, `.` matches line breaks. To disable this, prepend the pattern with `(?-s)`.
For patterns to search for substrings in a string, it is better to use LIKE or position, since they work much faster.
For patterns to search for substrings in a string, it is better to use functions [like](#like) or [position](#position) since they work much faster.
## multiMatchAny(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
@ -529,21 +529,25 @@ Result:
## like(haystack, pattern), haystack LIKE pattern operator
Checks whether a string matches a simple regular expression.
The regular expression can contain the metasymbols `%` and `_`.
Checks whether a string matches a LIKE expression.
A LIKE expression contains a mix of normal characters and the following metasymbols:
`%` indicates any quantity of any bytes (including zero characters).
- `%` indicates an arbitrary number of arbitrary characters (including zero characters).
`_` indicates any one byte.
- `_` indicates a single arbitrary character.
Use the backslash (`\`) for escaping metasymbols. See the note on escaping in the description of the match function.
- `\` is for escaping literals `%`, `_` and `\`.
Matching is based on UTF-8, e.g. `_` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes.
If the haystack or pattern contain a sequence of bytes that are not valid UTF-8, then the behavior is undefined.
No automatic Unicode normalization is performed, if you need it you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
If the haystack or the pattern are not valid UTF-8, then the behavior is undefined.
No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
For regular expressions like `%needle%`, the code is more optimal and works as fast as the `position` function.
For other regular expressions, the code is the same as for the match function.
To match against literals `%`, `_` and `/` (which are LIKE metacharacters), prepend them with a backslash, i.e. `\%`, `\_` and `\\`.
The backslash loses its special meaning, i.e. is interpreted literally, if it prepends a character different than `%`, `_` or `\`.
Note that ClickHouse requires backslashes in strings [to be quoted as well](../syntax.md#string), so you would actually need to write `\\%`, `\\_` and `\\\\`.
For patterns of the form `%needle%`, the function is as fast as the `position` function.
Other LIKE expressions are internally converted to a regular expression and executed with a performance similar to function `match`.
## notLike(haystack, pattern), haystack NOT LIKE pattern operator

View File

@ -1,8 +1,8 @@
---
slug: /en/sql-reference/functions/tuple-map-functions
sidebar_position: 46
sidebar_label: Working with maps
title: "Functions for maps"
sidebar_label: Maps
title: "Functions for Maps"
---
## map
@ -440,7 +440,7 @@ mapApply(func, map)
**Parameters**
- `func` - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `map` — [Map](../../sql-reference/data-types/map.md).
**Returned value**
@ -480,7 +480,7 @@ mapFilter(func, map)
**Parameters**
- `func` - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `map` — [Map](../../sql-reference/data-types/map.md).
**Returned value**

View File

@ -6,22 +6,23 @@ sidebar_label: Type Conversion
# Type Conversion Functions
## Common Issues of Numeric Conversions
## Common Issues with Data Conversion
When you convert a value from one to another data type, you should remember that if you try to fit a value from a larger data type to a smaller one (for example Int64 to Int32), or convert from one data type to another (for example `String` to `Int`), you could have data loss. Test beforehand.
Be aware of potential data loss if values of a datatype are converted to a smaller datatype (for example from `Int64` to `Int32`) or between
incompatible datatypes (for example from `String` to `Int`). Make sure to check carefully if the result is as expected.
ClickHouse has the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion).
ClickHouse generally uses the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion).
## toInt(8\|16\|32\|64\|128\|256)
Converts an input value to the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
Converts an input value to a value the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
- `toInt8(expr)`Results in the `Int8` data type.
- `toInt16(expr)`Results in the `Int16` data type.
- `toInt32(expr)`Results in the `Int32` data type.
- `toInt64(expr)`Results in the `Int64` data type.
- `toInt128(expr)`Results in the `Int128` data type.
- `toInt256(expr)`Results in the `Int256` data type.
- `toInt8(expr)`Converts to a value of data type `Int8`.
- `toInt16(expr)`Converts to a value of data type `Int16`.
- `toInt32(expr)`Converts to a value of data type `Int32`.
- `toInt64(expr)`Converts to a value of data type `Int64`.
- `toInt128(expr)`Converts to a value of data type `Int128`.
- `toInt256(expr)`Converts to a value of data type `Int256`.
**Arguments**
@ -53,7 +54,7 @@ Result:
## toInt(8\|16\|32\|64\|128\|256)OrZero
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns 0.
Takes an argument of type [String](/docs/en/sql-reference/data-types/string.md) and tries to parse it into an Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `0`.
**Example**
@ -73,7 +74,7 @@ Result:
## toInt(8\|16\|32\|64\|128\|256)OrNull
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns NULL.
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `NULL`.
**Example**
@ -93,7 +94,7 @@ Result:
## toInt(8\|16\|32\|64\|128\|256)OrDefault
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns the default type value.
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns the default type value.
**Example**
@ -116,11 +117,11 @@ Result:
Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
- `toUInt8(expr)`Results in the `UInt8` data type.
- `toUInt16(expr)`Results in the `UInt16` data type.
- `toUInt32(expr)`Results in the `UInt32` data type.
- `toUInt64(expr)`Results in the `UInt64` data type.
- `toUInt256(expr)`Results in the `UInt256` data type.
- `toUInt8(expr)`Converts to a value of data type `UInt8`.
- `toUInt16(expr)`Converts to a value of data type `UInt16`.
- `toUInt32(expr)`Converts to a value of data type `UInt32`.
- `toUInt64(expr)`Converts to a value of data type `UInt64`.
- `toUInt256(expr)`Converts to a value of data type `UInt256`.
**Arguments**
@ -128,7 +129,7 @@ Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint
**Returned value**
Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type.
- Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type.
Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
@ -166,26 +167,30 @@ Result:
## toDate
Converts the argument to `Date` data type.
Converts the argument to [Date](/docs/en/sql-reference/data-types/date.md) data type.
If the argument is [DateTime](/docs/en/sql-reference/data-types/datetime.md) or [DateTime64](/docs/en/sql-reference/data-types/datetime64.md), it truncates it and leaves the date component of the DateTime:
If the argument is `DateTime` or `DateTime64`, it truncates it, leaving the date component of the DateTime:
```sql
SELECT
now() AS x,
toDate(x)
```
```response
┌───────────────────x─┬─toDate(now())─┐
│ 2022-12-30 13:44:17 │ 2022-12-30 │
└─────────────────────┴───────────────┘
```
If the argument is a string, it is parsed as Date or DateTime. If it was parsed as DateTime, the date component is being used:
If the argument is a [String](/docs/en/sql-reference/data-types/string.md), it is parsed as [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). If it was parsed as [DateTime](/docs/en/sql-reference/data-types/datetime.md), the date component is being used:
```sql
SELECT
toDate('2022-12-30') AS x,
toTypeName(x)
```
```response
┌──────────x─┬─toTypeName(toDate('2022-12-30'))─┐
│ 2022-12-30 │ Date │
@ -193,18 +198,20 @@ SELECT
1 row in set. Elapsed: 0.001 sec.
```
```sql
SELECT
toDate('2022-12-30 01:02:03') AS x,
toTypeName(x)
```
```response
┌──────────x─┬─toTypeName(toDate('2022-12-30 01:02:03'))─┐
│ 2022-12-30 │ Date │
└────────────┴───────────────────────────────────────────┘
```
If the argument is a number and it looks like a UNIX timestamp (is greater than 65535), it is interpreted as a DateTime, then truncated to Date in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to Date depends on the timezone:
If the argument is a number and looks like a UNIX timestamp (is greater than 65535), it is interpreted as a [DateTime](/docs/en/sql-reference/data-types/datetime.md), then truncated to [Date](/docs/en/sql-reference/data-types/date.md) in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to [Date](/docs/en/sql-reference/data-types/date.md) depends on the timezone:
```sql
SELECT
@ -217,6 +224,7 @@ SELECT
toDate(ts) AS date_Amsterdam_2,
toDate(ts, 'Pacific/Apia') AS date_Samoa_2
```
```response
Row 1:
──────
@ -232,7 +240,7 @@ date_Samoa_2: 2022-12-31
The example above demonstrates how the same UNIX timestamp can be interpreted as different dates in different time zones.
If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (a UNIX day) and converted to Date. It corresponds to the internal numeric representation of the `Date` data type. Example:
If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (the first UNIX day) and converted to [Date](/docs/en/sql-reference/data-types/date.md). It corresponds to the internal numeric representation of the `Date` data type. Example:
```sql
SELECT toDate(12345)
@ -270,8 +278,6 @@ SELECT
└─────────────────────┴───────────────┴─────────────┴─────────────────────┘
```
Have a nice day working with dates and times.
## toDateOrZero
## toDateOrNull
@ -288,7 +294,7 @@ Have a nice day working with dates and times.
## toDate32
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account.
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account.
**Syntax**
@ -302,9 +308,7 @@ toDate32(expr)
**Returned value**
- A calendar date.
Type: [Date32](/docs/en/sql-reference/data-types/date32.md).
- A calendar date. Type [Date32](/docs/en/sql-reference/data-types/date32.md).
**Example**
@ -332,7 +336,7 @@ SELECT toDate32('1899-01-01') AS value, toTypeName(value);
└────────────┴────────────────────────────────────┘
```
3. With `Date`-type argument:
3. With [Date](/docs/en/sql-reference/data-types/date.md) argument:
``` sql
SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value);
@ -386,7 +390,7 @@ Result:
## toDate32OrDefault
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by `Date32`. If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, borders of `Date` are taken into account. Returns default value if an invalid argument is received.
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account. Returns default value if an invalid argument is received.
**Example**
@ -666,7 +670,7 @@ YYYY-MM-DD
YYYY-MM-DD hh:mm:ss
```
As an exception, if converting from UInt32, Int32, UInt64, or Int64 numeric types to Date, and if the number is greater than or equal to 65536, the number is interpreted as a Unix timestamp (and not as the number of days) and is rounded to the date. This allows support for the common occurrence of writing toDate(unix_timestamp), which otherwise would be an error and would require writing the more cumbersome toDate(toDateTime(unix_timestamp)).
As an exception, if converting from UInt32, Int32, UInt64, or Int64 numeric types to Date, and if the number is greater than or equal to 65536, the number is interpreted as a Unix timestamp (and not as the number of days) and is rounded to the date. This allows support for the common occurrence of writing `toDate(unix_timestamp)`, which otherwise would be an error and would require writing the more cumbersome `toDate(toDateTime(unix_timestamp))`.
Conversion between a date and a date with time is performed the natural way: by adding a null time or dropping the time.
@ -696,7 +700,7 @@ Also see the `toUnixTimestamp` function.
## toFixedString(s, N)
Converts a String type argument to a FixedString(N) type (a string with fixed length N). N must be a constant.
Converts a [String](/docs/en/sql-reference/data-types/string.md) type argument to a [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) type (a string of fixed length N).
If the string has fewer bytes than N, it is padded with null bytes to the right. If the string has more bytes than N, an exception is thrown.
## toStringCutToZero(s)
@ -914,7 +918,7 @@ Result:
└─────────────────────┴─────────────────────┴────────────┴─────────────────────┴───────────────────────────┘
```
Conversion to FixedString(N) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
Conversion to [FixedString (N)](/docs/en/sql-reference/data-types/fixedstring.md) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
Type conversion to [Nullable](/docs/en/sql-reference/data-types/nullable.md) and back is supported.
@ -1174,7 +1178,7 @@ For all of the formats with separator the function parses months names expressed
**Returned value**
- `time_string` converted to the `DateTime` data type.
- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type.
**Examples**
@ -1254,10 +1258,10 @@ Result:
**See Also**
- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/)
- [RFC 1123](https://tools.ietf.org/html/rfc1123)
- [toDate](#todate)
- [toDateTime](#todatetime)
- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/)
## parseDateTimeBestEffortUS

View File

@ -19,8 +19,15 @@ CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_n
## Example
Create a user:
```sql
CREATE USER robin IDENTIFIED BY 'password';
```
Create the `max_memory_usage_profile` settings profile with value and constraints for the `max_memory_usage` setting and assign it to user `robin`:
``` sql
CREATE SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin
CREATE
SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000
TO robin
```

View File

@ -17,10 +17,11 @@ By default, tables are created only on the current server. Distributed DDL queri
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1],
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2],
name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1] [COMMENT 'comment for column'],
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2] [COMMENT 'comment for column'],
...
) ENGINE = engine
COMMENT 'comment for table'
```
Creates a table named `table_name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine.
@ -32,6 +33,8 @@ Expressions can also be defined for default values (see below).
If necessary, primary key can be specified, with one or more key expressions.
Comments can be added for columns and for the table.
### With a Schema Similar to Other Table
``` sql
@ -127,6 +130,26 @@ Default expressions may be defined as an arbitrary expression from table constan
Normal default value. If the INSERT query does not specify the corresponding column, it will be filled in by computing the corresponding expression.
Example:
```sql
CREATE OR REPLACE TABLE test
(
id UInt64,
updated_at DateTime DEFAULT now(),
updated_at_date Date DEFAULT toDate(updated_at)
)
ENGINE = MergeTree
ORDER BY id;
INSERT INTO test (id) Values (1);
SELECT * FROM test;
┌─id─┬──────────updated_at─┬─updated_at_date─┐
│ 1 │ 2023-02-24 17:06:46 │ 2023-02-24 │
└────┴─────────────────────┴─────────────────┘
```
### MATERIALIZED
`MATERIALIZED expr`
@ -135,6 +158,36 @@ Materialized expression. Such a column cant be specified for INSERT, because
For an INSERT without a list of columns, these columns are not considered.
In addition, this column is not substituted when using an asterisk in a SELECT query. This is to preserve the invariant that the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
Example:
```sql
CREATE OR REPLACE TABLE test
(
id UInt64,
updated_at DateTime MATERIALIZED now(),
updated_at_date Date MATERIALIZED toDate(updated_at)
)
ENGINE = MergeTree
ORDER BY id;
INSERT INTO test Values (1);
SELECT * FROM test;
┌─id─┐
│ 1 │
└────┘
SELECT id, updated_at, updated_at_date FROM test;
┌─id─┬──────────updated_at─┬─updated_at_date─┐
│ 1 │ 2023-02-24 17:08:08 │ 2023-02-24 │
└────┴─────────────────────┴─────────────────┘
SELECT * FROM test SETTINGS asterisk_include_materialized_columns=1;
┌─id─┬──────────updated_at─┬─updated_at_date─┐
│ 1 │ 2023-02-24 17:08:08 │ 2023-02-24 │
└────┴─────────────────────┴─────────────────┘
```
### EPHEMERAL
`EPHEMERAL [expr]`
@ -142,6 +195,34 @@ In addition, this column is not substituted when using an asterisk in a SELECT q
Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required.
INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
Example:
```sql
CREATE OR REPLACE TABLE test
(
id UInt64,
unhexed String EPHEMERAL,
hexed FixedString(4) DEFAULT unhex(unhexed)
)
ENGINE = MergeTree
ORDER BY id
INSERT INTO test (id, unhexed) Values (1, '5a90b714');
SELECT
id,
hexed,
hex(hexed)
FROM test
FORMAT Vertical;
Row 1:
──────
id: 1
hexed: Z<><5A>
hex(hexed): 5A90B714
```
### ALIAS
`ALIAS expr`
@ -156,6 +237,29 @@ If you add a new column to a table but later change its default expression, the
It is not possible to set default values for elements in nested data structures.
```sql
CREATE OR REPLACE TABLE test
(
id UInt64,
size_bytes Int64,
size String Alias formatReadableSize(size_bytes)
)
ENGINE = MergeTree
ORDER BY id;
INSERT INTO test Values (1, 4678899);
SELECT id, size_bytes, size FROM test;
┌─id─┬─size_bytes─┬─size─────┐
│ 1 │ 4678899 │ 4.46 MiB │
└────┴────────────┴──────────┘
SELECT * FROM test SETTINGS asterisk_include_alias_columns=1;
┌─id─┬─size_bytes─┬─size─────┐
│ 1 │ 4678899 │ 4.46 MiB │
└────┴────────────┴──────────┘
```
## Primary Key
You can define a [primary key](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries) when creating a table. Primary key can be specified in two ways:
@ -166,7 +270,7 @@ You can define a [primary key](../../../engines/table-engines/mergetree-family/m
CREATE TABLE db.table_name
(
name1 type1, name2 type2, ...,
PRIMARY KEY(expr1[, expr2,...])]
PRIMARY KEY(expr1[, expr2,...])
)
ENGINE = engine;
```

View File

@ -54,6 +54,10 @@ SELECT * FROM view(column1=value1, column2=value2 ...)
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
```
:::tip
Here is a step by step guide on using [Materialized views](docs/en/guides/developer/cascading-materialized-views.md).
:::
Materialized views store data transformed by the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query.
When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` the table engine for storing data.

View File

@ -36,6 +36,8 @@ Queries that use `FINAL` are executed slightly slower than similar queries that
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine havet happened yet and deal with it by applying aggregation (for example, to discard duplicates).
`FINAL` can be applied automatically using [FINAL](../../../operations/settings/settings.md#final) setting to all tables in a query using a session or a user profile.
## Implementation Details
If the `FROM` clause is omitted, data will be read from the `system.one` table.

View File

@ -83,9 +83,13 @@ Examples: `1`, `10_000_000`, `0xffff_ffff`, `18446744073709551615`, `0xDEADBEEF`
### String
Only string literals in single quotes are supported. The enclosed characters can be backslash-escaped. The following escape sequences have a corresponding special value: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. In all other cases, escape sequences in the format `\c`, where `c` is any character, are converted to `c`. It means that you can use the sequences `\'`and`\\`. The value will have the [String](../sql-reference/data-types/string.md) type.
String literals must be enclosed in single quotes, double quotes are not supported.
Escaping works either
In string literals, you need to escape at least `'` and `\`. Single quotes can be escaped with the single quote, literals `'It\'s'` and `'It''s'` are equal.
- using a preceding single quote where the single-quote character `'` (and only this character) can be escaped as `''`, or
- using a preceding backslash with the following supported escape sequences: `\\`, `\'`, `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. The backslash loses its special meaning, i.e. will be interpreted literally, if it precedes characters different than the listed ones.
In string literals, you need to escape at least `'` and `\` using escape codes `\'` (or: `''`) and `\\`.
### Compound

View File

@ -23,23 +23,3 @@ You can use table functions in:
:::warning
You cant use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled.
:::
| Function | Description |
|------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------|
| [file](../../sql-reference/table-functions/file.md) | Creates a [File](../../engines/table-engines/special/file.md)-engine table. |
| [merge](../../sql-reference/table-functions/merge.md) | Creates a [Merge](../../engines/table-engines/special/merge.md)-engine table. |
| [numbers](../../sql-reference/table-functions/numbers.md) | Creates a table with a single column filled with integer numbers. |
| [remote](../../sql-reference/table-functions/remote.md) | Allows you to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md)-engine table. |
| [url](../../sql-reference/table-functions/url.md) | Creates a [Url](../../engines/table-engines/special/url.md)-engine table. |
| [mysql](../../sql-reference/table-functions/mysql.md) | Creates a [MySQL](../../engines/table-engines/integrations/mysql.md)-engine table. |
| [postgresql](../../sql-reference/table-functions/postgresql.md) | Creates a [PostgreSQL](../../engines/table-engines/integrations/postgresql.md)-engine table. |
| [jdbc](../../sql-reference/table-functions/jdbc.md) | Creates a [JDBC](../../engines/table-engines/integrations/jdbc.md)-engine table. |
| [odbc](../../sql-reference/table-functions/odbc.md) | Creates a [ODBC](../../engines/table-engines/integrations/odbc.md)-engine table. |
| [hdfs](../../sql-reference/table-functions/hdfs.md) | Creates a [HDFS](../../engines/table-engines/integrations/hdfs.md)-engine table. |
| [s3](../../sql-reference/table-functions/s3.md) | Creates a [S3](../../engines/table-engines/integrations/s3.md)-engine table. |
| [sqlite](../../sql-reference/table-functions/sqlite.md) | Creates a [sqlite](../../engines/table-engines/integrations/sqlite.md)-engine table. |
:::note
Only these table functions are enabled in readonly mode :
null, view, viewIfPermitted, numbers, numbers_mt, generateRandom, values, cluster, clusterAllReplicas
:::

View File

@ -567,7 +567,7 @@ SELECT
ts,
value,
round(avg(value) OVER (PARTITION BY metric ORDER BY toDate(ts)
Range BETWEEN 10 PRECEDING AND CURRENT ROW),2) moving_avg_10_days_temp
Range BETWEEN 10 PRECEDING AND CURRENT ROW),2) AS moving_avg_10_days_temp
FROM sensors
ORDER BY
metric ASC,

View File

@ -24,6 +24,7 @@ sidebar_label: "Клиентские библиотеки от сторонни
- [SeasClick C++ client](https://github.com/SeasX/SeasClick)
- [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
- [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php)
- [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
- Go
- [clickhouse](https://github.com/kshvakov/clickhouse/)
- [go-clickhouse](https://github.com/roistat/go-clickhouse)

View File

@ -0,0 +1,53 @@
---
slug: /ru/operations/system-tables/server_settings
---
# system.server_settings
Содержит информацию о конфигурации сервера.
В настоящий момент таблица содержит только верхнеуровневые параметры из файла `config.xml` и не поддерживает вложенные конфигурации
(например [logger](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-logger))
Столбцы:
- `name` ([String](../../sql-reference/data-types/string.md)) — имя настройки.
- `value` ([String](../../sql-reference/data-types/string.md)) — значение настройки.
- `default` ([String](../../sql-reference/data-types/string.md)) — значению настройки по умолчанию.
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — показывает, была ли настройка указана в `config.xml` или является значением по-умолчанию.
- `description` ([String](../../sql-reference/data-types/string.md)) — краткое описание настройки.
- `type` ([String](../../sql-reference/data-types/string.md)) — тип настройки.
**Пример**
Пример показывает как получить информацию о настройках, имена которых содержат `thread_pool`.
``` sql
SELECT *
FROM system.server_settings
WHERE name LIKE '%thread_pool%'
```
``` text
┌─name─────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐
│ max_thread_pool_size │ 5000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │
│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │
│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │
│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │
│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │
│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │
└──────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘
```
Использование `WHERE changed` может быть полезно, например, если необходимо проверить,
что настройки корректно загрузились из конфигурационного файла и используются.
<!-- -->
``` sql
SELECT * FROM system.settings WHERE changed AND name='max_thread_pool_size'
```
**Cм. также**
- [Настройки](../../operations/system-tables/settings.md)
- [Конфигурационные файлы](../../operations/configuration-files.md)
- [Настройки сервера](../../operations/server-configuration-parameters/settings.md)

View File

@ -16,6 +16,7 @@ slug: /ru/operations/system-tables/settings
- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Показывает, может ли пользователь изменять настройку:
- `0` — Текущий пользователь может изменять настройку.
- `1` — Текущий пользователь не может изменять настройку.
- `default` ([String](../../sql-reference/data-types/string.md)) — значению настройки по умолчанию.
**Пример**

View File

@ -301,7 +301,7 @@ ClickHouse поддерживает временные таблицы со сл
- Временные таблицы исчезают после завершения сессии, в том числе при обрыве соединения.
- Временная таблица использует только модуль памяти.
- Невозможно указать базу данных для временной таблицы. Она создается вне баз данных.
- Невозможно создать временную таблицу распределнным DDL запросом на всех серверах кластера (с опцией `ON CLUSTER`): такая таблица существует только в рамках существующей сессии.
- Невозможно создать временную таблицу распределённым DDL запросом на всех серверах кластера (с опцией `ON CLUSTER`): такая таблица существует только в рамках существующей сессии.
- Если временная таблица имеет то же имя, что и некоторая другая, то, при упоминании в запросе без указания БД, будет использована временная таблица.
- При распределённой обработке запроса, используемые в запросе временные таблицы, передаются на удалённые серверы.
@ -344,7 +344,9 @@ REPLACE TABLE myOldTable SELECT * FROM myOldTable WHERE CounterID <12345;
### Синтаксис
```sql
{CREATE [OR REPLACE]|REPLACE} TABLE [db.]table_name
```
Для данного запроса можно использовать любые варианты синтаксиса запроса `CREATE`. Запрос `REPLACE` для несуществующей таблицы вызовет ошибку.

View File

@ -108,7 +108,7 @@ SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
## Модификатор WITH CUBE {#with-cube-modifier}
Модификатор `WITH CUBE` применятеся для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`.
Модификатор `WITH CUBE` применяется для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`.
Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым выполняется группировка, указывается значение `0` или пустая строка.

View File

@ -16,7 +16,7 @@ FROM <left_table>
(ON <expr_list>)|(USING <column_list>) ...
```
Выражения из секции `ON` и столбцы из секции `USING` называется «ключами соединения». Если не указано иное, при присоединение создаётся [Декартово произведение](https://en.wikipedia.org/wiki/Cartesian_product) из строк с совпадающими значениями ключей соединения, что может привести к получению результатов с гораздо большим количеством строк, чем исходные таблицы.
Выражения из секции `ON` и столбцы из секции `USING` называются «ключами соединения». Если не указано иное, при присоединение создаётся [Декартово произведение](https://en.wikipedia.org/wiki/Cartesian_product) из строк с совпадающими значениями ключей соединения, что может привести к получению результатов с гораздо большим количеством строк, чем исходные таблицы.
## Поддерживаемые типы соединения {#select-join-types}
@ -28,7 +28,7 @@ FROM <left_table>
- `FULL OUTER JOIN`, не совпадающие строки из обеих таблиц возвращаются в дополнение к совпадающим строкам.
- `CROSS JOIN`, производит декартово произведение таблиц целиком, ключи соединения не указываются.
Без указания типа `JOIN` подразумевается `INNER`. Ключевое слово `OUTER` можно опускать. Альтернативным синтаксисом для `CROSS JOIN` является ли указание нескольких таблиц, разделённых запятыми, в [секции FROM](from.md).
Без указания типа `JOIN` подразумевается `INNER`. Ключевое слово `OUTER` можно опускать. Альтернативным синтаксисом для `CROSS JOIN` является указание нескольких таблиц, разделённых запятыми, в [секции FROM](from.md).
Дополнительные типы соединений, доступные в ClickHouse:
@ -62,7 +62,7 @@ FROM <left_table>
Строки объединяются только тогда, когда всё составное условие выполнено. Если оно не выполнено, то строки могут попасть в результат в зависимости от типа `JOIN`. Обратите внимание, что если то же самое условие поместить в секцию `WHERE`, то строки, для которых оно не выполняется, никогда не попаду в результат.
Оператор `OR` внутри секции `ON` работает, используя алгоритм хеш-соединения — на каждый агрумент `OR` с ключами соединений для `JOIN` создается отдельная хеш-таблица, поэтому потребление памяти и время выполнения запроса растет линейно при увеличении количества выражений `OR` секции `ON`.
Оператор `OR` внутри секции `ON` работает, используя алгоритм хеш-соединения — на каждый аргумент `OR` с ключами соединений для `JOIN` создается отдельная хеш-таблица, поэтому потребление памяти и время выполнения запроса растет линейно при увеличении количества выражений `OR` секции `ON`.
:::note "Примечание"
Если в условии использованы столбцы из разных таблиц, то пока поддерживается только оператор равенства (`=`).
@ -280,7 +280,7 @@ SELECT a, b, toTypeName(a), toTypeName(b) FROM t_1 FULL JOIN t_2 USING (a, b);
Каждый раз для выполнения запроса с одинаковым `JOIN`, подзапрос выполняется заново — результат не кэшируется. Это можно избежать, используя специальный движок таблиц [Join](../../../engines/table-engines/special/join.md), представляющий собой подготовленное множество для соединения, которое всегда находится в оперативке.
В некоторых случаях это более эффективно использовать [IN](../../operators/in.md) вместо `JOIN`.
В некоторых случаях более эффективно использовать [IN](../../operators/in.md) вместо `JOIN`.
Если `JOIN` необходим для соединения с таблицами измерений (dimension tables - сравнительно небольшие таблицы, которые содержат свойства измерений - например, имена для рекламных кампаний), то использование `JOIN` может быть не очень удобным из-за громоздкости синтаксиса, а также из-за того, что правая таблица читается заново при каждом запросе. Специально для таких случаев существует функциональность «Внешние словари», которую следует использовать вместо `JOIN`. Дополнительные сведения смотрите в разделе «Внешние словари».

View File

@ -67,7 +67,7 @@ sidebar_label: ORDER BY
## Примеры с использованием сравнения {#collation-examples}
Пример с значениями типа [String](../../../sql-reference/data-types/string.md):
Пример со значениями типа [String](../../../sql-reference/data-types/string.md):
Входная таблица:
@ -241,13 +241,13 @@ SELECT * FROM collate_test ORDER BY s ASC COLLATE 'en';
└───┴─────────┘
```
## Деталь реализации {#implementation-details}
## Детали реализации {#implementation-details}
Если кроме `ORDER BY` указан также не слишком большой [LIMIT](limit.md), то расходуется меньше оперативки. Иначе расходуется количество памяти, пропорциональное количеству данных для сортировки. При распределённой обработке запроса, если отсутствует [GROUP BY](group-by.md), сортировка частично делается на удалённых серверах, а на сервере-инициаторе запроса производится слияние результатов. Таким образом, при распределённой сортировке, может сортироваться объём данных, превышающий размер памяти на одном сервере.
Существует возможность выполнять сортировку во внешней памяти (с созданием временных файлов на диске), если оперативной памяти не хватает. Для этого предназначена настройка `max_bytes_before_external_sort`. Если она выставлена в 0 (по умолчанию), то внешняя сортировка выключена. Если она включена, то при достижении объёмом данных для сортировки указанного количества байт, накопленные данные будут отсортированы и сброшены во временный файл. После того, как все данные будут прочитаны, будет произведено слияние всех сортированных файлов и выдача результата. Файлы записываются в директорию `/var/lib/clickhouse/tmp/` (по умолчанию, может быть изменено с помощью параметра `tmp_path`) в конфиге.
На выполнение запроса может расходоваться больше памяти, чем `max_bytes_before_external_sort`. Поэтому, значение этой настройки должно быть существенно меньше, чем `max_memory_usage`. Для примера, если на вашем сервере 128 GB оперативки, и вам нужно выполнить один запрос, то выставите `max_memory_usage` в 100 GB, а `max_bytes_before_external_sort` в 80 GB.
На выполнение запроса может расходоваться больше памяти, чем `max_bytes_before_external_sort`. Поэтому значение этой настройки должно быть существенно меньше, чем `max_memory_usage`. Для примера, если на вашем сервере 128 GB оперативки, и вам нужно выполнить один запрос, то выставьте `max_memory_usage` в 100 GB, а `max_bytes_before_external_sort` в 80 GB.
Внешняя сортировка работает существенно менее эффективно, чем сортировка в оперативке.
@ -366,9 +366,9 @@ ORDER BY
└────────────┴────────────┴──────────┘
```
Поле `d1` не заполняется и использует значение по умолчанию. Поскольку у нас нет повторяющихся значений для `d2`, мы не можем правильно рассчитать последователность заполнения для `d1`.
Поле `d1` не заполняется и использует значение по умолчанию. Поскольку у нас нет повторяющихся значений для `d2`, мы не можем правильно рассчитать последовательность заполнения для `d1`.
едующий запрос (с измененым порядком в ORDER BY):
едующий запрос (с измененным порядком в ORDER BY):
```sql
SELECT
toDate((number * 10) * 86400) AS d1,

View File

@ -13,7 +13,7 @@ Prewhere — это оптимизация для более эффективн
`PREWHERE` имеет смысл использовать, если есть условия фильтрации, которые использует меньшинство столбцов из тех, что есть в запросе, но достаточно сильно фильтрует данные. Таким образом, сокращается количество читаемых данных.
В запросе может быть одновременно указаны и `PREWHERE`, и `WHERE`. В этом случае `PREWHERE` предшествует `WHERE`.
В запросе могут быть одновременно указаны и `PREWHERE`, и `WHERE`. В этом случае `PREWHERE` предшествует `WHERE`.
Если значение параметра [optimize_move_to_prewhere](../../../operations/settings/settings.md#optimize_move_to_prewhere) равно 0, эвристика по автоматическому перемещению части выражений из `WHERE` к `PREWHERE` отключается.

View File

@ -10,7 +10,7 @@ sidebar_label: SAMPLE
Сэмплирование имеет смысл, когда:
1. Точность результата не важна, например, для оценочных расчетов.
2. Возможности аппаратной части не позволяют соответствовать строгим критериям. Например, время ответа должно быть \&lt;100 мс. При этом точность расчета имеет более низкий приоритет.
2. Возможности аппаратной части не позволяют соответствовать строгим критериям. Например, время ответа должно быть &lt;100 мс. При этом точность расчета имеет более низкий приоритет.
3. Точность результата участвует в бизнес-модели сервиса. Например, пользователи с бесплатной подпиской на сервис могут получать отчеты с меньшей точностью, чем пользователи с премиум подпиской.
:::note "Внимание"

View File

@ -26,7 +26,7 @@ SELECT CounterID, 2 AS table, sum(Sign) AS c
Результирующие столбцы сопоставляются по их индексу (порядку внутри `SELECT`). Если имена столбцов не совпадают, то имена для конечного результата берутся из первого запроса.
При объединении выполняет приведение типов. Например, если два запроса имеют одно и то же поле с не-`Nullable` и `Nullable` совместимыми типами, полученные в результате `UNION` данные будут иметь `Nullable` тип.
При объединении выполняется приведение типов. Например, если два запроса имеют одно и то же поле с не-`Nullable` и `Nullable` совместимыми типами, полученные в результате `UNION` данные будут иметь `Nullable` тип.
Запросы, которые являются частью `UNION`, могут быть заключены в круглые скобки. [ORDER BY](order-by.md) и [LIMIT](limit.md) применяются к отдельным запросам, а не к конечному результату. Если вам нужно применить преобразование к конечному результату, вы можете разместить все объединенные с помощью `UNION` запросы в подзапрос в секции [FROM](from.md).

View File

@ -5,7 +5,7 @@ sidebar_label: WITH
# Секция WITH {#with-clause}
Clickhouse поддерживает [Общие табличные выражения](https://ru.wikipedia.org/wiki/Иерархические_и_рекурсивныеапросы_в_SQL), то есть позволяет использовать результаты выражений из секции `WITH` в остальной части `SELECT` запроса. Именованные подзапросы могут быть включены в текущий и дочерний контекст запроса в тех местах, где разрешены табличные объекты. Рекурсия предотвращается путем скрытия общего табличного выражения текущего уровня из выражения `WITH`.
ClickHouse поддерживает [Общие табличные выражения](https://ru.wikipedia.org/wiki/Иерархические_и_рекурсивныеапросы_в_SQL), то есть позволяет использовать результаты выражений из секции `WITH` в остальной части `SELECT` запроса. Именованные подзапросы могут быть включены в текущий и дочерний контекст запроса в тех местах, где разрешены табличные объекты. Рекурсия предотвращается путем скрытия общего табличного выражения текущего уровня из выражения `WITH`.
## Синтаксис

View File

@ -24,6 +24,7 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试
- [SeasClick C++ client](https://github.com/SeasX/SeasClick)
- [one-ck](https://github.com/lizhichao/one-ck)
- [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
- [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
- Go
- [clickhouse](https://github.com/kshvakov/clickhouse/)
- [go-clickhouse](https://github.com/roistat/go-clickhouse)

View File

@ -18,12 +18,14 @@ Group=clickhouse
Restart=always
RestartSec=30
# Since ClickHouse is systemd aware default 1m30sec may not be enough
TimeoutStartSec=inifinity
TimeoutStartSec=infinity
# %p is resolved to the systemd unit name
RuntimeDirectory=%p
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=%t/%p/%p.pid
# Minus means that this file is optional.
EnvironmentFile=-/etc/default/%p
# Bring back /etc/default/clickhouse for backward compatibility
EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE

View File

@ -1,5 +1,8 @@
#include "LocalServer.h"
#include <sys/resource.h>
#include <Common/logger_useful.h>
#include <base/errnoToString.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Poco/String.h>
#include <Poco/Logger.h>
@ -179,9 +182,9 @@ void LocalServer::tryInitPath()
parent_folder = std::filesystem::temp_directory_path();
}
catch (const fs::filesystem_error& e)
catch (const fs::filesystem_error & e)
{
// tmp folder don't exists? misconfiguration? chroot?
// The tmp folder doesn't exist? Is it a misconfiguration? Or chroot?
LOG_DEBUG(log, "Can not get temporary folder: {}", e.what());
parent_folder = std::filesystem::current_path();
@ -390,6 +393,21 @@ try
std::cout << std::fixed << std::setprecision(3);
std::cerr << std::fixed << std::setprecision(3);
/// Try to increase limit on number of open files.
{
rlimit rlim;
if (getrlimit(RLIMIT_NOFILE, &rlim))
throw Poco::Exception("Cannot getrlimit");
if (rlim.rlim_cur < rlim.rlim_max)
{
rlim.rlim_cur = config().getUInt("max_open_files", static_cast<unsigned>(rlim.rlim_max));
int rc = setrlimit(RLIMIT_NOFILE, &rlim);
if (rc != 0)
std::cerr << fmt::format("Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString()) << '\n';
}
}
#if defined(FUZZING_MODE)
static bool first_time = true;
if (first_time)

View File

@ -6,7 +6,6 @@
#include <sys/types.h>
#include <pwd.h>
#include <unistd.h>
#include <Poco/Version.h>
#include <Poco/Net/HTTPServer.h>
#include <Poco/Net/NetException.h>
#include <Poco/Util/HelpFormatter.h>
@ -92,6 +91,7 @@
#include <Server/ProtocolServerAdapter.h>
#include <Server/HTTP/HTTPServer.h>
#include <Interpreters/AsynchronousInsertQueue.h>
#include <Core/ServerSettings.h>
#include <filesystem>
#include <unordered_set>
@ -663,7 +663,10 @@ try
MainThreadStatus::getInstance();
StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true));
ServerSettings server_settings;
server_settings.loadSettingsFromConfig(config());
StackTrace::setShowAddresses(server_settings.show_addresses_in_stack_traces);
#if USE_HDFS
/// This will point libhdfs3 to the right location for its config.
@ -697,7 +700,7 @@ try
{
const String config_path = config().getString("config-file", "config.xml");
const auto config_dir = std::filesystem::path{config_path}.replace_filename("openssl.conf");
setenv("OPENSSL_CONF", config_dir.string(), true);
setenv("OPENSSL_CONF", config_dir.c_str(), true);
}
#endif
@ -748,9 +751,9 @@ try
// nodes (`from_zk`), because ZooKeeper interface uses the pool. We will
// ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well.
GlobalThreadPool::initialize(
config().getUInt("max_thread_pool_size", 10000),
config().getUInt("max_thread_pool_free_size", 1000),
config().getUInt("thread_pool_queue_size", 10000));
server_settings.max_thread_pool_size,
server_settings.max_thread_pool_free_size,
server_settings.thread_pool_queue_size);
#if USE_AZURE_BLOB_STORAGE
/// It makes sense to deinitialize libxml after joining of all threads
@ -766,9 +769,9 @@ try
#endif
IOThreadPool::initialize(
config().getUInt("max_io_thread_pool_size", 100),
config().getUInt("max_io_thread_pool_free_size", 0),
config().getUInt("io_thread_pool_queue_size", 10000));
server_settings.max_io_thread_pool_size,
server_settings.max_io_thread_pool_free_size,
server_settings.io_thread_pool_queue_size);
/// Initialize global local cache for remote filesystem.
if (config().has("local_cache_for_remote_fs"))
@ -784,15 +787,15 @@ try
}
}
Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024));
Poco::ThreadPool server_pool(3, server_settings.max_connections);
std::mutex servers_lock;
std::vector<ProtocolServerAdapter> servers;
std::vector<ProtocolServerAdapter> servers_to_start_before_tables;
/// This object will periodically calculate some metrics.
ServerAsynchronousMetrics async_metrics(
global_context,
config().getUInt("asynchronous_metrics_update_period_s", 1),
config().getUInt("asynchronous_heavy_metrics_update_period_s", 120),
server_settings.asynchronous_metrics_update_period_s,
server_settings.asynchronous_heavy_metrics_update_period_s,
[&]() -> std::vector<ProtocolServerMetrics>
{
std::vector<ProtocolServerMetrics> metrics;
@ -807,7 +810,7 @@ try
}
);
ConnectionCollector::init(global_context, config().getUInt("max_threads_for_connection_collector", 10));
ConnectionCollector::init(global_context, server_settings.max_threads_for_connection_collector);
bool has_zookeeper = config().has("zookeeper");
@ -826,6 +829,9 @@ try
Settings::checkNoSettingNamesAtTopLevel(config(), config_path);
/// We need to reload server settings because config could be updated via zookeeper.
server_settings.loadSettingsFromConfig(config());
#if defined(OS_LINUX)
std::string executable_path = getExecutablePath();
@ -945,7 +951,7 @@ try
std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
fs::path path = path_str;
std::string default_database = config().getString("default_database", "default");
std::string default_database = server_settings.default_database.toString();
/// Check that the process user id matches the owner of the data.
const auto effective_user_id = geteuid();
@ -1036,21 +1042,18 @@ try
LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone());
/// Storage with temporary data for processing of heavy queries.
if (auto temporary_policy = config().getString("tmp_policy", ""); !temporary_policy.empty())
if (!server_settings.tmp_policy.value.empty())
{
size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0);
global_context->setTemporaryStoragePolicy(temporary_policy, max_size);
global_context->setTemporaryStoragePolicy(server_settings.tmp_policy, server_settings.max_temporary_data_on_disk_size);
}
else if (auto temporary_cache = config().getString("temporary_data_in_cache", ""); !temporary_cache.empty())
else if (!server_settings.temporary_data_in_cache.value.empty())
{
size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0);
global_context->setTemporaryStorageInCache(temporary_cache, max_size);
global_context->setTemporaryStorageInCache(server_settings.temporary_data_in_cache, server_settings.max_temporary_data_on_disk_size);
}
else
{
std::string temporary_path = config().getString("tmp_path", path / "tmp/");
size_t max_size = config().getUInt64("max_temporary_data_on_disk_size", 0);
global_context->setTemporaryStoragePath(temporary_path, max_size);
global_context->setTemporaryStoragePath(temporary_path, server_settings.max_temporary_data_on_disk_size);
}
/** Directory with 'flags': files indicating temporary settings for the server set by system administrator.
@ -1185,10 +1188,12 @@ try
{
Settings::checkNoSettingNamesAtTopLevel(*config, config_path);
/// Limit on total memory usage
size_t max_server_memory_usage = config->getUInt64("max_server_memory_usage", 0);
ServerSettings server_settings;
server_settings.loadSettingsFromConfig(*config);
double max_server_memory_usage_to_ram_ratio = config->getDouble("max_server_memory_usage_to_ram_ratio", 0.9);
size_t max_server_memory_usage = server_settings.max_server_memory_usage;
double max_server_memory_usage_to_ram_ratio = server_settings.max_server_memory_usage_to_ram_ratio;
size_t default_max_server_memory_usage = static_cast<size_t>(memory_amount * max_server_memory_usage_to_ram_ratio);
if (max_server_memory_usage == 0)
@ -1216,8 +1221,7 @@ try
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
bool allow_use_jemalloc_memory = config->getBool("allow_use_jemalloc_memory", true);
total_memory_tracker.setAllowUseJemallocMemory(allow_use_jemalloc_memory);
total_memory_tracker.setAllowUseJemallocMemory(server_settings.allow_use_jemalloc_memory);
auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker();
total_memory_tracker.setOvercommitTracker(global_overcommit_tracker);
@ -1235,36 +1239,23 @@ try
global_context->setRemoteHostFilter(*config);
/// Setup protection to avoid accidental DROP for big tables (that are greater than 50 GB by default)
if (config->has("max_table_size_to_drop"))
global_context->setMaxTableSizeToDrop(config->getUInt64("max_table_size_to_drop"));
if (config->has("max_partition_size_to_drop"))
global_context->setMaxPartitionSizeToDrop(config->getUInt64("max_partition_size_to_drop"));
global_context->setMaxTableSizeToDrop(server_settings.max_table_size_to_drop);
global_context->setMaxPartitionSizeToDrop(server_settings.max_partition_size_to_drop);
ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited;
if (config->has("concurrent_threads_soft_limit_num"))
if (server_settings.concurrent_threads_soft_limit_num > 0 && server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit)
concurrent_threads_soft_limit = server_settings.concurrent_threads_soft_limit_num;
if (server_settings.concurrent_threads_soft_limit_ratio_to_cores > 0)
{
auto value = config->getUInt64("concurrent_threads_soft_limit_num", 0);
if (value > 0 && value < concurrent_threads_soft_limit)
concurrent_threads_soft_limit = value;
}
if (config->has("concurrent_threads_soft_limit_ratio_to_cores"))
{
auto value = config->getUInt64("concurrent_threads_soft_limit_ratio_to_cores", 0) * std::thread::hardware_concurrency();
auto value = server_settings.concurrent_threads_soft_limit_ratio_to_cores * std::thread::hardware_concurrency();
if (value > 0 && value < concurrent_threads_soft_limit)
concurrent_threads_soft_limit = value;
}
ConcurrencyControl::instance().setMaxConcurrency(concurrent_threads_soft_limit);
if (config->has("max_concurrent_queries"))
global_context->getProcessList().setMaxSize(config->getInt("max_concurrent_queries", 0));
if (config->has("max_concurrent_insert_queries"))
global_context->getProcessList().setMaxInsertQueriesAmount(config->getInt("max_concurrent_insert_queries", 0));
if (config->has("max_concurrent_select_queries"))
global_context->getProcessList().setMaxSelectQueriesAmount(config->getInt("max_concurrent_select_queries", 0));
global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries);
global_context->getProcessList().setMaxInsertQueriesAmount(server_settings.max_concurrent_insert_queries);
global_context->getProcessList().setMaxSelectQueriesAmount(server_settings.max_concurrent_select_queries);
if (config->has("keeper_server"))
global_context->updateKeeperConfiguration(*config);
@ -1273,56 +1264,36 @@ try
/// Note: If you specified it in the top level config (not it config of default profile)
/// then ClickHouse will use it exactly.
/// This is done for backward compatibility.
if (global_context->areBackgroundExecutorsInitialized() && (config->has("background_pool_size") || config->has("background_merges_mutations_concurrency_ratio")))
if (global_context->areBackgroundExecutorsInitialized())
{
auto new_pool_size = config->getUInt64("background_pool_size", 16);
auto new_ratio = config->getUInt64("background_merges_mutations_concurrency_ratio", 2);
auto new_pool_size = server_settings.background_pool_size;
auto new_ratio = server_settings.background_merges_mutations_concurrency_ratio;
global_context->getMergeMutateExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size * new_ratio);
auto new_scheduling_policy = config->getString("background_merges_mutations_scheduling_policy", "round_robin");
global_context->getMergeMutateExecutor()->updateSchedulingPolicy(new_scheduling_policy);
global_context->getMergeMutateExecutor()->updateSchedulingPolicy(server_settings.background_merges_mutations_scheduling_policy.toString());
}
if (global_context->areBackgroundExecutorsInitialized() && config->has("background_move_pool_size"))
if (global_context->areBackgroundExecutorsInitialized())
{
auto new_pool_size = config->getUInt64("background_move_pool_size");
auto new_pool_size = server_settings.background_move_pool_size;
global_context->getMovesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
}
if (global_context->areBackgroundExecutorsInitialized() && config->has("background_fetches_pool_size"))
if (global_context->areBackgroundExecutorsInitialized())
{
auto new_pool_size = config->getUInt64("background_fetches_pool_size");
auto new_pool_size = server_settings.background_fetches_pool_size;
global_context->getFetchesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
}
if (global_context->areBackgroundExecutorsInitialized() && config->has("background_common_pool_size"))
if (global_context->areBackgroundExecutorsInitialized())
{
auto new_pool_size = config->getUInt64("background_common_pool_size");
auto new_pool_size = server_settings.background_common_pool_size;
global_context->getCommonExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
}
if (config->has("background_buffer_flush_schedule_pool_size"))
{
auto new_pool_size = config->getUInt64("background_buffer_flush_schedule_pool_size");
global_context->getBufferFlushSchedulePool().increaseThreadsCount(new_pool_size);
}
if (config->has("background_schedule_pool_size"))
{
auto new_pool_size = config->getUInt64("background_schedule_pool_size");
global_context->getSchedulePool().increaseThreadsCount(new_pool_size);
}
if (config->has("background_message_broker_schedule_pool_size"))
{
auto new_pool_size = config->getUInt64("background_message_broker_schedule_pool_size");
global_context->getMessageBrokerSchedulePool().increaseThreadsCount(new_pool_size);
}
if (config->has("background_distributed_schedule_pool_size"))
{
auto new_pool_size = config->getUInt64("background_distributed_schedule_pool_size");
global_context->getDistributedSchedulePool().increaseThreadsCount(new_pool_size);
}
global_context->getBufferFlushSchedulePool().increaseThreadsCount(server_settings.background_buffer_flush_schedule_pool_size);
global_context->getSchedulePool().increaseThreadsCount(server_settings.background_schedule_pool_size);
global_context->getMessageBrokerSchedulePool().increaseThreadsCount(server_settings.background_message_broker_schedule_pool_size);
global_context->getDistributedSchedulePool().increaseThreadsCount(server_settings.background_distributed_schedule_pool_size);
if (config->has("resources"))
{
@ -1467,18 +1438,15 @@ try
});
/// Limit on total number of concurrently executed queries.
global_context->getProcessList().setMaxSize(config().getInt("max_concurrent_queries", 0));
global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries);
/// Set up caches.
/// Lower cache size on low-memory systems.
double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5);
size_t max_cache_size = static_cast<size_t>(memory_amount * cache_size_to_ram_max_ratio);
size_t max_cache_size = static_cast<size_t>(memory_amount * server_settings.cache_size_to_ram_max_ratio);
/// Size of cache for uncompressed blocks. Zero means disabled.
String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", "SLRU");
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
LOG_INFO(log, "Uncompressed cache policy name {}", uncompressed_cache_policy);
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0);
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
@ -1500,9 +1468,8 @@ try
global_context,
settings.async_insert_threads));
/// Size of cache for marks (index of MergeTree family of tables).
size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120);
String mark_cache_policy = config().getString("mark_cache_policy", "SLRU");
size_t mark_cache_size = server_settings.mark_cache_size;
String mark_cache_policy = server_settings.mark_cache_policy;
if (!mark_cache_size)
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
if (mark_cache_size > max_cache_size)
@ -1513,20 +1480,14 @@ try
}
global_context->setMarkCache(mark_cache_size, mark_cache_policy);
/// Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.
size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", 0);
if (index_uncompressed_cache_size)
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
if (server_settings.index_uncompressed_cache_size)
global_context->setIndexUncompressedCache(server_settings.index_uncompressed_cache_size);
/// Size of cache for index marks (index of MergeTree skip indices).
size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", 0);
if (index_mark_cache_size)
global_context->setIndexMarkCache(index_mark_cache_size);
if (server_settings.index_mark_cache_size)
global_context->setIndexMarkCache(server_settings.index_mark_cache_size);
/// A cache for mmapped files.
size_t mmap_cache_size = config().getUInt64("mmap_cache_size", 1000); /// The choice of default is arbitrary.
if (mmap_cache_size)
global_context->setMMappedFileCache(mmap_cache_size);
if (server_settings.mmap_cache_size)
global_context->setMMappedFileCache(server_settings.mmap_cache_size);
/// A cache for query results.
global_context->setQueryCache(config());
@ -1612,7 +1573,7 @@ try
/// context is destroyed.
/// In addition this object has to be created before the loading of the tables.
std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
if (config().has("disable_internal_dns_cache") && config().getInt("disable_internal_dns_cache"))
if (server_settings.disable_internal_dns_cache)
{
/// Disable DNS caching at all
DNSResolver::instance().setDisableCacheFlag();
@ -1622,7 +1583,7 @@ try
{
/// Initialize a watcher periodically updating DNS cache
dns_cache_updater = std::make_unique<DNSCacheUpdater>(
global_context, config().getInt("dns_cache_update_period", 15), config().getUInt("dns_max_consecutive_failures", 5));
global_context, server_settings.dns_cache_update_period, server_settings.dns_max_consecutive_failures);
}
if (dns_cache_updater)
@ -1887,7 +1848,7 @@ try
LOG_INFO(log, "Closed all listening sockets.");
/// Killing remaining queries.
if (!config().getBool("shutdown_wait_unfinished_queries", false))
if (server_settings.shutdown_wait_unfinished_queries)
global_context->getProcessList().killAllQueries();
if (current_connections)

View File

@ -1,8 +1,8 @@
#include <Access/Common/AllowedClientHosts.h>
#include <Common/Exception.h>
#include <Common/likePatternToRegexp.h>
#include <Common/logger_useful.h>
#include <base/scope_guard.h>
#include <Functions/likePatternToRegexp.h>
#include <Poco/Net/SocketAddress.h>
#include <Poco/RegularExpression.h>
#include <boost/algorithm/string/predicate.hpp>

View File

@ -13,7 +13,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -129,7 +129,7 @@ public:
const IColumn::Offsets & ith_offsets = ith_column.getOffsets();
if (ith_offsets[row_num] != end || (row_num != 0 && ith_offsets[row_num - 1] != begin))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
}
for (size_t i = begin; i < end; ++i)

View File

@ -19,7 +19,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
}
@ -197,7 +197,7 @@ public:
const IColumn::Offsets & ith_offsets = ith_column.getOffsets();
if (ith_offsets[row_num] != end || (row_num != 0 && ith_offsets[row_num - 1] != begin))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
}
AggregateFunctionForEachData & state = ensureAggregateData(place, end - begin, *arena);

View File

@ -63,6 +63,9 @@ static constexpr const char * getNameByTrait()
template <typename T>
struct GroupArraySamplerData
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
@ -97,6 +100,9 @@ struct GroupArrayNumericData;
template <typename T>
struct GroupArrayNumericData<T, false>
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;

View File

@ -32,6 +32,9 @@ namespace ErrorCodes
template <typename T>
struct MovingData
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
using Accumulator = T;
/// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena

View File

@ -117,7 +117,21 @@ public:
const auto & value = this->data(place).value;
size_t size = value.size();
writeVarUInt(size, buf);
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
/// In this version, pairs were serialized with padding.
/// We must ensure that padding bytes are zero-filled.
static_assert(offsetof(typename MaxIntersectionsData<PointType>::Value, first) == 0);
static_assert(offsetof(typename MaxIntersectionsData<PointType>::Value, second) > 0);
char zero_padding[offsetof(typename MaxIntersectionsData<PointType>::Value, second) - sizeof(value[0].first)]{};
for (size_t i = 0; i < size; ++i)
{
writePODBinary(value[i].first, buf);
writePODBinary(zero_padding, buf);
writePODBinary(value[i].second, buf);
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override

View File

@ -15,6 +15,11 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
JoinNode::JoinNode(QueryTreeNodePtr left_table_expression_,
QueryTreeNodePtr right_table_expression_,
QueryTreeNodePtr join_expression_,
@ -113,4 +118,18 @@ ASTPtr JoinNode::toASTImpl() const
return tables_in_select_query_ast;
}
void JoinNode::crossToInner(const QueryTreeNodePtr & join_expression_)
{
if (kind != JoinKind::Cross && kind != JoinKind::Comma)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot rewrite {} to INNER JOIN, expected CROSS", toString(kind));
if (children[join_expression_child_index])
throw Exception(ErrorCodes::LOGICAL_ERROR, "Join expression is expected to be empty for CROSS JOIN, got '{}'",
children[join_expression_child_index]->formatConvertedASTForErrorMessage());
kind = JoinKind::Inner;
strictness = JoinStrictness::All;
children[join_expression_child_index] = join_expression_;
}
}

View File

@ -126,6 +126,13 @@ public:
return QueryTreeNodeType::JOIN;
}
/*
* Convert CROSS to INNER JOIN - changes JOIN kind and sets a new join expression
* (that was moved from WHERE clause).
* Expects the current kind to be CROSS (and join expression to be null because of that).
*/
void crossToInner(const QueryTreeNodePtr & join_expression_);
void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
protected:

View File

@ -102,19 +102,21 @@ public:
if (!left_argument_constant_node && !right_argument_constant_node)
return;
/** If we extract negative constant, aggregate function name must be updated.
/** Need reverse max <-> min for:
*
* Example: SELECT min(-1 * id);
* Result: SELECT -1 * max(id);
* max(-1*value) -> -1*min(value)
* max(value/-2) -> min(value)/-2
* max(1-value) -> 1-min(value)
*/
std::string aggregate_function_name_if_constant_is_negative;
if (arithmetic_function_name == "multiply" || arithmetic_function_name == "divide")
auto get_reverse_aggregate_function_name = [](const std::string & aggregate_function_name) -> std::string
{
if (lower_aggregate_function_name == "min")
aggregate_function_name_if_constant_is_negative = "max";
else if (lower_aggregate_function_name == "max")
aggregate_function_name_if_constant_is_negative = "min";
}
if (aggregate_function_name == "min")
return "max";
else if (aggregate_function_name == "max")
return "min";
else
return aggregate_function_name;
};
size_t arithmetic_function_argument_index = 0;
@ -126,11 +128,11 @@ public:
/// Rewrite `aggregate_function(inner_function(constant, argument))` into `inner_function(constant, aggregate_function(argument))`
const auto & left_argument_constant_value_literal = left_argument_constant_node->getValue();
if (!aggregate_function_name_if_constant_is_negative.empty() &&
left_argument_constant_value_literal < zeroField(left_argument_constant_value_literal))
{
lower_aggregate_function_name = aggregate_function_name_if_constant_is_negative;
}
bool need_reverse = (arithmetic_function_name == "multiply" && left_argument_constant_value_literal < zeroField(left_argument_constant_value_literal))
|| (arithmetic_function_name == "minus");
if (need_reverse)
lower_aggregate_function_name = get_reverse_aggregate_function_name(lower_aggregate_function_name);
arithmetic_function_argument_index = 1;
}
@ -138,11 +140,10 @@ public:
{
/// Rewrite `aggregate_function(inner_function(argument, constant))` into `inner_function(aggregate_function(argument), constant)`
const auto & right_argument_constant_value_literal = right_argument_constant_node->getValue();
if (!aggregate_function_name_if_constant_is_negative.empty() &&
right_argument_constant_value_literal < zeroField(right_argument_constant_value_literal))
{
lower_aggregate_function_name = aggregate_function_name_if_constant_is_negative;
}
bool need_reverse = (arithmetic_function_name == "multiply" || arithmetic_function_name == "divide") && right_argument_constant_value_literal < zeroField(right_argument_constant_value_literal);
if (need_reverse)
lower_aggregate_function_name = get_reverse_aggregate_function_name(lower_aggregate_function_name);
arithmetic_function_argument_index = 0;
}

View File

@ -4,12 +4,13 @@
#include <unordered_map>
#include <vector>
#include <Common/likePatternToRegexp.h>
#include <Core/Field.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/likePatternToRegexp.h>
#include <Interpreters/Context.h>

View File

@ -0,0 +1,262 @@
#include <Analyzer/Passes/CrossToInnerJoinPass.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/JoinNode.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/ColumnNode.h>
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>
#include <Common/logger_useful.h>
namespace DB
{
namespace ErrorCodes
{
extern const int INCORRECT_QUERY;
}
namespace
{
void exctractJoinConditions(const QueryTreeNodePtr & node, QueryTreeNodes & equi_conditions, QueryTreeNodes & other)
{
auto * func = node->as<FunctionNode>();
if (!func)
{
other.push_back(node);
return;
}
const auto & args = func->getArguments().getNodes();
if (args.size() == 2 && func->getFunctionName() == "equals")
{
equi_conditions.push_back(node);
}
else if (func->getFunctionName() == "and")
{
for (const auto & arg : args)
exctractJoinConditions(arg, equi_conditions, other);
}
else
{
other.push_back(node);
}
}
const QueryTreeNodePtr & getEquiArgument(const QueryTreeNodePtr & cond, size_t index)
{
const auto * func = cond->as<FunctionNode>();
chassert(func && func->getFunctionName() == "equals" && func->getArguments().getNodes().size() == 2);
return func->getArguments().getNodes()[index];
}
/// Check that node has only one source and return it.
/// {_, false} - multiple sources
/// {nullptr, true} - no sources
/// {source, true} - single source
std::pair<const IQueryTreeNode *, bool> getExpressionSource(const QueryTreeNodePtr & node)
{
if (const auto * column = node->as<ColumnNode>())
{
auto source = column->getColumnSourceOrNull();
if (!source)
return {nullptr, false};
return {source.get(), true};
}
if (const auto * func = node->as<FunctionNode>())
{
const IQueryTreeNode * source = nullptr;
const auto & args = func->getArguments().getNodes();
for (const auto & arg : args)
{
auto [arg_source, is_ok] = getExpressionSource(arg);
if (!is_ok)
return {nullptr, false};
if (!source)
source = arg_source;
else if (arg_source && !source->isEqual(*arg_source))
return {nullptr, false};
}
return {source, true};
}
if (node->as<ConstantNode>())
return {nullptr, true};
return {nullptr, false};
}
bool findInTableExpression(const IQueryTreeNode * source, const QueryTreeNodePtr & table_expression)
{
if (!source)
return true;
if (source->isEqual(*table_expression))
return true;
if (const auto * join_node = table_expression->as<JoinNode>())
{
return findInTableExpression(source, join_node->getLeftTableExpression())
|| findInTableExpression(source, join_node->getRightTableExpression());
}
return false;
}
void getJoinNodes(QueryTreeNodePtr & join_tree_node, std::vector<JoinNode *> & join_nodes)
{
auto * join_node = join_tree_node->as<JoinNode>();
if (!join_node)
return;
if (!isCrossOrComma(join_node->getKind()))
return;
join_nodes.push_back(join_node);
getJoinNodes(join_node->getLeftTableExpression(), join_nodes);
getJoinNodes(join_node->getRightTableExpression(), join_nodes);
}
class CrossToInnerJoinVisitor : public InDepthQueryTreeVisitorWithContext<CrossToInnerJoinVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<CrossToInnerJoinVisitor>;
using Base::Base;
/// Returns false if can't rewrite cross to inner join
bool tryRewrite(JoinNode & join_node, QueryTreeNodePtr & where_condition)
{
if (!isCrossOrComma(join_node.getKind()))
return false;
if (!where_condition)
return false;
const auto & left_table = join_node.getLeftTableExpression();
const auto & right_table = join_node.getRightTableExpression();
QueryTreeNodes equi_conditions;
QueryTreeNodes other_conditions;
exctractJoinConditions(where_condition, equi_conditions, other_conditions);
bool can_convert_cross_to_inner = false;
for (auto & cond : equi_conditions)
{
auto left_src = getExpressionSource(getEquiArgument(cond, 0));
auto right_src = getExpressionSource(getEquiArgument(cond, 1));
if (left_src.second && right_src.second && left_src.first && right_src.first)
{
bool can_join_on = (findInTableExpression(left_src.first, left_table) && findInTableExpression(right_src.first, right_table))
|| (findInTableExpression(left_src.first, right_table) && findInTableExpression(right_src.first, left_table));
if (can_join_on)
{
can_convert_cross_to_inner = true;
continue;
}
}
/// Can't join on this condition, move it to other conditions
other_conditions.push_back(cond);
cond = nullptr;
}
if (!can_convert_cross_to_inner)
return false;
equi_conditions.erase(std::remove(equi_conditions.begin(), equi_conditions.end(), nullptr), equi_conditions.end());
join_node.crossToInner(makeConjunction(equi_conditions));
where_condition = makeConjunction(other_conditions);
return true;
}
void visitImpl(QueryTreeNodePtr & node)
{
if (!isEnabled())
return;
auto * query_node = node->as<QueryNode>();
if (!query_node)
return;
auto & where_node = query_node->getWhere();
if (!where_node)
return;
auto & join_tree_node = query_node->getJoinTree();
if (!join_tree_node || join_tree_node->getNodeType() != QueryTreeNodeType::JOIN)
return;
/// In case of multiple joins, we can try to rewrite all of them
/// Example: SELECT * FROM t1, t2, t3 WHERE t1.a = t2.a AND t2.a = t3.a
std::vector<JoinNode *> join_nodes;
getJoinNodes(join_tree_node, join_nodes);
for (auto * join_node : join_nodes)
{
bool is_rewritten = tryRewrite(*join_node, where_node);
if (!is_rewritten && forceRewrite(join_node->getKind()))
{
throw Exception(ErrorCodes::INCORRECT_QUERY,
"Failed to rewrite '{}' to INNER JOIN: "
"no equi-join conditions found in WHERE clause. "
"You may set setting `cross_to_inner_join_rewrite` to `1` to allow slow CROSS JOIN for this case",
join_node->formatASTForErrorMessage());
}
}
}
private:
bool isEnabled() const
{
return getSettings().cross_to_inner_join_rewrite;
}
bool forceRewrite(JoinKind kind) const
{
if (kind == JoinKind::Cross)
return false;
/// Comma join can be forced to rewrite
return getSettings().cross_to_inner_join_rewrite >= 2;
}
QueryTreeNodePtr makeConjunction(const QueryTreeNodes & nodes)
{
if (nodes.empty())
return nullptr;
if (nodes.size() == 1)
return nodes.front();
auto function_node = std::make_shared<FunctionNode>("and");
for (const auto & node : nodes)
function_node->getArguments().getNodes().push_back(node);
const auto & function = FunctionFactory::instance().get("and", getContext());
function_node->resolveAsFunction(function->build(function_node->getArgumentColumns()));
return function_node;
}
};
}
void CrossToInnerJoinPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
CrossToInnerJoinVisitor visitor(std::move(context));
visitor.visit(query_tree_node);
}
}

View File

@ -0,0 +1,28 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/** Replace CROSS JOIN with INNER JOIN.
* Example:
* SELECT * FROM t1 CROSS JOIN t2 WHERE t1.a = t2.a AND t1.b > 10 AND t2.b = t2.c
* We can move equality condition to ON section of INNER JOIN:
* SELECT * FROM t1 INNER JOIN t2 ON t1.a = t2.a WHERE t1.b > 10 AND t2.b = t2.c
*/
class CrossToInnerJoinPass final : public IQueryTreePass
{
public:
String getName() override { return "CrossToInnerJoin"; }
String getDescription() override
{
return "Replace CROSS JOIN with INNER JOIN";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -233,11 +233,43 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
auto select_settings = select_query_typed.settings();
SettingsChanges settings_changes;
/// We are going to remove settings LIMIT and OFFSET and
/// further replace them with corresponding expression nodes
UInt64 limit = 0;
UInt64 offset = 0;
/// Remove global settings limit and offset
if (const auto & settings_ref = updated_context->getSettingsRef(); settings_ref.limit || settings_ref.offset)
{
Settings settings = updated_context->getSettings();
limit = settings.limit;
offset = settings.offset;
settings.limit = 0;
settings.offset = 0;
updated_context->setSettings(settings);
}
if (select_settings)
{
auto & set_query = select_settings->as<ASTSetQuery &>();
updated_context->applySettingsChanges(set_query.changes);
settings_changes = set_query.changes;
/// Remove expression settings limit and offset
if (auto * limit_field = set_query.changes.tryGet("limit"))
{
limit = limit_field->safeGet<UInt64>();
set_query.changes.removeSetting("limit");
}
if (auto * offset_field = set_query.changes.tryGet("offset"))
{
offset = offset_field->safeGet<UInt64>();
set_query.changes.removeSetting("offset");
}
if (!set_query.changes.empty())
{
updated_context->applySettingsChanges(set_query.changes);
settings_changes = set_query.changes;
}
}
auto current_query_tree = std::make_shared<QueryNode>(std::move(updated_context), std::move(settings_changes));
@ -323,12 +355,32 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
if (select_limit_by)
current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by, current_context);
/// Combine limit expression with limit setting
auto select_limit = select_query_typed.limitLength();
if (select_limit)
if (select_limit && limit)
{
auto function_node = std::make_shared<FunctionNode>("least");
function_node->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(limit));
current_query_tree->getLimit() = std::move(function_node);
}
else if (limit)
current_query_tree->getLimit() = std::make_shared<ConstantNode>(limit);
else if (select_limit)
current_query_tree->getLimit() = buildExpression(select_limit, current_context);
/// Combine offset expression with offset setting
auto select_offset = select_query_typed.limitOffset();
if (select_offset)
if (select_offset && offset)
{
auto function_node = std::make_shared<FunctionNode>("plus");
function_node->getArguments().getNodes().push_back(buildExpression(select_offset, current_context));
function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(offset));
current_query_tree->getOffset() = std::move(function_node);
}
else if (offset)
current_query_tree->getOffset() = std::make_shared<ConstantNode>(offset);
else if (select_offset)
current_query_tree->getOffset() = buildExpression(select_offset, current_context);
return current_query_tree;

View File

@ -39,6 +39,7 @@
#include <Analyzer/Passes/AutoFinalOnQueryPass.h>
#include <Analyzer/Passes/ArrayExistsToHasPass.h>
#include <Analyzer/Passes/ComparisonTupleEliminationPass.h>
#include <Analyzer/Passes/CrossToInnerJoinPass.h>
namespace DB
@ -268,6 +269,7 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<AutoFinalOnQueryPass>());
manager.addPass(std::make_unique<CrossToInnerJoinPass>());
}
}

View File

@ -577,8 +577,8 @@ if (TARGET ch_contrib::annoy)
endif()
if (TARGET ch_rust::skim)
# Add only -I, library is needed only for clickhouse-client/clickhouse-local
dbms_target_include_directories(PRIVATE $<TARGET_PROPERTY:ch_rust::skim,INTERFACE_INCLUDE_DIRECTORIES>)
dbms_target_link_libraries(PUBLIC ch_rust::skim)
endif()
include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")

View File

@ -432,6 +432,7 @@ ReplxxLineReader::ReplxxLineReader(
};
rx.bind_key(Replxx::KEY::control('R'), interactive_history_search);
#endif
/// Rebind regular incremental search to C-T.
///
@ -443,7 +444,6 @@ ReplxxLineReader::ReplxxLineReader(
uint32_t reverse_search = Replxx::KEY::control('R');
return rx.invoke(Replxx::ACTION::HISTORY_INCREMENTAL_SEARCH, reverse_search);
});
#endif
}
ReplxxLineReader::~ReplxxLineReader()

View File

@ -747,7 +747,7 @@ namespace
*/
template<typename IntType>
requires (std::is_same_v<IntType, Int32> || std::is_same_v<IntType, UInt32>)
void replicateSSE42Int32(const IntType * __restrict data, IntType * __restrict result_data, const IColumn::Offsets & offsets)
void replicateSSE2Int32(const IntType * __restrict data, IntType * __restrict result_data, const IColumn::Offsets & offsets)
{
const IntType * data_copy_begin_ptr = nullptr;
size_t offsets_size = offsets.size();
@ -842,7 +842,7 @@ ColumnPtr ColumnVector<T>::replicate(const IColumn::Offsets & offsets) const
#ifdef __SSE2__
if constexpr (std::is_same_v<T, UInt32>)
{
replicateSSE42Int32(getData().data(), res->getData().data(), offsets);
replicateSSE2Int32(getData().data(), res->getData().data(), offsets);
return res;
}
#endif

View File

@ -533,9 +533,16 @@ void AsynchronousMetrics::update(TimePoint update_time)
AsynchronousMetricValues new_values;
auto current_time = std::chrono::system_clock::now();
auto time_after_previous_update [[maybe_unused]] = current_time - previous_update_time;
auto time_after_previous_update = current_time - previous_update_time;
previous_update_time = update_time;
double update_interval = 0.;
if (first_run)
update_interval = update_period.count();
else
update_interval = std::chrono::duration_cast<std::chrono::microseconds>(time_after_previous_update).count() / 1e6;
new_values["AsynchronousMetricsUpdateInterval"] = { update_interval, "Metrics update interval" };
/// This is also a good indicator of system responsiveness.
new_values["Jitter"] = { std::chrono::duration_cast<std::chrono::nanoseconds>(current_time - update_time).count() / 1e9,
"The difference in time the thread for calculation of the asynchronous metrics was scheduled to wake up and the time it was in fact, woken up."

View File

@ -35,26 +35,26 @@ namespace
void CancelToken::Registry::insert(CancelToken * token)
{
std::lock_guard<std::mutex> lock(mutex);
std::lock_guard lock(mutex);
threads[token->thread_id] = token;
}
void CancelToken::Registry::remove(CancelToken * token)
{
std::lock_guard<std::mutex> lock(mutex);
std::lock_guard lock(mutex);
threads.erase(token->thread_id);
}
void CancelToken::Registry::signal(UInt64 tid)
{
std::lock_guard<std::mutex> lock(mutex);
std::lock_guard lock(mutex);
if (auto it = threads.find(tid); it != threads.end())
it->second->signalImpl();
}
void CancelToken::Registry::signal(UInt64 tid, int code, const String & message)
{
std::lock_guard<std::mutex> lock(mutex);
std::lock_guard lock(mutex);
if (auto it = threads.find(tid); it != threads.end())
it->second->signalImpl(code, message);
}

View File

@ -163,14 +163,14 @@ public:
/// Returns size of queue
size_t size() const
{
std::lock_guard<std::mutex> lock(queue_mutex);
std::lock_guard lock(queue_mutex);
return queue.size();
}
/// Returns if queue is empty
bool empty() const
{
std::lock_guard<std::mutex> lock(queue_mutex);
std::lock_guard lock(queue_mutex);
return queue.empty();
}
@ -184,7 +184,7 @@ public:
bool was_finished_before = false;
{
std::lock_guard<std::mutex> lock(queue_mutex);
std::lock_guard lock(queue_mutex);
if (is_finished)
return true;
@ -202,14 +202,14 @@ public:
/// Returns if queue is finished
bool isFinished() const
{
std::lock_guard<std::mutex> lock(queue_mutex);
std::lock_guard lock(queue_mutex);
return is_finished;
}
/// Returns if queue is finished and empty
bool isFinishedAndEmpty() const
{
std::lock_guard<std::mutex> lock(queue_mutex);
std::lock_guard lock(queue_mutex);
return is_finished && queue.empty();
}
@ -217,7 +217,7 @@ public:
void clear()
{
{
std::lock_guard<std::mutex> lock(queue_mutex);
std::lock_guard lock(queue_mutex);
if (is_finished)
return;
@ -233,7 +233,7 @@ public:
void clearAndFinish()
{
{
std::lock_guard<std::mutex> lock(queue_mutex);
std::lock_guard lock(queue_mutex);
std::queue<T> empty_queue;
queue.swap(empty_queue);

View File

@ -149,7 +149,7 @@ DateLUT::DateLUT()
const DateLUTImpl & DateLUT::getImplementation(const std::string & time_zone) const
{
std::lock_guard<std::mutex> lock(mutex);
std::lock_guard lock(mutex);
auto it = impls.emplace(time_zone, nullptr).first;
if (!it->second)

View File

@ -197,7 +197,7 @@
M(187, COLLATION_COMPARISON_FAILED) \
M(188, UNKNOWN_ACTION) \
M(189, TABLE_MUST_NOT_BE_CREATED_MANUALLY) \
M(190, SIZES_OF_ARRAYS_DOESNT_MATCH) \
M(190, SIZES_OF_ARRAYS_DONT_MATCH) \
M(191, SET_SIZE_LIMIT_EXCEEDED) \
M(192, UNKNOWN_USER) \
M(193, WRONG_PASSWORD) \

View File

@ -117,14 +117,14 @@ String FieldVisitorToString::operator() (const Map & x) const
{
WriteBufferFromOwnString wb;
wb << '(';
wb << '[';
for (auto it = x.begin(); it != x.end(); ++it)
{
if (it != x.begin())
wb << ", ";
wb << applyVisitor(*this, *it);
}
wb << ')';
wb << ']';
return wb.str();
}

View File

@ -100,8 +100,8 @@ private:
bool required_substring_is_prefix;
bool is_case_insensitive;
std::string required_substring;
std::optional<DB::StringSearcher<true, true>> case_sensitive_substring_searcher;
std::optional<DB::StringSearcher<false, true>> case_insensitive_substring_searcher;
std::optional<DB::ASCIICaseSensitiveStringSearcher> case_sensitive_substring_searcher;
std::optional<DB::ASCIICaseInsensitiveStringSearcher> case_insensitive_substring_searcher;
std::unique_ptr<RegexType> re2;
unsigned number_of_subpatterns;

View File

@ -46,4 +46,30 @@ Field * SettingsChanges::tryGet(std::string_view name)
return &change->value;
}
bool SettingsChanges::insertSetting(std::string_view name, const Field & value)
{
auto it = std::find_if(begin(), end(), [&name](const SettingChange & change) { return change.name == name; });
if (it != end())
return false;
emplace_back(name, value);
return true;
}
void SettingsChanges::setSetting(std::string_view name, const Field & value)
{
if (auto * setting_value = tryGet(name))
*setting_value = value;
else
insertSetting(name, value);
}
bool SettingsChanges::removeSetting(std::string_view name)
{
auto it = std::find_if(begin(), end(), [&name](const SettingChange & change) { return change.name == name; });
if (it == end())
return false;
erase(it);
return true;
}
}

View File

@ -28,6 +28,13 @@ public:
bool tryGet(std::string_view name, Field & out_value) const;
const Field * tryGet(std::string_view name) const;
Field * tryGet(std::string_view name);
/// Inserts element if doesn't exists and returns true, otherwise just returns false
bool insertSetting(std::string_view name, const Field & value);
/// Sets element to value, inserts if doesn't exist
void setSetting(std::string_view name, const Field & value);
/// If element exists - removes it and returns true, otherwise returns false
bool removeSetting(std::string_view name);
};
}

View File

@ -26,13 +26,13 @@ namespace CurrentStatusInfo
inline void set(Status status, Key key, Int8 value)
{
std::lock_guard<std::mutex> lock(locks[status]);
std::lock_guard lock(locks[status]);
values[status][key] = value;
}
inline void unset(Status status, Key key)
{
std::lock_guard<std::mutex> lock(locks[status]);
std::lock_guard lock(locks[status]);
values[status].erase(key);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -109,14 +109,14 @@ size_t TLDListsHolder::parseAndAddTldList(const std::string & name, const std::s
}
size_t tld_list_size = tld_list.size();
std::lock_guard<std::mutex> lock(tld_lists_map_mutex);
std::lock_guard lock(tld_lists_map_mutex);
tld_lists_map.insert(std::make_pair(name, std::move(tld_list)));
return tld_list_size;
}
const TLDList & TLDListsHolder::getTldList(const std::string & name)
{
std::lock_guard<std::mutex> lock(tld_lists_map_mutex);
std::lock_guard lock(tld_lists_map_mutex);
auto it = tld_lists_map.find(name);
if (it == tld_lists_map.end())
throw Exception(ErrorCodes::TLD_LIST_NOT_FOUND, "TLD list {} does not exist", name);

View File

@ -237,6 +237,7 @@ void ThreadStatus::setFatalErrorCallback(std::function<void()> callback)
void ThreadStatus::onFatalError()
{
std::lock_guard lock(thread_group->mutex);
if (fatal_error_callback)
fatal_error_callback();
}

View File

@ -1,6 +1,6 @@
#include <Common/VersionNumber.h>
#include <cstdlib>
#include <iostream>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
namespace DB
{
@ -10,29 +10,21 @@ VersionNumber::VersionNumber(std::string version_string)
if (version_string.empty())
return;
char * start = &version_string.front();
char * end = start;
const char * eos = &version_string.back() + 1;
do
ReadBufferFromString rb(version_string);
while (!rb.eof())
{
Int64 value = strtol(start, &end, 10);
Int64 value;
if (!tryReadIntText(value, rb))
break;
components.push_back(value);
start = end + 1;
if (!checkChar('.', rb))
break;
}
while (start < eos && (end < eos && *end == '.'));
}
std::string VersionNumber::toString() const
{
std::string str;
for (Int64 v : components)
{
if (!str.empty())
str += '.';
str += std::to_string(v);
}
return str;
return fmt::format("{}", fmt::join(components, "."));
}
int VersionNumber::compare(const VersionNumber & rhs) const

Some files were not shown because too many files have changed in this diff Show More