Merge branch 'master' of github.com:ClickHouse/ClickHouse into zvonand-issue-50570

This commit is contained in:
zvonand 2023-08-25 16:51:51 +05:00
commit d5f26cece5
855 changed files with 20060 additions and 5883 deletions

12
.gitmodules vendored
View File

@ -347,3 +347,15 @@
[submodule "contrib/incbin"]
path = contrib/incbin
url = https://github.com/graphitemaster/incbin.git
[submodule "contrib/usearch"]
path = contrib/usearch
url = https://github.com/unum-cloud/usearch.git
[submodule "contrib/SimSIMD"]
path = contrib/SimSIMD
url = https://github.com/ashvardanian/SimSIMD.git
[submodule "contrib/FP16"]
path = contrib/FP16
url = https://github.com/Maratyszcza/FP16.git
[submodule "contrib/robin-map"]
path = contrib/robin-map
url = https://github.com/Tessil/robin-map.git

View File

@ -11,6 +11,7 @@
#include <base/defines.h>
#include <base/types.h>
#include <base/unaligned.h>
#include <base/simd.h>
#include <city.h>
@ -29,6 +30,11 @@
#define CRC_INT __crc32cd
#endif
#if defined(__aarch64__) && defined(__ARM_NEON)
#include <arm_neon.h>
#pragma clang diagnostic ignored "-Wreserved-identifier"
#endif
/**
* The std::string_view-like container to avoid creating strings to find substrings in the hash table.
@ -74,14 +80,14 @@ using StringRefs = std::vector<StringRef>;
* For more information, see hash_map_string_2.cpp
*/
inline bool compareSSE2(const char * p1, const char * p2)
inline bool compare8(const char * p1, const char * p2)
{
return 0xFFFF == _mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(p1)),
_mm_loadu_si128(reinterpret_cast<const __m128i *>(p2))));
}
inline bool compareSSE2x4(const char * p1, const char * p2)
inline bool compare64(const char * p1, const char * p2)
{
return 0xFFFF == _mm_movemask_epi8(
_mm_and_si128(
@ -101,7 +107,30 @@ inline bool compareSSE2x4(const char * p1, const char * p2)
_mm_loadu_si128(reinterpret_cast<const __m128i *>(p2) + 3)))));
}
inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size)
#elif defined(__aarch64__) && defined(__ARM_NEON)
inline bool compare8(const char * p1, const char * p2)
{
uint64_t mask = getNibbleMask(vceqq_u8(
vld1q_u8(reinterpret_cast<const unsigned char *>(p1)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2))));
return 0xFFFFFFFFFFFFFFFF == mask;
}
inline bool compare64(const char * p1, const char * p2)
{
uint64_t mask = getNibbleMask(vandq_u8(
vandq_u8(vceqq_u8(vld1q_u8(reinterpret_cast<const unsigned char *>(p1)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2))),
vceqq_u8(vld1q_u8(reinterpret_cast<const unsigned char *>(p1 + 16)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2 + 16)))),
vandq_u8(vceqq_u8(vld1q_u8(reinterpret_cast<const unsigned char *>(p1 + 32)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2 + 32))),
vceqq_u8(vld1q_u8(reinterpret_cast<const unsigned char *>(p1 + 48)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2 + 48))))));
return 0xFFFFFFFFFFFFFFFF == mask;
}
#endif
#if defined(__SSE2__) || (defined(__aarch64__) && defined(__ARM_NEON))
inline bool memequalWide(const char * p1, const char * p2, size_t size)
{
/** The order of branches and the trick with overlapping comparisons
* are the same as in memcpy implementation.
@ -138,7 +167,7 @@ inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size)
while (size >= 64)
{
if (compareSSE2x4(p1, p2))
if (compare64(p1, p2))
{
p1 += 64;
p2 += 64;
@ -150,17 +179,16 @@ inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size)
switch (size / 16)
{
case 3: if (!compareSSE2(p1 + 32, p2 + 32)) return false; [[fallthrough]];
case 2: if (!compareSSE2(p1 + 16, p2 + 16)) return false; [[fallthrough]];
case 1: if (!compareSSE2(p1, p2)) return false;
case 3: if (!compare8(p1 + 32, p2 + 32)) return false; [[fallthrough]];
case 2: if (!compare8(p1 + 16, p2 + 16)) return false; [[fallthrough]];
case 1: if (!compare8(p1, p2)) return false;
}
return compareSSE2(p1 + size - 16, p2 + size - 16);
return compare8(p1 + size - 16, p2 + size - 16);
}
#endif
inline bool operator== (StringRef lhs, StringRef rhs)
{
if (lhs.size != rhs.size)
@ -169,8 +197,8 @@ inline bool operator== (StringRef lhs, StringRef rhs)
if (lhs.size == 0)
return true;
#if defined(__SSE2__)
return memequalSSE2Wide(lhs.data, rhs.data, lhs.size);
#if defined(__SSE2__) || (defined(__aarch64__) && defined(__ARM_NEON))
return memequalWide(lhs.data, rhs.data, lhs.size);
#else
return 0 == memcmp(lhs.data, rhs.data, lhs.size);
#endif

14
base/base/simd.h Normal file
View File

@ -0,0 +1,14 @@
#pragma once
#if defined(__aarch64__) && defined(__ARM_NEON)
# include <arm_neon.h>
# pragma clang diagnostic ignored "-Wreserved-identifier"
/// Returns a 64 bit mask of nibbles (4 bits for each byte).
inline uint64_t getNibbleMask(uint8x16_t res)
{
return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(res), 4)), 0);
}
#endif

View File

@ -4,10 +4,19 @@ macro(add_glob cur_list)
endmacro()
macro(add_headers_and_sources prefix common_path)
add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h)
add_glob(${prefix}_headers ${common_path}/*.h)
add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c)
endmacro()
macro(add_headers_only prefix common_path)
add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
add_glob(${prefix}_headers ${common_path}/*.h)
endmacro()
macro(extract_into_parent_list src_list dest_list)
list(REMOVE_ITEM ${src_list} ${ARGN})
get_filename_component(__dir_name ${CMAKE_CURRENT_SOURCE_DIR} NAME)
foreach(file IN ITEMS ${ARGN})
list(APPEND ${dest_list} ${__dir_name}/${file})
endforeach()
set(${dest_list} "${${dest_list}}" PARENT_SCOPE)
endmacro()

View File

@ -19,6 +19,19 @@ else ()
message (FATAL_ERROR "Platform ${CMAKE_SYSTEM_NAME} is not supported")
endif ()
# Since we always use toolchain files to generate hermetic builds, cmake will
# always think it's a cross-compilation, See
# https://cmake.org/cmake/help/latest/variable/CMAKE_CROSSCOMPILING.html
#
# This will slow down cmake configuration and compilation. For instance, LLVM
# will try to configure NATIVE LLVM targets with all tests enabled (You'll see
# Building native llvm-tblgen...).
#
# Here, we set it manually by checking the system name and processor.
if (${CMAKE_SYSTEM_NAME} STREQUAL ${CMAKE_HOST_SYSTEM_NAME} AND ${CMAKE_SYSTEM_PROCESSOR} STREQUAL ${CMAKE_HOST_SYSTEM_PROCESSOR})
set (CMAKE_CROSSCOMPILING 0)
endif ()
if (CMAKE_CROSSCOMPILING)
if (OS_DARWIN)
# FIXME: broken dependencies

View File

@ -196,6 +196,17 @@ if (ARCH_S390X)
add_contrib(crc32-s390x-cmake crc32-s390x)
endif()
add_contrib (annoy-cmake annoy)
option(ENABLE_USEARCH "Enable USearch (Approximate Neighborhood Search, HNSW) support" ${ENABLE_LIBRARIES})
if (ENABLE_USEARCH)
add_contrib (FP16-cmake FP16)
add_contrib (robin-map-cmake robin-map)
add_contrib (SimSIMD-cmake SimSIMD)
add_contrib (usearch-cmake usearch) # requires: FP16, robin-map, SimdSIMD
else ()
message(STATUS "Not using USearch")
endif ()
add_contrib (xxHash-cmake xxHash)
add_contrib (libbcrypt-cmake libbcrypt)

1
contrib/FP16 vendored Submodule

@ -0,0 +1 @@
Subproject commit 0a92994d729ff76a58f692d3028ca1b64b145d91

View File

@ -0,0 +1 @@
# See contrib/usearch-cmake/CMakeLists.txt

1
contrib/SimSIMD vendored Submodule

@ -0,0 +1 @@
Subproject commit de2cb75b9e9e3389d5e1e51fd9f8ed151f3c17cf

View File

@ -0,0 +1 @@
# See contrib/usearch-cmake/CMakeLists.txt

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit 1f1b3d35fb6eb73e6492d3afd8a85cde848d174f
Subproject commit 1d93838f69a802639ca144ea5704a98e2481810d

2
contrib/base64 vendored

@ -1 +1 @@
Subproject commit 9499e0c4945589973b9ea1bc927377cfbc84aa46
Subproject commit 8628e258090f9eb76d90ac3c91e1ab4690e9aa11

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit aec12eea7fc762721ae16943d1361340c66c9c17
Subproject commit a01ddc144c130777d7c6727a3fc5d5cdbae016d6

View File

@ -19,6 +19,12 @@ add_library (_boost_filesystem ${SRCS_FILESYSTEM})
add_library (boost::filesystem ALIAS _boost_filesystem)
target_include_directories (_boost_filesystem SYSTEM BEFORE PUBLIC ${LIBRARY_DIR})
if (OS_LINUX)
target_compile_definitions (_boost_filesystem PRIVATE
BOOST_FILESYSTEM_HAS_POSIX_AT_APIS=1
)
endif ()
# headers-only
add_library (_boost_headers_only INTERFACE)
@ -172,9 +178,9 @@ endif()
# coroutine
set (SRCS_COROUTINE
"${LIBRARY_DIR}/libs/coroutine/detail/coroutine_context.cpp"
"${LIBRARY_DIR}/libs/coroutine/exceptions.cpp"
"${LIBRARY_DIR}/libs/coroutine/posix/stack_traits.cpp"
"${LIBRARY_DIR}/libs/coroutine/src/detail/coroutine_context.cpp"
"${LIBRARY_DIR}/libs/coroutine/src/exceptions.cpp"
"${LIBRARY_DIR}/libs/coroutine/src/posix/stack_traits.cpp"
)
add_library (_boost_coroutine ${SRCS_COROUTINE})
add_library (boost::coroutine ALIAS _boost_coroutine)

View File

@ -73,8 +73,8 @@ struct uint128
uint128() = default;
uint128(uint64 low64_, uint64 high64_) : low64(low64_), high64(high64_) {}
friend bool operator ==(const uint128 & x, const uint128 & y) { return (x.low64 == y.low64) && (x.high64 == y.high64); }
friend bool operator !=(const uint128 & x, const uint128 & y) { return !(x == y); }
friend auto operator<=>(const uint128 &, const uint128 &) = default;
};
inline uint64 Uint128Low64(const uint128 & x) { return x.low64; }

2
contrib/curl vendored

@ -1 +1 @@
Subproject commit b0edf0b7dae44d9e66f270a257cf654b35d5263d
Subproject commit eb3b049df526bf125eda23218e680ce7fa9ec46c

View File

@ -8,125 +8,122 @@ endif()
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/curl")
set (SRCS
"${LIBRARY_DIR}/lib/fopen.c"
"${LIBRARY_DIR}/lib/noproxy.c"
"${LIBRARY_DIR}/lib/idn.c"
"${LIBRARY_DIR}/lib/cfilters.c"
"${LIBRARY_DIR}/lib/cf-socket.c"
"${LIBRARY_DIR}/lib/altsvc.c"
"${LIBRARY_DIR}/lib/amigaos.c"
"${LIBRARY_DIR}/lib/asyn-thread.c"
"${LIBRARY_DIR}/lib/base64.c"
"${LIBRARY_DIR}/lib/bufq.c"
"${LIBRARY_DIR}/lib/bufref.c"
"${LIBRARY_DIR}/lib/cf-h1-proxy.c"
"${LIBRARY_DIR}/lib/cf-haproxy.c"
"${LIBRARY_DIR}/lib/cf-https-connect.c"
"${LIBRARY_DIR}/lib/file.c"
"${LIBRARY_DIR}/lib/timeval.c"
"${LIBRARY_DIR}/lib/base64.c"
"${LIBRARY_DIR}/lib/hostip.c"
"${LIBRARY_DIR}/lib/progress.c"
"${LIBRARY_DIR}/lib/formdata.c"
"${LIBRARY_DIR}/lib/cookie.c"
"${LIBRARY_DIR}/lib/http.c"
"${LIBRARY_DIR}/lib/sendf.c"
"${LIBRARY_DIR}/lib/url.c"
"${LIBRARY_DIR}/lib/dict.c"
"${LIBRARY_DIR}/lib/if2ip.c"
"${LIBRARY_DIR}/lib/speedcheck.c"
"${LIBRARY_DIR}/lib/ldap.c"
"${LIBRARY_DIR}/lib/version.c"
"${LIBRARY_DIR}/lib/getenv.c"
"${LIBRARY_DIR}/lib/escape.c"
"${LIBRARY_DIR}/lib/mprintf.c"
"${LIBRARY_DIR}/lib/telnet.c"
"${LIBRARY_DIR}/lib/netrc.c"
"${LIBRARY_DIR}/lib/getinfo.c"
"${LIBRARY_DIR}/lib/transfer.c"
"${LIBRARY_DIR}/lib/strcase.c"
"${LIBRARY_DIR}/lib/easy.c"
"${LIBRARY_DIR}/lib/curl_fnmatch.c"
"${LIBRARY_DIR}/lib/curl_log.c"
"${LIBRARY_DIR}/lib/fileinfo.c"
"${LIBRARY_DIR}/lib/krb5.c"
"${LIBRARY_DIR}/lib/memdebug.c"
"${LIBRARY_DIR}/lib/http_chunks.c"
"${LIBRARY_DIR}/lib/strtok.c"
"${LIBRARY_DIR}/lib/cf-socket.c"
"${LIBRARY_DIR}/lib/cfilters.c"
"${LIBRARY_DIR}/lib/conncache.c"
"${LIBRARY_DIR}/lib/connect.c"
"${LIBRARY_DIR}/lib/llist.c"
"${LIBRARY_DIR}/lib/hash.c"
"${LIBRARY_DIR}/lib/multi.c"
"${LIBRARY_DIR}/lib/content_encoding.c"
"${LIBRARY_DIR}/lib/share.c"
"${LIBRARY_DIR}/lib/http_digest.c"
"${LIBRARY_DIR}/lib/md4.c"
"${LIBRARY_DIR}/lib/md5.c"
"${LIBRARY_DIR}/lib/http_negotiate.c"
"${LIBRARY_DIR}/lib/inet_pton.c"
"${LIBRARY_DIR}/lib/strtoofft.c"
"${LIBRARY_DIR}/lib/strerror.c"
"${LIBRARY_DIR}/lib/amigaos.c"
"${LIBRARY_DIR}/lib/cookie.c"
"${LIBRARY_DIR}/lib/curl_addrinfo.c"
"${LIBRARY_DIR}/lib/curl_des.c"
"${LIBRARY_DIR}/lib/curl_endian.c"
"${LIBRARY_DIR}/lib/curl_fnmatch.c"
"${LIBRARY_DIR}/lib/curl_get_line.c"
"${LIBRARY_DIR}/lib/curl_gethostname.c"
"${LIBRARY_DIR}/lib/curl_gssapi.c"
"${LIBRARY_DIR}/lib/curl_memrchr.c"
"${LIBRARY_DIR}/lib/curl_multibyte.c"
"${LIBRARY_DIR}/lib/curl_ntlm_core.c"
"${LIBRARY_DIR}/lib/curl_ntlm_wb.c"
"${LIBRARY_DIR}/lib/curl_path.c"
"${LIBRARY_DIR}/lib/curl_range.c"
"${LIBRARY_DIR}/lib/curl_rtmp.c"
"${LIBRARY_DIR}/lib/curl_sasl.c"
"${LIBRARY_DIR}/lib/curl_sspi.c"
"${LIBRARY_DIR}/lib/curl_threads.c"
"${LIBRARY_DIR}/lib/curl_trc.c"
"${LIBRARY_DIR}/lib/dict.c"
"${LIBRARY_DIR}/lib/doh.c"
"${LIBRARY_DIR}/lib/dynbuf.c"
"${LIBRARY_DIR}/lib/dynhds.c"
"${LIBRARY_DIR}/lib/easy.c"
"${LIBRARY_DIR}/lib/escape.c"
"${LIBRARY_DIR}/lib/file.c"
"${LIBRARY_DIR}/lib/fileinfo.c"
"${LIBRARY_DIR}/lib/fopen.c"
"${LIBRARY_DIR}/lib/formdata.c"
"${LIBRARY_DIR}/lib/getenv.c"
"${LIBRARY_DIR}/lib/getinfo.c"
"${LIBRARY_DIR}/lib/gopher.c"
"${LIBRARY_DIR}/lib/hash.c"
"${LIBRARY_DIR}/lib/headers.c"
"${LIBRARY_DIR}/lib/hmac.c"
"${LIBRARY_DIR}/lib/hostasyn.c"
"${LIBRARY_DIR}/lib/hostip.c"
"${LIBRARY_DIR}/lib/hostip4.c"
"${LIBRARY_DIR}/lib/hostip6.c"
"${LIBRARY_DIR}/lib/hostsyn.c"
"${LIBRARY_DIR}/lib/hsts.c"
"${LIBRARY_DIR}/lib/http.c"
"${LIBRARY_DIR}/lib/http2.c"
"${LIBRARY_DIR}/lib/http_aws_sigv4.c"
"${LIBRARY_DIR}/lib/http_chunks.c"
"${LIBRARY_DIR}/lib/http_digest.c"
"${LIBRARY_DIR}/lib/http_negotiate.c"
"${LIBRARY_DIR}/lib/http_ntlm.c"
"${LIBRARY_DIR}/lib/http_proxy.c"
"${LIBRARY_DIR}/lib/idn.c"
"${LIBRARY_DIR}/lib/if2ip.c"
"${LIBRARY_DIR}/lib/imap.c"
"${LIBRARY_DIR}/lib/inet_ntop.c"
"${LIBRARY_DIR}/lib/inet_pton.c"
"${LIBRARY_DIR}/lib/krb5.c"
"${LIBRARY_DIR}/lib/ldap.c"
"${LIBRARY_DIR}/lib/llist.c"
"${LIBRARY_DIR}/lib/md4.c"
"${LIBRARY_DIR}/lib/md5.c"
"${LIBRARY_DIR}/lib/memdebug.c"
"${LIBRARY_DIR}/lib/mime.c"
"${LIBRARY_DIR}/lib/mprintf.c"
"${LIBRARY_DIR}/lib/mqtt.c"
"${LIBRARY_DIR}/lib/multi.c"
"${LIBRARY_DIR}/lib/netrc.c"
"${LIBRARY_DIR}/lib/nonblock.c"
"${LIBRARY_DIR}/lib/noproxy.c"
"${LIBRARY_DIR}/lib/openldap.c"
"${LIBRARY_DIR}/lib/parsedate.c"
"${LIBRARY_DIR}/lib/pingpong.c"
"${LIBRARY_DIR}/lib/pop3.c"
"${LIBRARY_DIR}/lib/progress.c"
"${LIBRARY_DIR}/lib/psl.c"
"${LIBRARY_DIR}/lib/rand.c"
"${LIBRARY_DIR}/lib/rename.c"
"${LIBRARY_DIR}/lib/rtsp.c"
"${LIBRARY_DIR}/lib/select.c"
"${LIBRARY_DIR}/lib/splay.c"
"${LIBRARY_DIR}/lib/strdup.c"
"${LIBRARY_DIR}/lib/sendf.c"
"${LIBRARY_DIR}/lib/setopt.c"
"${LIBRARY_DIR}/lib/sha256.c"
"${LIBRARY_DIR}/lib/share.c"
"${LIBRARY_DIR}/lib/slist.c"
"${LIBRARY_DIR}/lib/smb.c"
"${LIBRARY_DIR}/lib/smtp.c"
"${LIBRARY_DIR}/lib/socketpair.c"
"${LIBRARY_DIR}/lib/socks.c"
"${LIBRARY_DIR}/lib/curl_addrinfo.c"
"${LIBRARY_DIR}/lib/socks_gssapi.c"
"${LIBRARY_DIR}/lib/socks_sspi.c"
"${LIBRARY_DIR}/lib/curl_sspi.c"
"${LIBRARY_DIR}/lib/slist.c"
"${LIBRARY_DIR}/lib/nonblock.c"
"${LIBRARY_DIR}/lib/curl_memrchr.c"
"${LIBRARY_DIR}/lib/imap.c"
"${LIBRARY_DIR}/lib/pop3.c"
"${LIBRARY_DIR}/lib/smtp.c"
"${LIBRARY_DIR}/lib/pingpong.c"
"${LIBRARY_DIR}/lib/rtsp.c"
"${LIBRARY_DIR}/lib/curl_threads.c"
"${LIBRARY_DIR}/lib/warnless.c"
"${LIBRARY_DIR}/lib/hmac.c"
"${LIBRARY_DIR}/lib/curl_rtmp.c"
"${LIBRARY_DIR}/lib/openldap.c"
"${LIBRARY_DIR}/lib/curl_gethostname.c"
"${LIBRARY_DIR}/lib/gopher.c"
"${LIBRARY_DIR}/lib/http_proxy.c"
"${LIBRARY_DIR}/lib/asyn-thread.c"
"${LIBRARY_DIR}/lib/curl_gssapi.c"
"${LIBRARY_DIR}/lib/http_ntlm.c"
"${LIBRARY_DIR}/lib/curl_ntlm_wb.c"
"${LIBRARY_DIR}/lib/curl_ntlm_core.c"
"${LIBRARY_DIR}/lib/curl_sasl.c"
"${LIBRARY_DIR}/lib/rand.c"
"${LIBRARY_DIR}/lib/curl_multibyte.c"
"${LIBRARY_DIR}/lib/conncache.c"
"${LIBRARY_DIR}/lib/cf-h1-proxy.c"
"${LIBRARY_DIR}/lib/http2.c"
"${LIBRARY_DIR}/lib/smb.c"
"${LIBRARY_DIR}/lib/curl_endian.c"
"${LIBRARY_DIR}/lib/curl_des.c"
"${LIBRARY_DIR}/lib/speedcheck.c"
"${LIBRARY_DIR}/lib/splay.c"
"${LIBRARY_DIR}/lib/strcase.c"
"${LIBRARY_DIR}/lib/strdup.c"
"${LIBRARY_DIR}/lib/strerror.c"
"${LIBRARY_DIR}/lib/strtok.c"
"${LIBRARY_DIR}/lib/strtoofft.c"
"${LIBRARY_DIR}/lib/system_win32.c"
"${LIBRARY_DIR}/lib/mime.c"
"${LIBRARY_DIR}/lib/sha256.c"
"${LIBRARY_DIR}/lib/setopt.c"
"${LIBRARY_DIR}/lib/curl_path.c"
"${LIBRARY_DIR}/lib/curl_range.c"
"${LIBRARY_DIR}/lib/psl.c"
"${LIBRARY_DIR}/lib/doh.c"
"${LIBRARY_DIR}/lib/urlapi.c"
"${LIBRARY_DIR}/lib/curl_get_line.c"
"${LIBRARY_DIR}/lib/altsvc.c"
"${LIBRARY_DIR}/lib/socketpair.c"
"${LIBRARY_DIR}/lib/bufref.c"
"${LIBRARY_DIR}/lib/bufq.c"
"${LIBRARY_DIR}/lib/dynbuf.c"
"${LIBRARY_DIR}/lib/dynhds.c"
"${LIBRARY_DIR}/lib/hsts.c"
"${LIBRARY_DIR}/lib/http_aws_sigv4.c"
"${LIBRARY_DIR}/lib/mqtt.c"
"${LIBRARY_DIR}/lib/rename.c"
"${LIBRARY_DIR}/lib/headers.c"
"${LIBRARY_DIR}/lib/telnet.c"
"${LIBRARY_DIR}/lib/timediff.c"
"${LIBRARY_DIR}/lib/vauth/vauth.c"
"${LIBRARY_DIR}/lib/timeval.c"
"${LIBRARY_DIR}/lib/transfer.c"
"${LIBRARY_DIR}/lib/url.c"
"${LIBRARY_DIR}/lib/urlapi.c"
"${LIBRARY_DIR}/lib/vauth/cleartext.c"
"${LIBRARY_DIR}/lib/vauth/cram.c"
"${LIBRARY_DIR}/lib/vauth/digest.c"
@ -138,23 +135,24 @@ set (SRCS
"${LIBRARY_DIR}/lib/vauth/oauth2.c"
"${LIBRARY_DIR}/lib/vauth/spnego_gssapi.c"
"${LIBRARY_DIR}/lib/vauth/spnego_sspi.c"
"${LIBRARY_DIR}/lib/vauth/vauth.c"
"${LIBRARY_DIR}/lib/version.c"
"${LIBRARY_DIR}/lib/vquic/vquic.c"
"${LIBRARY_DIR}/lib/vtls/openssl.c"
"${LIBRARY_DIR}/lib/vssh/libssh.c"
"${LIBRARY_DIR}/lib/vssh/libssh2.c"
"${LIBRARY_DIR}/lib/vtls/bearssl.c"
"${LIBRARY_DIR}/lib/vtls/gtls.c"
"${LIBRARY_DIR}/lib/vtls/vtls.c"
"${LIBRARY_DIR}/lib/vtls/nss.c"
"${LIBRARY_DIR}/lib/vtls/wolfssl.c"
"${LIBRARY_DIR}/lib/vtls/hostcheck.c"
"${LIBRARY_DIR}/lib/vtls/keylog.c"
"${LIBRARY_DIR}/lib/vtls/mbedtls.c"
"${LIBRARY_DIR}/lib/vtls/openssl.c"
"${LIBRARY_DIR}/lib/vtls/schannel.c"
"${LIBRARY_DIR}/lib/vtls/schannel_verify.c"
"${LIBRARY_DIR}/lib/vtls/sectransp.c"
"${LIBRARY_DIR}/lib/vtls/gskit.c"
"${LIBRARY_DIR}/lib/vtls/mbedtls.c"
"${LIBRARY_DIR}/lib/vtls/bearssl.c"
"${LIBRARY_DIR}/lib/vtls/keylog.c"
"${LIBRARY_DIR}/lib/vtls/vtls.c"
"${LIBRARY_DIR}/lib/vtls/wolfssl.c"
"${LIBRARY_DIR}/lib/vtls/x509asn1.c"
"${LIBRARY_DIR}/lib/vtls/hostcheck.c"
"${LIBRARY_DIR}/lib/vssh/libssh2.c"
"${LIBRARY_DIR}/lib/vssh/libssh.c"
"${LIBRARY_DIR}/lib/warnless.c"
)
add_library (_curl ${SRCS})

View File

@ -1,6 +1,7 @@
option(ENABLE_ISAL_LIBRARY "Enable ISA-L library" ${ENABLE_LIBRARIES})
if (ARCH_AARCH64)
# Disable ISA-L libray on aarch64.
# ISA-L is only available for x86-64, so it shall be disabled for other platforms
if (NOT ARCH_AMD64)
set (ENABLE_ISAL_LIBRARY OFF)
endif ()

2
contrib/krb5 vendored

@ -1 +1 @@
Subproject commit b56ce6ba690e1f320df1a64afa34980c3e462617
Subproject commit 71b06c2276009ae649c7703019f3b4605f66fd3d

View File

@ -147,7 +147,7 @@ target_compile_definitions(_libarchive PUBLIC
target_compile_options(_libarchive PRIVATE "-Wno-reserved-macro-identifier")
if (TARGET ch_contrib::xz)
target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1)
target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1 HAVE_LIBLZMA=1)
target_link_libraries(_libarchive PRIVATE ch_contrib::xz)
endif()
@ -156,6 +156,16 @@ if (TARGET ch_contrib::zlib)
target_link_libraries(_libarchive PRIVATE ch_contrib::zlib)
endif()
if (TARGET ch_contrib::zstd)
target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1)
target_link_libraries(_libarchive PRIVATE ch_contrib::zstd)
endif()
if (TARGET ch_contrib::bzip2)
target_compile_definitions(_libarchive PUBLIC HAVE_BZLIB_H=1)
target_link_libraries(_libarchive PRIVATE ch_contrib::bzip2)
endif()
if (OS_LINUX)
target_compile_definitions(
_libarchive PUBLIC

@ -1 +1 @@
Subproject commit d857c707fccd50423bea1c4710dc469cf89607a9
Subproject commit e7b8befca85c8b847614432dba250c22d35fbae0

View File

@ -1,18 +1,16 @@
if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined")
if (APPLE OR SANITIZE STREQUAL "undefined")
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
else()
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
endif()
option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
option (ENABLE_EMBEDDED_COMPILER "Enable support for JIT compilation during query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
if (NOT ENABLE_EMBEDDED_COMPILER)
message(STATUS "Not using LLVM")
return()
endif()
# TODO: Enable compilation on AArch64
set (LLVM_VERSION "15.0.0bundled")
set (LLVM_INCLUDE_DIRS
"${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm/include"
@ -58,18 +56,30 @@ set (REQUIRED_LLVM_LIBRARIES
LLVMDemangle
)
# if (ARCH_AMD64)
if (ARCH_AMD64)
set (LLVM_TARGETS_TO_BUILD "X86" CACHE INTERNAL "")
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen)
# elseif (ARCH_AARCH64)
# list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen)
# endif ()
elseif (ARCH_AARCH64)
set (LLVM_TARGETS_TO_BUILD "AArch64" CACHE INTERNAL "")
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen)
elseif (ARCH_PPC64LE)
set (LLVM_TARGETS_TO_BUILD "PowerPC" CACHE INTERNAL "")
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMPowerPCInfo LLVMPowerPCDesc LLVMPowerPCCodeGen)
elseif (ARCH_S390X)
set (LLVM_TARGETS_TO_BUILD "SystemZ" CACHE INTERNAL "")
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMSystemZInfo LLVMSystemZDesc LLVMSystemZCodeGen)
elseif (ARCH_RISCV64)
set (LLVM_TARGETS_TO_BUILD "RISCV" CACHE INTERNAL "")
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMRISCVInfo LLVMRISCVDesc LLVMRISCVCodeGen)
endif ()
message (STATUS "LLVM TARGETS TO BUILD ${LLVM_TARGETS_TO_BUILD}")
set (CMAKE_INSTALL_RPATH "ON") # Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind
set (LLVM_COMPILER_CHECKED 1 CACHE INTERNAL "") # Skip internal compiler selection
set (LLVM_ENABLE_EH 1 CACHE INTERNAL "") # With exception handling
set (LLVM_ENABLE_RTTI 1 CACHE INTERNAL "")
set (LLVM_ENABLE_PIC 0 CACHE INTERNAL "")
set (LLVM_TARGETS_TO_BUILD "X86" CACHE STRING "") # for x86 + ARM: "X86;AArch64"
# Omit unnecessary stuff (just the options which are ON by default)
set(LLVM_ENABLE_BACKTRACES 0 CACHE INTERNAL "")
@ -99,15 +109,12 @@ set(LLVM_ENABLE_BINDINGS 0 CACHE INTERNAL "")
set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm")
set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm")
# Since we always use toolchain files to generate hermatic builds, cmake will
# think it's a cross compilation, and LLVM will try to configure NATIVE LLVM
# targets with all tests enabled, which will slow down cmake configuration and
# compilation (You'll see Building native llvm-tblgen...). Let's disable the
# cross compiling indicator for now.
#
# TODO We should let cmake know whether it's indeed a cross compilation in the
# first place.
set (CMAKE_CROSSCOMPILING 0)
message (STATUS "LLVM CMAKE CROSS COMPILING ${CMAKE_CROSSCOMPILING}")
if (CMAKE_CROSSCOMPILING)
set (LLVM_HOST_TRIPLE "${CMAKE_C_COMPILER_TARGET}" CACHE INTERNAL "")
message (STATUS "CROSS COMPILING SET LLVM HOST TRIPLE ${LLVM_HOST_TRIPLE}")
endif()
add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}")
set_directory_properties (PROPERTIES

2
contrib/openldap vendored

@ -1 +1 @@
Subproject commit 8688afe6bc95ebcd20edf4578c536362218cb70a
Subproject commit 5671b80e369df2caf5f34e02924316205a43c895

View File

@ -96,71 +96,82 @@ target_compile_definitions(_lber
)
set(_ldap_srcs
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/bind.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/open.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/result.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/error.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/compare.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/search.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/controls.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/messages.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/references.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/extended.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/cyrus.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/modify.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/add.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/modrdn.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/delete.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/abandon.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sasl.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sbind.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/unbind.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/add.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/addentry.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/assertion.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/avl.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/bind.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/cancel.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/charray.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/compare.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/controls.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/cyrus.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/dds.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/delete.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/deref.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/dnssrv.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/error.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/extended.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/fetch.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/filter.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/free.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sort.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/passwd.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/whoami.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/vc.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/getattr.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/getdn.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/getentry.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/getattr.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/getvalues.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/addentry.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/request.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-ip.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/url.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/pagectrl.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sortctrl.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/vlvctrl.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/init.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/options.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/print.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/string.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/util-int.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/schema.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/charray.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-local.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/dnssrv.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8-conv.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls2.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls_o.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls_g.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/turn.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ppolicy.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/dds.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/txn.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldap_sync.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/stctrl.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/assertion.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/deref.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldifutil.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldif.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/fetch.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/lbase64.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldap_sync.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldif.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldifutil.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/messages.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/modify.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/modrdn.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/msctrl.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/open.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/options.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-ip.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-local.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/pagectrl.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/passwd.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/ppolicy.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/print.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/psearchctrl.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/rdwr.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/references.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/request.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/result.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/rq.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sasl.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sbind.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/schema.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/search.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sort.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/sortctrl.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/stctrl.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/string.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tavl.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_debug.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_nt.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_posix.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_pth.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_thr.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/threads.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls2.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls_g.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls_o.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/tpool.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/turn.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/txn.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/unbind.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/url.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8-conv.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/util-int.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/vc.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/vlvctrl.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap/whoami.c"
)
mkversion(ldap)
@ -185,43 +196,5 @@ target_compile_definitions(_ldap
PRIVATE LDAP_LIBRARY
)
set(_ldap_r_specific_srcs
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/threads.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/rdwr.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/tpool.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/rq.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_posix.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_thr.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_nt.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_pth.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_stub.c"
"${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_debug.c"
)
mkversion(ldap_r)
add_library(_ldap_r
${_ldap_r_specific_srcs}
${_ldap_srcs}
"${CMAKE_CURRENT_BINARY_DIR}/ldap_r-version.c"
)
target_link_libraries(_ldap_r
PRIVATE _lber
PRIVATE OpenSSL::Crypto OpenSSL::SSL
)
target_include_directories(_ldap_r SYSTEM
PUBLIC ${_extra_build_dir}/include
PUBLIC "${OPENLDAP_SOURCE_DIR}/include"
PRIVATE "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r"
PRIVATE "${OPENLDAP_SOURCE_DIR}/libraries/libldap"
)
target_compile_definitions(_ldap_r
PRIVATE LDAP_R_COMPILE
PRIVATE LDAP_LIBRARY
)
add_library(ch_contrib::ldap ALIAS _ldap_r)
add_library(ch_contrib::ldap ALIAS _ldap)
add_library(ch_contrib::lber ALIAS _lber)

2
contrib/orc vendored

@ -1 +1 @@
Subproject commit 568d1d60c250af1890f226c182bc15bd8cc94cf1
Subproject commit a20d1d9d7ad4a4be7b7ba97588e16ca8b9abb2b6

1
contrib/robin-map vendored Submodule

@ -0,0 +1 @@
Subproject commit 851a59e0e3063ee0e23089062090a73fd3de482d

View File

@ -0,0 +1 @@
# See contrib/usearch-cmake/CMakeLists.txt

2
contrib/snappy vendored

@ -1 +1 @@
Subproject commit fb057edfed820212076239fd32cb2ff23e9016bf
Subproject commit 6ebb5b1ab8801ea3fde103c5c29f5ab86df5fe7a

View File

@ -20,6 +20,7 @@ echo '/boost/context/*' >> $FILES_TO_CHECKOUT
echo '/boost/convert/*' >> $FILES_TO_CHECKOUT
echo '/boost/coroutine/*' >> $FILES_TO_CHECKOUT
echo '/boost/core/*' >> $FILES_TO_CHECKOUT
echo '/boost/describe/*' >> $FILES_TO_CHECKOUT
echo '/boost/detail/*' >> $FILES_TO_CHECKOUT
echo '/boost/dynamic_bitset/*' >> $FILES_TO_CHECKOUT
echo '/boost/exception/*' >> $FILES_TO_CHECKOUT
@ -82,4 +83,4 @@ echo '/libs/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD
git read-tree -mu HEAD

1
contrib/usearch vendored Submodule

@ -0,0 +1 @@
Subproject commit 387b78b28b17b8954024ffc81e97cbcfa10d1f30

View File

@ -0,0 +1,17 @@
set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch")
set(USEARCH_SOURCE_DIR "${USEARCH_PROJECT_DIR}/include")
set(FP16_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/FP16")
set(ROBIN_MAP_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/robin-map")
set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD-map")
add_library(_usearch INTERFACE)
target_include_directories(_usearch SYSTEM INTERFACE
${FP16_PROJECT_DIR}/include
${ROBIN_MAP_PROJECT_DIR}/include
${SIMSIMD_PROJECT_DIR}/include
${USEARCH_SOURCE_DIR})
add_library(ch_contrib::usearch ALIAS _usearch)
target_compile_definitions(_usearch INTERFACE ENABLE_USEARCH)

View File

@ -17,6 +17,9 @@ CONNECTION_PARAMETERS=${CONNECTION_PARAMETERS:=""}
# Create all configured system logs:
clickhouse-client --query "SYSTEM FLUSH LOGS"
# It's doesn't make sense to try creating tables if SYNC fails
echo "SYSTEM SYNC DATABASE REPLICA default" | clickhouse-client --receive_timeout 180 $CONNECTION_PARAMETERS || exit 0
# For each system log table:
clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
do
@ -38,7 +41,7 @@ do
echo "Creating destination table ${table}_${hash}" >&2
echo "$statement" | clickhouse-client $CONNECTION_PARAMETERS
echo "$statement" | clickhouse-client --distributed_ddl_task_timeout=10 $CONNECTION_PARAMETERS || continue
echo "Creating table system.${table}_sender" >&2
@ -46,6 +49,7 @@ do
clickhouse-client --query "
CREATE TABLE system.${table}_sender
ENGINE = Distributed(${CLUSTER}, default, ${table}_${hash})
SETTINGS flush_on_detach=0
EMPTY AS
SELECT ${EXTRA_COLUMNS_EXPRESSION}, *
FROM system.${table}

View File

@ -20,6 +20,9 @@ services:
- type: ${keeper_fs:-tmpfs}
source: ${keeper_db_dir1:-}
target: /var/lib/clickhouse-keeper
- type: ${keeper_fs:-tmpfs}
source: ${keeper_db_dir1:-}
target: /var/lib/clickhouse
entrypoint: "${keeper_cmd_prefix:-clickhouse keeper} --config=/etc/clickhouse-keeper/keeper_config1.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log"
cap_add:
- SYS_PTRACE
@ -53,6 +56,9 @@ services:
- type: ${keeper_fs:-tmpfs}
source: ${keeper_db_dir2:-}
target: /var/lib/clickhouse-keeper
- type: ${keeper_fs:-tmpfs}
source: ${keeper_db_dir1:-}
target: /var/lib/clickhouse
entrypoint: "${keeper_cmd_prefix:-clickhouse keeper} --config=/etc/clickhouse-keeper/keeper_config2.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log"
cap_add:
- SYS_PTRACE
@ -86,6 +92,9 @@ services:
- type: ${keeper_fs:-tmpfs}
source: ${keeper_db_dir3:-}
target: /var/lib/clickhouse-keeper
- type: ${keeper_fs:-tmpfs}
source: ${keeper_db_dir1:-}
target: /var/lib/clickhouse
entrypoint: "${keeper_cmd_prefix:-clickhouse keeper} --config=/etc/clickhouse-keeper/keeper_config3.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log"
cap_add:
- SYS_PTRACE

View File

@ -644,7 +644,7 @@ function report
rm -r report ||:
mkdir report report/tmp ||:
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv run-errors.tsv ||:
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv all-queries.tsv run-errors.tsv ||:
cat analyze/errors.log >> report/errors.log ||:
cat profile-errors.log >> report/errors.log ||:
@ -810,12 +810,6 @@ create view total_client_time_per_query as select *
from file('analyze/client-times.tsv', TSV,
'test text, query_index int, client float, server float');
create table slow_on_client_report engine File(TSV, 'report/slow-on-client.tsv')
as select client, server, round(client/server, 3) p,
test, query_display_name
from total_client_time_per_query left join query_display_names using (test, query_index)
where p > round(1.02, 3) order by p desc;
create table wall_clock_time_per_test engine Memory as select *
from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');

View File

@ -21,6 +21,7 @@
<!-- disable JIT for perf tests -->
<compile_expressions>0</compile_expressions>
<compile_aggregate_expressions>0</compile_aggregate_expressions>
<compile_sort_description>0</compile_sort_description>
<!-- Don't fail some prewarm queries too early -->
<timeout_before_checking_execution_speed>60</timeout_before_checking_execution_speed>

View File

@ -364,20 +364,6 @@ if args.report == "main":
]
)
slow_on_client_rows = tsvRows("report/slow-on-client.tsv")
error_tests += len(slow_on_client_rows)
addSimpleTable(
"Slow on Client",
["Client time,&nbsp;s", "Server time,&nbsp;s", "Ratio", "Test", "Query"],
slow_on_client_rows,
)
if slow_on_client_rows:
errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>'
]
)
def add_backward_incompatible():
rows = tsvRows("report/partial-queries-report.tsv")
if not rows:

View File

@ -211,6 +211,11 @@ mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/cli
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \
| sed "s|<level>trace</level>|<level>test</level>|" \
> /etc/clickhouse-server/config.d/logger_trace.xml.tmp
mv /etc/clickhouse-server/config.d/logger_trace.xml.tmp /etc/clickhouse-server/config.d/logger_trace.xml
start
stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \

View File

@ -1,5 +1,5 @@
# docker build -t clickhouse/style-test .
FROM ubuntu:20.04
FROM ubuntu:22.04
ARG ACT_VERSION=0.2.33
ARG ACTIONLINT_VERSION=1.6.22

View File

@ -129,6 +129,7 @@ sudo cat /etc/clickhouse-server/config.d/lost_forever_check.xml \
| sed "s|>1<|>0<|g" \
> /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp
sudo mv /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp /etc/clickhouse-server/config.d/lost_forever_check.xml
rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml
start 500
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \

View File

@ -0,0 +1,14 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.3.10.5-lts (d8737007f9e) FIXME as compared to v23.3.9.55-lts (b9c5c8622d3)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)).

View File

@ -0,0 +1,45 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.3.9.55-lts (b9c5c8622d3) FIXME as compared to v23.3.8.21-lts (1675f2264f3)
#### Performance Improvement
* Backported in [#52213](https://github.com/ClickHouse/ClickHouse/issues/52213): Do not store blocks in `ANY` hash join if nothing is inserted. [#48633](https://github.com/ClickHouse/ClickHouse/pull/48633) ([vdimir](https://github.com/vdimir)).
* Backported in [#52826](https://github.com/ClickHouse/ClickHouse/issues/52826): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)).
#### Build/Testing/Packaging Improvement
* Backported in [#53019](https://github.com/ClickHouse/ClickHouse/issues/53019): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#53288](https://github.com/ClickHouse/ClickHouse/issues/53288): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#53461](https://github.com/ClickHouse/ClickHouse/issues/53461): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)).
* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)).
* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)).
* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)).
* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)).
* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)).
* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)).
* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)).
* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)).

View File

@ -90,34 +90,117 @@ Process 1 stopped
## Visual Studio Code integration
- [CodeLLDB extension](https://github.com/vadimcn/vscode-lldb) is required for visual debugging, the [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [cmake variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md).
- Make sure to set the backend to your llvm installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"`
- Launcher:
- [CodeLLDB](https://github.com/vadimcn/vscode-lldb) extension is required for visual debugging.
- [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [CMake Variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md).
- Make sure to set the backend to your LLVM installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"`
- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this)
### Example configurations
#### cmake-variants.yaml
```yaml
buildType:
default: relwithdebinfo
choices:
debug:
short: Debug
long: Emit debug information
buildType: Debug
release:
short: Release
long: Optimize generated code
buildType: Release
relwithdebinfo:
short: RelWithDebInfo
long: Release with Debug Info
buildType: RelWithDebInfo
tsan:
short: MinSizeRel
long: Minimum Size Release
buildType: MinSizeRel
toolchain:
default: default
description: Select toolchain
choices:
default:
short: x86_64
long: x86_64
s390x:
short: s390x
long: s390x
settings:
CMAKE_TOOLCHAIN_FILE: cmake/linux/toolchain-s390x.cmake
```
#### launch.json
```json
{
"version": "0.2.0",
"configurations": [
{
"name": "Debug",
"type": "lldb",
"request": "custom",
"targetCreateCommands": ["target create ${command:cmake.launchTargetDirectory}/clickhouse"],
"processCreateCommands": ["settings set target.source-map ${input:targetdir} ${workspaceFolder}", "gdb-remote 31338"],
"sourceMap": { "${input:targetdir}": "${workspaceFolder}" },
}
],
"inputs": [
{
"id": "targetdir",
"type": "command",
"command": "extension.commandvariable.transform",
"args": {
"text": "${command:cmake.launchTargetDirectory}",
"find": ".*/([^/]+)/[^/]+$",
"replace": "$1"
}
"name": "(lldb) Launch s390x with qemu",
"targetCreateCommands": ["target create ${command:cmake.launchTargetPath}"],
"processCreateCommands": ["gdb-remote 2159"],
"preLaunchTask": "Run ClickHouse"
}
]
}
```
#### settings.json
This would also put different builds under different subfolders of the `build` folder.
```json
{
"cmake.buildDirectory": "${workspaceFolder}/build/${buildKitVendor}-${buildKitVersion}-${variant:toolchain}-${variant:buildType}",
"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"
}
```
#### run-debug.sh
```sh
#! /bin/sh
echo 'Starting debugger session'
cd $1
qemu-s390x-static -g 2159 -L /usr/s390x-linux-gnu $2 $3 $4
```
#### tasks.json
Defines a task to run the compiled executable in `server` mode under a `tmp` folder next to the binaries, with configuration from under `programs/server/config.xml`.
```json
{
"version": "2.0.0",
"tasks": [
{
"label": "Run ClickHouse",
"type": "shell",
"isBackground": true,
"command": "${workspaceFolder}/.vscode/run-debug.sh",
"args": [
"${command:cmake.launchTargetDirectory}/tmp",
"${command:cmake.launchTargetPath}",
"server",
"--config-file=${workspaceFolder}/programs/server/config.xml"
],
"problemMatcher": [
{
"pattern": [
{
"regexp": ".",
"file": 1,
"location": 2,
"message": 3
}
],
"background": {
"activeOnStart": true,
"beginsPattern": "^Starting debugger session",
"endsPattern": ".*"
}
}
]
}
]
}
```
- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this)

View File

@ -173,6 +173,7 @@ Similar to GraphiteMergeTree, the Kafka engine supports extended configuration u
<!-- Global configuration options for all tables of Kafka engine type -->
<debug>cgrp</debug>
<auto_offset_reset>smallest</auto_offset_reset>
<statistics_interval_ms>600</statistics_interval_ms>
<!-- Configuration specific to topics "logs" and "stats" -->
@ -260,3 +261,4 @@ The number of rows in one Kafka message depends on whether the format is row-bas
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
- [background_message_broker_schedule_pool_size](../../../operations/server-configuration-parameters/settings.md#background_message_broker_schedule_pool_size)
- [system.kafka_consumers](../../../operations/system-tables/kafka_consumers.md)

View File

@ -13,7 +13,7 @@ If more than one table is required, it is highly recommended to use the [Materia
``` sql
CREATE TABLE postgresql_db.postgresql_replica (key UInt64, value UInt64)
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password')
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_table', 'postgres_user', 'postgres_password')
PRIMARY KEY key;
```

View File

@ -142,13 +142,15 @@ was specified for ANN indexes, the default value is 100 million.
- [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
- [USearch](/docs/en/engines/table-engines/mergetree-family/annindexes.md#usearch-usearch)
## Annoy {#annoy}
Annoy indexes are currently experimental, to use them you first need to `SET allow_experimental_annoy_index = 1`. They are also currently
disabled on ARM due to memory safety problems with the algorithm.
This type of ANN index implements [the Annoy algorithm](https://github.com/spotify/annoy) which is based on a recursive division of the
space in random linear surfaces (lines in 2D, planes in 3D etc.).
This type of ANN index is based on the [Annoy library](https://github.com/spotify/annoy) which recursively divides the space into random
linear surfaces (lines in 2D, planes in 3D etc.).
<div class='vimeo-container'>
<iframe src="//www.youtube.com/embed/QkCCyLW0ehU"
@ -216,3 +218,64 @@ ORDER BY L2Distance(vectors, Point)
LIMIT N
SETTINGS annoy_index_search_k_nodes=100;
```
:::note
The Annoy index currently does not work with per-table, non-default `index_granularity` settings (see
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
:::
## USearch {#usearch}
This type of ANN index is based on the [the USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
algorithm](https://arxiv.org/abs/1603.09320), i.e., builds a hierarchical graph where each point represents a vector and the edges represent
similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors,
that are expensive to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further
distance computations on modern Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient
navigation around immutable persistent files, without loading them into RAM.
<div class='vimeo-container'>
<iframe src="//www.youtube.com/embed/UMrhB3icP9w"
width="640"
height="360"
frameborder="0"
allow="autoplay;
fullscreen;
picture-in-picture"
allowfullscreen>
</iframe>
</div>
Syntax to create an USearch index over an [Array](../../../sql-reference/data-types/array.md) column:
```sql
CREATE TABLE table_with_usearch_index
(
id Int64,
vectors Array(Float32),
INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N]
)
ENGINE = MergeTree
ORDER BY id;
```
Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column:
```sql
CREATE TABLE table_with_usearch_index
(
id Int64,
vectors Tuple(Float32[, Float32[, ...]]),
INDEX [ann_index_name] vectors TYPE usearch([Distance]) [GRANULARITY N]
)
ENGINE = MergeTree
ORDER BY id;
```
USearch currently supports two distance functions:
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
distance function was specified during index creation, `L2Distance` is used as default.

View File

@ -196,6 +196,7 @@ SELECT * FROM nestedt FORMAT TSV
- [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`.
- [input_format_tsv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_detect_header) - automatically detect header with names and types in TSV format. Default value - `true`.
- [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
- [input_format_tsv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_allow_variable_number_of_columns) - allow variable number of columns in TSV format, ignore extra columns and use default values on missing columns. Default value - `false`.
## TabSeparatedRaw {#tabseparatedraw}
@ -473,7 +474,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
- [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`.
- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`.
- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`.
## CSVWithNames {#csvwithnames}
@ -502,9 +503,10 @@ the types from input data will be compared with the types of the corresponding c
Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](/docs/en/operations/settings/settings-formats.md/#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_field_delimiter), [format_custom_row_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_after_delimiter) settings, not from format strings.
If setting [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, ClickHouse will automatically detect header with names and types if any.
If setting [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, trailing empty lines at the end of file will be skipped.
Additional settings:
- [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) - enables automatic detection of header with names and types if any. Default value - `true`.
- [input_format_custom_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_skip_trailing_empty_lines) - skip trailing empty lines at the end of file . Default value - `false`.
- [input_format_custom_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_custom_allow_variable_number_of_columns) - allow variable number of columns in CustomSeparated format, ignore extra columns and use default values on missing columns. Default value - `false`.
There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [TemplateIgnoreSpaces](#templateignorespaces).
@ -1262,6 +1264,7 @@ SELECT * FROM json_each_row_nested
- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`.
- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`.
- [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - Ignore unknown keys in json object for named tuples. Default value - `false`.
- [input_format_json_compact_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_json_compact_allow_variable_number_of_columns) - allow variable number of columns in JSONCompact/JSONCompactEachRow format, ignore extra columns and use default values on missing columns. Default value - `false`.
- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`.
- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`.
- [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`.
@ -2136,6 +2139,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`.
- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`.
- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
- [input_format_parquet_local_file_min_bytes_for_seek](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_local_file_min_bytes_for_seek) - min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format. Default value - `8192`.
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`.

Binary file not shown.

After

Width:  |  Height:  |  Size: 232 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 246 KiB

View File

@ -6,7 +6,34 @@ sidebar_label: MySQL Interface
# MySQL Interface
ClickHouse supports MySQL wire protocol. To enable the MySQL wire protocol, add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:
ClickHouse supports the MySQL wire protocol. This allow tools that are MySQL-compatible to interact with ClickHouse seamlessly (e.g. [Looker Studio](../integrations/data-visualization/looker-studio-and-clickhouse.md)).
## Enabling the MySQL Interface On ClickHouse Cloud
1. After creating your ClickHouse Cloud Service, on the credentials screen, select the MySQL tab
![Credentials screen - Prompt](./images/mysql1.png)
2. Toggle the switch to enable the MySQL interface for this specific service. This will expose port `3306` for this service and prompt you with your MySQL connection screen that include your unique MySQL username. The password will be the same as the service's default user password.
![Credentials screen - Enabled MySQL](./images/mysql2.png)
Alternatively, in order to enable the MySQL interface for an existing service:
1. Ensure your service is in `Running` state then click on the "View connection string" button for the service you want to enable the MySQL interface for
![Connection screen - Prompt MySQL](./images/mysql3.png)
2. Toggle the switch to enable the MySQL interface for this specific service. This will prompt you to enter the default password.
![Connection screen - Prompt MySQL](./images/mysql4.png)
3. After entering the password, you will get prompted the MySQL connection string for this service
![Connection screen - MySQL Enabled](./images/mysql5.png)
## Enabling the MySQL Interface On Self-managed ClickHouse
Add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d/` [folder](../operations/configuration-files):
``` xml
<clickhouse>
@ -20,7 +47,7 @@ Startup your ClickHouse server and look for a log message similar to the followi
{} <Information> Application: Listening for MySQL compatibility protocol: 127.0.0.1:9004
```
## Connect mysql to ClickHouse
## Connect MySQL to ClickHouse
The following command demonstrates how to connect the MySQL client `mysql` to ClickHouse:

View File

@ -21,6 +21,11 @@ In most cases it is recommended to use an appropriate tool or library instead of
- [ODBC driver](../interfaces/odbc.md)
- [C++ client library](../interfaces/cpp.md)
ClickHouse server provides embedded visual interfaces for power users:
- Play UI: open `/play` in the browser;
- Advanced Dashboard: open `/dashboard` in the browser;
There are also a wide range of third-party libraries for working with ClickHouse:
- [Client libraries](../interfaces/third-party/client-libraries.md)

View File

@ -228,8 +228,8 @@ For most input formats schema inference reads some data to determine its structu
To prevent inferring the same schema every time ClickHouse read the data from the same file, the inferred schema is cached and when accessing the same file again, ClickHouse will use the schema from the cache.
There are special settings that control this cache:
- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config.
- `schema_inference_use_cache_for_{file,s3,hdfs,url}` - allows turning on/off using cache for schema inference. These settings can be used in queries.
- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url/azure}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config.
- `schema_inference_use_cache_for_{file,s3,hdfs,url,azure}` - allows turning on/off using cache for schema inference. These settings can be used in queries.
The schema of the file can be changed by modifying the data or by changing format settings.
For this reason, the schema inference cache identifies the schema by file source, format name, used format settings, and the last modification time of the file.

View File

@ -217,23 +217,61 @@ Type: UInt32
Default: 1024
## index_mark_cache_policy
Index mark cache policy name.
Type: String
Default: SLRU
## index_mark_cache_size
Size of cache for index marks. Zero means disabled.
:::note
This setting can be modified at runtime and will take effect immediately.
:::
Type: UInt64
Default: 0
## index_mark_cache_size_ratio
The size of the protected queue in the index mark cache relative to the cache's total size.
Type: Double
Default: 0.5
## index_uncompressed_cache_policy
Index uncompressed cache policy name.
Type: String
Default: SLRU
## index_uncompressed_cache_size
Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.
:::note
This setting can be modified at runtime and will take effect immediately.
:::
Type: UInt64
Default: 0
## index_uncompressed_cache_size_ratio
The size of the protected queue in the index uncompressed cache relative to the cache's total size.
Type: Double
Default: 0.5
## io_thread_pool_queue_size
@ -255,10 +293,22 @@ Default: SLRU
Size of cache for marks (index of MergeTree family of tables).
:::note
This setting can be modified at runtime and will take effect immediately.
:::
Type: UInt64
Default: 5368709120
## mark_cache_size_ratio
The size of the protected queue in the mark cache relative to the cache's total size.
Type: Double
Default: 0.5
## max_backup_bandwidth_for_server
The maximum read speed in bytes per second for all backups on server. Zero means unlimited.
@ -288,7 +338,7 @@ Default: 1000
Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited.
:::note
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
:::
Type: UInt64
@ -300,7 +350,7 @@ Default: 0
Limit on total number of concurrent insert queries. Zero means Unlimited.
:::note
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
:::
Type: UInt64
@ -312,7 +362,7 @@ Default: 0
Limit on total number of concurrently select queries. Zero means Unlimited.
:::note
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
:::
Type: UInt64
@ -456,6 +506,10 @@ Sets the cache size (in bytes) for mapped files. This setting allows avoiding fr
Note that the amount of data in mapped files does not consume memory directly and is not accounted for in query or server memory usage — because this memory can be discarded similar to the OS page cache. The cache is dropped (the files are closed) automatically on the removal of old parts in tables of the MergeTree family, also it can be dropped manually by the `SYSTEM DROP MMAP CACHE` query.
:::note
This setting can be modified at runtime and will take effect immediately.
:::
Type: UInt64
Default: 1000
@ -605,10 +659,22 @@ There is one shared cache for the server. Memory is allocated on demand. The cac
The uncompressed cache is advantageous for very short queries in individual cases.
:::note
This setting can be modified at runtime and will take effect immediately.
:::
Type: UInt64
Default: 0
## uncompressed_cache_size_ratio
The size of the protected queue in the uncompressed cache relative to the cache's total size.
Type: Double
Default: 0.5
## builtin_dictionaries_reload_interval {#builtin-dictionaries-reload-interval}
The interval in seconds before reloading built-in dictionaries.

View File

@ -56,11 +56,11 @@ Possible values:
- Any positive integer.
Default value: 300.
Default value: 3000.
To achieve maximum performance of `SELECT` queries, it is necessary to minimize the number of parts processed, see [Merge Tree](../../development/architecture.md#merge-tree).
You can set a larger value to 600 (1200), this will reduce the probability of the `Too many parts` error, but at the same time `SELECT` performance might degrade. Also in case of a merge issue (for example, due to insufficient disk space) you will notice it later than it could be with the original 300.
Prior to 23.6 this setting was set to 300. You can set a higher different value, it will reduce the probability of the `Too many parts` error, but at the same time `SELECT` performance might degrade. Also in case of a merge issue (for example, due to insufficient disk space) you will notice it later than it could be with the original 300.
## parts_to_delay_insert {#parts-to-delay-insert}
@ -623,6 +623,19 @@ Possible values:
Default value: false
## number_of_free_entries_in_pool_to_execute_optimize_entire_partition {#number_of_free_entries_in_pool_to_execute_optimize_entire_partition}
When there is less than specified number of free entries in pool, do not execute optimizing entire partition in the background (this task generated when set `min_age_to_force_merge_seconds` and enable `min_age_to_force_merge_on_partition_only`). This is to leave free threads for regular merges and avoid "Too many parts".
Possible values:
- Positive integer.
Default value: 25
The value of the `number_of_free_entries_in_pool_to_execute_optimize_entire_partition` setting should be less than the value of the [background_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_pool_size) * [background_merges_mutations_concurrency_ratio](/docs/en/operations/server-configuration-parameters/settings.md/#background_merges_mutations_concurrency_ratio). Otherwise, ClickHouse throws an exception.
## allow_floating_point_partition_key {#allow_floating_point_partition_key}
Enables to allow floating-point number as a partition key.

View File

@ -627,6 +627,13 @@ Column type should be String. If value is empty, default names `row_{i}`will be
Default value: ''.
### input_format_json_compact_allow_variable_number_of_columns {#input_format_json_compact_allow_variable_number_of_columns}
Allow variable number of columns in rows in JSONCompact/JSONCompactEachRow input formats.
Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
Disabled by default.
## TSV format settings {#tsv-format-settings}
### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default}
@ -764,6 +771,13 @@ When enabled, trailing empty lines at the end of TSV file will be skipped.
Disabled by default.
### input_format_tsv_allow_variable_number_of_columns {#input_format_tsv_allow_variable_number_of_columns}
Allow variable number of columns in rows in TSV input format.
Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
Disabled by default.
## CSV format settings {#csv-format-settings}
### format_csv_delimiter {#format_csv_delimiter}
@ -955,9 +969,11 @@ Result
```text
" string "
```
### input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns}
ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values.
Allow variable number of columns in rows in CSV input format.
Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
Disabled by default.
@ -1223,6 +1239,12 @@ Allow skipping columns with unsupported types while schema inference for format
Disabled by default.
### input_format_parquet_local_file_min_bytes_for_seek {#input_format_parquet_local_file_min_bytes_for_seek}
min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format.
Default value - `8192`.
### output_format_parquet_string_as_string {#output_format_parquet_string_as_string}
Use Parquet String type instead of Binary for String columns.
@ -1565,6 +1587,13 @@ When enabled, trailing empty lines at the end of file in CustomSeparated format
Disabled by default.
### input_format_custom_allow_variable_number_of_columns {#input_format_custom_allow_variable_number_of_columns}
Allow variable number of columns in rows in CustomSeparated input format.
Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
Disabled by default.
## Regexp format settings {#regexp-format-settings}
### format_regexp_escaping_rule {#format_regexp_escaping_rule}

View File

@ -98,6 +98,18 @@ Default value: 0.
</profiles>
```
## mutations_execute_nondeterministic_on_initiator {#mutations_execute_nondeterministic_on_initiator}
If true constant nondeterministic functions (e.g. function `now()`) are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. It helps to keep data in sync on replicas while executing mutations with constant nondeterministic functions. Default value: `false`.
## mutations_execute_subqueries_on_initiator {#mutations_execute_subqueries_on_initiator}
If true scalar subqueries are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. Default value: `false`.
## mutations_max_literal_size_to_replace {#mutations_max_literal_size_to_replace}
The maximum size of serialized literal in bytes to replace in `UPDATE` and `DELETE` queries. Takes effect only if at least one the two settings above is enabled. Default value: 16384 (16 KiB).
## distributed_product_mode {#distributed-product-mode}
Changes the behaviour of [distributed subqueries](../../sql-reference/operators/in.md).
@ -2371,6 +2383,23 @@ See also:
- [optimize_functions_to_subcolumns](#optimize-functions-to-subcolumns)
## optimize_count_from_files {#optimize_count_from_files}
Enables or disables the optimization of counting number of rows from files in different input formats. It applies to table functions/engines `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`.
Possible values:
- 0 — Optimization disabled.
- 1 — Optimization enabled.
Default value: `1`.
## use_cache_for_count_from_files {#use_cache_for_count_from_files}
Enables caching of rows number during count from files in table functions `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`.
Enabled by default.
## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life}
- Type: seconds
@ -4298,7 +4327,7 @@ Use this setting only for backward compatibility if your use cases depend on old
## session_timezone {#session_timezone}
Sets the implicit time zone of the current session or query.
The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone.
The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone.
The setting takes precedence over the globally configured (server-level) implicit time zone.
A value of '' (empty string) means that the implicit time zone of the current session or query is equal to the [server time zone](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone).
@ -4333,7 +4362,7 @@ SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zuric
```
:::warning
Not all functions that parse DateTime/DateTime64 respect `session_timezone`. This can lead to subtle errors.
Not all functions that parse DateTime/DateTime64 respect `session_timezone`. This can lead to subtle errors.
See the following example and explanation.
:::

View File

@ -114,7 +114,11 @@ Example of disk configuration:
## Using local cache {#using-local-cache}
It is possible to configure local cache over disks in storage configuration starting from version 22.3. For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc. Cache uses `LRU` cache policy.
It is possible to configure local cache over disks in storage configuration starting from version 22.3.
For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc.
For versions >= 23.5 cache is supported only for remote disk types: S3, Azure, HDFS.
Cache uses `LRU` cache policy.
Example of configuration for versions later or equal to 22.8:

View File

@ -23,6 +23,7 @@ Columns:
- `database_shard_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database).
- `database_replica_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database).
- `is_active` ([Nullable(UInt8)](../../sql-reference/data-types/int-uint.md)) — The status of the `Replicated` database replica (for clusters that belong to a `Replicated` database): 1 means "replica is online", 0 means "replica is offline", `NULL` means "unknown".
- `name` ([String](../../sql-reference/data-types/string.md)) - An alias to cluster.
**Example**

View File

@ -0,0 +1,58 @@
---
slug: /en/operations/system-tables/kafka_consumers
---
# kafka_consumers
Contains information about Kafka consumers.
Applicable for [Kafka table engine](../../engines/table-engines/integrations/kafka) (native ClickHouse integration)
Columns:
- `database` (String) - database of the table with Kafka Engine.
- `table` (String) - name of the table with Kafka Engine.
- `consumer_id` (String) - Kafka consumer identifier. Note, that a table can have many consumers. Specified by `kafka_num_consumers` parameter.
- `assignments.topic` (Array(String)) - Kafka topic.
- `assignments.partition_id` (Array(Int32)) - Kafka partition id. Note, that only one consumer can be assigned to a partition.
- `assignments.current_offset` (Array(Int64)) - current offset.
- `exceptions.time`, (Array(DateTime)) - timestamp when the 10 most recent exceptions were generated.
- `exceptions.text`, (Array(String)) - text of 10 most recent exceptions.
- `last_poll_time`, (DateTime) - timestamp of the most recent poll.
- `num_messages_read`, (UInt64) - number of messages read by the consumer.
- `last_commit_time`, (DateTime) - timestamp of the most recent poll.
- `num_commits`, (UInt64) - total number of commits for the consumer.
- `last_rebalance_time`, (DateTime) - timestamp of the most recent Kafka rebalance
- `num_rebalance_revocations`, (UInt64) - number of times the consumer was revoked its partitions
- `num_rebalance_assignments`, (UInt64) - number of times the consumer was assigned to Kafka cluster
- `is_currently_used`, (UInt8) - consumer is in use
- `rdkafka_stat` (String) - library internal statistic. See https://github.com/ClickHouse/librdkafka/blob/master/STATISTICS.md . Set `statistics_interval_ms` to 0 disable, default is 3000 (once in three seconds).
Example:
``` sql
SELECT *
FROM system.kafka_consumers
FORMAT Vertical
```
``` text
Row 1:
──────
database: test
table: kafka
consumer_id: ClickHouse-instance-test-kafka-1caddc7f-f917-4bb1-ac55-e28bd103a4a0
assignments.topic: ['system_kafka_cons']
assignments.partition_id: [0]
assignments.current_offset: [18446744073709550615]
exceptions.time: []
exceptions.text: []
last_poll_time: 2006-11-09 18:47:47
num_messages_read: 4
last_commit_time: 2006-11-10 04:39:40
num_commits: 1
last_rebalance_time: 1970-01-01 00:00:00
num_rebalance_revocations: 0
num_rebalance_assignments: 1
is_currently_used: 1
rdkafka_stat: {...}
```

View File

@ -26,9 +26,9 @@ SELECT p, toTypeName(p) FROM geo_point;
Result:
``` text
┌─p─────┬─toTypeName(p)─┐
┌─p───────┬─toTypeName(p)─┐
│ (10,10) │ Point │
└───────┴───────────────┘
└─────────┴───────────────┘
```
## Ring

View File

@ -1092,7 +1092,7 @@ Types of sources (`source_type`):
- [Local file](#local_file)
- [Executable File](#executable)
- [Executable Pool](#executable_pool)
- [HTTP(s)](#http)
- [HTTP(S)](#http)
- DBMS
- [ODBC](#odbc)
- [MySQL](#mysql)
@ -1102,7 +1102,7 @@ Types of sources (`source_type`):
- [Cassandra](#cassandra)
- [PostgreSQL](#postgresql)
## Local File {#local_file}
### Local File {#local_file}
Example of settings:
@ -1132,7 +1132,7 @@ When a dictionary with source `FILE` is created via DDL command (`CREATE DICTION
- [Dictionary function](../../sql-reference/table-functions/dictionary.md#dictionary-function)
## Executable File {#executable}
### Executable File {#executable}
Working with executable files depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable files STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data.
@ -1161,7 +1161,7 @@ Setting fields:
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node.
## Executable Pool {#executable_pool}
### Executable Pool {#executable_pool}
Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts.
@ -1196,9 +1196,9 @@ Setting fields:
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.
## Http(s) {#https}
### HTTP(S) {#https}
Working with an HTTP(s) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method.
Working with an HTTP(S) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method.
Example of settings:
@ -1248,7 +1248,55 @@ Setting fields:
When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server.
### Known Vulnerability of the ODBC Dictionary Functionality
### DBMS
#### ODBC
You can use this method to connect any database that has an ODBC driver.
Example of settings:
``` xml
<source>
<odbc>
<db>DatabaseName</db>
<table>ShemaName.TableName</table>
<connection_string>DSN=some_parameters</connection_string>
<invalidate_query>SQL_QUERY</invalidate_query>
<query>SELECT id, value_1, value_2 FROM ShemaName.TableName</query>
</odbc>
</source>
```
or
``` sql
SOURCE(ODBC(
db 'DatabaseName'
table 'SchemaName.TableName'
connection_string 'DSN=some_parameters'
invalidate_query 'SQL_QUERY'
query 'SELECT id, value_1, value_2 FROM db_name.table_name'
))
```
Setting fields:
- `db` Name of the database. Omit it if the database name is set in the `<connection_string>` parameters.
- `table` Name of the table and schema if exists.
- `connection_string` Connection string.
- `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates).
- `query` The custom query. Optional parameter.
:::note
The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared.
:::
ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so its necessary to set table name accordingly to table name case in database.
If you have a problems with encodings when using Oracle, see the corresponding [FAQ](/knowledgebase/oracle-odbc) item.
##### Known Vulnerability of the ODBC Dictionary Functionality
:::note
When connecting to the database through the ODBC driver connection parameter `Servername` can be substituted. In this case values of `USERNAME` and `PASSWORD` from `odbc.ini` are sent to the remote server and can be compromised.
@ -1277,7 +1325,7 @@ SELECT * FROM odbc('DSN=gregtest;Servername=some-server.com', 'test_db');
ODBC driver will send values of `USERNAME` and `PASSWORD` from `odbc.ini` to `some-server.com`.
### Example of Connecting Postgresql
##### Example of Connecting Postgresql
Ubuntu OS.
@ -1358,7 +1406,7 @@ LIFETIME(MIN 300 MAX 360)
You may need to edit `odbc.ini` to specify the full path to the library with the driver `DRIVER=/usr/local/lib/psqlodbcw.so`.
### Example of Connecting MS SQL Server
##### Example of Connecting MS SQL Server
Ubuntu OS.
@ -1462,55 +1510,7 @@ LAYOUT(FLAT())
LIFETIME(MIN 300 MAX 360)
```
## DBMS
### ODBC
You can use this method to connect any database that has an ODBC driver.
Example of settings:
``` xml
<source>
<odbc>
<db>DatabaseName</db>
<table>ShemaName.TableName</table>
<connection_string>DSN=some_parameters</connection_string>
<invalidate_query>SQL_QUERY</invalidate_query>
<query>SELECT id, value_1, value_2 FROM ShemaName.TableName</query>
</odbc>
</source>
```
or
``` sql
SOURCE(ODBC(
db 'DatabaseName'
table 'SchemaName.TableName'
connection_string 'DSN=some_parameters'
invalidate_query 'SQL_QUERY'
query 'SELECT id, value_1, value_2 FROM db_name.table_name'
))
```
Setting fields:
- `db` Name of the database. Omit it if the database name is set in the `<connection_string>` parameters.
- `table` Name of the table and schema if exists.
- `connection_string` Connection string.
- `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates).
- `query` The custom query. Optional parameter.
:::note
The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared.
:::
ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so its necessary to set table name accordingly to table name case in database.
If you have a problems with encodings when using Oracle, see the corresponding [FAQ](/knowledgebase/oracle-odbc) item.
### Mysql
#### Mysql
Example of settings:
@ -1627,7 +1627,7 @@ SOURCE(MYSQL(
))
```
### ClickHouse
#### ClickHouse
Example of settings:
@ -1680,7 +1680,7 @@ Setting fields:
The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared.
:::
### Mongodb
#### Mongodb
Example of settings:
@ -1723,7 +1723,7 @@ Setting fields:
- `options` - MongoDB connection string options (optional parameter).
### Redis
#### Redis
Example of settings:
@ -1756,7 +1756,7 @@ Setting fields:
- `storage_type` The structure of internal Redis storage using for work with keys. `simple` is for simple sources and for hashed single key sources, `hash_map` is for hashed sources with two keys. Ranged sources and cache sources with complex key are unsupported. May be omitted, default value is `simple`.
- `db_index` The specific numeric index of Redis logical database. May be omitted, default value is 0.
### Cassandra
#### Cassandra
Example of settings:
@ -1798,7 +1798,7 @@ Setting fields:
The `column_family` or `where` fields cannot be used together with the `query` field. And either one of the `column_family` or `query` fields must be declared.
:::
### PostgreSQL
#### PostgreSQL
Example of settings:
@ -1855,7 +1855,7 @@ Setting fields:
The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared.
:::
## Null
### Null
A special source that can be used to create dummy (empty) dictionaries. Such dictionaries can useful for tests or with setups with separated data and query nodes at nodes with Distributed tables.

View File

@ -1794,6 +1794,330 @@ Return value type is always [Float64](../../sql-reference/data-types/float.md).
└─────┴──────────────────────────────────────────────────────────────────────────────────────────┘
```
## arrayRotateLeft
Rotates an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements.
If the number of elements is negative, the array is rotated to the right.
**Syntax**
``` sql
arrayRotateLeft(arr, n)
```
**Arguments**
- `arr` — [Array](../../sql-reference/data-types/array.md).
- `n` — Number of elements to rotate.
**Returned value**
- An array rotated to the left by the specified number of elements.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT arrayRotateLeft([1,2,3,4,5,6], 2) as res;
```
Result:
``` text
┌─res───────────┐
│ [3,4,5,6,1,2] │
└───────────────┘
```
Query:
``` sql
SELECT arrayRotateLeft([1,2,3,4,5,6], -2) as res;
```
Result:
``` text
┌─res───────────┐
│ [5,6,1,2,3,4] │
└───────────────┘
```
Query:
``` sql
SELECT arrayRotateLeft(['a','b','c','d','e'], 3) as res;
```
Result:
``` text
┌─res───────────────────┐
│ ['d','e','a','b','c'] │
└───────────────────────┘
```
## arrayRotateRight
Rotates an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements.
If the number of elements is negative, the array is rotated to the left.
**Syntax**
``` sql
arrayRotateRight(arr, n)
```
**Arguments**
- `arr` — [Array](../../sql-reference/data-types/array.md).
- `n` — Number of elements to rotate.
**Returned value**
- An array rotated to the right by the specified number of elements.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT arrayRotateRight([1,2,3,4,5,6], 2) as res;
```
Result:
``` text
┌─res───────────┐
│ [5,6,1,2,3,4] │
└───────────────┘
```
Query:
``` sql
SELECT arrayRotateRight([1,2,3,4,5,6], -2) as res;
```
Result:
``` text
┌─res───────────┐
│ [3,4,5,6,1,2] │
└───────────────┘
```
Query:
``` sql
SELECT arrayRotateRight(['a','b','c','d','e'], 3) as res;
```
Result:
``` text
┌─res───────────────────┐
│ ['c','d','e','a','b'] │
└───────────────────────┘
```
## arrayShiftLeft
Shifts an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements.
New elements are filled with the provided argument or the default value of the array element type.
If the number of elements is negative, the array is shifted to the right.
**Syntax**
``` sql
arrayShiftLeft(arr, n[, default])
```
**Arguments**
- `arr` — [Array](../../sql-reference/data-types/array.md).
- `n` — Number of elements to shift.
- `default` — Optional. Default value for new elements.
**Returned value**
- An array shifted to the left by the specified number of elements.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], 2) as res;
```
Result:
``` text
┌─res───────────┐
│ [3,4,5,6,0,0] │
└───────────────┘
```
Query:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], -2) as res;
```
Result:
``` text
┌─res───────────┐
│ [0,0,1,2,3,4] │
└───────────────┘
```
Query:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], 2, 42) as res;
```
Result:
``` text
┌─res─────────────┐
│ [3,4,5,6,42,42] │
└─────────────────┘
```
Query:
``` sql
SELECT arrayShiftLeft(['a','b','c','d','e','f'], 3, 'foo') as res;
```
Result:
``` text
┌─res─────────────────────────────┐
│ ['d','e','f','foo','foo','foo'] │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
```
Result:
``` text
┌─res─────────────────┐
│ [3,4,5,6,4242,4242] │
└─────────────────────┘
```
## arrayShiftRight
Shifts an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements.
New elements are filled with the provided argument or the default value of the array element type.
If the number of elements is negative, the array is shifted to the left.
**Syntax**
``` sql
arrayShiftRight(arr, n[, default])
```
**Arguments**
- `arr` — [Array](../../sql-reference/data-types/array.md).
- `n` — Number of elements to shift.
- `default` — Optional. Default value for new elements.
**Returned value**
- An array shifted to the right by the specified number of elements.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], 2) as res;
```
Result:
``` text
┌─res───────────┐
│ [0,0,1,2,3,4] │
└───────────────┘
```
Query:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], -2) as res;
```
Result:
``` text
┌─res───────────┐
│ [3,4,5,6,0,0] │
└───────────────┘
```
Query:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], 2, 42) as res;
```
Result:
``` text
┌─res─────────────┐
│ [42,42,1,2,3,4] │
└─────────────────┘
```
Query:
``` sql
SELECT arrayShiftRight(['a','b','c','d','e','f'], 3, 'foo') as res;
```
Result:
``` text
┌─res─────────────────────────────┐
│ ['foo','foo','foo','a','b','c'] │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
```
Result:
``` text
┌─res─────────────────┐
│ [4242,4242,1,2,3,4] │
└─────────────────────┘
```
## Distance functions
All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md).

View File

@ -1819,6 +1819,72 @@ Result:
└────────────────────────────────────┘
```
## toUTCTimestamp
Convert DateTime/DateTime64 type value from other time zone to UTC timezone timestamp
**Syntax**
``` sql
toUTCTimestamp(time_val, time_zone)
```
**Arguments**
- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
**Returned value**
- DateTime/DateTime64 in text form
**Example**
``` sql
SELECT toUTCTimestamp(toDateTime('2023-03-16'), 'Asia/Shanghai');
```
Result:
``` text
┌─toUTCTimestamp(toDateTime('2023-03-16'),'Asia/Shanghai')┐
│ 2023-03-15 16:00:00 │
└─────────────────────────────────────────────────────────┘
```
## fromUTCTimestamp
Convert DateTime/DateTime64 type value from UTC timezone to other time zone timestamp
**Syntax**
``` sql
fromUTCTimestamp(time_val, time_zone)
```
**Arguments**
- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
**Returned value**
- DateTime/DateTime64 in text form
**Example**
``` sql
SELECT fromUTCTimestamp(toDateTime64('2023-03-16 10:00:00', 3), 'Asia/Shanghai');
```
Result:
``` text
┌─fromUTCTimestamp(toDateTime64('2023-03-16 10:00:00',3),'Asia/Shanghai')─┐
│ 2023-03-16 18:00:00.000 │
└─────────────────────────────────────────────────────────────────────────┘
```
## Related content
- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)

View File

@ -51,7 +51,7 @@ Calculates the MD5 from a string and returns the resulting set of bytes as Fixed
If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the sipHash128 function instead.
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
## sipHash64 (#hash_functions-siphash64)
## sipHash64 {#hash_functions-siphash64}
Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
@ -63,9 +63,9 @@ This is a cryptographic hash function. It works at least three times faster than
The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
1. The first and the second hash value are concatenated to an array which is hashed.
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
3. This calculation is repeated for all remaining hash values of the original input.
1. The first and the second hash value are concatenated to an array which is hashed.
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
3. This calculation is repeated for all remaining hash values of the original input.
**Arguments**

View File

@ -66,13 +66,13 @@ RELOAD FUNCTION [ON CLUSTER cluster_name] function_name
## DROP DNS CACHE
Resets ClickHouses internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries).
Clears ClickHouses internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries).
For more convenient (automatic) cache management, see disable_internal_dns_cache, dns_cache_update_period parameters.
## DROP MARK CACHE
Resets the mark cache.
Clears the mark cache.
## DROP REPLICA
@ -106,22 +106,18 @@ Similar to `SYSTEM DROP REPLICA`, but removes the `Replicated` database replica
## DROP UNCOMPRESSED CACHE
Reset the uncompressed data cache.
Clears the uncompressed data cache.
The uncompressed data cache is enabled/disabled with the query/user/profile-level setting [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache).
Its size can be configured using the server-level setting [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size).
## DROP COMPILED EXPRESSION CACHE
Reset the compiled expression cache.
Clears the compiled expression cache.
The compiled expression cache is enabled/disabled with the query/user/profile-level setting [compile_expressions](../../operations/settings/settings.md#compile-expressions).
## DROP QUERY CACHE
Resets the [query cache](../../operations/query-cache.md).
```sql
SYSTEM DROP QUERY CACHE [ON CLUSTER cluster_name]
```
Clears the [query cache](../../operations/query-cache.md).
## FLUSH LOGS
@ -443,9 +439,9 @@ SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QU
```
- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped.
- If `QUERIES ALL` modifier is specified, all protocols are stopped.
- If `QUERIES DEFAULT` modifier is specified, all default protocols are stopped.
- If `QUERIES CUSTOM` modifier is specified, all custom protocols are stopped.
- If `QUERIES ALL [EXCEPT .. [,..]]` modifier is specified, all protocols are stopped, unless specified with `EXCEPT` clause.
- If `QUERIES DEFAULT [EXCEPT .. [,..]]` modifier is specified, all default protocols are stopped, unless specified with `EXCEPT` clause.
- If `QUERIES CUSTOM [EXCEPT .. [,..]]` modifier is specified, all custom protocols are stopped, unless specified with `EXCEPT` clause.
### SYSTEM START LISTEN

View File

@ -4,8 +4,9 @@ sidebar_position: 52
sidebar_label: TRUNCATE
---
# TRUNCATE Statement
# TRUNCATE Statements
## TRUNCATE TABLE
``` sql
TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
```
@ -21,3 +22,10 @@ You can specify how long (in seconds) to wait for inactive replicas to execute `
:::note
If the `alter_sync` is set to `2` and some replicas are not active for more than the time, specified by the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown.
:::
## TRUNCATE DATABASE
``` sql
TRUNCATE DATBASE [IF EXISTS] [db.]name [ON CLUSTER cluster]
```
Removes all tables from a database but keeps the database itself. When the clause `IF EXISTS` is omitted, the query returns an error if the database does not exist.

View File

@ -91,7 +91,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv
## Import Sample Dataset {#import-sample-dataset}
Now its time to fill our ClickHouse server with some sample data. In this tutorial, well use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, well go with the most realistic one.
Now its time to fill our ClickHouse server with some sample data. In this tutorial, well use some anonymized metric data. There are [multiple ways to import the dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, well go with the most realistic one.
### Download and Extract Table Data {#download-and-extract-table-data}
@ -116,7 +116,7 @@ Syntax for creating tables is way more complicated compared to databases (see [r
2. Table schema, i.e. list of columns and their [data types](../sql-reference/data-types/index.md).
3. [Table engine](../engines/table-engines/index.md) and its settings, which determines all the details on how queries to this table will be physically executed.
Yandex.Metrica is a web analytics service, and sample dataset doesnt cover its full functionality, so there are only two tables to create:
There are only two tables to create:
- `hits` is a table with each action done by all users on all websites covered by the service.
- `visits` is a table that contains pre-built sessions instead of individual actions.
@ -523,7 +523,7 @@ SELECT
sumIf(Sign, has(Goals.ID, 1105530)) AS goal_visits,
(100. * goal_visits) / visits AS goal_percent
FROM tutorial.visits_v1
WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru')
WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403)
```
## Cluster Deployment {#cluster-deployment}
@ -544,19 +544,19 @@ Example config for a cluster with three shards, one replica each:
<perftest_3shards_1replicas>
<shard>
<replica>
<host>example-perftest01j.yandex.ru</host>
<host>example-perftest01j.clickhouse.com</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>example-perftest02j.yandex.ru</host>
<host>example-perftest02j.clickhouse.com</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>example-perftest03j.yandex.ru</host>
<host>example-perftest03j.clickhouse.com</host>
<port>9000</port>
</replica>
</shard>
@ -602,15 +602,15 @@ Example config for a cluster of one shard containing three replicas:
<perftest_1shards_3replicas>
<shard>
<replica>
<host>example-perftest01j.yandex.ru</host>
<host>example-perftest01j.clickhouse.com</host>
<port>9000</port>
</replica>
<replica>
<host>example-perftest02j.yandex.ru</host>
<host>example-perftest02j.clickhouse.com</host>
<port>9000</port>
</replica>
<replica>
<host>example-perftest03j.yandex.ru</host>
<host>example-perftest03j.clickhouse.com</host>
<port>9000</port>
</replica>
</shard>
@ -628,15 +628,15 @@ ZooKeeper locations are specified in the configuration file:
``` xml
<zookeeper>
<node>
<host>zoo01.yandex.ru</host>
<host>zoo01.clickhouse.com</host>
<port>2181</port>
</node>
<node>
<host>zoo02.yandex.ru</host>
<host>zoo02.clickhouse.com</host>
<port>2181</port>
</node>
<node>
<host>zoo03.yandex.ru</host>
<host>zoo03.clickhouse.com</host>
<port>2181</port>
</node>
</zookeeper>

View File

@ -1703,3 +1703,327 @@ SELECT arrayProduct([toDecimal64(1,8), toDecimal64(2,8), toDecimal64(3,8)]) as r
│ 6 │ Float64 │
└─────┴──────────────────────────────────────────────────────────────────────────────────────────┘
```
## arrayRotateLeft
Поворачивает [массив](../../sql-reference/data-types/array.md) влево на заданное число элементов.
Если количество элементов отрицательно, то массив поворачивается вправо.
**Синтаксис**
``` sql
arrayRotateLeft(arr, n)
```
**Аргументы**
- `arr` — [Массив](../../sql-reference/data-types/array.md).
- `n` — Число элементов, на которое нужно повернуть массив.
**Возвращаемое значение**
- Массив, повернутый на заданное число элементов влево.
Тип: [Массив](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
``` sql
SELECT arrayRotateLeft([1,2,3,4,5,6], 2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [3,4,5,6,1,2] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayRotateLeft([1,2,3,4,5,6], -2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [5,6,1,2,3,4] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayRotateLeft(['a','b','c','d','e'], 3) as res;
```
Результат:
``` text
┌─res───────────────────┐
│ ['d','e','a','b','c'] │
└───────────────────────┘
```
## arrayRotateRight
Поворачивает [массив](../../sql-reference/data-types/array.md) вправо на заданное число элементов.
Если количество элементов отрицательно, то массив поворачивается влево.
**Синтаксис**
``` sql
arrayRotateRight(arr, n)
```
**Аргументы**
- `arr` — [Массив](../../sql-reference/data-types/array.md).
- `n` — Число элементов, на которое нужно повернуть массив.
**Возвращаемое значение**
- Массив, повернутый на заданное число элементов вправо.
Тип: [Массив](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
``` sql
SELECT arrayRotateRight([1,2,3,4,5,6], 2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [5,6,1,2,3,4] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayRotateRight([1,2,3,4,5,6], -2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [3,4,5,6,1,2] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayRotateRight(['a','b','c','d','e'], 3) as res;
```
Результат:
``` text
┌─res───────────────────┐
│ ['c','d','e','a','b'] │
└───────────────────────┘
```
## arrayShiftLeft
Сдвигает [массив](../../sql-reference/data-types/array.md) влево на заданное число элементов.
Новые элементы заполняются переданным аргументом или значением по умолчанию для типа элементов массива.
Если количество элементов отрицательно, то массив сдвигается вправо.
**Синтаксис**
``` sql
arrayShiftLeft(arr, n[, default])
```
**Аргументы**
- `arr` — [Массив](../../sql-reference/data-types/array.md).
- `n` — Число элементов, на которое нужно сдвинуть массив.
- `default` — Опциональный. Значение по умолчанию для новых элементов.
**Возвращаемое значение**
- Массив, сдвинутый на заданное число элементов влево.
Тип: [Массив](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], 2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [3,4,5,6,0,0] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], -2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [0,0,1,2,3,4] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], 2, 42) as res;
```
Результат:
``` text
┌─res─────────────┐
│ [3,4,5,6,42,42] │
└─────────────────┘
```
Запрос:
``` sql
SELECT arrayShiftLeft(['a','b','c','d','e','f'], 3, 'foo') as res;
```
Результат:
``` text
┌─res─────────────────────────────┐
│ ['d','e','f','foo','foo','foo'] │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
```
Результат:
``` text
┌─res─────────────────┐
│ [3,4,5,6,4242,4242] │
└─────────────────────┘
```
## arrayShiftRight
Сдвигает [массив](../../sql-reference/data-types/array.md) вправо на заданное число элементов.
Новые элементы заполняются переданным аргументом или значением по умолчанию для типа элементов массива.
Если количество элементов отрицательно, то массив сдвигается влево.
**Синтаксис**
``` sql
arrayShiftRight(arr, n[, default])
```
**Аргументы**
- `arr` — [Массив](../../sql-reference/data-types/array.md).
- `n` — Число элементов, на которое нужно сдвинуть массив.
- `default` — Опциональный. Значение по умолчанию для новых элементов.
**Возвращаемое значение**
- Массив, сдвинутый на заданное число элементов вправо.
Тип: [Массив](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], 2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [0,0,1,2,3,4] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], -2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [3,4,5,6,0,0] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], 2, 42) as res;
```
Результат:
``` text
┌─res─────────────┐
│ [42,42,1,2,3,4] │
└─────────────────┘
```
Запрос:
``` sql
SELECT arrayShiftRight(['a','b','c','d','e','f'], 3, 'foo') as res;
```
Результат:
``` text
┌─res─────────────────────────────┐
│ ['foo','foo','foo','a','b','c'] │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
```
Результат:
``` text
┌─res─────────────────┐
│ [4242,4242,1,2,3,4] │
└─────────────────────┘
```

View File

@ -1171,10 +1171,267 @@ ClickHouse服务器日志文件中跟踪日志确认了ClickHouse正在对索引
主键键列之间的基数差得越大,主键中的列的顺序越重要。我们将在下一章节对此进行演示。
# 高效地为键列排序
## 高效地为键列排序
TODO
<a name="test"></a>
# 高效地识别单行
TODO
在复合主键中,键列的顺序会对以下两方面产生重大影响:
- 查询中过滤次关键字列的效率,以及
- 表数据文件的压缩率。
为了演示这一点,我们将使用我们的[网络流量样本数据集(web traffic sample data set)](#数据集)这个版本,
其中每一行包含三列,分别表示互联网用户(`UserID` 列)对 URL`URL`列)的访问是否被标记为僵尸流量(`IsRobot` 列)。
我们将使用一个包含上述所有三列的复合主键,该主键可用于加快计算以下内容的典型网络分析查询速度
- 特定 URL 有多少(百分比)流量来自机器人,或
- 我们对特定用户是否为僵尸用户有多大把握(来自该用户的流量中有多大比例被认为是(或不是)僵尸流量)
我们使用该查询来计算我们要用作复合主键中三个列的基数(注意,我们使用 [URL 表函数](/docs/en/sql-reference/table-functions/url.md) 来即席查询 TSV 数据,而无需创建本地表)。在 `clickhouse client`中运行此查询:
```sql
SELECT
formatReadableQuantity(uniq(URL)) AS cardinality_URL,
formatReadableQuantity(uniq(UserID)) AS cardinality_UserID,
formatReadableQuantity(uniq(IsRobot)) AS cardinality_IsRobot
FROM
(
SELECT
c11::UInt64 AS UserID,
c15::String AS URL,
c20::UInt8 AS IsRobot
FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz')
WHERE URL != ''
)
```
响应如下:
```response
┌─cardinality_URL─┬─cardinality_UserID─┬─cardinality_IsRobot─┐
│ 2.39 million │ 119.08 thousand │ 4.00 │
└─────────────────┴────────────────────┴─────────────────────┘
1 row in set. Elapsed: 118.334 sec. Processed 8.87 million rows, 15.88 GB (74.99 thousand rows/s., 134.21 MB/s.)
```
我们可以看到,各列之间的基数,尤其是 `URL` 列和 `IsRobot` 列之间,存在着很大的差异,因此,在复合主键中,这些列的顺序对于有效加快对这些列的查询过滤速度,以及实现表中列数据文件的最佳压缩比都非常重要。
为了证明这一点,我们为僵尸流量分析数据创建了两个版本的表:
- 带有复合主键`(URL、UserID、IsRobot)`的表 `hits_URL_UserID_IsRobot`,其中的键列按基数降序排列
- 使用复合主键`(IsRobot, UserID, URL)` 创建表 `hits_IsRobot_UserID_URL`,其中的键列按基数升序排列
创建具有复合主键`(URL、UserID、IsRobot)`的表 `hits_URL_UserID_IsRobot`
```sql
CREATE TABLE hits_URL_UserID_IsRobot
(
`UserID` UInt32,
`URL` String,
`IsRobot` UInt8
)
ENGINE = MergeTree
// highlight-next-line
PRIMARY KEY (URL, UserID, IsRobot);
```
然后填充887万行数据
```sql
INSERT INTO hits_URL_UserID_IsRobot SELECT
intHash32(c11::UInt64) AS UserID,
c15 AS URL,
c20 AS IsRobot
FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz')
WHERE URL != '';
```
响应如下:
```response
0 rows in set. Elapsed: 104.729 sec. Processed 8.87 million rows, 15.88 GB (84.73 thousand rows/s., 151.64 MB/s.)
```
接下来,创建带有复合主键 `(IsRobot,UserID,URL)`的表 `hits_IsRobot_UserID_URL`
```sql
CREATE TABLE hits_IsRobot_UserID_URL
(
`UserID` UInt32,
`URL` String,
`IsRobot` UInt8
)
ENGINE = MergeTree
// highlight-next-line
PRIMARY KEY (IsRobot, UserID, URL);
```
并在其中填入与上一个表相同的 887 万行数据:
```sql
INSERT INTO hits_IsRobot_UserID_URL SELECT
intHash32(c11::UInt64) AS UserID,
c15 AS URL,
c20 AS IsRobot
FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz')
WHERE URL != '';
```
响应如下:
```response
0 rows in set. Elapsed: 95.959 sec. Processed 8.87 million rows, 15.88 GB (92.48 thousand rows/s., 165.50 MB/s.)
```
### 在次关键字列上高效过滤
当查询对至少一列进行过滤时,该列是复合关键字的一部分,并且是第一关键字列,[那么 ClickHouse 将在关键字列的索引标记上运行二分查找算法](#主索引被用来选择颗粒)。
当查询(仅)过滤属于复合关键字的某一列,但不是第一关键字列时,[ClickHouse 将在关键字列的索引标记上使用通用排除搜索算法](#查询使用第二位主键的性能问题)。
对于第二种情况,复合主键中关键列的排序对[通用排除搜索算法](https://github.com/ClickHouse/ClickHouse/blob/22.3/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp#L1444)的有效性很重要。
这是一个对表中的 `UserID` 列进行过滤的查询,我们对该表的关键字列`(URL、UserID、IsRobot)`按基数进行了降序排序:
```sql
SELECT count(*)
FROM hits_URL_UserID_IsRobot
WHERE UserID = 112304
```
响应如下:
```response
┌─count()─┐
│ 73 │
└─────────┘
1 row in set. Elapsed: 0.026 sec.
// highlight-next-line
Processed 7.92 million rows,
31.67 MB (306.90 million rows/s., 1.23 GB/s.)
```
对关键字列`(IsRobot, UserID, URL)`按基数升序排列的表,进行相同的查询:
```sql
SELECT count(*)
FROM hits_IsRobot_UserID_URL
WHERE UserID = 112304
```
响应如下:
```response
┌─count()─┐
│ 73 │
└─────────┘
1 row in set. Elapsed: 0.003 sec.
// highlight-next-line
Processed 20.32 thousand rows,
81.28 KB (6.61 million rows/s., 26.44 MB/s.)
```
我们可以看到,在对关键列按基数进行升序排列的表中,查询执行的效率和速度明显更高。
其原因是,当通过具有较低基数前键列的次关键字列选择[颗粒](#主索引被用来选择颗粒)时, [通用排除搜索算法](https://github.com/ClickHouse/ClickHouse/blob/22.3/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp#L1444)最有效。 我们在本指南的[上一节](#generic-exclusion-search-algorithm)中对此进行了详细说明。
### 数据文件的最佳压缩率
此查询将比较上面创建的两个表中 `UserID` 列的压缩率:
```sql
SELECT
table AS Table,
name AS Column,
formatReadableSize(data_uncompressed_bytes) AS Uncompressed,
formatReadableSize(data_compressed_bytes) AS Compressed,
round(data_uncompressed_bytes / data_compressed_bytes, 0) AS Ratio
FROM system.columns
WHERE (table = 'hits_URL_UserID_IsRobot' OR table = 'hits_IsRobot_UserID_URL') AND (name = 'UserID')
ORDER BY Ratio ASC
```
这是响应:
```response
┌─Table───────────────────┬─Column─┬─Uncompressed─┬─Compressed─┬─Ratio─┐
│ hits_URL_UserID_IsRobot │ UserID │ 33.83 MiB │ 11.24 MiB │ 3 │
│ hits_IsRobot_UserID_URL │ UserID │ 33.83 MiB │ 877.47 KiB │ 39 │
└─────────────────────────┴────────┴──────────────┴────────────┴───────┘
2 rows in set. Elapsed: 0.006 sec.
```
我们可以看到,在按关键字列`(IsRobot、UserID、URL)` 按基数升序排列的表中,`UserID` 列的压缩率明显更高。
虽然两个表中存储的数据完全相同(我们在两个表中插入了相同的 887 万行),但复合主键中关键字列的顺序对表的 [列数据文件](#数据按照主键排序存储在磁盘上)中的 <a href="https://clickhouse.com/docs/en/introduction/distinctive-features/#data-compression" target="_blank">压缩</a>数据所需的磁盘空间有很大影响:
- 在具有复合主键`(URL, UserID, IsRobot)` 的表 `hits_URL_UserID_IsRobot` 中,我们按照键列的基数降序排列,此时 `UserID.bin` 数据文件占用**11.24MB**的磁盘空间。
- 在具有复合主键`(IsRobot, UserID, URL)` 的表 `hits_IsRobot_UserID_URL` 中,我们按照键列的基数升序排列,`UserID.bin` 数据文件仅占用**877.47 KiB**的磁盘空间。
对磁盘上表的列数据进行良好的压缩比不仅能节省磁盘空间,还能使需要从该列读取数据的查询(尤其是分析查询)更快,因为将列数据从磁盘移动到主内存(操作系统的文件缓存)所需的 i/o 更少。
下面我们将说明,为什么主键列按基数升序排列有利于提高表列的压缩率。
下图阐述了主键的磁盘上行顺序,其中键列是按基数升序排列的:
<img src={require('../../../en/guides/best-practices/images/sparse-primary-indexes-14a.png').default} class="image"/>
我们讨论过 [表的行数据按主键列有序存储在磁盘上](#数据按照主键排序存储在磁盘上)。
在上图中,表格的行(它们在磁盘上的列值)首先按其 `cl` 值排序,具有相同 `cl` 值的行按其 `ch` 值排序。由于第一键列 `cl` 的基数较低,因此很可能存在具有相同 `cl` 值的行。因此,`ch`值也很可能是有序的(局部地--对于具有相同`cl`值的行而言)。
如果在一列中,相似的数据被放在彼此相近的位置,例如通过排序,那么这些数据将得到更好的压缩。
一般来说,压缩算法会受益于数据的运行长度(可见的数据越多,压缩效果越好)和局部性(数据越相似,压缩率越高)。
与上图不同的是,下图阐述了主键的磁盘上行顺序,其中主键列是按基数降序排列的:
<img src={require('../../../en/guides/best-practices/images/sparse-primary-indexes-14b.png').default} class="image"/>
现在,表格的行首先按其 `ch` 值排序,具有相同 `ch` 值的行按其 `cl` 值排序。
但是,由于第一键列 `ch` 的基数很高,因此不太可能存在具有相同 `ch` 值的行。因此,`cl`值也不太可能是有序的(局部地--对于具有相同`ch`值的行而言)。
因此,`cl`值很可能是随机排序的,因此局部性和压缩比都很差。
### 小结
为了在查询中有效地过滤次关键字列和提高表列数据文件的压缩率,按基数升序排列主键中的列是有益的。
### 相关内容
- 博客: [Super charging your ClickHouse queries](https://clickhouse.com/blog/clickhouse-faster-queries-with-projections-and-primary-indexes)
## 有效识别单行
尽管在一般情况下,它[不](/knowledgebase/key-value)是ClickHouse 的最佳用例,
但是有时建立在ClickHouse之上的应用程序,需要识别ClickHouse表中的单行。
一个直观的解决方案可能是使用[UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier) 列,每一行的值都是唯一的,并且为了快速检索行,将该列用作主键列。
为了实现最快的检索UUID 列[需要成为主键列](#主索引被用来选择颗粒)。
我们讨论过,由于[ClickHouse 表的行数据是按主键列顺序存储在磁盘上的](#数据按照主键排序存储在磁盘上),因此在主键或复合主键中,在基数较小的列之前设置基数非常大的列(如 UUID 列)[不利于其他表列的压缩率](#数据文件的最佳压缩率)。
在最快检索速度和最佳数据压缩之间的折中方法是使用某种复合主键,其中 UUID 是最后一列关键字,位于(更)小基数关键字列之后,这些关键字列用于确保表中某些列的良好压缩比。
### 一个具体例子
一个具体的例子是 Alexey Milovidov 开发的文本粘贴服务 https://pastila.nl 相关[博客](https://clickhouse.com/blog/building-a-paste-service-with-clickhouse/)。
每次更改文本区域时,数据都会自动保存到 ClickHouse 表格行中(每次更改保存一行)。
识别和检索(特定版本)粘贴内容的一种方法是使用内容的哈希值作为包含内容的表行的 UUID。
下图显示了
- 当内容发生变化时(例如由于按键将文本键入文本框),行的插入顺序,以及
- 当使用 `PRIMARY KEY (hash)` 时,插入行数据的磁盘顺序:
<img src={require('../../../en/guides/best-practices/images/sparse-primary-indexes-15a.png').default} class="image"/>
由于 `hash` 列被用作主键列
- 可以[非常快速](#主索引被用来选择颗粒) 检索特定行,但
- 表格的行(列数据)是按照(唯一和随机的)哈希值升序存储在磁盘上的。因此,内容列的值也是按随机顺序存储的,不具有数据局部性,导致**内容列数据文件的压缩率不理想**。
为了大幅提高内容列的压缩率同时仍能快速检索特定行pastila.nl 使用两个哈希值(和一个复合主键)来识别特定行:
- 内容哈希值,如上所述,对于不同的数据是不同的,以及
- 对[局部性敏感的哈希值fingerprint](https://en.wikipedia.org/wiki/Locality-sensitive_hashing) 它**不会**因数据的微小变化而变化。
下图显示了
- 当内容发生变化时(例如,由于按键将文本输入文本区),行的插入顺序以及
- 当使用复合主键`(fingerprint,hash)` 时,插入行数据的磁盘顺序:
<img src={require('../../../en/guides/best-practices/images/sparse-primary-indexes-15b.png').default} class="image"/>
现在,磁盘上的行首先按指纹 (`fingerprint`) 排序,对于`fingerprint` 值相同的行,其哈希(`hash`)值决定最终的排序。
由于仅有细微差别的数据会获得相同的指纹值,因此类似的数据现在会被存储在磁盘的内容列中,并且彼此靠近。这对内容列的压缩率非常有利,因为压缩算法一般会从数据局部性中获益(数据越相似,压缩率越高)。
由此带来的妥协是,检索特定行时需要两个字段("指纹"和 "散列"),以便最佳地利用由复合主键 `(fingerprint, hash)` 产生的主索引。

View File

@ -32,6 +32,12 @@ contents:
dst: /usr/bin/clickhouse-keeper
- src: clickhouse-keeper.service
dst: /lib/systemd/system/clickhouse-keeper.service
- src: clickhouse
dst: /usr/bin/clickhouse-keeper-client
type: symlink
- src: clickhouse
dst: /usr/bin/clickhouse-keeper-converter
type: symlink
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-keeper/AUTHORS

View File

@ -5,6 +5,7 @@
#include <fstream>
#include <iomanip>
#include <random>
#include <string_view>
#include <pcg_random.hpp>
#include <Poco/Util/Application.h>
#include <Common/Stopwatch.h>
@ -48,6 +49,7 @@ namespace DB
{
using Ports = std::vector<UInt16>;
static constexpr std::string_view DEFAULT_CLIENT_NAME = "benchmark";
namespace ErrorCodes
{
@ -122,7 +124,7 @@ public:
default_database_, user_, password_, quota_key_,
/* cluster_= */ "",
/* cluster_secret_= */ "",
/* client_name_= */ "benchmark",
/* client_name_= */ std::string(DEFAULT_CLIENT_NAME),
Protocol::Compression::Enable,
secure));
@ -135,6 +137,8 @@ public:
global_context->makeGlobalContext();
global_context->setSettings(settings);
global_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
global_context->setQueryKindInitial();
std::cerr << std::fixed << std::setprecision(3);

View File

@ -1243,6 +1243,7 @@ void Client::processConfig()
global_context->getSettingsRef().max_insert_block_size);
}
global_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
global_context->setQueryKindInitial();
global_context->setQuotaClientKey(config().getString("quota_key", ""));
global_context->setQueryKind(query_kind);

View File

@ -133,8 +133,6 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/ProxyListConfiguration.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp

View File

@ -110,19 +110,18 @@ void Keeper::createServer(const std::string & listen_host, const char * port_nam
}
catch (const Poco::Exception &)
{
std::string message = "Listen [" + listen_host + "]:" + std::to_string(port) + " failed: " + getCurrentExceptionMessage(false);
if (listen_try)
{
LOG_WARNING(&logger(), "{}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to "
LOG_WARNING(&logger(), "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, "
"then consider to "
"specify not disabled IPv4 or IPv6 address to listen in <listen_host> element of configuration "
"file. Example for disabled IPv6: <listen_host>0.0.0.0</listen_host> ."
" Example for disabled IPv4: <listen_host>::</listen_host>",
message);
listen_host, port, getCurrentExceptionMessage(false));
}
else
{
throw Exception::createDeprecated(message, ErrorCodes::NETWORK_ERROR);
throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false));
}
}
}
@ -291,12 +290,6 @@ try
{
path = config().getString("keeper_server.storage_path");
}
else if (std::filesystem::is_directory(std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"))
{
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
"By default 'keeper.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper.storage_path' in the keeper configuration explicitly",
KEEPER_DEFAULT_PATH, String{std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"});
}
else if (config().has("keeper_server.log_storage_path"))
{
path = std::filesystem::path(config().getString("keeper_server.log_storage_path")).parent_path();
@ -305,6 +298,12 @@ try
{
path = std::filesystem::path(config().getString("keeper_server.snapshot_storage_path")).parent_path();
}
else if (std::filesystem::is_directory(std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"))
{
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
"By default 'keeper.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper.storage_path' in the keeper configuration explicitly",
KEEPER_DEFAULT_PATH, String{std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"});
}
else
{
path = KEEPER_DEFAULT_PATH;

View File

@ -2,6 +2,8 @@
#include <sys/resource.h>
#include <Common/logger_useful.h>
#include <Common/formatReadable.h>
#include <base/getMemoryAmount.h>
#include <base/errnoToString.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Poco/String.h>
@ -655,43 +657,67 @@ void LocalServer::processConfig()
/// There is no need for concurrent queries, override max_concurrent_queries.
global_context->getProcessList().setMaxSize(0);
/// Size of cache for uncompressed blocks. Zero means disabled.
String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", "");
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0);
if (uncompressed_cache_size)
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
const size_t physical_server_memory = getMemoryAmount();
const double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5);
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * cache_size_to_ram_max_ratio);
/// Size of cache for marks (index of MergeTree family of tables).
String mark_cache_policy = config().getString("mark_cache_policy", "");
size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120);
if (mark_cache_size)
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY);
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE);
double uncompressed_cache_size_ratio = config().getDouble("uncompressed_cache_size_ratio", DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO);
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio);
/// Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.
size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", 0);
if (index_uncompressed_cache_size)
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY);
size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
double mark_cache_size_ratio = config().getDouble("mark_cache_size_ratio", DEFAULT_MARK_CACHE_SIZE_RATIO);
if (!mark_cache_size)
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
if (mark_cache_size > max_cache_size)
{
mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
}
global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio);
/// Size of cache for index marks (index of MergeTree skip indices).
size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", 0);
if (index_mark_cache_size)
global_context->setIndexMarkCache(index_mark_cache_size);
String index_uncompressed_cache_policy = config().getString("index_uncompressed_cache_policy", DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY);
size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE);
double index_uncompressed_cache_size_ratio = config().getDouble("index_uncompressed_cache_size_ratio", DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO);
if (index_uncompressed_cache_size > max_cache_size)
{
index_uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio);
/// A cache for mmapped files.
size_t mmap_cache_size = config().getUInt64("mmap_cache_size", 1000); /// The choice of default is arbitrary.
if (mmap_cache_size)
global_context->setMMappedFileCache(mmap_cache_size);
String index_mark_cache_policy = config().getString("index_mark_cache_policy", DEFAULT_INDEX_MARK_CACHE_POLICY);
size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE);
double index_mark_cache_size_ratio = config().getDouble("index_mark_cache_size_ratio", DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO);
if (index_mark_cache_size > max_cache_size)
{
index_mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio);
size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE);
if (mmap_cache_size > max_cache_size)
{
mmap_cache_size = max_cache_size;
LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setMMappedFileCache(mmap_cache_size);
/// Initialize a dummy query cache.
global_context->setQueryCache(0, 0, 0, 0);
#if USE_EMBEDDED_COMPILER
/// 128 MB
constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128;
size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default);
constexpr size_t compiled_expression_cache_elements_size_default = 10000;
size_t compiled_expression_cache_elements_size
= config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size);
size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
#endif
/// NOTE: it is important to apply any overrides before

View File

@ -365,17 +365,14 @@ static void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UI
hash.update(seed);
hash.update(i);
const auto checksum = getSipHash128AsArray(hash);
if (size >= 16)
{
char * hash_dst = reinterpret_cast<char *>(std::min(pos, end - 16));
hash.get128(hash_dst);
auto * hash_dst = std::min(pos, end - 16);
memcpy(hash_dst, checksum.data(), checksum.size());
}
else
{
char value[16];
hash.get128(value);
memcpy(dst, value, end - dst);
}
memcpy(dst, checksum.data(), end - dst);
pos += 16;
++i;
@ -393,7 +390,10 @@ static void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UI
static void transformUUID(const UUID & src_uuid, UUID & dst_uuid, UInt64 seed)
{
const UInt128 & src = src_uuid.toUnderType();
auto src_copy = src_uuid;
transformEndianness<std::endian::little, std::endian::native>(src_copy);
const UInt128 & src = src_copy.toUnderType();
UInt128 & dst = dst_uuid.toUnderType();
SipHash hash;
@ -401,10 +401,11 @@ static void transformUUID(const UUID & src_uuid, UUID & dst_uuid, UInt64 seed)
hash.update(reinterpret_cast<const char *>(&src), sizeof(UUID));
/// Saving version and variant from an old UUID
hash.get128(reinterpret_cast<char *>(&dst));
dst = hash.get128();
dst.items[1] = (dst.items[1] & 0x1fffffffffffffffull) | (src.items[1] & 0xe000000000000000ull);
dst.items[0] = (dst.items[0] & 0xffffffffffff0fffull) | (src.items[0] & 0x000000000000f000ull);
const UInt64 trace[2] = {0x000000000000f000ull, 0xe000000000000000ull};
UUIDHelpers::getLowBytes(dst_uuid) = (UUIDHelpers::getLowBytes(dst_uuid) & (0xffffffffffffffffull - trace[1])) | (UUIDHelpers::getLowBytes(src_uuid) & trace[1]);
UUIDHelpers::getHighBytes(dst_uuid) = (UUIDHelpers::getHighBytes(dst_uuid) & (0xffffffffffffffffull - trace[0])) | (UUIDHelpers::getHighBytes(src_uuid) & trace[0]);
}
class FixedStringModel : public IModel
@ -1301,18 +1302,14 @@ try
if (structure.empty())
{
ReadBufferIterator read_buffer_iterator = [&](ColumnsDescription &)
{
auto file = std::make_unique<ReadBufferFromFileDescriptor>(STDIN_FILENO);
auto file = std::make_unique<ReadBufferFromFileDescriptor>(STDIN_FILENO);
/// stdin must be seekable
auto res = lseek(file->getFD(), 0, SEEK_SET);
if (-1 == res)
throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
return file;
};
/// stdin must be seekable
auto res = lseek(file->getFD(), 0, SEEK_SET);
if (-1 == res)
throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
SingleReadBufferIterator read_buffer_iterator(std::move(file));
schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, false, context_const);
}
else

View File

@ -29,6 +29,7 @@
#include <Common/ShellCommand.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ZooKeeper/ZooKeeperNodeCache.h>
#include <Common/formatReadable.h>
#include <Common/getMultipleKeysFromConfig.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/getExecutablePath.h>
@ -325,19 +326,18 @@ void Server::createServer(
}
catch (const Poco::Exception &)
{
std::string message = "Listen [" + listen_host + "]:" + std::to_string(port) + " failed: " + getCurrentExceptionMessage(false);
if (listen_try)
{
LOG_WARNING(&logger(), "{}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to "
LOG_WARNING(&logger(), "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, "
"then consider to "
"specify not disabled IPv4 or IPv6 address to listen in <listen_host> element of configuration "
"file. Example for disabled IPv6: <listen_host>0.0.0.0</listen_host> ."
" Example for disabled IPv4: <listen_host>::</listen_host>",
message);
listen_host, port, getCurrentExceptionMessage(false));
}
else
{
throw Exception::createDeprecated(message, ErrorCodes::NETWORK_ERROR);
throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false));
}
}
}
@ -658,10 +658,10 @@ try
global_context->addWarningMessage("Server was built with sanitizer. It will work slowly.");
#endif
const auto memory_amount = getMemoryAmount();
const size_t physical_server_memory = getMemoryAmount();
LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.",
formatReadableSizeWithBinarySuffix(memory_amount),
formatReadableSizeWithBinarySuffix(physical_server_memory),
getNumberOfPhysicalCPUCores(), // on ARM processors it can show only enabled at current moment cores
std::thread::hardware_concurrency());
@ -1105,6 +1105,75 @@ try
if (config().has("macros"))
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
/// Set up caches.
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * server_settings.cache_size_to_ram_max_ratio);
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
double uncompressed_cache_size_ratio = server_settings.uncompressed_cache_size_ratio;
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio);
String mark_cache_policy = server_settings.mark_cache_policy;
size_t mark_cache_size = server_settings.mark_cache_size;
double mark_cache_size_ratio = server_settings.mark_cache_size_ratio;
if (mark_cache_size > max_cache_size)
{
mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
}
global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio);
String index_uncompressed_cache_policy = server_settings.index_uncompressed_cache_policy;
size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size;
double index_uncompressed_cache_size_ratio = server_settings.index_uncompressed_cache_size_ratio;
if (index_uncompressed_cache_size > max_cache_size)
{
index_uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio);
String index_mark_cache_policy = server_settings.index_mark_cache_policy;
size_t index_mark_cache_size = server_settings.index_mark_cache_size;
double index_mark_cache_size_ratio = server_settings.index_mark_cache_size_ratio;
if (index_mark_cache_size > max_cache_size)
{
index_mark_cache_size = max_cache_size;
LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio);
size_t mmap_cache_size = server_settings.mmap_cache_size;
if (mmap_cache_size > max_cache_size)
{
mmap_cache_size = max_cache_size;
LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setMMappedFileCache(mmap_cache_size);
size_t query_cache_max_size_in_bytes = config().getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE);
size_t query_cache_max_entries = config().getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES);
size_t query_cache_query_cache_max_entry_size_in_bytes = config().getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES);
size_t query_cache_max_entry_size_in_rows = config().getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS);
if (query_cache_max_size_in_bytes > max_cache_size)
{
query_cache_max_size_in_bytes = max_cache_size;
LOG_INFO(log, "Lowered query cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setQueryCache(query_cache_max_size_in_bytes, query_cache_max_entries, query_cache_query_cache_max_entry_size_in_bytes, query_cache_max_entry_size_in_rows);
#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
#endif
/// Initialize main config reloader.
std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");
@ -1136,9 +1205,10 @@ try
server_settings_.loadSettingsFromConfig(*config);
size_t max_server_memory_usage = server_settings_.max_server_memory_usage;
double max_server_memory_usage_to_ram_ratio = server_settings_.max_server_memory_usage_to_ram_ratio;
size_t default_max_server_memory_usage = static_cast<size_t>(memory_amount * max_server_memory_usage_to_ram_ratio);
size_t current_physical_server_memory = getMemoryAmount(); /// With cgroups, the amount of memory available to the server can be changed dynamically.
size_t default_max_server_memory_usage = static_cast<size_t>(current_physical_server_memory * max_server_memory_usage_to_ram_ratio);
if (max_server_memory_usage == 0)
{
@ -1146,7 +1216,7 @@ try
LOG_INFO(log, "Setting max_server_memory_usage was set to {}"
" ({} available * {:.2f} max_server_memory_usage_to_ram_ratio)",
formatReadableSizeWithBinarySuffix(max_server_memory_usage),
formatReadableSizeWithBinarySuffix(memory_amount),
formatReadableSizeWithBinarySuffix(current_physical_server_memory),
max_server_memory_usage_to_ram_ratio);
}
else if (max_server_memory_usage > default_max_server_memory_usage)
@ -1157,7 +1227,7 @@ try
" calculated as {} available"
" * {:.2f} max_server_memory_usage_to_ram_ratio",
formatReadableSizeWithBinarySuffix(max_server_memory_usage),
formatReadableSizeWithBinarySuffix(memory_amount),
formatReadableSizeWithBinarySuffix(current_physical_server_memory),
max_server_memory_usage_to_ram_ratio);
}
@ -1167,14 +1237,14 @@ try
size_t merges_mutations_memory_usage_soft_limit = server_settings_.merges_mutations_memory_usage_soft_limit;
size_t default_merges_mutations_server_memory_usage = static_cast<size_t>(memory_amount * server_settings_.merges_mutations_memory_usage_to_ram_ratio);
size_t default_merges_mutations_server_memory_usage = static_cast<size_t>(current_physical_server_memory * server_settings_.merges_mutations_memory_usage_to_ram_ratio);
if (merges_mutations_memory_usage_soft_limit == 0)
{
merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage;
LOG_INFO(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}"
" ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)",
formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit),
formatReadableSizeWithBinarySuffix(memory_amount),
formatReadableSizeWithBinarySuffix(current_physical_server_memory),
server_settings_.merges_mutations_memory_usage_to_ram_ratio);
}
else if (merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage)
@ -1183,7 +1253,7 @@ try
LOG_WARNING(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}"
" ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)",
formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit),
formatReadableSizeWithBinarySuffix(memory_amount),
formatReadableSizeWithBinarySuffix(current_physical_server_memory),
server_settings_.merges_mutations_memory_usage_to_ram_ratio);
}
@ -1323,7 +1393,14 @@ try
global_context->updateStorageConfiguration(*config);
global_context->updateInterserverCredentials(*config);
global_context->updateUncompressedCacheConfiguration(*config);
global_context->updateMarkCacheConfiguration(*config);
global_context->updateIndexUncompressedCacheConfiguration(*config);
global_context->updateIndexMarkCacheConfiguration(*config);
global_context->updateMMappedFileCacheConfiguration(*config);
global_context->updateQueryCacheConfiguration(*config);
CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs");
#if USE_SSL
CertificateReloader::instance().tryLoad(*config);
@ -1483,21 +1560,6 @@ try
/// Limit on total number of concurrently executed queries.
global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries);
/// Set up caches.
size_t max_cache_size = static_cast<size_t>(memory_amount * server_settings.cache_size_to_ram_max_ratio);
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
LOG_INFO(log, "Uncompressed cache policy name {}", uncompressed_cache_policy);
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Uncompressed cache size was lowered to {} because the system has low amount of memory",
formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
/// Load global settings from default_profile and system_profile.
global_context->setDefaultProfiles(config());
@ -1513,46 +1575,16 @@ try
server_settings.async_insert_queue_flush_on_shutdown));
}
size_t mark_cache_size = server_settings.mark_cache_size;
String mark_cache_policy = server_settings.mark_cache_policy;
if (!mark_cache_size)
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
if (mark_cache_size > max_cache_size)
{
mark_cache_size = max_cache_size;
LOG_INFO(log, "Mark cache size was lowered to {} because the system has low amount of memory",
formatReadableSizeWithBinarySuffix(mark_cache_size));
}
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
if (server_settings.index_uncompressed_cache_size)
global_context->setIndexUncompressedCache(server_settings.index_uncompressed_cache_size);
if (server_settings.index_mark_cache_size)
global_context->setIndexMarkCache(server_settings.index_mark_cache_size);
if (server_settings.mmap_cache_size)
global_context->setMMappedFileCache(server_settings.mmap_cache_size);
/// A cache for query results.
global_context->setQueryCache(config());
#if USE_EMBEDDED_COMPILER
/// 128 MB
constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128;
size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default);
constexpr size_t compiled_expression_cache_elements_size_default = 10000;
size_t compiled_expression_cache_elements_size = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size);
#endif
/// Set path for format schema files
fs::path format_schema_path(config().getString("format_schema_path", path / "format_schemas/"));
global_context->setFormatSchemaPath(format_schema_path);
fs::create_directories(format_schema_path);
/// Set path for filesystem caches
fs::path filesystem_caches_path(config().getString("filesystem_caches_path", ""));
if (!filesystem_caches_path.empty())
global_context->setFilesystemCachesPath(filesystem_caches_path);
/// Check sanity of MergeTreeSettings on server startup
{
size_t background_pool_tasks = global_context->getMergeMutateExecutor()->getMaxTasksCount();
@ -2048,6 +2080,9 @@ void Server::createServers(
for (const auto & protocol : protocols)
{
if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol))
continue;
std::string prefix = "protocols." + protocol + ".";
std::string port_name = prefix + "port";
std::string description {"<undefined> protocol"};
@ -2057,9 +2092,6 @@ void Server::createServers(
if (!config.has(prefix + "port"))
continue;
if (!server_type.shouldStart(ServerType::Type::CUSTOM, port_name))
continue;
std::vector<std::string> hosts;
if (config.has(prefix + "host"))
hosts.push_back(config.getString(prefix + "host"));

View File

@ -11,6 +11,7 @@
--background: linear-gradient(to bottom, #00CCFF, #00D0D0);
--chart-background: white;
--shadow-color: rgba(0, 0, 0, 0.25);
--moving-shadow-color: rgba(0, 0, 0, 0.5);
--input-shadow-color: rgba(0, 255, 0, 1);
--error-color: red;
--auth-error-color: white;
@ -34,6 +35,7 @@
--background: #151C2C;
--chart-background: #1b2834;
--shadow-color: rgba(0, 0, 0, 0);
--moving-shadow-color: rgba(255, 255, 255, 0.25);
--input-shadow-color: rgba(255, 128, 0, 0.25);
--error-color: #F66;
--legend-background: rgba(255, 255, 255, 0.25);
@ -91,6 +93,21 @@
position: relative;
}
.chart-maximized {
flex: 1 100%;
height: 75vh
}
.chart-moving {
z-index: 11;
box-shadow: 0 0 2rem var(--moving-shadow-color);
}
.chart-displaced {
opacity: 75%;
filter: blur(1px);
}
.chart div { position: absolute; }
.inputs {
@ -230,8 +247,8 @@
filter: contrast(125%);
}
#add, #reload {
padding: .25rem 0.5rem;
#add, #reload, #edit {
padding: 0.25rem 0.5rem;
text-align: center;
font-weight: bold;
user-select: none;
@ -244,13 +261,10 @@
margin-right: 1rem !important;
margin-left: 0rem;
margin-bottom: 1rem;
height: 3ex;
}
/* .unconnected #reload {
margin-left: 3px;
} */
#add:hover, #reload:hover {
#add:hover, #reload:hover, #edit:hover {
background: var(--button-background-color);
}
@ -306,6 +320,7 @@
}
.chart-buttons a {
margin-right: 0.25rem;
user-select: none;
}
.chart-buttons a:hover {
color: var(--chart-button-hover-color);
@ -333,18 +348,21 @@
padding: 2rem;
}
.query-editor textarea {
grid-row: 1;
grid-column: 1 / span 2;
z-index: 11;
textarea {
padding: 0.5rem;
outline: none;
border: none;
font-size: 12pt;
border-bottom: 1px solid var(--edit-title-border);
background: var(--chart-background);
color: var(--text-color);
resize: none;
}
.query-editor textarea {
grid-row: 1;
grid-column: 1 / span 2;
z-index: 11;
border-bottom: 1px solid var(--edit-title-border);
margin: 0;
}
@ -367,10 +385,41 @@
filter: contrast(125%);
}
.edit-cancel {
cursor: pointer;
background: var(--new-chart-background-color);
}
.edit-cancel:hover {
filter: contrast(125%);
}
.nowrap {
white-space: pre;
}
#mass-editor {
display: none;
grid-template-columns: auto fit-content(10%) fit-content(10%);
grid-template-rows: auto fit-content(10%);
row-gap: 1rem;
column-gap: 1rem;
}
#mass-editor-textarea {
width: 100%;
height: 100%;
grid-row: 1;
grid-column: 1 / span 3;
}
#mass-editor input {
padding: 0.5rem;
}
#mass-editor-message {
color: var(--auth-error-color);
}
/* Source: https://cdn.jsdelivr.net/npm/uplot@1.6.21/dist/uPlot.min.css
* It is copy-pasted to lower the number of requests.
*/
@ -389,6 +438,7 @@
</div>
<div id="button-options">
<span class="nowrap themes"><span id="toggle-dark">🌚</span><span id="toggle-light">🌞</span></span>
<input id="edit" type="button" value="✎" style="display: none;">
<input id="add" type="button" value="Add chart" style="display: none;">
<input id="reload" type="button" value="Reload">
<div id="chart-params"></div>
@ -397,6 +447,12 @@
<div id="auth-error"></div>
</div>
<div id="charts"></div>
<div id="mass-editor">
<textarea id="mass-editor-textarea" spellcheck="false" data-gramm="false"></textarea>
<span id="mass-editor-message">&nbsp;</span>
<input type="submit" id="mass-editor-cancel" class="edit-cancel" value="Cancel">
<input type="submit" id="mass-editor-confirm" class="edit-confirm" value="Apply">
</div>
<script>
/** Implementation note: it might be more natural to use some reactive framework.
@ -405,9 +461,7 @@
*
* TODO:
* - zoom on the graphs should work on touch devices;
* - add mass edit capability (edit the JSON config as a whole);
* - compress the state for URL's #hash;
* - save/load JSON configuration;
* - footer with "about" or a link to source code;
* - allow to configure a table on a server to save the dashboards;
* - multiple lines on chart;
@ -418,11 +472,13 @@
let host = 'https://play.clickhouse.com/';
let user = 'explorer';
let password = '';
let add_http_cors_header = true;
/// If it is hosted on server, assume that it is the address of ClickHouse.
if (location.protocol != 'file:') {
host = location.origin;
user = 'default';
add_http_cors_header = false;
}
const errorCodeMessageMap = {
@ -702,6 +758,7 @@ function insertChart(i) {
query_editor_textarea.spellcheck = false;
query_editor_textarea.value = q.query;
query_editor_textarea.placeholder = 'Query';
query_editor_textarea.setAttribute('data-gramm', false);
query_editor.appendChild(query_editor_textarea);
let query_editor_title = document.createElement('input');
@ -756,6 +813,92 @@ function insertChart(i) {
let edit_buttons = document.createElement('div');
edit_buttons.className = 'chart-buttons';
let move = document.createElement('a');
let move_text = document.createTextNode('✥');
move.appendChild(move_text);
let is_dragging = false;
move.addEventListener('mousedown', e => {
const idx = getCurrentIndex();
is_dragging = true;
chart.className = 'chart chart-moving';
let offset_x = e.clientX;
let offset_y = e.clientY;
let displace_idx = null;
let displace_chart = null;
function mouseup(e) {
is_dragging = false;
chart.className = 'chart';
chart.style.left = null;
chart.style.top = null;
if (displace_idx !== null) {
const elem = queries[idx];
queries.splice(idx, 1);
queries.splice(displace_idx, 0, elem);
displace_chart.className = 'chart';
drawAll();
}
}
function mousemove(e) {
if (!is_dragging) {
document.body.removeEventListener('mousemove', mousemove);
document.body.removeEventListener('mouseup', mouseup);
return;
}
let x = e.clientX - offset_x;
let y = e.clientY - offset_y;
chart.style.left = `${x}px`;
chart.style.top = `${y}px`;
displace_idx = null;
displace_chart = null;
let current_idx = -1;
for (const elem of charts.querySelectorAll('.chart')) {
++current_idx;
if (current_idx == idx) {
continue;
}
const this_rect = chart.getBoundingClientRect();
const this_center_x = this_rect.left + this_rect.width / 2;
const this_center_y = this_rect.top + this_rect.height / 2;
const elem_rect = elem.getBoundingClientRect();
if (this_center_x >= elem_rect.left && this_center_x <= elem_rect.right
&& this_center_y >= elem_rect.top && this_center_y <= elem_rect.bottom) {
elem.className = 'chart chart-displaced';
displace_idx = current_idx;
displace_chart = elem;
} else {
elem.className = 'chart';
}
}
}
document.body.addEventListener('mouseup', mouseup);
document.body.addEventListener('mousemove', mousemove);
});
let maximize = document.createElement('a');
let maximize_text = document.createTextNode('🗖');
maximize.appendChild(maximize_text);
maximize.addEventListener('click', e => {
const idx = getCurrentIndex();
chart.className = (chart.className == 'chart' ? 'chart chart-maximized' : 'chart');
resize();
});
let edit = document.createElement('a');
let edit_text = document.createTextNode('✎');
edit.appendChild(edit_text);
@ -788,6 +931,8 @@ function insertChart(i) {
saveState();
});
edit_buttons.appendChild(move);
edit_buttons.appendChild(maximize);
edit_buttons.appendChild(edit);
edit_buttons.appendChild(trash);
@ -815,6 +960,66 @@ document.getElementById('reload').addEventListener('click', e => {
reloadAll();
});
let mass_editor_active = false;
function showMassEditor() {
document.getElementById('charts').style.display = 'none';
let editor_div = document.getElementById('mass-editor');
editor_div.style.display = 'grid';
let editor = document.getElementById('mass-editor-textarea');
editor.value = JSON.stringify({params: params, queries: queries}, null, 2);
mass_editor_active = true;
}
function hideMassEditor() {
document.getElementById('mass-editor').style.display = 'none';
document.getElementById('charts').style.display = 'flex';
mass_editor_active = false;
}
function massEditorApplyChanges() {
let editor = document.getElementById('mass-editor-textarea');
({params, queries} = JSON.parse(editor.value));
hideMassEditor();
regenerate();
drawAll();
saveState();
}
document.getElementById('edit').addEventListener('click', e => {
if (mass_editor_active) {
massEditorApplyChanges();
hideMassEditor();
} else {
showMassEditor();
}
});
document.getElementById('mass-editor-confirm').addEventListener('click', e => {
massEditorApplyChanges();
hideMassEditor();
});
document.getElementById('mass-editor-cancel').addEventListener('click', e => {
hideMassEditor();
});
document.getElementById('mass-editor-textarea').addEventListener('input', e => {
let message = document.getElementById('mass-editor-message').firstChild;
message.data = '';
if (e.target.value != '') {
try { JSON.parse(e.target.value) } catch (e) {
message.data = e.toString();
}
}
});
function legendAsTooltipPlugin({ className, style = { background: "var(--legend-background)" } } = {}) {
let legendEl;
@ -865,8 +1070,6 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend-
};
}
let add_http_cors_header = false;
async function draw(idx, chart, url_params, query) {
if (plots[idx]) {
plots[idx].destroy();
@ -877,7 +1080,7 @@ async function draw(idx, chart, url_params, query) {
user = document.getElementById('user').value;
password = document.getElementById('password').value;
let url = `${host}?default_format=JSONCompactColumns`
let url = `${host}?default_format=JSONCompactColumns&enable_http_compression=1`
if (add_http_cors_header) {
// For debug purposes, you may set add_http_cors_header from a browser console
@ -906,12 +1109,14 @@ async function draw(idx, chart, url_params, query) {
}
if (error) {
const errorMatch = errorMessages.find(({ regex }) => error.match(regex))
const match = error.match(errorMatch.regex)
const message = errorMatch.messageFunc(match)
const errorMatch = errorMessages.find(({ regex }) => error.match(regex));
if (!errorMatch) {
throw new Error(error);
}
const match = error.match(errorMatch.regex);
const message = errorMatch.messageFunc(match);
if (message) {
const authError = new Error(message)
throw authError
throw new Error(message);
}
}
@ -978,23 +1183,23 @@ async function draw(idx, chart, url_params, query) {
}
function showAuthError(message) {
const charts = document.querySelector('#charts');
const charts = document.getElementById('charts');
charts.style.height = '0px';
charts.style.opacity = '0';
const add = document.querySelector('#add');
add.style.display = 'none';
document.getElementById('add').style.display = 'none';
document.getElementById('edit').style.display = 'none';
const authError = document.querySelector('#auth-error');
const authError = document.getElementById('auth-error');
authError.textContent = message;
authError.style.display = 'flex';
}
function hideAuthError() {
const charts = document.querySelector('#charts');
const charts = document.getElementById('charts');
charts.style.height = 'auto';
charts.style.opacity = '1';
const authError = document.querySelector('#auth-error');
const authError = document.getElementById('auth-error');
authError.textContent = '';
authError.style.display = 'none';
}
@ -1025,11 +1230,11 @@ async function drawAll() {
if (results.includes(true)) {
const element = document.querySelector('.inputs');
element.classList.remove('unconnected');
const add = document.querySelector('#add');
add.style.display = 'block';
document.getElementById('add').style.display = 'inline-block';
document.getElementById('edit').style.display = 'inline-block';
}
else {
const charts = document.querySelector('#charts')
const charts = document.getElementById('charts')
charts.style.height = '0px';
}
})
@ -1048,14 +1253,14 @@ new ResizeObserver(resize).observe(document.body);
function disableReloadButton() {
const reloadButton = document.getElementById('reload')
reloadButton.value = 'Reloading...'
reloadButton.value = 'Reloading'
reloadButton.disabled = true
reloadButton.classList.add('disabled')
}
function disableRunButton() {
const runButton = document.getElementById('run')
runButton.value = 'Reloading...'
runButton.value = 'Reloading'
runButton.disabled = true
runButton.classList.add('disabled')
}

View File

@ -465,7 +465,7 @@
<input class="monospace shadow" id="url" type="text" value="http://localhost:8123/" placeholder="url" /><input class="monospace shadow" id="user" type="text" value="default" placeholder="user" /><input class="monospace shadow" id="password" type="password" placeholder="password" />
</div>
<div id="query_div">
<textarea autofocus spellcheck="false" class="monospace shadow" id="query"></textarea>
<textarea autofocus spellcheck="false" data-gramm="false" class="monospace shadow" id="query"></textarea>
</div>
<div id="run_div">
<button class="shadow" id="run">Run</button>

View File

@ -95,7 +95,7 @@ enum class AccessType
M(CREATE_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute CREATE NAMED COLLECTION */ \
M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \
\
M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH} DATABASE */\
M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH|TRUNCATE} DATABASE */\
M(DROP_TABLE, "", TABLE, DROP) /* allows to execute {DROP|DETACH} TABLE */\
M(DROP_VIEW, "", VIEW, DROP) /* allows to execute {DROP|DETACH} TABLE for views;
implicitly enabled by the grant DROP_TABLE */\
@ -153,6 +153,7 @@ enum class AccessType
M(SYSTEM_DROP_QUERY_CACHE, "SYSTEM DROP QUERY, DROP QUERY CACHE, DROP QUERY", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_COMPILED_EXPRESSION_CACHE, "SYSTEM DROP COMPILED EXPRESSION, DROP COMPILED EXPRESSION CACHE, DROP COMPILED EXPRESSIONS", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_FILESYSTEM_CACHE, "SYSTEM DROP FILESYSTEM CACHE, DROP FILESYSTEM CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_SYNC_FILESYSTEM_CACHE, "SYSTEM REPAIR FILESYSTEM CACHE, REPAIR FILESYSTEM CACHE, SYNC FILESYSTEM CACHE", GLOBAL, SYSTEM) \
M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_S3_CLIENT_CACHE, "SYSTEM DROP S3 CLIENT, DROP S3 CLIENT CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \

View File

@ -549,7 +549,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
if (rc != LDAP_SUCCESS)
{
String message = "LDAP search failed";
String message;
const char * raw_err_str = ldap_err2string(rc);
if (raw_err_str && *raw_err_str != '\0')
@ -570,7 +570,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
message += matched_msg;
}
throw Exception::createDeprecated(message, ErrorCodes::LDAP_ERROR);
throw Exception(ErrorCodes::LDAP_ERROR, "LDAP search failed{}", message);
}
break;

View File

@ -46,7 +46,7 @@ void MultipleAccessStorage::setStorages(const std::vector<StoragePtr> & storages
{
std::lock_guard lock{mutex};
nested_storages = std::make_shared<const Storages>(storages);
ids_cache.reset();
ids_cache.clear();
}
void MultipleAccessStorage::addStorage(const StoragePtr & new_storage)
@ -69,7 +69,7 @@ void MultipleAccessStorage::removeStorage(const StoragePtr & storage_to_remove)
auto new_storages = std::make_shared<Storages>(*nested_storages);
new_storages->erase(new_storages->begin() + index);
nested_storages = new_storages;
ids_cache.reset();
ids_cache.clear();
}
std::vector<StoragePtr> MultipleAccessStorage::getStorages()
@ -502,4 +502,15 @@ void MultipleAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
throwBackupNotAllowed();
}
bool MultipleAccessStorage::containsStorage(std::string_view storage_type) const
{
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
{
if (storage->getStorageType() == storage_type)
return true;
}
return false;
}
}

View File

@ -57,6 +57,7 @@ public:
bool isRestoreAllowed() const override;
void backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const override;
void restoreFromBackup(RestorerFromBackup & restorer) override;
bool containsStorage(std::string_view storage_type) const;
protected:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;

View File

@ -11,6 +11,7 @@
#include <Common/Config/ConfigReloader.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/quoteString.h>
#include <Common/TransformEndianness.hpp>
#include <Core/Settings.h>
#include <Interpreters/executeQuery.h>
#include <Parsers/Access/ASTGrantQuery.h>
@ -49,6 +50,7 @@ namespace
md5.update(type_storage_chars, strlen(type_storage_chars));
UUID result;
memcpy(&result, md5.digest().data(), md5.digestLength());
transformEndianness<std::endian::native, std::endian::little>(result);
return result;
}

View File

@ -109,7 +109,7 @@ public:
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
writeBinary(this->data(place).numerator, buf);
writeBinaryLittleEndian(this->data(place).numerator, buf);
if constexpr (std::is_unsigned_v<Denominator>)
writeVarUInt(this->data(place).denominator, buf);
@ -119,7 +119,7 @@ public:
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
readBinary(this->data(place).numerator, buf);
readBinaryLittleEndian(this->data(place).numerator, buf);
if constexpr (std::is_unsigned_v<Denominator>)
readVarUInt(this->data(place).denominator, buf);

View File

@ -100,6 +100,17 @@ void AggregateFunctionBoundingRatioData::deserialize(ReadBuffer & buf)
}
}
inline void writeBinary(const AggregateFunctionBoundingRatioData::Point & p, WriteBuffer & buf)
{
writePODBinary(p, buf);
}
inline void readBinary(AggregateFunctionBoundingRatioData::Point & p, ReadBuffer & buf)
{
readPODBinary(p, buf);
}
class AggregateFunctionBoundingRatio final : public IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>
{
private:

View File

@ -4,6 +4,8 @@
#include <AggregateFunctions/FactoryHelpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Interpreters/Context.h>
#include <Core/ServerSettings.h>
namespace DB
@ -43,6 +45,13 @@ inline AggregateFunctionPtr createAggregateFunctionGroupArrayImpl(const DataType
return std::make_shared<GroupArrayGeneralImpl<GroupArrayNodeGeneral, Trait>>(argument_type, parameters, std::forward<TArgs>(args)...);
}
size_t getMaxArraySize()
{
if (auto context = Context::getGlobalContextInstance())
return context->getServerSettings().aggregate_function_group_array_max_element_size;
return 0xFFFFFF;
}
template <bool Tlast>
AggregateFunctionPtr createAggregateFunctionGroupArray(
@ -51,7 +60,7 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
assertUnary(name, argument_types);
bool limit_size = false;
UInt64 max_elems = std::numeric_limits<UInt64>::max();
UInt64 max_elems = getMaxArraySize();
if (parameters.empty())
{
@ -78,7 +87,7 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
{
if (Tlast)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "groupArrayLast make sense only with max_elems (groupArrayLast(max_elems)())");
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ false, Tlast, /* Tsampler= */ Sampler::NONE>>(argument_types[0], parameters);
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ false, Tlast, /* Tsampler= */ Sampler::NONE>>(argument_types[0], parameters, max_elems);
}
else
return createAggregateFunctionGroupArrayImpl<GroupArrayTrait</* Thas_limit= */ true, Tlast, /* Tsampler= */ Sampler::NONE>>(argument_types[0], parameters, max_elems);

View File

@ -21,7 +21,7 @@
#include <type_traits>
#define AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE 0xFFFFFF
#define AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE 0xFFFFFF
namespace DB
@ -128,7 +128,7 @@ class GroupArrayNumericImpl final
public:
explicit GroupArrayNumericImpl(
const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, UInt64 seed_ = 123456)
: IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
, max_elems(max_elems_)
@ -263,10 +263,18 @@ public:
}
}
static void checkArraySize(size_t elems, size_t max_elems)
{
if (unlikely(elems > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size {} (maximum: {})", elems, max_elems);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
const auto & value = this->data(place).value;
const size_t size = value.size();
const UInt64 size = value.size();
checkArraySize(size, max_elems);
writeVarUInt(size, buf);
for (const auto & element : value)
writeBinaryLittleEndian(element, buf);
@ -287,13 +295,7 @@ public:
{
size_t size = 0;
readVarUInt(size, buf);
if (unlikely(size > AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE);
if (limit_num_elems && unlikely(size > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size, it should not exceed {}", max_elems);
checkArraySize(size, max_elems);
auto & value = this->data(place).value;
@ -357,9 +359,17 @@ struct GroupArrayNodeBase
const_cast<char *>(arena->alignedInsert(reinterpret_cast<const char *>(this), sizeof(Node) + size, alignof(Node))));
}
static void checkElementSize(size_t size, size_t max_size)
{
if (unlikely(size > max_size))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array element size {} (maximum: {})", size, max_size);
}
/// Write node to buffer
void write(WriteBuffer & buf) const
{
checkElementSize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
writeVarUInt(size, buf);
buf.write(data(), size);
}
@ -369,9 +379,7 @@ struct GroupArrayNodeBase
{
UInt64 size;
readVarUInt(size, buf);
if (unlikely(size > AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE);
checkElementSize(size, AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE);
Node * node = reinterpret_cast<Node *>(arena->alignedAlloc(sizeof(Node) + size, alignof(Node)));
node->size = size;
@ -455,7 +463,7 @@ class GroupArrayGeneralImpl final
UInt64 seed;
public:
GroupArrayGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
GroupArrayGeneralImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 max_elems_, UInt64 seed_ = 123456)
: IAggregateFunctionDataHelper<GroupArrayGeneralData<Node, Trait::sampler != Sampler::NONE>, GroupArrayGeneralImpl<Node, Trait>>(
{data_type_}, parameters_, std::make_shared<DataTypeArray>(data_type_))
, data_type(this->argument_types[0])
@ -596,9 +604,18 @@ public:
}
}
static void checkArraySize(size_t elems, size_t max_elems)
{
if (unlikely(elems > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size {} (maximum: {})", elems, max_elems);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
writeVarUInt(data(place).value.size(), buf);
UInt64 elems = data(place).value.size();
checkArraySize(elems, max_elems);
writeVarUInt(elems, buf);
auto & value = data(place).value;
for (auto & node : value)
@ -624,12 +641,7 @@ public:
if (unlikely(elems == 0))
return;
if (unlikely(elems > AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE);
if (limit_num_elems && unlikely(elems > max_elems))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size, it should not exceed {}", max_elems);
checkArraySize(elems, max_elems);
auto & value = data(place).value;
@ -673,6 +685,6 @@ public:
bool allocatesMemoryInArena() const override { return true; }
};
#undef AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ARRAY_SIZE
#undef AGGREGATE_FUNCTION_GROUP_ARRAY_MAX_ELEMENT_SIZE
}

View File

@ -547,7 +547,7 @@ public:
/// For serialization we use signed Int32 (for historical reasons), -1 means "no value"
Int32 size_to_write = size ? size : -1;
writeBinary(size_to_write, buf);
writeBinaryLittleEndian(size_to_write, buf);
if (has())
buf.write(getData(), size);
}
@ -573,7 +573,7 @@ public:
{
/// For serialization we use signed Int32 (for historical reasons), -1 means "no value"
Int32 rhs_size_signed;
readBinary(rhs_size_signed, buf);
readBinaryLittleEndian(rhs_size_signed, buf);
if (rhs_size_signed < 0)
{

View File

@ -258,12 +258,12 @@ struct AggregateFunctionSumData
void write(WriteBuffer & buf) const
{
writeBinary(sum, buf);
writeBinaryLittleEndian(sum, buf);
}
void read(ReadBuffer & buf)
{
readBinary(sum, buf);
readBinaryLittleEndian(sum, buf);
}
T get() const

View File

@ -315,10 +315,9 @@ struct Adder
{
StringRef value = column.getDataAt(row_num);
UInt128 key;
SipHash hash;
hash.update(value.data, value.size);
hash.get128(key);
const auto key = hash.get128();
data.set.template insert<const UInt128 &, use_single_level_hash_table>(key);
}

View File

@ -1,28 +1,26 @@
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
add_headers_and_sources(clickhouse_aggregate_functions .)
list(REMOVE_ITEM clickhouse_aggregate_functions_sources
extract_into_parent_list(clickhouse_aggregate_functions_sources dbms_sources
IAggregateFunction.cpp
AggregateFunctionFactory.cpp
AggregateFunctionCombinatorFactory.cpp
AggregateFunctionCount.cpp
AggregateFunctionState.cpp
AggregateFunctionCount.cpp
parseAggregateFunctionParameters.cpp
FactoryHelpers.cpp
)
list(REMOVE_ITEM clickhouse_aggregate_functions_headers
extract_into_parent_list(clickhouse_aggregate_functions_headers dbms_headers
IAggregateFunction.h
IAggregateFunctionCombinator.h
AggregateFunctionFactory.h
AggregateFunctionCombinatorFactory.h
AggregateFunctionCount.h
AggregateFunctionState.h
parseAggregateFunctionParameters.h
AggregateFunctionCount.cpp
FactoryHelpers.h
parseAggregateFunctionParameters.h
)
add_library(clickhouse_aggregate_functions ${clickhouse_aggregate_functions_sources})
add_library(clickhouse_aggregate_functions ${clickhouse_aggregate_functions_headers} ${clickhouse_aggregate_functions_sources})
target_link_libraries(clickhouse_aggregate_functions PRIVATE dbms PUBLIC ch_contrib::cityhash)
if(ENABLE_EXAMPLES)

View File

@ -783,6 +783,16 @@ public:
for (size_t i = 0; i < size; ++i)
result[i] = std::numeric_limits<float>::quiet_NaN();
}
friend void writeBinary(const Kind & x, WriteBuffer & buf)
{
writePODBinary(x, buf);
}
friend void readBinary(Kind & x, ReadBuffer & buf)
{
readPODBinary(x, buf);
}
};
#undef SMALL_THRESHOLD

View File

@ -276,3 +276,12 @@ private:
return NanLikeValueConstructor<ResultType, std::is_floating_point_v<ResultType>>::getValue();
}
};
namespace DB
{
template <typename T>
void readBinary(std::pair<T, UInt32> & x, ReadBuffer & buf)
{
readPODBinary(x, buf);
}
}

View File

@ -107,9 +107,7 @@ struct UniqVariadicHash<true, false>
++column;
}
UInt128 key;
hash.get128(key);
return key;
return hash.get128();
}
};
@ -131,9 +129,7 @@ struct UniqVariadicHash<true, true>
++column;
}
UInt128 key;
hash.get128(key);
return key;
return hash.get128();
}
};

View File

@ -20,7 +20,7 @@ struct QueryTreeNodeWithHash
{}
QueryTreeNodePtrType node = nullptr;
std::pair<UInt64, UInt64> hash;
CityHash_v1_0_2::uint128 hash;
};
template <typename T>
@ -55,6 +55,6 @@ struct std::hash<DB::QueryTreeNodeWithHash<T>>
{
size_t operator()(const DB::QueryTreeNodeWithHash<T> & node_with_hash) const
{
return node_with_hash.hash.first;
return node_with_hash.hash.low64;
}
};

View File

@ -229,10 +229,7 @@ IQueryTreeNode::Hash IQueryTreeNode::getTreeHash() const
}
}
Hash result;
hash_state.get128(result);
return result;
return getSipHash128AsPair(hash_state);
}
QueryTreeNodePtr IQueryTreeNode::clone() const

View File

@ -106,7 +106,7 @@ public:
*/
bool isEqual(const IQueryTreeNode & rhs, CompareOptions compare_options = { .compare_aliases = true }) const;
using Hash = std::pair<UInt64, UInt64>;
using Hash = CityHash_v1_0_2::uint128;
using HashState = SipHash;
/** Get tree hash identifying current tree

Some files were not shown because too many files have changed in this diff Show More