Merge branch 'master' into analyzer-additional-filters

This commit is contained in:
Dmitry Novik 2023-04-12 01:13:38 +02:00 committed by GitHub
commit 235ad55bad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
750 changed files with 13281 additions and 7319 deletions

View File

@ -110,6 +110,7 @@ Checks: '*,
-misc-const-correctness,
-misc-no-recursion,
-misc-non-private-member-variables-in-classes,
-misc-confusable-identifiers, # useful but slooow
-modernize-avoid-c-arrays,
-modernize-concat-nested-namespaces,
@ -148,19 +149,6 @@ Checks: '*,
-readability-use-anyofallof,
-zirkon-*,
-misc-*, # temporarily disabled due to being too slow
# also disable checks in other categories which are aliases of checks in misc-*:
# https://releases.llvm.org/15.0.0/tools/clang/tools/extra/docs/clang-tidy/checks/list.html
-cert-dcl54-cpp, # alias of misc-new-delete-overloads
-hicpp-new-delete-operators, # alias of misc-new-delete-overloads
-cert-fio38-c, # alias of misc-non-copyable-objects
-cert-dcl03-c, # alias of misc-static-assert
-hicpp-static-assert, # alias of misc-static-assert
-cert-err09-cpp, # alias of misc-throw-by-value-catch-by-reference
-cert-err61-cpp, # alias of misc-throw-by-value-catch-by-reference
-cppcoreguidelines-c-copy-assignment-signature, # alias of misc-unconventional-assign-operator
-cppcoreguidelines-non-private-member-variables-in-classes, # alias of misc-non-private-member-variables-in-classes
'
WarningsAsErrors: '*'

View File

@ -349,6 +349,13 @@ jobs:
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"

View File

@ -487,6 +487,13 @@ jobs:
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"

View File

@ -550,6 +550,13 @@ jobs:
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"

View File

@ -406,6 +406,13 @@ jobs:
with:
clear-repository: true
submodules: true
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
run: |
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
"$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
du -hs "$GITHUB_WORKSPACE/contrib" ||:
find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"

View File

@ -1,4 +1,4 @@
[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/clickhouse-presentations/raw/master/images/logo-400x240.png)](https://clickhouse.com)
[<img alt="ClickHouse — open source distributed column-oriented DBMS" width="400px" src="https://clickhouse.com/images/ch_gh_logo_rounded.png" />](https://clickhouse.com?utm_source=github)
ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.

View File

@ -36,7 +36,7 @@
namespace detail
{
template <char ...chars> constexpr bool is_in(char x) { return ((x == chars) || ...); }
template <char ...chars> constexpr bool is_in(char x) { return ((x == chars) || ...); } // NOLINT(misc-redundant-expression)
#if defined(__SSE2__)
template <char s0>

View File

@ -155,13 +155,13 @@ struct common_type<wide::integer<Bits, Signed>, Arithmetic>
std::is_floating_point_v<Arithmetic>,
Arithmetic,
std::conditional_t<
sizeof(Arithmetic) < Bits * sizeof(long),
sizeof(Arithmetic) * 8 < Bits,
wide::integer<Bits, Signed>,
std::conditional_t<
Bits * sizeof(long) < sizeof(Arithmetic),
Bits < sizeof(Arithmetic) * 8,
Arithmetic,
std::conditional_t<
Bits * sizeof(long) == sizeof(Arithmetic) && (std::is_same_v<Signed, signed> || std::is_signed_v<Arithmetic>),
Bits == sizeof(Arithmetic) * 8 && (std::is_same_v<Signed, signed> || std::is_signed_v<Arithmetic>),
Arithmetic,
wide::integer<Bits, Signed>>>>>;
};

View File

@ -0,0 +1,81 @@
/* origin: FreeBSD /usr/src/lib/msun/src/e_expf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const float
half[2] = {0.5,-0.5},
ln2hi = 6.9314575195e-1f, /* 0x3f317200 */
ln2lo = 1.4286067653e-6f, /* 0x35bfbe8e */
invln2 = 1.4426950216e+0f, /* 0x3fb8aa3b */
/*
* Domain [-0.34568, 0.34568], range ~[-4.278e-9, 4.447e-9]:
* |x*(exp(x)+1)/(exp(x)-1) - p(x)| < 2**-27.74
*/
P1 = 1.6666625440e-1f, /* 0xaaaa8f.0p-26 */
P2 = -2.7667332906e-3f; /* -0xb55215.0p-32 */
float expf(float x)
{
float_t hi, lo, c, xx, y;
int k, sign;
uint32_t hx;
GET_FLOAT_WORD(hx, x);
sign = hx >> 31; /* sign bit of x */
hx &= 0x7fffffff; /* high word of |x| */
/* special cases */
if (hx >= 0x42aeac50) { /* if |x| >= -87.33655f or NaN */
if (hx >= 0x42b17218 && !sign) { /* x >= 88.722839f */
/* overflow */
x *= 0x1p127f;
return x;
}
if (sign) {
/* underflow */
FORCE_EVAL(-0x1p-149f/x);
if (hx >= 0x42cff1b5) /* x <= -103.972084f */
return 0;
}
}
/* argument reduction */
if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
if (hx > 0x3f851592) /* if |x| > 1.5 ln2 */
k = invln2*x + half[sign];
else
k = 1 - sign - sign;
hi = x - k*ln2hi; /* k*ln2hi is exact here */
lo = k*ln2lo;
x = hi - lo;
} else if (hx > 0x39000000) { /* |x| > 2**-14 */
k = 0;
hi = x;
lo = 0;
} else {
/* raise inexact */
FORCE_EVAL(0x1p127f + x);
return 1 + x;
}
/* x is now in primary range */
xx = x*x;
c = x - xx*(P1+xx*P2);
y = 1 + (x*c/(2-c) - lo + hi);
if (k == 0)
return y;
return scalbnf(y, k);
}

View File

@ -0,0 +1,31 @@
#include <math.h>
#include <stdint.h>
float scalbnf(float x, int n)
{
union {float f; uint32_t i;} u;
float_t y = x;
if (n > 127) {
y *= 0x1p127f;
n -= 127;
if (n > 127) {
y *= 0x1p127f;
n -= 127;
if (n > 127)
n = 127;
}
} else if (n < -126) {
y *= 0x1p-126f;
n += 126;
if (n < -126) {
y *= 0x1p-126f;
n += 126;
if (n < -126)
n = -126;
}
}
u.i = (uint32_t)(0x7f+n)<<23;
x = y * u.f;
return x;
}

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit d03245f801f798c63ee9a7d2b8914a9e5c5cd666
Subproject commit 1f1b3d35fb6eb73e6492d3afd8a85cde848d174f

View File

@ -202,6 +202,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/builder.cc"
"${LIBRARY_DIR}/buffer.cc"
"${LIBRARY_DIR}/chunked_array.cc"
"${LIBRARY_DIR}/chunk_resolver.cc"
"${LIBRARY_DIR}/compare.cc"
"${LIBRARY_DIR}/config.cc"
"${LIBRARY_DIR}/datum.cc"
@ -268,6 +269,10 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/util/uri.cc"
"${LIBRARY_DIR}/util/utf8.cc"
"${LIBRARY_DIR}/util/value_parsing.cc"
"${LIBRARY_DIR}/util/byte_size.cc"
"${LIBRARY_DIR}/util/debug.cc"
"${LIBRARY_DIR}/util/tracing.cc"
"${LIBRARY_DIR}/util/atfork_internal.cc"
"${LIBRARY_DIR}/vendored/base64.cpp"
"${LIBRARY_DIR}/vendored/datetime/tz.cpp"
@ -301,9 +306,11 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/exec/source_node.cc"
"${LIBRARY_DIR}/compute/exec/sink_node.cc"
"${LIBRARY_DIR}/compute/exec/order_by_impl.cc"
"${LIBRARY_DIR}/compute/exec/partition_util.cc"
"${LIBRARY_DIR}/compute/function.cc"
"${LIBRARY_DIR}/compute/function_internal.cc"
"${LIBRARY_DIR}/compute/kernel.cc"
"${LIBRARY_DIR}/compute/light_array.cc"
"${LIBRARY_DIR}/compute/registry.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc"
@ -317,21 +324,28 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_dictionary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_extension.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_compare.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_nested.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_random.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_round.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_validity.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string_ascii.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string_utf8.cc"
"${LIBRARY_DIR}/compute/kernels/util_internal.cc"
"${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc"
"${LIBRARY_DIR}/compute/kernels/vector_cumulative_ops.cc"
"${LIBRARY_DIR}/compute/kernels/vector_hash.cc"
"${LIBRARY_DIR}/compute/kernels/vector_rank.cc"
"${LIBRARY_DIR}/compute/kernels/vector_select_k.cc"
"${LIBRARY_DIR}/compute/kernels/vector_nested.cc"
"${LIBRARY_DIR}/compute/kernels/vector_replace.cc"
"${LIBRARY_DIR}/compute/kernels/vector_selection.cc"
@ -340,13 +354,15 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/exec/union_node.cc"
"${LIBRARY_DIR}/compute/exec/key_hash.cc"
"${LIBRARY_DIR}/compute/exec/key_map.cc"
"${LIBRARY_DIR}/compute/exec/key_compare.cc"
"${LIBRARY_DIR}/compute/exec/key_encode.cc"
"${LIBRARY_DIR}/compute/exec/util.cc"
"${LIBRARY_DIR}/compute/exec/hash_join_dict.cc"
"${LIBRARY_DIR}/compute/exec/hash_join.cc"
"${LIBRARY_DIR}/compute/exec/hash_join_node.cc"
"${LIBRARY_DIR}/compute/exec/task_util.cc"
"${LIBRARY_DIR}/compute/row/encode_internal.cc"
"${LIBRARY_DIR}/compute/row/grouper.cc"
"${LIBRARY_DIR}/compute/row/compare_internal.cc"
"${LIBRARY_DIR}/compute/row/row_internal.cc"
"${LIBRARY_DIR}/ipc/dictionary.cc"
"${LIBRARY_DIR}/ipc/feather.cc"
@ -357,7 +373,8 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/ipc/writer.cc"
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc"
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc"
"${ARROW_SRC_DIR}/arrow/adapters/orc/util.cc"
"${ARROW_SRC_DIR}/arrow/adapters/orc/options.cc"
)
add_definitions(-DARROW_WITH_LZ4)

2
contrib/cctz vendored

@ -1 +1 @@
Subproject commit 7c78edd52b4d65acc103c2f195818ffcabe6fe0d
Subproject commit 5e05432420f9692418e2e12aff09859e420b14a2

2
contrib/krb5 vendored

@ -1 +1 @@
Subproject commit 9453aec0d50e5aff9b189051611b321b40935d02
Subproject commit b56ce6ba690e1f320df1a64afa34980c3e462617

View File

@ -15,10 +15,6 @@ if(NOT AWK_PROGRAM)
message(FATAL_ERROR "You need the awk program to build ClickHouse with krb5 enabled.")
endif()
if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
add_compile_definitions(USE_BORINGSSL=1)
endif ()
set(KRB5_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/krb5/src")
set(KRB5_ET_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}/include_private")
@ -162,6 +158,11 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/kdf.c"
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/cmac.c"
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/des/des_keys.c"
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/des/f_parity.c"
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/enc_provider/rc4.c"
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/hash_provider/hash_md4.c"
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/md4/md4.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/prng.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/enc_dk_cmac.c"
# "${KRB5_SOURCE_DIR}/lib/crypto/krb/crc32.c"
@ -226,7 +227,6 @@ set(ALL_SRCS
# "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/des.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/rc4.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/des3.c"
#"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/camellia.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/cmac.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/sha256.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/hmac.c"
@ -474,6 +474,14 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/lib/krb5/krb5_libinit.c"
)
if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
add_compile_definitions(USE_BORINGSSL=1)
else()
set(ALL_SRCS ${ALL_SRCS}
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/camellia.c"
)
endif()
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/compile_et"
COMMAND /bin/sh
@ -673,6 +681,7 @@ target_include_directories(_krb5 PRIVATE
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5"
"${KRB5_SOURCE_DIR}/lib/gssapi/spnego"
"${KRB5_SOURCE_DIR}/util/et"
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/md4"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl"
"${KRB5_SOURCE_DIR}/lib/crypto/krb"
"${KRB5_SOURCE_DIR}/util/profile"

View File

@ -0,0 +1,19 @@
#!/bin/sh
set -e
git config submodule."contrib/llvm-project".update '!../sparse-checkout/update-llvm-project.sh'
git config submodule."contrib/croaring".update '!../sparse-checkout/update-croaring.sh'
git config submodule."contrib/aws".update '!../sparse-checkout/update-aws.sh'
git config submodule."contrib/openssl".update '!../sparse-checkout/update-openssl.sh'
git config submodule."contrib/boringssl".update '!../sparse-checkout/update-boringssl.sh'
git config submodule."contrib/arrow".update '!../sparse-checkout/update-arrow.sh'
git config submodule."contrib/grpc".update '!../sparse-checkout/update-grpc.sh'
git config submodule."contrib/orc".update '!../sparse-checkout/update-orc.sh'
git config submodule."contrib/h3".update '!../sparse-checkout/update-h3.sh'
git config submodule."contrib/icu".update '!../sparse-checkout/update-icu.sh'
git config submodule."contrib/boost".update '!../sparse-checkout/update-boost.sh'
git config submodule."contrib/aws-s2n-tls".update '!../sparse-checkout/update-aws-s2n-tls.sh'
git config submodule."contrib/protobuf".update '!../sparse-checkout/update-protobuf.sh'
git config submodule."contrib/libxml2".update '!../sparse-checkout/update-libxml2.sh'
git config submodule."contrib/brotli".update '!../sparse-checkout/update-brotli.sh'

View File

@ -0,0 +1,12 @@
#!/bin/sh
echo "Using sparse checkout for arrow"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/*/*' >> $FILES_TO_CHECKOUT
echo '/cpp/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -0,0 +1,13 @@
#!/bin/sh
echo "Using sparse checkout for aws-s2n-tls"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/test/*' >> $FILES_TO_CHECKOUT
echo '!/docs/*' >> $FILES_TO_CHECKOUT
echo '!/compliance/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -0,0 +1,13 @@
#!/bin/sh
echo "Using sparse checkout for aws"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/*/*' >> $FILES_TO_CHECKOUT
echo '/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT
echo '/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -0,0 +1,85 @@
#!/bin/sh
echo "Using sparse checkout for boost"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/*/*' >> $FILES_TO_CHECKOUT
echo '/boost/*' > $FILES_TO_CHECKOUT
echo '!/boost/*/*' >> $FILES_TO_CHECKOUT
echo '/boost/algorithm/*' >> $FILES_TO_CHECKOUT
echo '/boost/any/*' >> $FILES_TO_CHECKOUT
echo '/boost/atomic/*' >> $FILES_TO_CHECKOUT
echo '/boost/assert/*' >> $FILES_TO_CHECKOUT
echo '/boost/bind/*' >> $FILES_TO_CHECKOUT
echo '/boost/concept/*' >> $FILES_TO_CHECKOUT
echo '/boost/config/*' >> $FILES_TO_CHECKOUT
echo '/boost/container/*' >> $FILES_TO_CHECKOUT
echo '/boost/container_hash/*' >> $FILES_TO_CHECKOUT
echo '/boost/context/*' >> $FILES_TO_CHECKOUT
echo '/boost/convert/*' >> $FILES_TO_CHECKOUT
echo '/boost/coroutine/*' >> $FILES_TO_CHECKOUT
echo '/boost/core/*' >> $FILES_TO_CHECKOUT
echo '/boost/detail/*' >> $FILES_TO_CHECKOUT
echo '/boost/dynamic_bitset/*' >> $FILES_TO_CHECKOUT
echo '/boost/exception/*' >> $FILES_TO_CHECKOUT
echo '/boost/filesystem/*' >> $FILES_TO_CHECKOUT
echo '/boost/functional/*' >> $FILES_TO_CHECKOUT
echo '/boost/function/*' >> $FILES_TO_CHECKOUT
echo '/boost/geometry/*' >> $FILES_TO_CHECKOUT
echo '/boost/graph/*' >> $FILES_TO_CHECKOUT
echo '/boost/heap/*' >> $FILES_TO_CHECKOUT
echo '/boost/integer/*' >> $FILES_TO_CHECKOUT
echo '/boost/intrusive/*' >> $FILES_TO_CHECKOUT
echo '/boost/iostreams/*' >> $FILES_TO_CHECKOUT
echo '/boost/io/*' >> $FILES_TO_CHECKOUT
echo '/boost/iterator/*' >> $FILES_TO_CHECKOUT
echo '/boost/math/*' >> $FILES_TO_CHECKOUT
echo '/boost/move/*' >> $FILES_TO_CHECKOUT
echo '/boost/mpl/*' >> $FILES_TO_CHECKOUT
echo '/boost/multi_index/*' >> $FILES_TO_CHECKOUT
echo '/boost/multiprecision/*' >> $FILES_TO_CHECKOUT
echo '/boost/numeric/*' >> $FILES_TO_CHECKOUT
echo '/boost/predef/*' >> $FILES_TO_CHECKOUT
echo '/boost/preprocessor/*' >> $FILES_TO_CHECKOUT
echo '/boost/program_options/*' >> $FILES_TO_CHECKOUT
echo '/boost/range/*' >> $FILES_TO_CHECKOUT
echo '/boost/regex/*' >> $FILES_TO_CHECKOUT
echo '/boost/smart_ptr/*' >> $FILES_TO_CHECKOUT
echo '/boost/type_index/*' >> $FILES_TO_CHECKOUT
echo '/boost/type_traits/*' >> $FILES_TO_CHECKOUT
echo '/boost/system/*' >> $FILES_TO_CHECKOUT
echo '/boost/tti/*' >> $FILES_TO_CHECKOUT
echo '/boost/utility/*' >> $FILES_TO_CHECKOUT
echo '/boost/lexical_cast/*' >> $FILES_TO_CHECKOUT
echo '/boost/optional/*' >> $FILES_TO_CHECKOUT
echo '/boost/property_map/*' >> $FILES_TO_CHECKOUT
echo '/boost/pending/*' >> $FILES_TO_CHECKOUT
echo '/boost/multi_array/*' >> $FILES_TO_CHECKOUT
echo '/boost/tuple/*' >> $FILES_TO_CHECKOUT
echo '/boost/icl/*' >> $FILES_TO_CHECKOUT
echo '/boost/unordered/*' >> $FILES_TO_CHECKOUT
echo '/boost/typeof/*' >> $FILES_TO_CHECKOUT
echo '/boost/parameter/*' >> $FILES_TO_CHECKOUT
echo '/boost/mp11/*' >> $FILES_TO_CHECKOUT
echo '/boost/archive/*' >> $FILES_TO_CHECKOUT
echo '/boost/function_types/*' >> $FILES_TO_CHECKOUT
echo '/boost/serialization/*' >> $FILES_TO_CHECKOUT
echo '/boost/fusion/*' >> $FILES_TO_CHECKOUT
echo '/boost/variant/*' >> $FILES_TO_CHECKOUT
echo '/boost/format/*' >> $FILES_TO_CHECKOUT
echo '/boost/locale/*' >> $FILES_TO_CHECKOUT
echo '/boost/random/*' >> $FILES_TO_CHECKOUT
echo '/boost/spirit/*' >> $FILES_TO_CHECKOUT
echo '/boost/uuid/*' >> $FILES_TO_CHECKOUT
echo '/boost/xpressive/*' >> $FILES_TO_CHECKOUT
echo '/boost/asio/*' >> $FILES_TO_CHECKOUT
echo '/boost/circular_buffer/*' >> $FILES_TO_CHECKOUT
echo '/boost/proto/*' >> $FILES_TO_CHECKOUT
echo '/boost/qvm/*' >> $FILES_TO_CHECKOUT
echo '/boost/property_tree/*' >> $FILES_TO_CHECKOUT
echo '/libs/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -0,0 +1,14 @@
#!/bin/sh
echo "Using sparse checkout for boringsll"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/fuzz/*' >> $FILES_TO_CHECKOUT
echo '!/crypto/cipher_extra/test/*' >> $FILES_TO_CHECKOUT
echo '!/third_party/wycheproof_testvectors/*' >> $FILES_TO_CHECKOUT
echo '!/third_party/googletest/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -0,0 +1,12 @@
#!/bin/sh
echo "Using sparse checkout for brotli"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/*/*' >> $FILES_TO_CHECKOUT
echo '/c/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -0,0 +1,12 @@
#!/bin/sh
echo "Using sparse checkout for croaring"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/benchmarks/*' >> $FILES_TO_CHECKOUT
echo '!/tests/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -0,0 +1,22 @@
#!/bin/sh
echo "Using sparse checkout for grpc"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/test/*' >> $FILES_TO_CHECKOUT
echo '/test/build/*' >> $FILES_TO_CHECKOUT
echo '!/tools/*' >> $FILES_TO_CHECKOUT
echo '/tools/codegen/*' >> $FILES_TO_CHECKOUT
echo '!/examples/*' >> $FILES_TO_CHECKOUT
echo '!/doc/*' >> $FILES_TO_CHECKOUT
# FIXME why do we need csharp?
#echo '!/src/csharp/*' >> $FILES_TO_CHECKOUT
echo '!/src/python/*' >> $FILES_TO_CHECKOUT
echo '!/src/objective-c/*' >> $FILES_TO_CHECKOUT
echo '!/src/php/*' >> $FILES_TO_CHECKOUT
echo '!/src/ruby/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -0,0 +1,12 @@
#!/bin/sh
echo "Using sparse checkout for h3"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/tests/*' >> $FILES_TO_CHECKOUT
echo '!/website/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -0,0 +1,12 @@
#!/bin/sh
echo "Using sparse checkout for icu"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/*/*' >> $FILES_TO_CHECKOUT
echo '/icu4c/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -0,0 +1,16 @@
#!/bin/sh
echo "Using sparse checkout for libxml2"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/result/*' >> $FILES_TO_CHECKOUT
echo '!/test/*' >> $FILES_TO_CHECKOUT
echo '!/doc/*' >> $FILES_TO_CHECKOUT
echo '!/os400/*' >> $FILES_TO_CHECKOUT
echo '!/fuzz/*' >> $FILES_TO_CHECKOUT
echo '!/python/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -0,0 +1,27 @@
#!/bin/sh
echo "Using sparse checkout for llvm-project"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/*/*' >> $FILES_TO_CHECKOUT
echo '/llvm/*' >> $FILES_TO_CHECKOUT
echo '!/llvm/*/*' >> $FILES_TO_CHECKOUT
echo '/llvm/cmake/*' >> $FILES_TO_CHECKOUT
echo '/llvm/projects/*' >> $FILES_TO_CHECKOUT
echo '/llvm/include/*' >> $FILES_TO_CHECKOUT
echo '/llvm/lib/*' >> $FILES_TO_CHECKOUT
echo '/llvm/utils/TableGen/*' >> $FILES_TO_CHECKOUT
echo '/libcxxabi/*' >> $FILES_TO_CHECKOUT
echo '!/libcxxabi/test/*' >> $FILES_TO_CHECKOUT
echo '/libcxx/*' >> $FILES_TO_CHECKOUT
echo '!/libcxx/test/*' >> $FILES_TO_CHECKOUT
echo '/libunwind/*' >> $FILES_TO_CHECKOUT
echo '!/libunwind/test/*' >> $FILES_TO_CHECKOUT
echo '/compiler-rt/*' >> $FILES_TO_CHECKOUT
echo '!/compiler-rt/test/*' >> $FILES_TO_CHECKOUT
echo '/cmake/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -0,0 +1,15 @@
#!/bin/sh
echo "Using sparse checkout for openssl"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/fuzz/*' >> $FILES_TO_CHECKOUT
echo '!/test/*' >> $FILES_TO_CHECKOUT
echo '!/doc/*' >> $FILES_TO_CHECKOUT
echo '!/providers/*' >> $FILES_TO_CHECKOUT
echo '!/apps/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -0,0 +1,13 @@
#!/bin/sh
echo "Using sparse checkout for orc"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '/*' > $FILES_TO_CHECKOUT
echo '!/*/*' >> $FILES_TO_CHECKOUT
echo '/c++/*' >> $FILES_TO_CHECKOUT
echo '/proto/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

View File

@ -0,0 +1,13 @@
#!/bin/sh
echo "Using sparse checkout for protobuf"
FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
echo '!/*' > $FILES_TO_CHECKOUT
echo '/*/*' >> $FILES_TO_CHECKOUT
echo '/src/*' >> $FILES_TO_CHECKOUT
echo '/cmake/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD

11
contrib/update-submodules.sh vendored Executable file
View File

@ -0,0 +1,11 @@
#!/bin/sh
set -e
WORKDIR=$(dirname "$0")
WORKDIR=$(readlink -f "${WORKDIR}")
"$WORKDIR/sparse-checkout/setup-sparse-checkout.sh"
git submodule init
git submodule sync
git submodule update --depth=1

View File

@ -0,0 +1,29 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v22.8.16.32-lts (7c4be737bd0) FIXME as compared to v22.8.15.23-lts (d36fa168bbf)
#### Build/Testing/Packaging Improvement
* Backported in [#48344](https://github.com/ClickHouse/ClickHouse/issues/48344): Use sccache as a replacement for ccache and using S3 as cache backend. [#46240](https://github.com/ClickHouse/ClickHouse/pull/46240) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#48250](https://github.com/ClickHouse/ClickHouse/issues/48250): The `clickhouse/clickhouse-keeper` image used to be pushed only with tags `-alpine`, e.g. `latest-alpine`. As it was suggested in https://github.com/ClickHouse/examples/pull/2, now it will be pushed as suffixless too. [#48236](https://github.com/ClickHouse/ClickHouse/pull/48236) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix bug in zero-copy replication disk choice during fetch [#47010](https://github.com/ClickHouse/ClickHouse/pull/47010) ([alesapin](https://github.com/alesapin)).
* Fix query parameters [#47488](https://github.com/ClickHouse/ClickHouse/pull/47488) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix wait for zero copy lock during move [#47631](https://github.com/ClickHouse/ClickHouse/pull/47631) ([alesapin](https://github.com/alesapin)).
* Fix crash in polygonsSymDifferenceCartesian [#47702](https://github.com/ClickHouse/ClickHouse/pull/47702) ([pufit](https://github.com/pufit)).
* Backport to 22.8: Fix moving broken parts to the detached for the object storage disk on startup [#48273](https://github.com/ClickHouse/ClickHouse/pull/48273) ([Aleksei Filatov](https://github.com/aalexfvk)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Add a fuse for backport branches w/o a created PR [#47760](https://github.com/ClickHouse/ClickHouse/pull/47760) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Only valid Reviews.STATES overwrite existing reviews [#47789](https://github.com/ClickHouse/ClickHouse/pull/47789) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Place short return before big block, improve logging [#47822](https://github.com/ClickHouse/ClickHouse/pull/47822) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Artifacts s3 prefix [#47945](https://github.com/ClickHouse/ClickHouse/pull/47945) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix tsan error lock-order-inversion [#47953](https://github.com/ClickHouse/ClickHouse/pull/47953) ([Kruglov Pavel](https://github.com/Avogar)).

View File

@ -39,9 +39,15 @@ Next, you need to download the source files onto your working machine. This is c
In the command line terminal run:
git clone --recursive --shallow-submodules git@github.com:your_github_username/ClickHouse.git
git clone --shallow-submodules git@github.com:your_github_username/ClickHouse.git
cd ClickHouse
Or (if you'd like to use sparse checkout for submodules and avoid checking out unneeded files):
git clone git@github.com:your_github_username/ClickHouse.git
cd ClickHouse
./contrib/update-submodules.sh
Note: please, substitute *your_github_username* with what is appropriate!
This command will create a directory `ClickHouse` containing the working copy of the project.

View File

@ -140,3 +140,4 @@ DESCRIBE TABLE test_database.test_table;
## Related content
- Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres)
- Blog: [ClickHouse and PostgreSQL - a Match Made in Data Heaven - part 2](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres-part-2)

View File

@ -177,4 +177,6 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32)
- [Using PostgreSQL as a dictionary source](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-postgresql)
## Related content
- Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres)
- Blog: [ClickHouse and PostgreSQL - a Match Made in Data Heaven - part 2](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres-part-2)

View File

@ -122,3 +122,7 @@ FROM test.mv_visits
GROUP BY StartDate
ORDER BY StartDate;
```
## Related Content
- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states)

View File

@ -191,3 +191,7 @@ is performance. In practice, users often search for multiple terms at once. For
'%big%'` can be evaluated directly using an inverted index by forming the union of the row id lists for terms "little" and "big". This also
means that the parameter `GRANULARITY` supplied to index creation has no meaning (it may be removed from the syntax in the future).
:::
## Related Content
- Blog: [Introducing Inverted Indices in ClickHouse](https://clickhouse.com/blog/clickhouse-search-with-inverted-indices)

View File

@ -8,11 +8,18 @@ sidebar_label: Data Replication
:::note
In ClickHouse Cloud replication is managed for you. Please create your tables without adding arguments. For example, in the text below you would replace:
```sql
ENGINE = ReplicatedReplacingMergeTree(
'/clickhouse/tables/{shard}/table_name',
'{replica}',
ver
)
```
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver)
```
with:
```
```sql
ENGINE = ReplicatedReplacingMergeTree
```
:::

View File

@ -186,3 +186,7 @@ ARRAY JOIN
When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference/summap.md) function for aggregation of `Map`.
For nested data structure, you do not need to specify its columns in the tuple of columns for summation.
## Related Content
- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states)

View File

@ -78,7 +78,8 @@ Of course, it's possible to manually run `CREATE TABLE` with same path on nonrel
### Inserts
When new rows are inserted into `KeeperMap`, if the key already exists, the value will be updated, otherwise new key is created.
When new rows are inserted into `KeeperMap`, if the key does not exist, a new entry for the key is created.
If the key exists, and setting `keeper_map_strict_mode` is set to `true`, an exception is thrown, otherwise, the value for the key is overwritten.
Example:
@ -89,6 +90,7 @@ INSERT INTO keeper_map_table VALUES ('some key', 1, 'value', 3.2);
### Deletes
Rows can be deleted using `DELETE` query or `TRUNCATE`.
If the key exists, and setting `keeper_map_strict_mode` is set to `true`, fetching and deleting data will succeed only if it can be executed atomically.
```sql
DELETE FROM keeper_map_table WHERE key LIKE 'some%' AND v1 > 1;
@ -105,7 +107,12 @@ TRUNCATE TABLE keeper_map_table;
### Updates
Values can be updated using `ALTER TABLE` query. Primary key cannot be updated.
If setting `keeper_map_strict_mode` is set to `true`, fetching and updating data will succeed only if it's executed atomically.
```sql
ALTER TABLE keeper_map_table UPDATE v1 = v1 * 10 + 2 WHERE key LIKE 'some%' AND v3 > 3.1;
```
## Related content
- Blog: [Building a Real-time Analytics Apps with ClickHouse and Hex](https://clickhouse.com/blog/building-real-time-applications-with-clickhouse-and-hex-notebook-keeper-engine)

View File

@ -2499,7 +2499,9 @@ LIMIT 20
We welcome exact and improved solutions here.
# Related Content
## Related Content
- [Git commits and our community](https://clickhouse.com/blog/clickhouse-git-community-commits)
- [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits)
- Blog: [Git commits and our community](https://clickhouse.com/blog/clickhouse-git-community-commits)
- Blog: [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits)
- Blog: [Building a Real-time Analytics Apps with ClickHouse and Hex](https://clickhouse.com/blog/building-real-time-applications-with-clickhouse-and-hex-notebook-keeper-engine)
- Blog: [A Story of Open-source GitHub Activity using ClickHouse + Grafana](https://clickhouse.com/blog/introduction-to-clickhouse-and-grafana-webinar)

View File

@ -78,7 +78,7 @@ The supported formats are:
| [Null](#null) | ✗ | ✔ |
| [XML](#xml) | ✗ | ✔ |
| [CapnProto](#capnproto) | ✔ | ✔ |
| [LineAsString](#lineasstring) | ✔ | |
| [LineAsString](#lineasstring) | ✔ | |
| [Regexp](#data-format-regexp) | ✔ | ✗ |
| [RawBLOB](#rawblob) | ✔ | ✔ |
| [MsgPack](#msgpack) | ✔ | ✔ |
@ -1235,8 +1235,8 @@ For output it uses the following correspondence between ClickHouse types and BSO
| ClickHouse type | BSON Type |
|-----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------|
| [Bool](/docs/en/sql-reference/data-types/boolean.md) | `\x08` boolean |
| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `\x10` int32 |
| [Int16/UInt16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `\x10` int32 |
| [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
| [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
| [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
@ -1255,30 +1255,30 @@ For output it uses the following correspondence between ClickHouse types and BSO
| [Array](/docs/en/sql-reference/data-types/array.md) | `\x04` array |
| [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x04` array |
| [Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x03` document |
| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys) | `\x03` document |
| [Map](/docs/en/sql-reference/data-types/map.md) | `\x03` document |
| [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `\x10` int32 |
| [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `\x05` binary, `\x00` binary subtype |
For input it uses the following correspondence between BSON types and ClickHouse types:
| BSON Type | ClickHouse Type |
|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) |
| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) |
| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) |
| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) |
| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) |
| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) |
| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) |
| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
| BSON Type | ClickHouse Type |
|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) |
| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) |
| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) |
| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) |
| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) |
| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) |
| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)/[Enum8/Enum16](/docs/en/sql-reference/data-types/enum.md) |
| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8).
Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value.
@ -1610,29 +1610,34 @@ See also [Format Schema](#formatschema).
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) |
|----------------------------------|------------------------------------------------------------------------------------------------------------------------|------------------------------|
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` |
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `INT64` |
| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` |
| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` |
| `ENUM` | [Enum(8\ |16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` |
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
| `DATA` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `DATA` |
| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) |
|------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------|
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` |
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md), [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `INT32` |
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md), [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `INT64` |
| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` |
| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` |
| `ENUM` | [Enum(8/16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` |
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
| `DATA` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `DATA` |
| `DATA` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `DATA` |
| `DATA` | [Decimal128/Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DATA` |
| `STRUCT(entries LIST(STRUCT(key Key, value Value)))` | [Map](/docs/en/sql-reference/data-types/map.md) | `STRUCT(entries LIST(STRUCT(key Key, value Value)))` |
Integer types can be converted into each other during input/output.
For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting.
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` type also can be nested.
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
### Inserting and Selecting Data {#inserting-and-selecting-data-capnproto}
@ -1872,6 +1877,13 @@ Column names must:
Output Avro file compression and sync interval can be configured with [output_format_avro_codec](/docs/en/operations/settings/settings-formats.md/#output_format_avro_codec) and [output_format_avro_sync_interval](/docs/en/operations/settings/settings-formats.md/#output_format_avro_sync_interval) respectively.
### Example Data {#example-data-avro}
Using the ClickHouse [DESCRIBE](/docs/en/sql-reference/statements/describe-table) function, you can quickly view the inferred format of an Avro file like the following example. This example includes the URL of a publicly accessible Avro file in the ClickHouse S3 public bucket:
``` DESCRIBE url('https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits.avro','Avro');
```
## AvroConfluent {#data-format-avro-confluent}
AvroConfluent supports decoding single-object Avro messages commonly used with [Kafka](https://kafka.apache.org/) and [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html).
@ -1931,30 +1943,31 @@ Setting `format_avro_schema_registry_url` needs to be configured in `users.xml`
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) |
|----------------------------------------------------|-----------------------------------------------------------------|------------------------------|
| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` |
| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` |
| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` |
| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` |
| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
| `FIXED_LENGTH_BYTE_ARRAY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_LENGTH_BYTE_ARRAY` |
| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) |
|-----------------------------------------------|------------------------------------------------------------------------------------------------------------|-------------------------------|
| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `INT8` |
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `INT16` |
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` |
| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` |
| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` |
| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` |
| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_LENGTH_BYTE_ARRAY` |
| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `FIXED_LENGTH_BYTE_ARRAY` |
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
@ -2000,31 +2013,32 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
| Arrow data type (`INSERT`) | ClickHouse data type | Arrow data type (`SELECT`) |
|-----------------------------------------|-----------------------------------------------------------------|----------------------------|
| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
| `FLOAT`, `HALF_FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
| `DATE32` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `UINT16` |
| `DATE64` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
| `TIMESTAMP`, `TIME32`, `TIME64` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `UINT32` |
| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
| `STRING`, `BINARY`, `FIXED_SIZE_BINARY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_SIZE_BINARY` |
| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
| `DECIMAL256` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL256` |
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
| `FIXED_SIZE_BINARY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_SIZE_BINARY` |
| Arrow data type (`INSERT`) | ClickHouse data type | Arrow data type (`SELECT`) |
|-----------------------------------------|------------------------------------------------------------------------------------------------------------|----------------------------|
| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `INT8` |
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `INT16` |
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
| `FLOAT`, `HALF_FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
| `DATE32` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `UINT16` |
| `DATE64` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
| `TIMESTAMP`, `TIME32`, `TIME64` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `UINT32` |
| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
| `STRING`, `BINARY`, `FIXED_SIZE_BINARY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_SIZE_BINARY` |
| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
| `DECIMAL256` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL256` |
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
| `FIXED_SIZE_BINARY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_SIZE_BINARY` |
| `FIXED_SIZE_BINARY`, `BINARY` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `FIXED_SIZE_BINARY` |
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
@ -2073,23 +2087,26 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) |
|---------------------------------------|---------------------------------------------------------------|--------------------------|
| `Boolean` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `Boolean` |
| `Tinyint` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `Tinyint` |
| `Smallint` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `Smallint` |
| `Int` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `Int` |
| `Bigint` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `Bigint` |
| `Float` | [Float32](/docs/en/sql-reference/data-types/float.md) | `Float` |
| `Double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `Double` |
| `Decimal` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `Decimal` |
| `Date` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `Date` |
| `Timestamp` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `Timestamp` |
| `String`, `Char`, `Varchar`, `Binary` | [String](/docs/en/sql-reference/data-types/string.md) | `Binary` |
| `List` | [Array](/docs/en/sql-reference/data-types/array.md) | `List` |
| `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` |
| `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` |
| `-` | [IPv4](/docs/en/sql-reference/data-types/int-uint.md) | `Int` |
| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) |
|---------------------------------------|-------------------------------------------------------------------------------------------------------------------|--------------------------|
| `Boolean` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `Boolean` |
| `Tinyint` | [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `Tinyint` |
| `Smallint` | [Int16/UInt16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `Smallint` |
| `Int` | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `Int` |
| `Bigint` | [Int64/UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `Bigint` |
| `Float` | [Float32](/docs/en/sql-reference/data-types/float.md) | `Float` |
| `Double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `Double` |
| `Decimal` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `Decimal` |
| `Date` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `Date` |
| `Timestamp` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `Timestamp` |
| `String`, `Char`, `Varchar`, `Binary` | [String](/docs/en/sql-reference/data-types/string.md) | `Binary` |
| `List` | [Array](/docs/en/sql-reference/data-types/array.md) | `List` |
| `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` |
| `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` |
| `Int` | [IPv4](/docs/en/sql-reference/data-types/int-uint.md) | `Int` |
| `Binary` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `Binary` |
| `Binary` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `Binary` |
| `Binary` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `Binary` |
Other types are not supported.

View File

@ -61,3 +61,7 @@ FROM system.opentelemetry_span_log
```
In case of any errors, the part of the log data for which the error has occurred will be silently lost. Check the server log for error messages if the data does not arrive.
## Related Content
- Blog: [Building an Observability Solution with ClickHouse - Part 2 - Traces](https://clickhouse.com/blog/storing-traces-and-spans-open-telemetry-in-clickhouse)

View File

@ -124,3 +124,7 @@ Finally, entries in the query cache are not shared between users due to security
row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can
be marked accessible by other users (i.e. shared) by supplying setting
[query_cache_share_between_users](settings/settings.md#query-cache-share-between-users).
## Related Content
- Blog: [Introducing the ClickHouse Query Cache](https://clickhouse.com/blog/introduction-to-the-clickhouse-query-cache-and-design)

View File

@ -50,6 +50,7 @@ last_queue_update: 2021-10-12 14:50:08
absolute_delay: 99
total_replicas: 5
active_replicas: 5
lost_part_count: 0
last_queue_update_exception:
zookeeper_exception:
replica_is_active: {'r1':1,'r2':1}
@ -90,6 +91,7 @@ The next 4 columns have a non-zero value only where there is an active session w
- `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has.
- `total_replicas` (`UInt8`) - The total number of known replicas of this table.
- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ClickHouse Keeper (i.e., the number of functioning replicas).
- `lost_part_count` (`UInt64`) - The number of data parts lost in the table by all replicas in total since table creation. Value is persisted in ClickHouse Keeper and can only increase.
- `last_queue_update_exception` (`String`) - When the queue contains broken entries. Especially important when ClickHouse breaks backward compatibility between versions and log entries written by newer versions aren't parseable by old versions.
- `zookeeper_exception` (`String`) - The last exception message, got if the error happened when fetching the info from ClickHouse Keeper.
- `replica_is_active` ([Map(String, UInt8)](../../sql-reference/data-types/map.md)) — Map between replica name and is replica active.

View File

@ -11,8 +11,16 @@ Columns:
- `volume_name` ([String](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy.
- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration, the data fills the volumes according this priority, i.e. data during inserts and merges is written to volumes with a lower priority (taking into account other rules: TTL, `max_data_part_size`, `move_factor`).
- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy.
- `volume_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of volume. Can have one of the following values:
- `JBOD`
- `SINGLE_DISK`
- `UNKNOWN`
- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit).
- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order.
- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `prefer_not_to_merge` setting. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks.
- `perform_ttl_move_on_insert` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `perform_ttl_move_on_insert` setting. — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3).
- `load_balancing` ([Enum8](../../sql-reference/data-types/enum.md)) — Policy for disk balancing. Can have one of the following values:
- `ROUND_ROBIN`
- `LEAST_USED`
If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table.

View File

@ -74,7 +74,7 @@ Never set the block size too small or too large.
You can use RAID-0 on SSD.
Regardless of RAID use, always use replication for data security.
Enable NCQ with a long queue. For HDD, choose the CFQ scheduler, and for SSD, choose noop. Dont reduce the readahead setting.
Enable NCQ with a long queue. For HDD, choose the mq-deadline or CFQ scheduler, and for SSD, choose noop. Dont reduce the readahead setting.
For HDD, enable the write cache.
Make sure that [`fstrim`](https://en.wikipedia.org/wiki/Trim_(computing)) is enabled for NVME and SSD disks in your OS (usually it's implemented using a cronjob or systemd service).

View File

@ -6,12 +6,162 @@ sidebar_label: clickhouse-local
# clickhouse-local
The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. It accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/index.md). `clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines.
## Related Content
- Blog: [Extracting, Converting, and Querying Data in Local Files using clickhouse-local](https://clickhouse.com/blog/extracting-converting-querying-local-files-with-sql-clickhouse-local)
## When to use clickhouse-local vs. ClickHouse
`clickhouse-local` is an easy-to-use version of ClickHouse that is ideal for developers who need to perform fast processing on local and remote files using SQL without having to install a full database server. With `clickhouse-local`, developers can use SQL commands (using the [ClickHouse SQL dialect](../../sql-reference/index.md)) directly from the command line, providing a simple and efficient way to access ClickHouse features without the need for a full ClickHouse installation. One of the main benefits of `clickhouse-local` is that it is already included when installing [clickhouse-client](https://clickhouse.com/docs/en/integrations/sql-clients/clickhouse-client-local). This means that developers can get started with `clickhouse-local` quickly, without the need for a complex installation process.
While `clickhouse-local` is a great tool for development and testing purposes, and for processing files, it is not suitable for serving end users or applications. In these scenarios, it is recommended to use the open-source [ClickHouse](https://clickhouse.com/docs/en/install). ClickHouse is a powerful OLAP database that is designed to handle large-scale analytical workloads. It provides fast and efficient processing of complex queries on large datasets, making it ideal for use in production environments where high-performance is critical. Additionally, ClickHouse offers a wide range of features such as replication, sharding, and high availability, which are essential for scaling up to handle large datasets and serving applications. If you need to handle larger datasets or serve end users or applications, we recommend using open-source ClickHouse instead of `clickhouse-local`.
Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local CSVs](#query-data-in-a-csv-file-using-sql) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3).
## Download clickhouse-local
`clickhouse-local` is executed using the same `clickhouse` binary that runs the ClickHouse server and `clickhouse-client`. The easiest way to download the latest version is with the following command:
```bash
curl https://clickhouse.com/ | sh
```
:::note
The binary you just downloaded can run all sorts of ClickHouse tools and utilities. If you want to run ClickHouse as a database server, check out the [Quick Start](../../quick-start.mdx).
:::
## Query data in a CSV file using SQL
A common use of `clickhouse-local` is to run ad-hoc queries on files: where you don't have to insert the data into a table. `clickhouse-local` can stream the data from a file into a temporary table and execute your SQL.
If the file is sitting on the same machine as `clickhouse-local`, use the `file` table engine. The following `reviews.tsv` file contains a sampling of Amazon product reviews:
```bash
./clickhouse local -q "SELECT * FROM file('reviews.tsv')"
```
ClickHouse knows the file uses a tab-separated format from filename extension. If you need to explicitly specify the format, simply add one of the [many ClickHouse input formats](../../interfaces/formats.md):
```bash
./clickhouse local -q "SELECT * FROM file('reviews.tsv', 'TabSeparated')"
```
The `file` table function creates a table, and you can use `DESCRIBE` to see the inferred schema:
```bash
./clickhouse local -q "DESCRIBE file('reviews.tsv')"
```
```response
marketplace Nullable(String)
customer_id Nullable(Int64)
review_id Nullable(String)
product_id Nullable(String)
product_parent Nullable(Int64)
product_title Nullable(String)
product_category Nullable(String)
star_rating Nullable(Int64)
helpful_votes Nullable(Int64)
total_votes Nullable(Int64)
vine Nullable(String)
verified_purchase Nullable(String)
review_headline Nullable(String)
review_body Nullable(String)
review_date Nullable(Date)
```
Let's find a product with the highest rating:
```bash
./clickhouse local -q "SELECT
argMax(product_title,star_rating),
max(star_rating)
FROM file('reviews.tsv')"
```
```response
Monopoly Junior Board Game 5
```
## Query data in a Parquet file in AWS S3
If you have a file in S3, use `clickhouse-local` and the `s3` table function to query the file in place (without inserting the data into a ClickHouse table). We have a file named `house_0.parquet` in a public bucket that contains home prices of property sold in the United Kingdom. Let's see how many rows it has:
```bash
./clickhouse local -q "
SELECT count()
FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/house_parquet/house_0.parquet')"
```
The file has 2.7M rows:
```response
2772030
```
It's always useful to see what the inferred schema that ClickHouse determines from the file:
```bash
./clickhouse local -q "DESCRIBE s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/house_parquet/house_0.parquet')"
```
```response
price Nullable(Int64)
date Nullable(UInt16)
postcode1 Nullable(String)
postcode2 Nullable(String)
type Nullable(String)
is_new Nullable(UInt8)
duration Nullable(String)
addr1 Nullable(String)
addr2 Nullable(String)
street Nullable(String)
locality Nullable(String)
town Nullable(String)
district Nullable(String)
county Nullable(String)
```
Let's see what the most expensive neighborhoods are:
```bash
./clickhouse local -q "
SELECT
town,
district,
count() AS c,
round(avg(price)) AS price,
bar(price, 0, 5000000, 100)
FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/house_parquet/house_0.parquet')
GROUP BY
town,
district
HAVING c >= 100
ORDER BY price DESC
LIMIT 10"
```
```response
LONDON CITY OF LONDON 886 2271305 █████████████████████████████████████████████▍
LEATHERHEAD ELMBRIDGE 206 1176680 ███████████████████████▌
LONDON CITY OF WESTMINSTER 12577 1108221 ██████████████████████▏
LONDON KENSINGTON AND CHELSEA 8728 1094496 █████████████████████▉
HYTHE FOLKESTONE AND HYTHE 130 1023980 ████████████████████▍
CHALFONT ST GILES CHILTERN 113 835754 ████████████████▋
AMERSHAM BUCKINGHAMSHIRE 113 799596 ███████████████▉
VIRGINIA WATER RUNNYMEDE 356 789301 ███████████████▊
BARNET ENFIELD 282 740514 ██████████████▊
NORTHWOOD THREE RIVERS 184 731609 ██████████████▋
```
:::tip
When you are ready to insert your files into ClickHouse, startup a ClickHouse server and insert the results of your `file` and `s3` table functions into a `MergeTree` table. View the [Quick Start](../../quick-start.mdx) for more details.
:::
By default `clickhouse-local` has access to data on the same host, and it does not depend on the server's configuration. It also supports loading server configuration using `--config-file` argument. For temporary data, a unique temporary data directory is created by default.
## Usage {#usage}
By default `clickhouse-local` has access to data of a ClickHouse server on the same host, and it does not depend on the server's configuration. It also supports loading server configuration using `--config-file` argument. For temporary data, a unique temporary data directory is created by default.
Basic usage (Linux):
``` bash
@ -24,7 +174,9 @@ Basic usage (Mac):
$ ./clickhouse local --structure "table_structure" --input-format "format_of_incoming_data" --query "query"
```
Also supported on Windows through WSL2.
:::note
`clickhouse-local` is also supported on Windows through WSL2.
:::
Arguments:

View File

@ -285,3 +285,8 @@ FROM people
│ [3,2] │ [11.5,12.949999809265137] │
└────────┴───────────────────────────┘
```
## Related Content
- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states)

View File

@ -6,7 +6,7 @@ title: deltaSumTimestamp
Adds the difference between consecutive rows. If the difference is negative, it is ignored.
This function is primarily for [materialized views](../../../sql-reference/statements/create/view.md#materialized) that are ordered by some time bucket-aligned timestamp, for example, a `toStartOfMinute` bucket. Because the rows in such a materialized view will all have the same timestamp, it is impossible for them to be merged in the "right" order. This function keeps track of the `timestamp` of the values it's seen, so it's possible to order the states correctly during merging.
This function is primarily for [materialized views](../../../sql-reference/statements/create/view.md#materialized) that store data ordered by some time bucket-aligned timestamp, for example, a `toStartOfMinute` bucket. Because the rows in such a materialized view will all have the same timestamp, it is impossible for them to be merged in the correct order, without storing the original, unrounded timestamp value. The `deltaSumTimestamp` function keeps track of the original `timestamp` of the values it's seen, so the values (states) of the function are correctly computed during merging of parts.
To calculate the delta sum across an ordered collection you can simply use the [deltaSum](../../../sql-reference/aggregate-functions/reference/deltasum.md#agg_functions-deltasum) function.

View File

@ -0,0 +1,76 @@
---
slug: /en/sql-reference/aggregate-functions/reference/quantileApprox
sidebar_position: 204
---
# quantileApprox
Computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [Greenwald-Khanna](http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf) algorithm. The Greenwald-Khanna algorithm is an algorithm used to compute quantiles on a stream of data in a highly efficient manner. It was introduced by Michael Greenwald and Sanjeev Khanna in 2001. It is widely used in databases and big data systems where computing accurate quantiles on a large stream of data in real-time is necessary. The algorithm is highly efficient, taking only O(log n) space and O(log log n) time per item (where n is the size of the input). It is also highly accurate, providing an approximate quantile value with high probability.
`quantileApprox` is different from other quantile functions in ClickHouse, because it enables user to control the accuracy of the approximate quantile result.
**Syntax**
``` sql
quantileApprox(accuracy, level)(expr)
```
Alias: `medianApprox`.
**Arguments**
- `accuracy` — Accuracy of quantile. Constant positive integer. Larger accuracy value means less error. For example, if the accuracy argument is set to 100, the computed quantile will have an error no greater than 1% with high probability. There is a trade-off between the accuracy of the computed quantiles and the computational complexity of the algorithm. A larger accuracy requires more memory and computational resources to compute the quantile accurately, while a smaller accuracy argument allows for a faster and more memory-efficient computation but with a slightly lower accuracy.
- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
**Returned value**
- Quantile of the specified level and accuracy.
Type:
- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
**Example**
``` sql
SELECT quantileApprox(1, 0.25)(number + 1)
FROM numbers(1000)
┌─quantileApprox(1, 0.25)(plus(number, 1))─┐
│ 1 │
└──────────────────────────────────────────┘
SELECT quantileApprox(10, 0.25)(number + 1)
FROM numbers(1000)
┌─quantileApprox(10, 0.25)(plus(number, 1))─┐
│ 156 │
└───────────────────────────────────────────┘
SELECT quantileApprox(100, 0.25)(number + 1)
FROM numbers(1000)
┌─quantileApprox(100, 0.25)(plus(number, 1))─┐
│ 251 │
└────────────────────────────────────────────┘
SELECT quantileApprox(1000, 0.25)(number + 1)
FROM numbers(1000)
┌─quantileApprox(1000, 0.25)(plus(number, 1))─┐
│ 249 │
└─────────────────────────────────────────────┘
```
**See Also**
- [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)

View File

@ -114,3 +114,59 @@ Result:
│ [249.75,499.5,749.25,899.1,949.05,989.01,998.001] │
└─────────────────────────────────────────────────────────────────────┘
```
## quantilesApprox
`quantilesApprox` works similarly with `quantileApprox` but allows us to calculate quantities at different levels simultaneously and returns an array.
**Syntax**
``` sql
quantilesApprox(accuracy, level1, level2, ...)(expr)
```
**Returned value**
- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels.
Type of array values:
- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
**Example**
Query:
``` sql
SELECT quantilesApprox(1, 0.25, 0.5, 0.75)(number + 1)
FROM numbers(1000)
┌─quantilesApprox(1, 0.25, 0.5, 0.75)(plus(number, 1))─┐
│ [1,1,1] │
└──────────────────────────────────────────────────────┘
SELECT quantilesApprox(10, 0.25, 0.5, 0.75)(number + 1)
FROM numbers(1000)
┌─quantilesApprox(10, 0.25, 0.5, 0.75)(plus(number, 1))─┐
│ [156,413,659] │
└───────────────────────────────────────────────────────┘
SELECT quantilesApprox(100, 0.25, 0.5, 0.75)(number + 1)
FROM numbers(1000)
┌─quantilesApprox(100, 0.25, 0.5, 0.75)(plus(number, 1))─┐
│ [251,498,741] │
└────────────────────────────────────────────────────────┘
SELECT quantilesApprox(1000, 0.25, 0.5, 0.75)(number + 1)
FROM numbers(1000)
┌─quantilesApprox(1000, 0.25, 0.5, 0.75)(plus(number, 1))─┐
│ [249,499,749] │
└─────────────────────────────────────────────────────────┘
```

View File

@ -63,3 +63,8 @@ SELECT uniqMerge(state) FROM (SELECT uniqState(UserID) AS state FROM table GROUP
## Usage Example
See [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) engine description.
## Related Content
- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states)

View File

@ -108,3 +108,8 @@ Result:
- [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
- [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
## Related content
- Blog: [Building an Observability Solution with ClickHouse - Part 2 - Traces](https://clickhouse.com/blog/storing-traces-and-spans-open-telemetry-in-clickhouse)

View File

@ -645,7 +645,7 @@ For an alternative to `date\_diff`, see function `age`.
date_diff('unit', startdate, enddate, [timezone])
```
Aliases: `dateDiff`, `DATE_DIFF`.
Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_DIFF`.
**Arguments**
@ -1264,7 +1264,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %d | day of the month, zero-padded (01-31) | 02 |
| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 |
| %e | day of the month, space-padded (1-31) | &nbsp; 2 |
| %f | fractional second from the fractional part of DateTime64 | 1234560 |
| %f | fractional second, see 'Note 1' below | 1234560 |
| %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 |
| %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 |
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
@ -1276,16 +1276,16 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %k | hour in 24h format (00-23) | 22 |
| %l | hour in 12h format (01-12) | 09 |
| %m | month as an integer number (01-12) | 01 |
| %M | minute (00-59) | 33 |
| %M | full month name (January-December), see 'Note 2' below | January |
| %n | new-line character () | |
| %p | AM or PM designation | PM |
| %Q | Quarter (1-4) | 1 |
| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%M %p | 10:30 PM |
| %R | 24-hour HH:MM time, equivalent to %H:%M | 22:33 |
| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p | 10:30 PM |
| %R | 24-hour HH:MM time, equivalent to %H:%i | 22:33 |
| %s | second (00-59) | 44 |
| %S | second (00-59) | 44 |
| %t | horizontal-tab character () | |
| %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S | 22:33:44 |
| %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S | 22:33:44 |
| %u | ISO 8601 weekday as number with Monday as 1 (1-7) | 2 |
| %V | ISO 8601 week number (01-53) | 01 |
| %w | weekday as a integer number with Sunday as 0 (0-6) | 2 |
@ -1295,6 +1295,10 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %z | Time offset from UTC as +HHMM or -HHMM | -0500 |
| %% | a % sign | % |
Note 1: In ClickHouse versions earlier than v23.4, `%f` prints a single zero (0) if the formatted value is a Date, Date32 or DateTime (which have no fractional seconds) or a DateTime64 with a precision of 0. The previous behavior can be restored using setting `formatdatetime_f_prints_single_zero = 1`.
Note 2: In ClickHouse versions earlier than v23.4, `%M` prints the minute (00-59) instead of the full month name (January-December). The previous behavior can be restored using setting `formatdatetime_parsedatetime_m_is_month_name = 0`.
**Example**
Query:

View File

@ -441,11 +441,11 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0
## javaHash
Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452),
[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405),
[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410),
[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959),
[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060).
Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452),
[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405),
[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410),
[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959),
[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060).
This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result.
Note that Java only support calculating signed integers hash, so if you want to calculate unsigned integers hash you must cast it to proper signed ClickHouse types.
@ -660,6 +660,45 @@ Result:
└──────────────────────┴─────────────────────┘
```
## kafkaMurmurHash
Calculates a 32-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [Kafka](https://github.com/apache/kafka/blob/461c5cfe056db0951d9b74f5adc45973670404d7/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L482) and without the highest bit to be compatible with [Default Partitioner](https://github.com/apache/kafka/blob/139f7709bd3f5926901a21e55043388728ccca78/clients/src/main/java/org/apache/kafka/clients/producer/internals/BuiltInPartitioner.java#L328).
**Syntax**
```sql
MurmurHash(par1, ...)
```
**Arguments**
- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types).
**Returned value**
- Calculated hash value.
Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md).
**Example**
Query:
```sql
SELECT
kafkaMurmurHash('foobar') AS res1,
kafkaMurmurHash(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS res2
```
Result:
```response
┌───────res1─┬─────res2─┐
│ 1357151166 │ 85479775 │
└────────────┴──────────┘
```
## murmurHash3_32, murmurHash3_64
Produces a [MurmurHash3](https://github.com/aappleby/smhasher) hash value.

View File

@ -13,17 +13,18 @@ Functions for [searching](../../sql-reference/functions/string-search-functions.
## replaceOne(haystack, pattern, replacement)
Replaces the first occurrence of the substring pattern (if it exists) in haystack by the replacement string.
pattern and replacement must be constants.
## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement)
Replaces all occurrences of the substring pattern in haystack by the replacement string.
Alias: `replace`.
## replaceRegexpOne(haystack, pattern, replacement)
Replaces the first occurrence of the substring matching the regular expression pattern in haystack by the replacement string.
pattern must be a constant [re2 regular expression](https://github.com/google/re2/wiki/Syntax).
replacement must be a plain constant string or a constant string containing substitutions `\0-\9`.
pattern must be a [re2 regular expression](https://github.com/google/re2/wiki/Syntax).
replacement must be a plain string or a string containing substitutions `\0-\9`.
Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match.
To use a verbatim `\` character in the pattern or replacement string, escape it using `\`.
Also keep in mind that string literals require an extra escaping.
@ -88,6 +89,8 @@ SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res
└─────────────────────┘
```
Alias: `REGEXP_REPLACE`.
## regexpQuoteMeta(s)
The function adds a backslash before some predefined characters in the string.

View File

@ -68,9 +68,9 @@ Result:
## mapFromArrays
Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md).
Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md). Notice that the second argument could also be a [Map](../../sql-reference/data-types/map.md), thus it is casted to an Array when executing.
The function is a more convenient alternative to `CAST((key_array, value_array), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`.
The function is a more convenient alternative to `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`.
**Syntax**
@ -82,11 +82,11 @@ Alias: `MAP_FROM_ARRAYS(keys, values)`
**Arguments**
- `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md)
- `values` - Given value array to create a map from.
- `values` - Given value array or map to create a map from.
**Returned value**
- A map whose keys and values are constructed from the key and value arrays
- A map whose keys and values are constructed from the key array and value array/map.
**Example**
@ -94,13 +94,17 @@ Query:
```sql
select mapFromArrays(['a', 'b', 'c'], [1, 2, 3])
```
```text
┌─mapFromArrays(['a', 'b', 'c'], [1, 2, 3])─┐
│ {'a':1,'b':2,'c':3} │
└───────────────────────────────────────────┘
```
SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3))
┌─mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3))─┐
│ {1:('a',1),2:('b',2),3:('c',3)} │
└───────────────────────────────────────────────────────┘
```
## mapAdd

View File

@ -1245,7 +1245,6 @@ Returns DateTime values parsed from input string according to a MySQL style form
**Supported format specifiers**
All format specifiers listed in [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) except:
- %f: fractional second
- %Q: Quarter (1-4)
**Example**

View File

@ -28,3 +28,7 @@ The synchronicity of the query processing is defined by the [mutations_sync](/do
- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations)
- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries)
- [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting
## Related content
- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse)

View File

@ -61,3 +61,7 @@ For all `ALTER` queries, if `alter_sync = 2` and some replicas are not active fo
:::
For `ALTER TABLE ... UPDATE|DELETE` queries the synchronicity is defined by the [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting.
## Related content
- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse)

View File

@ -27,3 +27,8 @@ The synchronicity of the query processing is defined by the [mutations_sync](/do
- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations)
- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries)
- [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting
## Related content
- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse)

View File

@ -364,3 +364,4 @@ The window view is useful in the following scenarios:
## Related Content
- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)
- Blog: [Building an Observability Solution with ClickHouse - Part 2 - Traces](https://clickhouse.com/blog/storing-traces-and-spans-open-telemetry-in-clickhouse)

View File

@ -55,3 +55,7 @@ With the described implementation now we can see what can negatively affect 'DEL
- Table having a very large number of data parts
- Having a lot of data in Compact parts—in a Compact part, all columns are stored in one file.
## Related content
- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse)

View File

@ -22,6 +22,10 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC]
Deletes the table.
:::tip
Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md)
:::
Syntax:
``` sql

View File

@ -18,6 +18,10 @@ FROM <left_table>
Expressions from `ON` clause and columns from `USING` clause are called “join keys”. Unless otherwise stated, join produces a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from rows with matching “join keys”, which might produce results with much more rows than the source tables.
## Related Content
- Blog: [ClickHouse: A Blazingly Fast DBMS with Full SQL Join Support - Part 1](https://clickhouse.com/blog/clickhouse-fully-supports-joins)
## Supported Types of JOIN
All standard [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) types are supported:

View File

@ -114,11 +114,11 @@ This will also create system tables even if message queue is empty.
## RELOAD CONFIG
Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeper.
Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeper. Note that `SYSTEM RELOAD CONFIG` does not reload `USER` configuration stored in ZooKeeper, it only reloads `USER` configuration that is stored in `users.xml`. To reload all `USER` config use `SYSTEM RELOAD USERS`
## RELOAD USERS
Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage. Note that `SYSTEM RELOAD CONFIG` will only reload users.xml access storage.
Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage.
## SHUTDOWN
@ -224,6 +224,14 @@ Clears freezed backup with the specified name from all the disks. See more about
SYSTEM UNFREEZE WITH NAME <backup_name>
```
### WAIT LOADING PARTS
Wait until all asynchronously loading data parts of a table (outdated data parts) will became loaded.
``` sql
SYSTEM WAIT LOADING PARTS [db.]merge_tree_family_table_name
```
## Managing ReplicatedMergeTree Tables
ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) tables.

View File

@ -0,0 +1,99 @@
---
slug: /en/sql-reference/statements/undrop
sidebar_label: UNDROP
---
# UNDROP TABLE
Cancels the dropping of the table.
Beginning with ClickHouse version 23.3 it is possible to UNDROP a table in an Atomic database
within `database_atomic_delay_before_drop_table_sec` (8 minutes by default) of issuing the DROP TABLE statement. Dropped tables are listed in
a system table called `system.dropped_tables`.
If you have a materialized view without a `TO` clause associated with the dropped table, then you will also have to UNDROP the inner table of that view.
:::note
UNDROP TABLE is experimental. To use it add this setting:
```sql
set allow_experimental_undrop_table_query = 1;
```
:::
:::tip
Also see [DROP TABLE](/docs/en/sql-reference/statements/drop.md)
:::
Syntax:
``` sql
UNDROP TABLE [db.]name [UUID '<uuid>'] [ON CLUSTER cluster]
```
**Example**
``` sql
set allow_experimental_undrop_table_query = 1;
```
```sql
CREATE TABLE undropMe
(
`id` UInt8
)
ENGINE = MergeTree
ORDER BY id
```
```sql
DROP TABLE undropMe
```
```sql
SELECT *
FROM system.dropped_tables
FORMAT Vertical
```
```response
Row 1:
──────
index: 0
database: default
table: undropMe
uuid: aa696a1a-1d70-4e60-a841-4c80827706cc
engine: MergeTree
metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.undropMe.aa696a1a-1d70-4e60-a841-4c80827706cc.sql
table_dropped_time: 2023-04-05 14:12:12
1 row in set. Elapsed: 0.001 sec.
```
```sql
UNDROP TABLE undropMe
```
```response
Ok.
```
```sql
SELECT *
FROM system.dropped_tables
FORMAT Vertical
```
```response
Ok.
0 rows in set. Elapsed: 0.001 sec.
```
```sql
DESCRIBE TABLE undropMe
FORMAT Vertical
```
```response
Row 1:
──────
name: id
type: UInt8
default_type:
default_expression:
comment:
codec_expression:
ttl_expression:
```

View File

@ -14,7 +14,7 @@ The `INSERT` query uses both parsers:
INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def')
```
The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#settings-input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#syntax-expressions).
The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#expressions).
Data can have any format. When a query is received, the server calculates no more than [max_query_size](../operations/settings/settings.md#settings-max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed.
It allows for avoiding issues with large `INSERT` queries.
@ -45,7 +45,7 @@ You can check whether a data type name is case-sensitive in the [system.data_typ
In contrast to standard SQL, all other keywords (including functions names) are **case-sensitive**.
Keywords are not reserved; they are treated as such only in the corresponding context. If you use [identifiers](#syntax-identifiers) with the same name as the keywords, enclose them into double-quotes or backticks. For example, the query `SELECT "FROM" FROM table_name` is valid if the table `table_name` has column with the name `"FROM"`.
Keywords are not reserved; they are treated as such only in the corresponding context. If you use [identifiers](#identifiers) with the same name as the keywords, enclose them into double-quotes or backticks. For example, the query `SELECT "FROM" FROM table_name` is valid if the table `table_name` has column with the name `"FROM"`.
## Identifiers
@ -54,7 +54,7 @@ Identifiers are:
- Cluster, database, table, partition, and column names.
- Functions.
- Data types.
- [Expression aliases](#syntax-expression_aliases).
- [Expression aliases](#expression_aliases).
Identifiers can be quoted or non-quoted. The latter is preferred.
@ -108,7 +108,7 @@ Depending on the data format (input or output), `NULL` may have a different repr
There are many nuances to processing `NULL`. For example, if at least one of the arguments of a comparison operation is `NULL`, the result of this operation is also `NULL`. The same is true for multiplication, addition, and other operations. For more information, read the documentation for each operation.
In queries, you can check `NULL` using the [IS NULL](../sql-reference/operators/index.md#operator-is-null) and [IS NOT NULL](../sql-reference/operators/index.md) operators and the related functions `isNull` and `isNotNull`.
In queries, you can check `NULL` using the [IS NULL](../sql-reference/operators/index.md#is-null) and [IS NOT NULL](../sql-reference/operators/index.md#is-not-null) operators and the related functions `isNull` and `isNotNull`.
### Heredoc
@ -149,7 +149,7 @@ For example, the following SQL defines parameters named `a`, `b`, `c` and `d` -
SET param_a = 13;
SET param_b = 'str';
SET param_c = '2022-08-04 18:30:53';
SET param_d = {'10': [11, 12], '13': [14, 15]}';
SET param_d = {'10': [11, 12], '13': [14, 15]};
SELECT
{a: UInt32},
@ -166,7 +166,7 @@ Result:
If you are using `clickhouse-client`, the parameters are specified as `--param_name=value`. For example, the following parameter has the name `message` and it is retrieved as a `String`:
```sql
```bash
clickhouse-client --param_message='hello' --query="SELECT {message: String}"
```
@ -190,7 +190,7 @@ Query parameters are not general text substitutions which can be used in arbitra
## Functions
Function calls are written like an identifier with a list of arguments (possibly empty) in round brackets. In contrast to standard SQL, the brackets are required, even for an empty argument list. Example: `now()`.
There are regular and aggregate functions (see the section “Aggregate functions”). Some aggregate functions can contain two lists of arguments in brackets. Example: `quantile (0.9) (x)`. These aggregate functions are called “parametric” functions, and the arguments in the first list are called “parameters”. The syntax of aggregate functions without parameters is the same as for regular functions.
There are regular and aggregate functions (see the section [Aggregate functions](/docs/en/sql-reference/aggregate-functions/index.md)). Some aggregate functions can contain two lists of arguments in brackets. Example: `quantile (0.9) (x)`. These aggregate functions are called “parametric” functions, and the arguments in the first list are called “parameters”. The syntax of aggregate functions without parameters is the same as for regular functions.
## Operators
@ -199,7 +199,7 @@ For example, the expression `1 + 2 * 3 + 4` is transformed to `plus(plus(1, mult
## Data Types and Database Table Engines
Data types and table engines in the `CREATE` query are written the same way as identifiers or functions. In other words, they may or may not contain an argument list in brackets. For more information, see the sections “Data types,” “Table engines,” and “CREATE”.
Data types and table engines in the `CREATE` query are written the same way as identifiers or functions. In other words, they may or may not contain an argument list in brackets. For more information, see the sections [Data types](/docs/en/sql-reference/data-types/index.md), [Table engines](/docs/en/engines/table-engines/index.md), and [CREATE](/docs/en/sql-reference/statements/create/index.md).
## Expression Aliases
@ -211,17 +211,17 @@ expr AS alias
- `AS` — The keyword for defining aliases. You can define the alias for a table name or a column name in a `SELECT` clause without using the `AS` keyword.
For example, `SELECT table_name_alias.column_name FROM table_name table_name_alias`.
For example, `SELECT table_name_alias.column_name FROM table_name table_name_alias`.
In the [CAST](./functions/type-conversion-functions.md#type_conversion_function-cast) function, the `AS` keyword has another meaning. See the description of the function.
In the [CAST](./functions/type-conversion-functions.md#castx-t) function, the `AS` keyword has another meaning. See the description of the function.
- `expr` — Any expression supported by ClickHouse.
For example, `SELECT column_name * 2 AS double FROM some_table`.
For example, `SELECT column_name * 2 AS double FROM some_table`.
- `alias` — Name for `expr`. Aliases should comply with the [identifiers](#syntax-identifiers) syntax.
- `alias` — Name for `expr`. Aliases should comply with the [identifiers](#identifiers) syntax.
For example, `SELECT "table t".column_name FROM table_name AS "table t"`.
For example, `SELECT "table t".column_name FROM table_name AS "table t"`.
### Notes on Usage
@ -254,11 +254,11 @@ Received exception from server (version 18.14.17):
Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: Aggregate function sum(b) is found inside another aggregate function in query.
```
In this example, we declared table `t` with column `b`. Then, when selecting data, we defined the `sum(b) AS b` alias. As aliases are global, ClickHouse substituted the literal `b` in the expression `argMax(a, b)` with the expression `sum(b)`. This substitution caused the exception. You can change this default behavior by setting [prefer_column_name_to_alias](../operations/settings/settings.md#prefer_column_name_to_alias) to `1`.
In this example, we declared table `t` with column `b`. Then, when selecting data, we defined the `sum(b) AS b` alias. As aliases are global, ClickHouse substituted the literal `b` in the expression `argMax(a, b)` with the expression `sum(b)`. This substitution caused the exception. You can change this default behavior by setting [prefer_column_name_to_alias](../operations/settings/settings.md#prefer-column-name-to-alias) to `1`.
## Asterisk
In a `SELECT` query, an asterisk can replace the expression. For more information, see the section “SELECT”.
In a `SELECT` query, an asterisk can replace the expression. For more information, see the section [SELECT](/docs/en/sql-reference/statements/select/index.md#asterisk).
## Expressions

View File

@ -20,7 +20,7 @@ A key advantage between ordinary UDF functions and the `executable` table functi
The `executable` table function requires three parameters and accepts an optional list of input queries:
```sql
executable(script_name, format, structure, [input_query...])
executable(script_name, format, structure, [input_query...] [,SETTINGS ...])
```
- `script_name`: the file name of the script. saved in the `user_scripts` folder (the default folder of the `user_scripts_path` setting)
@ -83,6 +83,15 @@ The response looks like:
└────┴────────────┘
```
## Settings
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Default value is `false`.
- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions. Default value is `16`.
- `max_command_execution_time` — Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10.
- `command_termination_timeout` — executable script should contain main read-write loop. After table function is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000.
- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000.
## Passing Query Results to a Script
Be sure to check out the example in the `Executable` table engine on [how to pass query results to a script](../../engines/table-engines/special/executable.md#passing-query-results-to-a-script). Here is how you execute the same script in that example using the `executable` table function:
@ -94,4 +103,4 @@ SELECT * FROM executable(
'id UInt64, sentiment Float32',
(SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20)
);
```
```

View File

@ -133,4 +133,6 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32)
- [Using PostgreSQL as a dictionary source](../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-postgresql)
## Related content
- Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres)
- Blog: [ClickHouse and PostgreSQL - a Match Made in Data Heaven - part 2](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres-part-2)

View File

@ -41,9 +41,15 @@ ClickHouse не работает и не собирается на 32-битны
Выполните в терминале:
git clone git@github.com:your_github_username/ClickHouse.git --recursive
git clone --shallow-submodules git@github.com:your_github_username/ClickHouse.git
cd ClickHouse
Или (если вы хотите использовать sparse checkout для submodules):
git clone git@github.com:your_github_username/ClickHouse.git
cd ClickHouse
./contrib/update-submodules.sh
Замените слово `your_github_username` в команде для git на имя вашего аккаунта на GitHub.
Эта команда создаст директорию ClickHouse, содержащую рабочую копию проекта.

View File

@ -7,7 +7,7 @@ sidebar_position: 141
Суммирует разницу между последовательными строками. Если разница отрицательна — она будет проигнорирована.
Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), упорядоченных по некоторому временному бакету согласно timestamp, например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, невозможно объединить их в "правом" порядке. Функция отслеживает `timestamp` наблюдаемых значений, поэтому возможно правильно упорядочить состояния во время слияния.
Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), хранящих данные, упорядоченные по некоторому округленному временному интервалу, согласно timestamp, например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, их невозможно объединить в правильном порядке без хранения исходного, неокругленного значения timestamp. Функция `deltaSumTimestamp` отслеживает исходные `timestamp` наблюдаемых значений, поэтому значения (состояния) функции правильно вычисляются во время слияния кусков.
Чтобы вычислить разницу между упорядоченными последовательными строками, вы можете использовать функцию [deltaSum](../../../sql-reference/aggregate-functions/reference/deltasum.md#agg_functions-deltasum) вместо функции `deltaSumTimestamp`.

View File

@ -277,11 +277,11 @@ void Client::initialize(Poco::Util::Application & self)
*/
const char * env_user = getenv("CLICKHOUSE_USER"); // NOLINT(concurrency-mt-unsafe)
if (env_user)
if (env_user && !config().has("user"))
config().setString("user", env_user);
const char * env_password = getenv("CLICKHOUSE_PASSWORD"); // NOLINT(concurrency-mt-unsafe)
if (env_password)
if (env_password && !config().has("password"))
config().setString("password", env_password);
parseConnectionsCredentials();

View File

@ -1,4 +1,5 @@
#include "ClusterCopierApp.h"
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/StatusFile.h>
#include <Common/TerminalSize.h>
#include <IO/ConnectionTimeouts.h>
@ -192,6 +193,8 @@ void ClusterCopierApp::mainImpl()
if (!task_file.empty())
copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false));
zkutil::validateZooKeeperConfig(config());
copier->init();
copier->process(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(context->getSettingsRef()));

View File

@ -89,8 +89,12 @@ static std::vector<std::string> extractFromConfig(
if (has_zk_includes && process_zk_includes)
{
DB::ConfigurationPtr bootstrap_configuration(new Poco::Util::XMLConfiguration(config_xml));
zkutil::validateZooKeeperConfig(*bootstrap_configuration);
zkutil::ZooKeeperPtr zookeeper = std::make_shared<zkutil::ZooKeeper>(
*bootstrap_configuration, "zookeeper", nullptr);
*bootstrap_configuration, bootstrap_configuration->has("zookeeper") ? "zookeeper" : "keeper", nullptr);
zkutil::ZooKeeperNodeCache zk_node_cache([&] { return zookeeper; });
config_xml = processor.processConfig(&has_zk_includes, &zk_node_cache);
}

View File

@ -375,15 +375,22 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
try
{
ReadBufferFromFile in(binary_self_path.string());
WriteBufferFromFile out(main_bin_tmp_path.string());
copyData(in, out);
out.sync();
String source = binary_self_path.string();
String destination = main_bin_tmp_path.string();
if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
/// Try to make a hard link first, as an optimization.
/// It is possible if the source and the destination are on the same filesystems.
if (0 != link(source.c_str(), destination.c_str()))
{
ReadBufferFromFile in(binary_self_path.string());
WriteBufferFromFile out(main_bin_tmp_path.string());
copyData(in, out);
out.sync();
out.finalize();
}
if (0 != chmod(destination.c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
out.finalize();
}
catch (const Exception & e)
{

View File

@ -17,7 +17,6 @@
#include <Poco/Net/TCPServerParams.h>
#include <Poco/Net/TCPServer.h>
#include <Poco/Util/HelpFormatter.h>
#include <Poco/Version.h>
#include <Poco/Environment.h>
#include <sys/stat.h>
#include <pwd.h>

View File

@ -815,7 +815,8 @@ try
}
);
bool has_zookeeper = config().has("zookeeper");
zkutil::validateZooKeeperConfig(config());
bool has_zookeeper = zkutil::hasZooKeeperConfig(config());
zkutil::ZooKeeperNodeCache main_config_zk_node_cache([&] { return global_context->getZooKeeper(); });
zkutil::EventPtr main_config_zk_changed_event = std::make_shared<Poco::Event>();
@ -980,7 +981,7 @@ try
StatusFile status{path / "status", StatusFile::write_full_info};
DB::ServerUUID::load(path / "uuid", log);
ServerUUID::load(path / "uuid", log);
/// Try to increase limit on number of open files.
{
@ -1307,7 +1308,7 @@ try
{
/// We do not load ZooKeeper configuration on the first config loading
/// because TestKeeper server is not started yet.
if (config->has("zookeeper"))
if (zkutil::hasZooKeeperConfig(*config))
global_context->reloadZooKeeperIfChanged(config);
global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config);

View File

@ -10,6 +10,7 @@
#include <Interpreters/Access/InterpreterCreateUserQuery.h>
#include <Interpreters/Access/InterpreterShowGrantsQuery.h>
#include <Common/logger_useful.h>
#include <Common/ThreadPool.h>
#include <Poco/JSON/JSON.h>
#include <Poco/JSON/Object.h>
#include <Poco/JSON/Stringifier.h>
@ -19,6 +20,7 @@
#include <base/range.h>
#include <filesystem>
#include <fstream>
#include <memory>
namespace DB
@ -317,15 +319,15 @@ void DiskAccessStorage::scheduleWriteLists(AccessEntityType type)
return; /// If the lists' writing thread is still waiting we can update `types_of_lists_to_write` easily,
/// without restarting that thread.
if (lists_writing_thread.joinable())
lists_writing_thread.join();
if (lists_writing_thread && lists_writing_thread->joinable())
lists_writing_thread->join();
/// Create the 'need_rebuild_lists.mark' file.
/// This file will be used later to find out if writing lists is successful or not.
std::ofstream out{getNeedRebuildListsMarkFilePath(directory_path)};
out.close();
lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this};
lists_writing_thread = std::make_unique<ThreadFromGlobalPool>(&DiskAccessStorage::listsWritingThreadFunc, this);
lists_writing_thread_is_waiting = true;
}
@ -349,10 +351,10 @@ void DiskAccessStorage::listsWritingThreadFunc()
void DiskAccessStorage::stopListsWritingThread()
{
if (lists_writing_thread.joinable())
if (lists_writing_thread && lists_writing_thread->joinable())
{
lists_writing_thread_should_exit.notify_one();
lists_writing_thread.join();
lists_writing_thread->join();
}
}

View File

@ -1,7 +1,7 @@
#pragma once
#include <Access/MemoryAccessStorage.h>
#include <Common/ThreadPool.h>
#include <Common/ThreadPool_fwd.h>
#include <boost/container/flat_set.hpp>
@ -81,7 +81,7 @@ private:
bool failed_to_write_lists TSA_GUARDED_BY(mutex) = false;
/// List files are written in a separate thread.
ThreadFromGlobalPool lists_writing_thread;
std::unique_ptr<ThreadFromGlobalPool> lists_writing_thread;
/// Signals `lists_writing_thread` to exit.
std::condition_variable lists_writing_thread_should_exit;

View File

@ -1,3 +1,4 @@
#include <memory>
#include <Access/AccessEntityIO.h>
#include <Access/MemoryAccessStorage.h>
#include <Access/ReplicatedAccessStorage.h>
@ -15,6 +16,7 @@
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/escapeForFileName.h>
#include <Common/setThreadName.h>
#include <Common/ThreadPool.h>
#include <base/range.h>
#include <base/sleep.h>
#include <boost/range/algorithm_ext/erase.hpp>
@ -72,7 +74,7 @@ void ReplicatedAccessStorage::startWatchingThread()
{
bool prev_watching_flag = watching.exchange(true);
if (!prev_watching_flag)
watching_thread = ThreadFromGlobalPool(&ReplicatedAccessStorage::runWatchingThread, this);
watching_thread = std::make_unique<ThreadFromGlobalPool>(&ReplicatedAccessStorage::runWatchingThread, this);
}
void ReplicatedAccessStorage::stopWatchingThread()
@ -81,8 +83,8 @@ void ReplicatedAccessStorage::stopWatchingThread()
if (prev_watching_flag)
{
watched_queue->finish();
if (watching_thread.joinable())
watching_thread.join();
if (watching_thread && watching_thread->joinable())
watching_thread->join();
}
}

View File

@ -2,7 +2,7 @@
#include <atomic>
#include <Common/ThreadPool.h>
#include <Common/ThreadPool_fwd.h>
#include <Common/ZooKeeper/Common.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ConcurrentBoundedQueue.h>
@ -21,7 +21,7 @@ public:
static constexpr char STORAGE_TYPE[] = "replicated";
ReplicatedAccessStorage(const String & storage_name, const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper, AccessChangesNotifier & changes_notifier_, bool allow_backup);
virtual ~ReplicatedAccessStorage() override;
~ReplicatedAccessStorage() override;
const char * getStorageType() const override { return STORAGE_TYPE; }
@ -43,7 +43,7 @@ private:
std::mutex cached_zookeeper_mutex;
std::atomic<bool> watching = false;
ThreadFromGlobalPool watching_thread;
std::unique_ptr<ThreadFromGlobalPool> watching_thread;
std::shared_ptr<ConcurrentBoundedQueue<UUID>> watched_queue;
std::optional<UUID> insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;

View File

@ -0,0 +1,36 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h>
#include <AggregateFunctions/FactoryHelpers.h>
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
namespace DB
{
struct Settings;
namespace
{
AggregateFunctionPtr createAggregateFunctionKolmogorovSmirnovTest(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Aggregate function {} only supports numerical types", name);
return std::make_shared<AggregateFunctionKolmogorovSmirnov>(argument_types, parameters);
}
}
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory)
{
factory.registerFunction("kolmogorovSmirnovTest", createAggregateFunctionKolmogorovSmirnovTest, AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -0,0 +1,323 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/StatCommon.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnTuple.h>
#include <Common/Exception.h>
#include <Common/assert_cast.h>
#include <Common/ArenaAllocator.h>
#include <Common/PODArray_fwd.h>
#include <base/types.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
}
struct KolmogorovSmirnov : public StatisticalSample<Float64, Float64>
{
enum class Alternative
{
TwoSided,
Less,
Greater
};
std::pair<Float64, Float64> getResult(Alternative alternative, String method)
{
::sort(x.begin(), x.end());
::sort(y.begin(), y.end());
Float64 max_s = std::numeric_limits<Float64>::min();
Float64 min_s = std::numeric_limits<Float64>::max();
Float64 now_s = 0;
UInt64 pos_x = 0;
UInt64 pos_y = 0;
UInt64 n1 = x.size();
UInt64 n2 = y.size();
const Float64 n1_d = 1. / n1;
const Float64 n2_d = 1. / n2;
const Float64 tol = 1e-7;
// reference: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test
while (pos_x < x.size() && pos_y < y.size())
{
if (likely(fabs(x[pos_x] - y[pos_y]) >= tol))
{
if (x[pos_x] < y[pos_y])
{
now_s += n1_d;
++pos_x;
}
else
{
now_s -= n2_d;
++pos_y;
}
max_s = std::max(max_s, now_s);
min_s = std::min(min_s, now_s);
}
else
{
now_s += n1_d;
++pos_x;
}
}
now_s += n1_d * (x.size() - pos_x) - n2_d * (y.size() - pos_y);
min_s = std::min(min_s, now_s);
max_s = std::max(max_s, now_s);
Float64 d = 0;
if (alternative == Alternative::TwoSided)
d = std::max(std::abs(max_s), std::abs(min_s));
else if (alternative == Alternative::Less)
d = -min_s;
else if (alternative == Alternative::Greater)
d = max_s;
UInt64 g = std::__gcd(n1, n2);
UInt64 nx_g = n1 / g;
UInt64 ny_g = n2 / g;
if (method == "auto")
method = std::max(n1, n2) <= 10000 ? "exact" : "asymp";
else if (method == "exact" && nx_g >= std::numeric_limits<Int32>::max() / ny_g)
method = "asymp";
Float64 p_value = std::numeric_limits<Float64>::infinity();
if (method == "exact")
{
/* reference:
* Gunar Schröer and Dietrich Trenkler
* Exact and Randomization Distributions of Kolmogorov-Smirnov, Tests for Two or Three Samples
*
* and
*
* Thomas Viehmann
* Numerically more stable computation of the p-values for the two-sample Kolmogorov-Smirnov test
*/
if (n2 > n1)
std::swap(n1, n2);
const Float64 f_n1 = static_cast<Float64>(n1);
const Float64 f_n2 = static_cast<Float64>(n2);
const Float64 k_d = (0.5 + floor(d * f_n2 * f_n1 - tol)) / (f_n2 * f_n1);
PaddedPODArray<Float64> c(n1 + 1);
auto check = alternative == Alternative::TwoSided ?
[](const Float64 & q, const Float64 & r, const Float64 & s) { return fabs(r - s) >= q; }
: [](const Float64 & q, const Float64 & r, const Float64 & s) { return r - s >= q; };
c[0] = 0;
for (UInt64 j = 1; j <= n1; j++)
if (check(k_d, 0., j / f_n1))
c[j] = 1.;
else
c[j] = c[j - 1];
for (UInt64 i = 1; i <= n2; i++)
{
if (check(k_d, i / f_n2, 0.))
c[0] = 1.;
for (UInt64 j = 1; j <= n1; j++)
if (check(k_d, i / f_n2, j / f_n1))
c[j] = 1.;
else
{
Float64 v = i / static_cast<Float64>(i + j);
Float64 w = j / static_cast<Float64>(i + j);
c[j] = v * c[j] + w * c[j - 1];
}
}
p_value = c[n1];
}
else if (method == "asymp")
{
Float64 n = std::min(n1, n2);
Float64 m = std::max(n1, n2);
Float64 p = sqrt((n * m) / (n + m)) * d;
if (alternative == Alternative::TwoSided)
{
/* reference:
* J.DURBIN
* Distribution theory for tests based on the sample distribution function
*/
Float64 new_val, old_val, s, w, z;
UInt64 k_max = static_cast<UInt64>(sqrt(2 - log(tol)));
if (p < 1)
{
z = - (M_PI_2 * M_PI_4) / (p * p);
w = log(p);
s = 0;
for (UInt64 k = 1; k < k_max; k += 2)
s += exp(k * k * z - w);
p = s / 0.398942280401432677939946059934;
}
else
{
z = -2 * p * p;
s = -1;
UInt64 k = 1;
old_val = 0;
new_val = 1;
while (fabs(old_val - new_val) > tol)
{
old_val = new_val;
new_val += 2 * s * exp(z * k * k);
s *= -1;
k++;
}
p = new_val;
}
p_value = 1 - p;
}
else
{
/* reference:
* J. L. HODGES, Jr
* The significance probability of the Smirnov two-sample test
*/
// Use Hodges' suggested approximation Eqn 5.3
// Requires m to be the larger of (n1, n2)
Float64 expt = -2 * p * p - 2 * p * (m + 2 * n) / sqrt(m * n * (m + n)) / 3.0;
p_value = exp(expt);
}
}
return {d, p_value};
}
};
class AggregateFunctionKolmogorovSmirnov final:
public IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov>
{
private:
using Alternative = typename KolmogorovSmirnov::Alternative;
Alternative alternative = Alternative::TwoSided;
String method = "auto";
public:
explicit AggregateFunctionKolmogorovSmirnov(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov> ({arguments}, {}, createResultType())
{
if (params.size() > 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require two parameter or less", getName());
if (params.empty())
return;
if (params[0].getType() != Field::Types::String)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
const auto & param = params[0].get<String>();
if (param == "two-sided")
alternative = Alternative::TwoSided;
else if (param == "less")
alternative = Alternative::Less;
else if (param == "greater")
alternative = Alternative::Greater;
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown parameter in aggregate function {}. "
"It must be one of: 'two-sided', 'less', 'greater'", getName());
if (params.size() != 2)
return;
if (params[1].getType() != Field::Types::String)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a String", getName());
method = params[1].get<String>();
if (method != "auto" && method != "exact" && method != "asymp")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown method in aggregate function {}. "
"It must be one of: 'auto', 'exact', 'asymp'", getName());
}
String getName() const override
{
return "kolmogorovSmirnovTest";
}
bool allocatesMemoryInArena() const override { return true; }
static DataTypePtr createResultType()
{
DataTypes types
{
std::make_shared<DataTypeNumber<Float64>>(),
std::make_shared<DataTypeNumber<Float64>>(),
};
Strings names
{
"d_statistic",
"p_value"
};
return std::make_shared<DataTypeTuple>(
std::move(types),
std::move(names)
);
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
Float64 value = columns[0]->getFloat64(row_num);
UInt8 is_second = columns[1]->getUInt(row_num);
if (is_second)
this->data(place).addY(value, arena);
else
this->data(place).addX(value, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
this->data(place).merge(this->data(rhs), arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).write(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).read(buf, arena);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
if (!this->data(place).size_x || !this->data(place).size_y)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName());
auto [d_statistic, p_value] = this->data(place).getResult(alternative, method);
/// Because p-value is a probability.
p_value = std::min(1.0, std::max(0.0, p_value));
auto & column_tuple = assert_cast<ColumnTuple &>(to);
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
column_stat.getData().push_back(d_statistic);
column_value.getData().push_back(p_value);
}
};
}

View File

@ -1,6 +1,5 @@
#pragma once
#include <Common/logger_useful.h>
#include <base/sort.h>
#include <DataTypes/DataTypesNumber.h>

View File

@ -26,9 +26,11 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
}
template <typename> class QuantileTiming;
template <typename> class QuantileApprox;
/** Generic aggregate function for calculation of quantiles.
@ -60,6 +62,7 @@ private:
using ColVecType = ColumnVectorOrDecimal<Value>;
static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>);
static constexpr bool is_quantile_approx = std::is_same_v<Data, QuantileApprox<Value>>;
static_assert(!is_decimal<Value> || !returns_float);
QuantileLevels<Float64> levels;
@ -67,22 +70,57 @@ private:
/// Used when there are single level to get.
Float64 level = 0.5;
/// Used for the approximate version of the algorithm (Greenwald-Khanna)
ssize_t accuracy = 10000;
DataTypePtr & argument_type;
public:
AggregateFunctionQuantile(const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>(
argument_types_, params, createResultType(argument_types_))
, levels(params, returns_many)
, levels(is_quantile_approx && !params.empty() ? Array(params.begin() + 1, params.end()) : params, returns_many)
, level(levels.levels[0])
, argument_type(this->argument_types[0])
{
if (!returns_many && levels.size() > 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require one parameter or less", getName());
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires one level parameter or less", getName());
if constexpr (is_quantile_approx)
{
if (params.empty())
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one param", getName());
const auto & accuracy_field = params[0];
if (!isInt64OrUInt64FieldType(accuracy_field.getType()))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires accuracy parameter with integer type", getName());
if (accuracy_field.getType() == Field::Types::Int64)
accuracy = accuracy_field.get<Int64>();
else
accuracy = accuracy_field.get<UInt64>();
if (accuracy <= 0)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Aggregate function {} requires accuracy parameter with positive value but is {}",
getName(),
accuracy);
}
}
String getName() const override { return Name::name; }
void create(AggregateDataPtr __restrict place) const override /// NOLINT
{
if constexpr (is_quantile_approx)
new (place) Data(accuracy);
else
new (place) Data;
}
static DataTypePtr createResultType(const DataTypes & argument_types_)
{
DataTypePtr res;
@ -257,4 +295,7 @@ struct NameQuantilesBFloat16 { static constexpr auto name = "quantilesBFloat16";
struct NameQuantileBFloat16Weighted { static constexpr auto name = "quantileBFloat16Weighted"; };
struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBFloat16Weighted"; };
struct NameQuantileApprox { static constexpr auto name = "quantileApprox"; };
struct NameQuantilesApprox { static constexpr auto name = "quantilesApprox"; };
}

View File

@ -0,0 +1,71 @@
#include <AggregateFunctions/AggregateFunctionQuantile.h>
#include <AggregateFunctions/QuantileApprox.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Helpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Core/Field.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
template <typename Value, bool _> using FuncQuantileApprox = AggregateFunctionQuantile<Value, QuantileApprox<Value>, NameQuantileApprox, false, void, false>;
template <typename Value, bool _> using FuncQuantilesApprox = AggregateFunctionQuantile<Value, QuantileApprox<Value>, NameQuantilesApprox, false, void, true>;
template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) \
return std::make_shared<Function<TYPE, true>>(argument_types, params);
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<UInt128, true>>(argument_types, params);
if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, true>>(argument_types, params);
if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, true>>(argument_types, params);
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
argument_type->getName(), name);
}
}
void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory & factory)
{
/// For aggregate functions returning array we cannot return NULL on empty set.
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
factory.registerFunction(NameQuantileApprox::name, createAggregateFunctionQuantile<FuncQuantileApprox>);
factory.registerFunction(NameQuantilesApprox::name, {createAggregateFunctionQuantile<FuncQuantilesApprox>, properties});
/// 'median' is an alias for 'quantile'
factory.registerAlias("medianApprox", NameQuantileApprox::name);
}
}

View File

@ -11,7 +11,6 @@
#include <IO/WriteHelpers.h>
#include <Columns/ColumnString.h>
#include <Common/PODArray.h>
#include <Common/logger_useful.h>
#include <IO/ReadBufferFromString.h>
#include <Common/HashTable/HashMap.h>
#include <Columns/IColumn.h>

View File

@ -18,7 +18,6 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <map>
#include <Common/logger_useful.h>
#include <Common/ClickHouseRevision.h>

View File

@ -9,7 +9,7 @@
#include <Interpreters/Context_fwd.h>
#include <base/types.h>
#include <Common/Exception.h>
#include <Common/ThreadPool.h>
#include <Common/ThreadPool_fwd.h>
#include <Core/IResolvedFunction.h>
#include "config.h"

View File

@ -0,0 +1,477 @@
#pragma once
#include <cmath>
#include <base/sort.h>
#include <Common/RadixSort.h>
#include <IO/WriteBuffer.h>
#include <IO/ReadBuffer.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
}
template <typename T>
class ApproxSampler
{
public:
struct Stats
{
T value; // the sampled value
Int64 g; // the minimum rank jump from the previous value's minimum rank
Int64 delta; // the maximum span of the rank
Stats() = default;
Stats(T value_, Int64 g_, Int64 delta_) : value(value_), g(g_), delta(delta_) {}
};
struct QueryResult
{
size_t index;
Int64 rank;
T value;
QueryResult(size_t index_, Int64 rank_, T value_) : index(index_), rank(rank_), value(value_) { }
};
ApproxSampler() = default;
explicit ApproxSampler(
double relative_error_,
size_t compress_threshold_ = default_compress_threshold,
size_t count_ = 0,
bool compressed_ = false)
: relative_error(relative_error_)
, compress_threshold(compress_threshold_)
, count(count_)
, compressed(compressed_)
{
sampled.reserve(compress_threshold);
backup_sampled.reserve(compress_threshold);
head_sampled.reserve(default_head_size);
}
bool isCompressed() const { return compressed; }
void setCompressed() { compressed = true; }
void insert(T x)
{
head_sampled.push_back(x);
compressed = false;
if (head_sampled.size() >= default_head_size)
{
withHeadBufferInserted();
if (sampled.size() >= compress_threshold)
compress();
}
}
void query(const Float64 * percentiles, const size_t * indices, size_t size, T * result) const
{
if (!head_sampled.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot operate on an uncompressed summary, call compress() first");
if (sampled.empty())
{
for (size_t i = 0; i < size; ++i)
result[i] = T();
return;
}
Int64 current_max = std::numeric_limits<Int64>::min();
for (const auto & stats : sampled)
current_max = std::max(stats.delta + stats.g, current_max);
Int64 target_error = current_max/2;
size_t index= 0;
auto min_rank = sampled[0].g;
for (size_t i = 0; i < size; ++i)
{
double percentile = percentiles[indices[i]];
if (percentile <= relative_error)
{
result[indices[i]] = sampled.front().value;
}
else if (percentile >= 1 - relative_error)
{
result[indices[i]] = sampled.back().value;
}
else
{
QueryResult res = findApproxQuantile(index, min_rank, target_error, percentile);
index = res.index;
min_rank = res.rank;
result[indices[i]] = res.value;
}
}
}
void compress()
{
if (compressed)
return;
withHeadBufferInserted();
doCompress(2 * relative_error * count);
compressed = true;
}
void merge(const ApproxSampler & other)
{
if (other.count == 0)
return;
else if (count == 0)
{
compress_threshold = other.compress_threshold;
relative_error = other.relative_error;
count = other.count;
compressed = other.compressed;
sampled.resize(other.sampled.size());
memcpy(sampled.data(), other.sampled.data(), sizeof(Stats) * other.sampled.size());
return;
}
else
{
// Merge the two buffers.
// The GK algorithm is a bit unclear about it, but we need to adjust the statistics during the
// merging. The main idea is that samples that come from one side will suffer from the lack of
// precision of the other.
// As a concrete example, take two QuantileSummaries whose samples (value, g, delta) are:
// `a = [(0, 1, 0), (20, 99, 0)]` and `b = [(10, 1, 0), (30, 49, 0)]`
// This means `a` has 100 values, whose minimum is 0 and maximum is 20,
// while `b` has 50 values, between 10 and 30.
// The resulting samples of the merge will be:
// a+b = [(0, 1, 0), (10, 1, ??), (20, 99, ??), (30, 49, 0)]
// The values of `g` do not change, as they represent the minimum number of values between two
// consecutive samples. The values of `delta` should be adjusted, however.
// Take the case of the sample `10` from `b`. In the original stream, it could have appeared
// right after `0` (as expressed by `g=1`) or right before `20`, so `delta=99+0-1=98`.
// In the GK algorithm's style of working in terms of maximum bounds, one can observe that the
// maximum additional uncertainty over samples coming from `b` is `max(g_a + delta_a) =
// floor(2 * eps_a * n_a)`. Likewise, additional uncertainty over samples from `a` is
// `floor(2 * eps_b * n_b)`.
// Only samples that interleave the other side are affected. That means that samples from
// one side that are lesser (or greater) than all samples from the other side are just copied
// unmodified.
// If the merging instances have different `relativeError`, the resulting instance will carry
// the largest one: `eps_ab = max(eps_a, eps_b)`.
// The main invariant of the GK algorithm is kept:
// `max(g_ab + delta_ab) <= floor(2 * eps_ab * (n_a + n_b))` since
// `max(g_ab + delta_ab) <= floor(2 * eps_a * n_a) + floor(2 * eps_b * n_b)`
// Finally, one can see how the `insert(x)` operation can be expressed as `merge([(x, 1, 0])`
compress();
backup_sampled.clear();
backup_sampled.reserve(sampled.size() + other.sampled.size());
double merged_relative_error = std::max(relative_error, other.relative_error);
size_t merged_count = count + other.count;
Int64 additional_self_delta = static_cast<Int64>(std::floor(2 * other.relative_error * other.count));
Int64 additional_other_delta = static_cast<Int64>(std::floor(2 * relative_error * count));
// Do a merge of two sorted lists until one of the lists is fully consumed
size_t self_idx = 0;
size_t other_idx = 0;
while (self_idx < sampled.size() && other_idx < other.sampled.size())
{
const Stats & self_sample = sampled[self_idx];
const Stats & other_sample = other.sampled[other_idx];
// Detect next sample
Stats next_sample;
Int64 additional_delta = 0;
if (self_sample.value < other_sample.value)
{
++self_idx;
next_sample = self_sample;
additional_delta = other_idx > 0 ? additional_self_delta : 0;
}
else
{
++other_idx;
next_sample = other_sample;
additional_delta = self_idx > 0 ? additional_other_delta : 0;
}
// Insert it
next_sample.delta += additional_delta;
backup_sampled.emplace_back(std::move(next_sample));
}
// Copy the remaining samples from the other list
// (by construction, at most one `while` loop will run)
while (self_idx < sampled.size())
{
backup_sampled.emplace_back(sampled[self_idx]);
++self_idx;
}
while (other_idx < other.sampled.size())
{
backup_sampled.emplace_back(other.sampled[other_idx]);
++other_idx;
}
std::swap(sampled, backup_sampled);
relative_error = merged_relative_error;
count = merged_count;
compress_threshold = other.compress_threshold;
doCompress(2 * merged_relative_error * merged_count);
compressed = true;
}
}
void write(WriteBuffer & buf) const
{
writeIntBinary<size_t>(compress_threshold, buf);
writeFloatBinary<double>(relative_error, buf);
writeIntBinary<size_t>(count, buf);
writeIntBinary<size_t>(sampled.size(), buf);
for (const auto & stats : sampled)
{
writeFloatBinary<T>(stats.value, buf);
writeIntBinary<Int64>(stats.g, buf);
writeIntBinary<Int64>(stats.delta, buf);
}
}
void read(ReadBuffer & buf)
{
readIntBinary<size_t>(compress_threshold, buf);
readFloatBinary<double>(relative_error, buf);
readIntBinary<size_t>(count, buf);
size_t sampled_len = 0;
readIntBinary<size_t>(sampled_len, buf);
sampled.resize(sampled_len);
for (size_t i = 0; i < sampled_len; ++i)
{
auto stats = sampled[i];
readFloatBinary<T>(stats.value, buf);
readIntBinary<Int64>(stats.g, buf);
readIntBinary<Int64>(stats.delta, buf);
}
}
private:
QueryResult findApproxQuantile(size_t index, Int64 min_rank_at_index, double target_error, double percentile) const
{
Stats curr_sample = sampled[index];
Int64 rank = static_cast<Int64>(std::ceil(percentile * count));
size_t i = index;
Int64 min_rank = min_rank_at_index;
while (i < sampled.size() - 1)
{
Int64 max_rank = min_rank + curr_sample.delta;
if (max_rank - target_error <= rank && rank <= min_rank + target_error)
return {i, min_rank, curr_sample.value};
else
{
++i;
curr_sample = sampled[i];
min_rank += curr_sample.g;
}
}
return {sampled.size()-1, 0, sampled.back().value};
}
void withHeadBufferInserted()
{
if (head_sampled.empty())
return;
bool use_radix_sort = head_sampled.size() >= 256 && (is_arithmetic_v<T> && !is_big_int_v<T>);
if (use_radix_sort)
RadixSort<RadixSortNumTraits<T>>::executeLSD(head_sampled.data(), head_sampled.size());
else
::sort(head_sampled.begin(), head_sampled.end());
backup_sampled.clear();
backup_sampled.reserve(sampled.size() + head_sampled.size());
size_t sample_idx = 0;
size_t ops_idx = 0;
size_t current_count = count;
for (; ops_idx < head_sampled.size(); ++ops_idx)
{
T current_sample = head_sampled[ops_idx];
// Add all the samples before the next observation.
while (sample_idx < sampled.size() && sampled[sample_idx].value <= current_sample)
{
backup_sampled.emplace_back(sampled[sample_idx]);
++sample_idx;
}
// If it is the first one to insert, of if it is the last one
++current_count;
Int64 delta;
if (backup_sampled.empty() || (sample_idx == sampled.size() && ops_idx == (head_sampled.size() - 1)))
delta = 0;
else
delta = static_cast<Int64>(std::floor(2 * relative_error * current_count));
backup_sampled.emplace_back(current_sample, 1, delta);
}
// Add all the remaining existing samples
for (; sample_idx < sampled.size(); ++sample_idx)
backup_sampled.emplace_back(sampled[sample_idx]);
std::swap(sampled, backup_sampled);
head_sampled.clear();
count = current_count;
}
void doCompress(double merge_threshold)
{
if (sampled.empty())
return;
backup_sampled.clear();
// Start for the last element, which is always part of the set.
// The head contains the current new head, that may be merged with the current element.
Stats head = sampled.back();
ssize_t i = sampled.size() - 2;
// Do not compress the last element
while (i >= 1)
{
// The current sample:
const auto & sample1 = sampled[i];
// Do we need to compress?
if (sample1.g + head.g + head.delta < merge_threshold)
{
// Do not insert yet, just merge the current element into the head.
head.g += sample1.g;
}
else
{
// Prepend the current head, and keep the current sample as target for merging.
backup_sampled.push_back(head);
head = sample1;
}
--i;
}
backup_sampled.push_back(head);
// If necessary, add the minimum element:
auto curr_head = sampled.front();
// don't add the minimum element if `currentSamples` has only one element (both `currHead` and
// `head` point to the same element)
if (curr_head.value <= head.value && sampled.size() > 1)
backup_sampled.emplace_back(sampled.front());
std::reverse(backup_sampled.begin(), backup_sampled.end());
std::swap(sampled, backup_sampled);
}
double relative_error;
size_t compress_threshold;
size_t count = 0;
bool compressed;
PaddedPODArray<Stats> sampled;
PaddedPODArray<Stats> backup_sampled;
PaddedPODArray<T> head_sampled;
static constexpr size_t default_compress_threshold = 10000;
static constexpr size_t default_head_size = 50000;
};
template <typename Value>
class QuantileApprox
{
private:
using Data = ApproxSampler<Value>;
mutable Data data;
public:
QuantileApprox() = default;
explicit QuantileApprox(size_t accuracy) : data(1.0 / static_cast<double>(accuracy)) { }
void add(const Value & x)
{
data.insert(x);
}
template <typename Weight>
void add(const Value &, const Weight &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method add with weight is not implemented for GKSampler");
}
void merge(const QuantileApprox & rhs)
{
if (!data.isCompressed())
data.compress();
data.merge(rhs.data);
}
void serialize(WriteBuffer & buf) const
{
/// Always compress before serialization
if (!data.isCompressed())
data.compress();
data.write(buf);
}
void deserialize(ReadBuffer & buf)
{
data.read(buf);
data.setCompressed();
}
/// Get the value of the `level` quantile. The level must be between 0 and 1.
Value get(Float64 level)
{
if (!data.isCompressed())
data.compress();
Value res;
size_t indice = 0;
data.query(&level, &indice, 1, &res);
return res;
}
/// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
/// indices - an array of index levels such that the corresponding elements will go in ascending order.
void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
{
if (!data.isCompressed())
data.compress();
data.query(levels, indices, size, result);
}
Float64 getFloat64(Float64 /*level*/)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFloat64 is not implemented for GKSampler");
}
void getManyFloat(const Float64 * /*levels*/, const size_t * /*indices*/, size_t /*size*/, Float64 * /*result*/)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getManyFloat is not implemented for GKSampler");
}
};
}

View File

@ -32,6 +32,7 @@ void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileTDigestWeighted(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileBFloat16(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileBFloat16Weighted(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory &);
void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &);
void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory &);
void registerAggregateFunctionRate(AggregateFunctionFactory &);
@ -79,6 +80,7 @@ void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory);
class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -123,6 +125,7 @@ void registerAggregateFunctions()
registerAggregateFunctionsQuantileTDigestWeighted(factory);
registerAggregateFunctionsQuantileBFloat16(factory);
registerAggregateFunctionsQuantileBFloat16Weighted(factory);
registerAggregateFunctionsQuantileApprox(factory);
registerAggregateFunctionsSequenceMatch(factory);
registerAggregateFunctionWindowFunnel(factory);
registerAggregateFunctionRate(factory);
@ -170,6 +173,7 @@ void registerAggregateFunctions()
registerAggregateFunctionExponentialMovingAverage(factory);
registerAggregateFunctionSparkbar(factory);
registerAggregateFunctionAnalysisOfVariance(factory);
registerAggregateFunctionKolmogorovSmirnovTest(factory);
registerWindowFunctions(factory);
}

View File

@ -86,7 +86,12 @@ public:
DataTypePtr getResultType() const override
{
return getExpression()->getResultType();
return result_type;
}
void resolve(DataTypePtr lambda_type)
{
result_type = std::move(lambda_type);
}
void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
@ -102,6 +107,7 @@ protected:
private:
Names argument_names;
DataTypePtr result_type;
static constexpr size_t arguments_child_index = 0;
static constexpr size_t expression_child_index = 1;

View File

@ -32,6 +32,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Formats/FormatFactory.h>
#include <Databases/IDatabase.h>
@ -75,6 +76,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/QueryTreeBuilder.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/Identifier.h>
namespace ProfileEvents
{
@ -112,6 +114,8 @@ namespace ErrorCodes
extern const int ALIAS_REQUIRED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int UNKNOWN_TABLE;
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
}
/** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h before.
@ -5085,8 +5089,11 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
arguments_projection_names[function_lambda_argument_index] = lambda_argument_projection_name_buffer.str();
}
argument_types[function_lambda_argument_index] = std::make_shared<DataTypeFunction>(function_data_type_argument_types, lambda_to_resolve->getResultType());
argument_columns[function_lambda_argument_index].type = argument_types[function_lambda_argument_index];
auto lambda_resolved_type = std::make_shared<DataTypeFunction>(function_data_type_argument_types, lambda_to_resolve_typed.getExpression()->getResultType());
lambda_to_resolve_typed.resolve(lambda_resolved_type);
argument_types[function_lambda_argument_index] = lambda_resolved_type;
argument_columns[function_lambda_argument_index].type = lambda_resolved_type;
function_arguments[function_lambda_argument_index] = std::move(lambda_to_resolve);
}
@ -6076,6 +6083,18 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
scope.table_expression_node_to_data.emplace(table_expression_node, std::move(table_expression_data));
}
bool findIdentifier(const FunctionNode & function)
{
for (const auto & argument : function.getArguments())
{
if (argument->as<IdentifierNode>())
return true;
if (const auto * f = argument->as<FunctionNode>(); f && findIdentifier(*f))
return true;
}
return false;
}
/// Resolve table function node in scope
void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
IdentifierResolveScope & scope,
@ -6087,12 +6106,11 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
if (!nested_table_function)
expressions_visitor.visit(table_function_node_typed.getArgumentsNode());
const auto & table_function_factory = TableFunctionFactory::instance();
const auto & table_function_name = table_function_node_typed.getTableFunctionName();
auto & scope_context = scope.context;
TableFunctionPtr table_function_ptr = table_function_factory.tryGet(table_function_name, scope_context);
TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().tryGet(table_function_name, scope_context);
if (!table_function_ptr)
{
auto hints = TableFunctionFactory::instance().getHints(table_function_name);
@ -6107,17 +6125,131 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
table_function_name);
}
uint64_t use_structure_from_insertion_table_in_table_functions = scope_context->getSettingsRef().use_structure_from_insertion_table_in_table_functions;
if (!nested_table_function &&
scope_context->getSettingsRef().use_structure_from_insertion_table_in_table_functions &&
use_structure_from_insertion_table_in_table_functions &&
scope_context->hasInsertionTable() &&
table_function_ptr->needStructureHint())
{
const auto & insertion_table = scope_context->getInsertionTable();
if (!insertion_table.empty())
{
auto insertion_table_storage = DatabaseCatalog::instance().getTable(insertion_table, scope_context);
const auto & structure_hint = insertion_table_storage->getInMemoryMetadataPtr()->columns;
table_function_ptr->setStructureHint(structure_hint);
const auto & insert_structure = DatabaseCatalog::instance().getTable(insertion_table, scope_context)->getInMemoryMetadataPtr()->getColumns();
DB::ColumnsDescription structure_hint;
bool use_columns_from_insert_query = true;
/// Insert table matches columns against SELECT expression by position, so we want to map
/// insert table columns to table function columns through names from SELECT expression.
auto insert_column = insert_structure.begin();
auto insert_structure_end = insert_structure.end(); /// end iterator of the range covered by possible asterisk
auto virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
bool asterisk = false;
const auto & expression_list = scope.scope_node->as<QueryNode &>().getProjection();
auto expression = expression_list.begin();
/// We want to go through SELECT expression list and correspond each expression to column in insert table
/// which type will be used as a hint for the file structure inference.
for (; expression != expression_list.end() && insert_column != insert_structure_end; ++expression)
{
if (auto * identifier_node = (*expression)->as<IdentifierNode>())
{
if (!virtual_column_names.contains(identifier_node->getIdentifier().getFullName()))
{
if (asterisk)
{
if (use_structure_from_insertion_table_in_table_functions == 1)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
use_columns_from_insert_query = false;
break;
}
structure_hint.add({ identifier_node->getIdentifier().getFullName(), insert_column->type });
}
/// Once we hit asterisk we want to find end of the range covered by asterisk
/// contributing every further SELECT expression to the tail of insert structure
if (asterisk)
--insert_structure_end;
else
++insert_column;
}
else if (auto * matcher_node = (*expression)->as<MatcherNode>(); matcher_node && matcher_node->getMatcherType() == MatcherNodeType::ASTERISK)
{
if (asterisk)
{
if (use_structure_from_insertion_table_in_table_functions == 1)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only one asterisk can be used in INSERT SELECT query.");
use_columns_from_insert_query = false;
break;
}
if (!structure_hint.empty())
{
if (use_structure_from_insertion_table_in_table_functions == 1)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
use_columns_from_insert_query = false;
break;
}
asterisk = true;
}
else if (auto * function = (*expression)->as<FunctionNode>())
{
if (use_structure_from_insertion_table_in_table_functions == 2 && findIdentifier(*function))
{
use_columns_from_insert_query = false;
break;
}
/// Once we hit asterisk we want to find end of the range covered by asterisk
/// contributing every further SELECT expression to the tail of insert structure
if (asterisk)
--insert_structure_end;
else
++insert_column;
}
else
{
/// Once we hit asterisk we want to find end of the range covered by asterisk
/// contributing every further SELECT expression to the tail of insert structure
if (asterisk)
--insert_structure_end;
else
++insert_column;
}
}
if (use_structure_from_insertion_table_in_table_functions == 2 && !asterisk)
{
/// For input function we should check if input format supports reading subset of columns.
if (table_function_ptr->getName() == "input")
use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(scope.context->getInsertFormat());
else
use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns();
}
if (use_columns_from_insert_query)
{
if (expression == expression_list.end())
{
/// Append tail of insert structure to the hint
if (asterisk)
{
for (; insert_column != insert_structure_end; ++insert_column)
structure_hint.add({ insert_column->name, insert_column->type });
}
if (!structure_hint.empty())
table_function_ptr->setStructureHint(structure_hint);
} else if (use_structure_from_insertion_table_in_table_functions == 1)
throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns in insert table less than required by SELECT expression.");
}
}
}

View File

@ -115,13 +115,23 @@ private:
for (size_t i = 0; i < expected_argument_types_size; ++i)
{
// Skip lambdas
if (WhichDataType(expected_argument_types[i]).isFunction())
continue;
const auto & expected_argument_type = expected_argument_types[i];
const auto & actual_argument_type = actual_argument_columns[i].type;
if (!expected_argument_type)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Function {} expected argument {} type is not set after running {} pass",
function->toAST()->formatForErrorMessage(),
i + 1,
pass_name);
if (!actual_argument_type)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Function {} actual argument {} type is not set after running {} pass",
function->toAST()->formatForErrorMessage(),
i + 1,
pass_name);
if (!expected_argument_type->equals(*actual_argument_type))
{
/// Aggregate functions remove low cardinality for their argument types

View File

@ -1,5 +1,6 @@
#include <Backups/BackupCoordinationFileInfos.h>
#include <Common/quoteString.h>
#include <Common/Exception.h>
namespace DB

View File

@ -8,7 +8,8 @@
namespace DB
{
BackupCoordinationLocal::BackupCoordinationLocal(bool plain_backup_) : file_infos(plain_backup_)
BackupCoordinationLocal::BackupCoordinationLocal(bool plain_backup_)
: log(&Poco::Logger::get("BackupCoordinationLocal")), file_infos(plain_backup_)
{
}
@ -35,7 +36,7 @@ Strings BackupCoordinationLocal::waitForStage(const String &, std::chrono::milli
void BackupCoordinationLocal::addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<PartNameAndChecksum> & part_names_and_checksums)
{
std::lock_guard lock{replicated_tables_mutex};
replicated_tables.addPartNames(table_shared_id, table_name_for_logs, replica_name, part_names_and_checksums);
replicated_tables.addPartNames({table_shared_id, table_name_for_logs, replica_name, part_names_and_checksums});
}
Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const
@ -48,7 +49,7 @@ Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_sha
void BackupCoordinationLocal::addReplicatedMutations(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<MutationInfo> & mutations)
{
std::lock_guard lock{replicated_tables_mutex};
replicated_tables.addMutations(table_shared_id, table_name_for_logs, replica_name, mutations);
replicated_tables.addMutations({table_shared_id, table_name_for_logs, replica_name, mutations});
}
std::vector<IBackupCoordination::MutationInfo> BackupCoordinationLocal::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const
@ -61,7 +62,7 @@ std::vector<IBackupCoordination::MutationInfo> BackupCoordinationLocal::getRepli
void BackupCoordinationLocal::addReplicatedDataPath(const String & table_shared_id, const String & data_path)
{
std::lock_guard lock{replicated_tables_mutex};
replicated_tables.addDataPath(table_shared_id, data_path);
replicated_tables.addDataPath({table_shared_id, data_path});
}
Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_shared_id) const
@ -74,7 +75,7 @@ Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_sha
void BackupCoordinationLocal::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path)
{
std::lock_guard lock{replicated_access_mutex};
replicated_access.addFilePath(access_zk_path, access_entity_type, "", file_path);
replicated_access.addFilePath({access_zk_path, access_entity_type, "", file_path});
}
Strings BackupCoordinationLocal::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const
@ -87,7 +88,7 @@ Strings BackupCoordinationLocal::getReplicatedAccessFilePaths(const String & acc
void BackupCoordinationLocal::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path)
{
std::lock_guard lock{replicated_sql_objects_mutex};
replicated_sql_objects.addDirectory(loader_zk_path, object_type, "", dir_path);
replicated_sql_objects.addDirectory({loader_zk_path, object_type, "", dir_path});
}
Strings BackupCoordinationLocal::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const
@ -125,7 +126,12 @@ bool BackupCoordinationLocal::startWritingFile(size_t data_file_index)
bool BackupCoordinationLocal::hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const
{
return (num_active_backups > 1);
if (num_active_backups > 1)
{
LOG_WARNING(log, "Found concurrent backups: num_active_backups={}", num_active_backups);
return true;
}
return false;
}
}

View File

@ -52,6 +52,8 @@ public:
bool hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const override;
private:
Poco::Logger * const log;
BackupCoordinationReplicatedTables TSA_GUARDED_BY(replicated_tables_mutex) replicated_tables;
BackupCoordinationReplicatedAccess TSA_GUARDED_BY(replicated_access_mutex) replicated_access;
BackupCoordinationReplicatedSQLObjects TSA_GUARDED_BY(replicated_sql_objects_mutex) replicated_sql_objects;

Some files were not shown because too many files have changed in this diff Show More