mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-14 19:45:11 +00:00
Merge branch 'master' into zk_retry_timeout
This commit is contained in:
commit
45ad555c39
14
.clang-tidy
14
.clang-tidy
@ -110,6 +110,7 @@ Checks: '*,
|
||||
-misc-const-correctness,
|
||||
-misc-no-recursion,
|
||||
-misc-non-private-member-variables-in-classes,
|
||||
-misc-confusable-identifiers, # useful but slooow
|
||||
|
||||
-modernize-avoid-c-arrays,
|
||||
-modernize-concat-nested-namespaces,
|
||||
@ -148,19 +149,6 @@ Checks: '*,
|
||||
-readability-use-anyofallof,
|
||||
|
||||
-zirkon-*,
|
||||
|
||||
-misc-*, # temporarily disabled due to being too slow
|
||||
# also disable checks in other categories which are aliases of checks in misc-*:
|
||||
# https://releases.llvm.org/15.0.0/tools/clang/tools/extra/docs/clang-tidy/checks/list.html
|
||||
-cert-dcl54-cpp, # alias of misc-new-delete-overloads
|
||||
-hicpp-new-delete-operators, # alias of misc-new-delete-overloads
|
||||
-cert-fio38-c, # alias of misc-non-copyable-objects
|
||||
-cert-dcl03-c, # alias of misc-static-assert
|
||||
-hicpp-static-assert, # alias of misc-static-assert
|
||||
-cert-err09-cpp, # alias of misc-throw-by-value-catch-by-reference
|
||||
-cert-err61-cpp, # alias of misc-throw-by-value-catch-by-reference
|
||||
-cppcoreguidelines-c-copy-assignment-signature, # alias of misc-unconventional-assign-operator
|
||||
-cppcoreguidelines-non-private-member-variables-in-classes, # alias of misc-non-private-member-variables-in-classes
|
||||
'
|
||||
|
||||
WarningsAsErrors: '*'
|
||||
|
@ -1,4 +1,4 @@
|
||||
[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/clickhouse-presentations/raw/master/images/logo-400x240.png)](https://clickhouse.com)
|
||||
[<img alt="ClickHouse — open source distributed column-oriented DBMS" width="400px" src="https://clickhouse.com/images/ch_gh_logo_rounded.png" />](https://clickhouse.com?utm_source=github)
|
||||
|
||||
ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.
|
||||
|
||||
|
@ -36,7 +36,7 @@
|
||||
|
||||
namespace detail
|
||||
{
|
||||
template <char ...chars> constexpr bool is_in(char x) { return ((x == chars) || ...); }
|
||||
template <char ...chars> constexpr bool is_in(char x) { return ((x == chars) || ...); } // NOLINT(misc-redundant-expression)
|
||||
|
||||
#if defined(__SSE2__)
|
||||
template <char s0>
|
||||
|
@ -155,13 +155,13 @@ struct common_type<wide::integer<Bits, Signed>, Arithmetic>
|
||||
std::is_floating_point_v<Arithmetic>,
|
||||
Arithmetic,
|
||||
std::conditional_t<
|
||||
sizeof(Arithmetic) < Bits * sizeof(long),
|
||||
sizeof(Arithmetic) * 8 < Bits,
|
||||
wide::integer<Bits, Signed>,
|
||||
std::conditional_t<
|
||||
Bits * sizeof(long) < sizeof(Arithmetic),
|
||||
Bits < sizeof(Arithmetic) * 8,
|
||||
Arithmetic,
|
||||
std::conditional_t<
|
||||
Bits * sizeof(long) == sizeof(Arithmetic) && (std::is_same_v<Signed, signed> || std::is_signed_v<Arithmetic>),
|
||||
Bits == sizeof(Arithmetic) * 8 && (std::is_same_v<Signed, signed> || std::is_signed_v<Arithmetic>),
|
||||
Arithmetic,
|
||||
wide::integer<Bits, Signed>>>>>;
|
||||
};
|
||||
|
81
base/glibc-compatibility/musl/expf.c
Normal file
81
base/glibc-compatibility/musl/expf.c
Normal file
@ -0,0 +1,81 @@
|
||||
/* origin: FreeBSD /usr/src/lib/msun/src/e_expf.c */
|
||||
/*
|
||||
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
||||
*/
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#include "libm.h"
|
||||
|
||||
static const float
|
||||
half[2] = {0.5,-0.5},
|
||||
ln2hi = 6.9314575195e-1f, /* 0x3f317200 */
|
||||
ln2lo = 1.4286067653e-6f, /* 0x35bfbe8e */
|
||||
invln2 = 1.4426950216e+0f, /* 0x3fb8aa3b */
|
||||
/*
|
||||
* Domain [-0.34568, 0.34568], range ~[-4.278e-9, 4.447e-9]:
|
||||
* |x*(exp(x)+1)/(exp(x)-1) - p(x)| < 2**-27.74
|
||||
*/
|
||||
P1 = 1.6666625440e-1f, /* 0xaaaa8f.0p-26 */
|
||||
P2 = -2.7667332906e-3f; /* -0xb55215.0p-32 */
|
||||
|
||||
float expf(float x)
|
||||
{
|
||||
float_t hi, lo, c, xx, y;
|
||||
int k, sign;
|
||||
uint32_t hx;
|
||||
|
||||
GET_FLOAT_WORD(hx, x);
|
||||
sign = hx >> 31; /* sign bit of x */
|
||||
hx &= 0x7fffffff; /* high word of |x| */
|
||||
|
||||
/* special cases */
|
||||
if (hx >= 0x42aeac50) { /* if |x| >= -87.33655f or NaN */
|
||||
if (hx >= 0x42b17218 && !sign) { /* x >= 88.722839f */
|
||||
/* overflow */
|
||||
x *= 0x1p127f;
|
||||
return x;
|
||||
}
|
||||
if (sign) {
|
||||
/* underflow */
|
||||
FORCE_EVAL(-0x1p-149f/x);
|
||||
if (hx >= 0x42cff1b5) /* x <= -103.972084f */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* argument reduction */
|
||||
if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
|
||||
if (hx > 0x3f851592) /* if |x| > 1.5 ln2 */
|
||||
k = invln2*x + half[sign];
|
||||
else
|
||||
k = 1 - sign - sign;
|
||||
hi = x - k*ln2hi; /* k*ln2hi is exact here */
|
||||
lo = k*ln2lo;
|
||||
x = hi - lo;
|
||||
} else if (hx > 0x39000000) { /* |x| > 2**-14 */
|
||||
k = 0;
|
||||
hi = x;
|
||||
lo = 0;
|
||||
} else {
|
||||
/* raise inexact */
|
||||
FORCE_EVAL(0x1p127f + x);
|
||||
return 1 + x;
|
||||
}
|
||||
|
||||
/* x is now in primary range */
|
||||
xx = x*x;
|
||||
c = x - xx*(P1+xx*P2);
|
||||
y = 1 + (x*c/(2-c) - lo + hi);
|
||||
if (k == 0)
|
||||
return y;
|
||||
return scalbnf(y, k);
|
||||
}
|
31
base/glibc-compatibility/musl/scalbnf.c
Normal file
31
base/glibc-compatibility/musl/scalbnf.c
Normal file
@ -0,0 +1,31 @@
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
|
||||
float scalbnf(float x, int n)
|
||||
{
|
||||
union {float f; uint32_t i;} u;
|
||||
float_t y = x;
|
||||
|
||||
if (n > 127) {
|
||||
y *= 0x1p127f;
|
||||
n -= 127;
|
||||
if (n > 127) {
|
||||
y *= 0x1p127f;
|
||||
n -= 127;
|
||||
if (n > 127)
|
||||
n = 127;
|
||||
}
|
||||
} else if (n < -126) {
|
||||
y *= 0x1p-126f;
|
||||
n += 126;
|
||||
if (n < -126) {
|
||||
y *= 0x1p-126f;
|
||||
n += 126;
|
||||
if (n < -126)
|
||||
n = -126;
|
||||
}
|
||||
}
|
||||
u.i = (uint32_t)(0x7f+n)<<23;
|
||||
x = y * u.f;
|
||||
return x;
|
||||
}
|
2
contrib/arrow
vendored
2
contrib/arrow
vendored
@ -1 +1 @@
|
||||
Subproject commit d03245f801f798c63ee9a7d2b8914a9e5c5cd666
|
||||
Subproject commit 1f1b3d35fb6eb73e6492d3afd8a85cde848d174f
|
@ -202,6 +202,7 @@ set(ARROW_SRCS
|
||||
"${LIBRARY_DIR}/builder.cc"
|
||||
"${LIBRARY_DIR}/buffer.cc"
|
||||
"${LIBRARY_DIR}/chunked_array.cc"
|
||||
"${LIBRARY_DIR}/chunk_resolver.cc"
|
||||
"${LIBRARY_DIR}/compare.cc"
|
||||
"${LIBRARY_DIR}/config.cc"
|
||||
"${LIBRARY_DIR}/datum.cc"
|
||||
@ -268,6 +269,10 @@ set(ARROW_SRCS
|
||||
"${LIBRARY_DIR}/util/uri.cc"
|
||||
"${LIBRARY_DIR}/util/utf8.cc"
|
||||
"${LIBRARY_DIR}/util/value_parsing.cc"
|
||||
"${LIBRARY_DIR}/util/byte_size.cc"
|
||||
"${LIBRARY_DIR}/util/debug.cc"
|
||||
"${LIBRARY_DIR}/util/tracing.cc"
|
||||
"${LIBRARY_DIR}/util/atfork_internal.cc"
|
||||
"${LIBRARY_DIR}/vendored/base64.cpp"
|
||||
"${LIBRARY_DIR}/vendored/datetime/tz.cpp"
|
||||
|
||||
@ -301,9 +306,11 @@ set(ARROW_SRCS
|
||||
"${LIBRARY_DIR}/compute/exec/source_node.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/sink_node.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/order_by_impl.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/partition_util.cc"
|
||||
"${LIBRARY_DIR}/compute/function.cc"
|
||||
"${LIBRARY_DIR}/compute/function_internal.cc"
|
||||
"${LIBRARY_DIR}/compute/kernel.cc"
|
||||
"${LIBRARY_DIR}/compute/light_array.cc"
|
||||
"${LIBRARY_DIR}/compute/registry.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc"
|
||||
@ -317,21 +324,28 @@ set(ARROW_SRCS
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_dictionary.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_extension.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_compare.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_nested.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_random.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_round.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_string.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_validity.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_string_ascii.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_string_utf8.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/util_internal.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_cumulative_ops.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_hash.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_rank.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_select_k.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_nested.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_replace.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_selection.cc"
|
||||
@ -340,13 +354,15 @@ set(ARROW_SRCS
|
||||
"${LIBRARY_DIR}/compute/exec/union_node.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/key_hash.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/key_map.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/key_compare.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/key_encode.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/util.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/hash_join_dict.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/hash_join.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/hash_join_node.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/task_util.cc"
|
||||
"${LIBRARY_DIR}/compute/row/encode_internal.cc"
|
||||
"${LIBRARY_DIR}/compute/row/grouper.cc"
|
||||
"${LIBRARY_DIR}/compute/row/compare_internal.cc"
|
||||
"${LIBRARY_DIR}/compute/row/row_internal.cc"
|
||||
|
||||
"${LIBRARY_DIR}/ipc/dictionary.cc"
|
||||
"${LIBRARY_DIR}/ipc/feather.cc"
|
||||
@ -357,7 +373,8 @@ set(ARROW_SRCS
|
||||
"${LIBRARY_DIR}/ipc/writer.cc"
|
||||
|
||||
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc"
|
||||
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc"
|
||||
"${ARROW_SRC_DIR}/arrow/adapters/orc/util.cc"
|
||||
"${ARROW_SRC_DIR}/arrow/adapters/orc/options.cc"
|
||||
)
|
||||
|
||||
add_definitions(-DARROW_WITH_LZ4)
|
||||
|
2
contrib/cctz
vendored
2
contrib/cctz
vendored
@ -1 +1 @@
|
||||
Subproject commit 7c78edd52b4d65acc103c2f195818ffcabe6fe0d
|
||||
Subproject commit 5e05432420f9692418e2e12aff09859e420b14a2
|
@ -15,10 +15,6 @@ if(NOT AWK_PROGRAM)
|
||||
message(FATAL_ERROR "You need the awk program to build ClickHouse with krb5 enabled.")
|
||||
endif()
|
||||
|
||||
if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
|
||||
add_compile_definitions(USE_BORINGSSL=1)
|
||||
endif ()
|
||||
|
||||
set(KRB5_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/krb5/src")
|
||||
set(KRB5_ET_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}/include_private")
|
||||
|
||||
@ -162,6 +158,11 @@ set(ALL_SRCS
|
||||
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/kdf.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/cmac.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/des/des_keys.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/des/f_parity.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/enc_provider/rc4.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/hash_provider/hash_md4.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/md4/md4.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/prng.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb/enc_dk_cmac.c"
|
||||
# "${KRB5_SOURCE_DIR}/lib/crypto/krb/crc32.c"
|
||||
@ -226,7 +227,6 @@ set(ALL_SRCS
|
||||
# "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/des.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/rc4.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/des3.c"
|
||||
#"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/camellia.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/cmac.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/sha256.c"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/hmac.c"
|
||||
@ -474,6 +474,14 @@ set(ALL_SRCS
|
||||
"${KRB5_SOURCE_DIR}/lib/krb5/krb5_libinit.c"
|
||||
)
|
||||
|
||||
if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
|
||||
add_compile_definitions(USE_BORINGSSL=1)
|
||||
else()
|
||||
set(ALL_SRCS ${ALL_SRCS}
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/camellia.c"
|
||||
)
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/compile_et"
|
||||
COMMAND /bin/sh
|
||||
@ -673,6 +681,7 @@ target_include_directories(_krb5 PRIVATE
|
||||
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5"
|
||||
"${KRB5_SOURCE_DIR}/lib/gssapi/spnego"
|
||||
"${KRB5_SOURCE_DIR}/util/et"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/md4"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/openssl"
|
||||
"${KRB5_SOURCE_DIR}/lib/crypto/krb"
|
||||
"${KRB5_SOURCE_DIR}/util/profile"
|
||||
|
@ -8,11 +8,18 @@ sidebar_label: Data Replication
|
||||
|
||||
:::note
|
||||
In ClickHouse Cloud replication is managed for you. Please create your tables without adding arguments. For example, in the text below you would replace:
|
||||
|
||||
```sql
|
||||
ENGINE = ReplicatedReplacingMergeTree(
|
||||
'/clickhouse/tables/{shard}/table_name',
|
||||
'{replica}',
|
||||
ver
|
||||
)
|
||||
```
|
||||
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver)
|
||||
```
|
||||
|
||||
with:
|
||||
```
|
||||
|
||||
```sql
|
||||
ENGINE = ReplicatedReplacingMergeTree
|
||||
```
|
||||
:::
|
||||
|
@ -1235,8 +1235,8 @@ For output it uses the following correspondence between ClickHouse types and BSO
|
||||
| ClickHouse type | BSON Type |
|
||||
|-----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------|
|
||||
| [Bool](/docs/en/sql-reference/data-types/boolean.md) | `\x08` boolean |
|
||||
| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `\x10` int32 |
|
||||
| [Int16/UInt16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `\x10` int32 |
|
||||
| [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
|
||||
| [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
|
||||
| [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
|
||||
@ -1255,30 +1255,30 @@ For output it uses the following correspondence between ClickHouse types and BSO
|
||||
| [Array](/docs/en/sql-reference/data-types/array.md) | `\x04` array |
|
||||
| [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x04` array |
|
||||
| [Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x03` document |
|
||||
| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys) | `\x03` document |
|
||||
| [Map](/docs/en/sql-reference/data-types/map.md) | `\x03` document |
|
||||
| [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `\x10` int32 |
|
||||
| [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `\x05` binary, `\x00` binary subtype |
|
||||
|
||||
For input it uses the following correspondence between BSON types and ClickHouse types:
|
||||
|
||||
| BSON Type | ClickHouse Type |
|
||||
|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) |
|
||||
| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) |
|
||||
| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) |
|
||||
| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
|
||||
| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) |
|
||||
| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) |
|
||||
| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
|
||||
| BSON Type | ClickHouse Type |
|
||||
|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) |
|
||||
| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) |
|
||||
| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) |
|
||||
| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
|
||||
| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) |
|
||||
| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
|
||||
| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) |
|
||||
| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
|
||||
| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)/[Enum8/Enum16](/docs/en/sql-reference/data-types/enum.md) |
|
||||
| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
|
||||
|
||||
Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8).
|
||||
Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value.
|
||||
@ -1610,29 +1610,34 @@ See also [Format Schema](#formatschema).
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) |
|
||||
|----------------------------------|------------------------------------------------------------------------------------------------------------------------|------------------------------|
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `INT64` |
|
||||
| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
|
||||
| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
|
||||
| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` |
|
||||
| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` |
|
||||
| `ENUM` | [Enum(8\ |16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||
| `DATA` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `DATA` |
|
||||
| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) |
|
||||
|------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------|
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md), [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md), [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `INT64` |
|
||||
| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
|
||||
| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
|
||||
| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` |
|
||||
| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` |
|
||||
| `ENUM` | [Enum(8/16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||
| `DATA` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `DATA` |
|
||||
| `DATA` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `DATA` |
|
||||
| `DATA` | [Decimal128/Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DATA` |
|
||||
| `STRUCT(entries LIST(STRUCT(key Key, value Value)))` | [Map](/docs/en/sql-reference/data-types/map.md) | `STRUCT(entries LIST(STRUCT(key Key, value Value)))` |
|
||||
|
||||
Integer types can be converted into each other during input/output.
|
||||
|
||||
For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting.
|
||||
|
||||
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` type also can be nested.
|
||||
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
|
||||
|
||||
### Inserting and Selecting Data {#inserting-and-selecting-data-capnproto}
|
||||
|
||||
@ -1931,30 +1936,31 @@ Setting `format_avro_schema_registry_url` needs to be configured in `users.xml`
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) |
|
||||
|----------------------------------------------------|-----------------------------------------------------------------|------------------------------|
|
||||
| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
|
||||
| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` |
|
||||
| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` |
|
||||
| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` |
|
||||
| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
|
||||
| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
|
||||
| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` |
|
||||
| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
|
||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||
| `FIXED_LENGTH_BYTE_ARRAY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_LENGTH_BYTE_ARRAY` |
|
||||
| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) |
|
||||
|-----------------------------------------------|------------------------------------------------------------------------------------------------------------|-------------------------------|
|
||||
| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
|
||||
| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` |
|
||||
| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` |
|
||||
| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` |
|
||||
| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
|
||||
| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
|
||||
| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` |
|
||||
| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
|
||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||
| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_LENGTH_BYTE_ARRAY` |
|
||||
| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `FIXED_LENGTH_BYTE_ARRAY` |
|
||||
|
||||
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
|
||||
|
||||
@ -2000,31 +2006,32 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| Arrow data type (`INSERT`) | ClickHouse data type | Arrow data type (`SELECT`) |
|
||||
|-----------------------------------------|-----------------------------------------------------------------|----------------------------|
|
||||
| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
|
||||
| `FLOAT`, `HALF_FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
|
||||
| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
|
||||
| `DATE32` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `UINT16` |
|
||||
| `DATE64` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `TIMESTAMP`, `TIME32`, `TIME64` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `UINT32` |
|
||||
| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
|
||||
| `STRING`, `BINARY`, `FIXED_SIZE_BINARY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_SIZE_BINARY` |
|
||||
| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
| `DECIMAL256` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL256` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
|
||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||
| `FIXED_SIZE_BINARY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_SIZE_BINARY` |
|
||||
| Arrow data type (`INSERT`) | ClickHouse data type | Arrow data type (`SELECT`) |
|
||||
|-----------------------------------------|------------------------------------------------------------------------------------------------------------|----------------------------|
|
||||
| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
|
||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `INT8` |
|
||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
|
||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `INT16` |
|
||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
|
||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
|
||||
| `FLOAT`, `HALF_FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
|
||||
| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
|
||||
| `DATE32` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `UINT16` |
|
||||
| `DATE64` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||
| `TIMESTAMP`, `TIME32`, `TIME64` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `UINT32` |
|
||||
| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
|
||||
| `STRING`, `BINARY`, `FIXED_SIZE_BINARY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_SIZE_BINARY` |
|
||||
| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
|
||||
| `DECIMAL256` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL256` |
|
||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||
| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
|
||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||
| `FIXED_SIZE_BINARY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_SIZE_BINARY` |
|
||||
| `FIXED_SIZE_BINARY`, `BINARY` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `FIXED_SIZE_BINARY` |
|
||||
|
||||
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
|
||||
|
||||
@ -2073,23 +2080,26 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
|
||||
|
||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||
|
||||
| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) |
|
||||
|---------------------------------------|---------------------------------------------------------------|--------------------------|
|
||||
| `Boolean` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `Boolean` |
|
||||
| `Tinyint` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `Tinyint` |
|
||||
| `Smallint` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `Smallint` |
|
||||
| `Int` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `Int` |
|
||||
| `Bigint` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `Bigint` |
|
||||
| `Float` | [Float32](/docs/en/sql-reference/data-types/float.md) | `Float` |
|
||||
| `Double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `Double` |
|
||||
| `Decimal` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `Decimal` |
|
||||
| `Date` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `Date` |
|
||||
| `Timestamp` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `Timestamp` |
|
||||
| `String`, `Char`, `Varchar`, `Binary` | [String](/docs/en/sql-reference/data-types/string.md) | `Binary` |
|
||||
| `List` | [Array](/docs/en/sql-reference/data-types/array.md) | `List` |
|
||||
| `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` |
|
||||
| `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` |
|
||||
| `-` | [IPv4](/docs/en/sql-reference/data-types/int-uint.md) | `Int` |
|
||||
| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) |
|
||||
|---------------------------------------|-------------------------------------------------------------------------------------------------------------------|--------------------------|
|
||||
| `Boolean` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `Boolean` |
|
||||
| `Tinyint` | [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `Tinyint` |
|
||||
| `Smallint` | [Int16/UInt16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `Smallint` |
|
||||
| `Int` | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `Int` |
|
||||
| `Bigint` | [Int64/UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `Bigint` |
|
||||
| `Float` | [Float32](/docs/en/sql-reference/data-types/float.md) | `Float` |
|
||||
| `Double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `Double` |
|
||||
| `Decimal` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `Decimal` |
|
||||
| `Date` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `Date` |
|
||||
| `Timestamp` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `Timestamp` |
|
||||
| `String`, `Char`, `Varchar`, `Binary` | [String](/docs/en/sql-reference/data-types/string.md) | `Binary` |
|
||||
| `List` | [Array](/docs/en/sql-reference/data-types/array.md) | `List` |
|
||||
| `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` |
|
||||
| `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` |
|
||||
| `Int` | [IPv4](/docs/en/sql-reference/data-types/int-uint.md) | `Int` |
|
||||
| `Binary` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `Binary` |
|
||||
| `Binary` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `Binary` |
|
||||
| `Binary` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `Binary` |
|
||||
|
||||
Other types are not supported.
|
||||
|
||||
|
@ -6,7 +6,13 @@ sidebar_label: clickhouse-local
|
||||
|
||||
# clickhouse-local
|
||||
|
||||
The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. It accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/index.md). `clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines.
|
||||
## When to use clickhouse-local vs. ClickHouse
|
||||
|
||||
`clickhouse-local` is an easy-to-use version of ClickHouse that is ideal for developers who need to perform fast processing on local and remote files using SQL without having to install a full database server. With `clickhouse-local`, developers can use SQL commands (using the [ClickHouse SQL dialect](../../sql-reference/index.md)) directly from the command line, providing a simple and efficient way to access ClickHouse features without the need for a full ClickHouse installation. One of the main benefits of `clickhouse-local` is that it is already included when installing [clickhouse-client](https://clickhouse.com/docs/en/integrations/sql-clients/clickhouse-client-local). This means that developers can get started with `clickhouse-local` quickly, without the need for a complex installation process.
|
||||
|
||||
While `clickhouse-local` is a great tool for development and testing purposes, and for processing files, it is not suitable for serving end users or applications. In these scenarios, it is recommended to use the open-source [ClickHouse](https://clickhouse.com/docs/en/install). ClickHouse is a powerful OLAP database that is designed to handle large-scale analytical workloads. It provides fast and efficient processing of complex queries on large datasets, making it ideal for use in production environments where high-performance is critical. Additionally, ClickHouse offers a wide range of features such as replication, sharding, and high availability, which are essential for scaling up to handle large datasets and serving applications. If you need to handle larger datasets or serve end users or applications, we recommend using open-source ClickHouse instead of `clickhouse-local`.
|
||||
|
||||
Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local CSVs](#query-data-in-a-csv-file-using-sql) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3).
|
||||
|
||||
## Download clickhouse-local
|
||||
|
||||
|
@ -6,7 +6,7 @@ title: deltaSumTimestamp
|
||||
|
||||
Adds the difference between consecutive rows. If the difference is negative, it is ignored.
|
||||
|
||||
This function is primarily for [materialized views](../../../sql-reference/statements/create/view.md#materialized) that are ordered by some time bucket-aligned timestamp, for example, a `toStartOfMinute` bucket. Because the rows in such a materialized view will all have the same timestamp, it is impossible for them to be merged in the "right" order. This function keeps track of the `timestamp` of the values it's seen, so it's possible to order the states correctly during merging.
|
||||
This function is primarily for [materialized views](../../../sql-reference/statements/create/view.md#materialized) that store data ordered by some time bucket-aligned timestamp, for example, a `toStartOfMinute` bucket. Because the rows in such a materialized view will all have the same timestamp, it is impossible for them to be merged in the correct order, without storing the original, unrounded timestamp value. The `deltaSumTimestamp` function keeps track of the original `timestamp` of the values it's seen, so the values (states) of the function are correctly computed during merging of parts.
|
||||
|
||||
To calculate the delta sum across an ordered collection you can simply use the [deltaSum](../../../sql-reference/aggregate-functions/reference/deltasum.md#agg_functions-deltasum) function.
|
||||
|
||||
|
@ -0,0 +1,76 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/quantileApprox
|
||||
sidebar_position: 204
|
||||
---
|
||||
|
||||
# quantileApprox
|
||||
|
||||
Computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [Greenwald-Khanna](http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf) algorithm. The Greenwald-Khanna algorithm is an algorithm used to compute quantiles on a stream of data in a highly efficient manner. It was introduced by Michael Greenwald and Sanjeev Khanna in 2001. It is widely used in databases and big data systems where computing accurate quantiles on a large stream of data in real-time is necessary. The algorithm is highly efficient, taking only O(log n) space and O(log log n) time per item (where n is the size of the input). It is also highly accurate, providing an approximate quantile value with high probability.
|
||||
|
||||
`quantileApprox` is different from other quantile functions in ClickHouse, because it enables user to control the accuracy of the approximate quantile result.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
quantileApprox(accuracy, level)(expr)
|
||||
```
|
||||
|
||||
Alias: `medianApprox`.
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `accuracy` — Accuracy of quantile. Constant positive integer. Larger accuracy value means less error. For example, if the accuracy argument is set to 100, the computed quantile will have an error no greater than 1% with high probability. There is a trade-off between the accuracy of the computed quantiles and the computational complexity of the algorithm. A larger accuracy requires more memory and computational resources to compute the quantile accurately, while a smaller accuracy argument allows for a faster and more memory-efficient computation but with a slightly lower accuracy.
|
||||
|
||||
- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
|
||||
|
||||
- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
|
||||
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Quantile of the specified level and accuracy.
|
||||
|
||||
|
||||
Type:
|
||||
|
||||
- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
|
||||
- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
|
||||
- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT quantileApprox(1, 0.25)(number + 1)
|
||||
FROM numbers(1000)
|
||||
|
||||
┌─quantileApprox(1, 0.25)(plus(number, 1))─┐
|
||||
│ 1 │
|
||||
└──────────────────────────────────────────┘
|
||||
|
||||
SELECT quantileApprox(10, 0.25)(number + 1)
|
||||
FROM numbers(1000)
|
||||
|
||||
┌─quantileApprox(10, 0.25)(plus(number, 1))─┐
|
||||
│ 156 │
|
||||
└───────────────────────────────────────────┘
|
||||
|
||||
SELECT quantileApprox(100, 0.25)(number + 1)
|
||||
FROM numbers(1000)
|
||||
|
||||
┌─quantileApprox(100, 0.25)(plus(number, 1))─┐
|
||||
│ 251 │
|
||||
└────────────────────────────────────────────┘
|
||||
|
||||
SELECT quantileApprox(1000, 0.25)(number + 1)
|
||||
FROM numbers(1000)
|
||||
|
||||
┌─quantileApprox(1000, 0.25)(plus(number, 1))─┐
|
||||
│ 249 │
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
**See Also**
|
||||
|
||||
- [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
|
||||
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)
|
@ -114,3 +114,59 @@ Result:
|
||||
│ [249.75,499.5,749.25,899.1,949.05,989.01,998.001] │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## quantilesApprox
|
||||
|
||||
`quantilesApprox` works similarly with `quantileApprox` but allows us to calculate quantities at different levels simultaneously and returns an array.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
quantilesApprox(accuracy, level1, level2, ...)(expr)
|
||||
```
|
||||
|
||||
**Returned value**
|
||||
|
||||
- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels.
|
||||
|
||||
Type of array values:
|
||||
|
||||
- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
|
||||
- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
|
||||
- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
|
||||
``` sql
|
||||
SELECT quantilesApprox(1, 0.25, 0.5, 0.75)(number + 1)
|
||||
FROM numbers(1000)
|
||||
|
||||
┌─quantilesApprox(1, 0.25, 0.5, 0.75)(plus(number, 1))─┐
|
||||
│ [1,1,1] │
|
||||
└──────────────────────────────────────────────────────┘
|
||||
|
||||
SELECT quantilesApprox(10, 0.25, 0.5, 0.75)(number + 1)
|
||||
FROM numbers(1000)
|
||||
|
||||
┌─quantilesApprox(10, 0.25, 0.5, 0.75)(plus(number, 1))─┐
|
||||
│ [156,413,659] │
|
||||
└───────────────────────────────────────────────────────┘
|
||||
|
||||
|
||||
SELECT quantilesApprox(100, 0.25, 0.5, 0.75)(number + 1)
|
||||
FROM numbers(1000)
|
||||
|
||||
┌─quantilesApprox(100, 0.25, 0.5, 0.75)(plus(number, 1))─┐
|
||||
│ [251,498,741] │
|
||||
└────────────────────────────────────────────────────────┘
|
||||
|
||||
SELECT quantilesApprox(1000, 0.25, 0.5, 0.75)(number + 1)
|
||||
FROM numbers(1000)
|
||||
|
||||
┌─quantilesApprox(1000, 0.25, 0.5, 0.75)(plus(number, 1))─┐
|
||||
│ [249,499,749] │
|
||||
└─────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
@ -1276,16 +1276,16 @@ Using replacement fields, you can define a pattern for the resulting string. “
|
||||
| %k | hour in 24h format (00-23) | 22 |
|
||||
| %l | hour in 12h format (01-12) | 09 |
|
||||
| %m | month as an integer number (01-12) | 01 |
|
||||
| %M | minute (00-59) | 33 |
|
||||
| %M | full month name (January-December), see (*) below | January |
|
||||
| %n | new-line character (‘’) | |
|
||||
| %p | AM or PM designation | PM |
|
||||
| %Q | Quarter (1-4) | 1 |
|
||||
| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%M %p | 10:30 PM |
|
||||
| %R | 24-hour HH:MM time, equivalent to %H:%M | 22:33 |
|
||||
| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p | 10:30 PM |
|
||||
| %R | 24-hour HH:MM time, equivalent to %H:%i | 22:33 |
|
||||
| %s | second (00-59) | 44 |
|
||||
| %S | second (00-59) | 44 |
|
||||
| %t | horizontal-tab character (’) | |
|
||||
| %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S | 22:33:44 |
|
||||
| %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S | 22:33:44 |
|
||||
| %u | ISO 8601 weekday as number with Monday as 1 (1-7) | 2 |
|
||||
| %V | ISO 8601 week number (01-53) | 01 |
|
||||
| %w | weekday as a integer number with Sunday as 0 (0-6) | 2 |
|
||||
@ -1295,6 +1295,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
|
||||
| %z | Time offset from UTC as +HHMM or -HHMM | -0500 |
|
||||
| %% | a % sign | % |
|
||||
|
||||
(*) In ClickHouse versions earlier than v23.4, `%M` prints the minute (00-59) instead of the full month name (January-December). The previous behavior can be restored using setting `formatdatetime_parsedatetime_m_is_month_name = 0`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
@ -441,11 +441,11 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0
|
||||
|
||||
## javaHash
|
||||
|
||||
Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452),
|
||||
[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405),
|
||||
[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410),
|
||||
[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959),
|
||||
[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060).
|
||||
Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452),
|
||||
[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405),
|
||||
[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410),
|
||||
[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959),
|
||||
[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060).
|
||||
This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result.
|
||||
|
||||
Note that Java only support calculating signed integers hash, so if you want to calculate unsigned integers hash you must cast it to proper signed ClickHouse types.
|
||||
@ -660,6 +660,45 @@ Result:
|
||||
└──────────────────────┴─────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## kafkaMurmurHash
|
||||
|
||||
Calculates a 32-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [Kafka](https://github.com/apache/kafka/blob/461c5cfe056db0951d9b74f5adc45973670404d7/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L482) and without the highest bit to be compatible with [Default Partitioner](https://github.com/apache/kafka/blob/139f7709bd3f5926901a21e55043388728ccca78/clients/src/main/java/org/apache/kafka/clients/producer/internals/BuiltInPartitioner.java#L328).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
MurmurHash(par1, ...)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Calculated hash value.
|
||||
|
||||
Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
kafkaMurmurHash('foobar') AS res1,
|
||||
kafkaMurmurHash(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS res2
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌───────res1─┬─────res2─┐
|
||||
│ 1357151166 │ 85479775 │
|
||||
└────────────┴──────────┘
|
||||
```
|
||||
|
||||
## murmurHash3_32, murmurHash3_64
|
||||
|
||||
Produces a [MurmurHash3](https://github.com/aappleby/smhasher) hash value.
|
||||
|
@ -13,17 +13,18 @@ Functions for [searching](../../sql-reference/functions/string-search-functions.
|
||||
## replaceOne(haystack, pattern, replacement)
|
||||
|
||||
Replaces the first occurrence of the substring ‘pattern’ (if it exists) in ‘haystack’ by the ‘replacement’ string.
|
||||
‘pattern’ and ‘replacement’ must be constants.
|
||||
|
||||
## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement)
|
||||
|
||||
Replaces all occurrences of the substring ‘pattern’ in ‘haystack’ by the ‘replacement’ string.
|
||||
|
||||
Alias: `replace`.
|
||||
|
||||
## replaceRegexpOne(haystack, pattern, replacement)
|
||||
|
||||
Replaces the first occurrence of the substring matching the regular expression ‘pattern’ in ‘haystack‘ by the ‘replacement‘ string.
|
||||
‘pattern‘ must be a constant [re2 regular expression](https://github.com/google/re2/wiki/Syntax).
|
||||
‘replacement’ must be a plain constant string or a constant string containing substitutions `\0-\9`.
|
||||
‘pattern‘ must be a [re2 regular expression](https://github.com/google/re2/wiki/Syntax).
|
||||
‘replacement’ must be a plain string or a string containing substitutions `\0-\9`.
|
||||
Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match.
|
||||
To use a verbatim `\` character in the ‘pattern‘ or ‘replacement‘ string, escape it using `\`.
|
||||
Also keep in mind that string literals require an extra escaping.
|
||||
@ -88,6 +89,8 @@ SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
Alias: `REGEXP_REPLACE`.
|
||||
|
||||
## regexpQuoteMeta(s)
|
||||
|
||||
The function adds a backslash before some predefined characters in the string.
|
||||
|
@ -22,6 +22,10 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC]
|
||||
|
||||
Deletes the table.
|
||||
|
||||
:::tip
|
||||
Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md)
|
||||
:::
|
||||
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
|
@ -224,6 +224,14 @@ Clears freezed backup with the specified name from all the disks. See more about
|
||||
SYSTEM UNFREEZE WITH NAME <backup_name>
|
||||
```
|
||||
|
||||
### WAIT LOADING PARTS
|
||||
|
||||
Wait until all asynchronously loading data parts of a table (outdated data parts) will became loaded.
|
||||
|
||||
``` sql
|
||||
SYSTEM WAIT LOADING PARTS [db.]merge_tree_family_table_name
|
||||
```
|
||||
|
||||
## Managing ReplicatedMergeTree Tables
|
||||
|
||||
ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) tables.
|
||||
|
99
docs/en/sql-reference/statements/undrop.md
Normal file
99
docs/en/sql-reference/statements/undrop.md
Normal file
@ -0,0 +1,99 @@
|
||||
---
|
||||
slug: /en/sql-reference/statements/undrop
|
||||
sidebar_label: UNDROP
|
||||
---
|
||||
|
||||
# UNDROP TABLE
|
||||
|
||||
Cancels the dropping of the table.
|
||||
|
||||
Beginning with ClickHouse version 23.3 it is possible to UNDROP a table in an Atomic database
|
||||
within `database_atomic_delay_before_drop_table_sec` (8 minutes by default) of issuing the DROP TABLE statement. Dropped tables are listed in
|
||||
a system table called `system.dropped_tables`.
|
||||
|
||||
If you have a materialized view without a `TO` clause associated with the dropped table, then you will also have to UNDROP the inner table of that view.
|
||||
|
||||
:::note
|
||||
UNDROP TABLE is experimental. To use it add this setting:
|
||||
```sql
|
||||
set allow_experimental_undrop_table_query = 1;
|
||||
```
|
||||
:::
|
||||
|
||||
:::tip
|
||||
Also see [DROP TABLE](/docs/en/sql-reference/statements/drop.md)
|
||||
:::
|
||||
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
UNDROP TABLE [db.]name [UUID '<uuid>'] [ON CLUSTER cluster]
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
set allow_experimental_undrop_table_query = 1;
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE undropMe
|
||||
(
|
||||
`id` UInt8
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
```
|
||||
|
||||
```sql
|
||||
DROP TABLE undropMe
|
||||
```
|
||||
```sql
|
||||
SELECT *
|
||||
FROM system.dropped_tables
|
||||
FORMAT Vertical
|
||||
```
|
||||
```response
|
||||
Row 1:
|
||||
──────
|
||||
index: 0
|
||||
database: default
|
||||
table: undropMe
|
||||
uuid: aa696a1a-1d70-4e60-a841-4c80827706cc
|
||||
engine: MergeTree
|
||||
metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.undropMe.aa696a1a-1d70-4e60-a841-4c80827706cc.sql
|
||||
table_dropped_time: 2023-04-05 14:12:12
|
||||
|
||||
1 row in set. Elapsed: 0.001 sec.
|
||||
```
|
||||
```sql
|
||||
UNDROP TABLE undropMe
|
||||
```
|
||||
```response
|
||||
Ok.
|
||||
```
|
||||
```sql
|
||||
SELECT *
|
||||
FROM system.dropped_tables
|
||||
FORMAT Vertical
|
||||
```
|
||||
```response
|
||||
Ok.
|
||||
|
||||
0 rows in set. Elapsed: 0.001 sec.
|
||||
```
|
||||
```sql
|
||||
DESCRIBE TABLE undropMe
|
||||
FORMAT Vertical
|
||||
```
|
||||
```response
|
||||
Row 1:
|
||||
──────
|
||||
name: id
|
||||
type: UInt8
|
||||
default_type:
|
||||
default_expression:
|
||||
comment:
|
||||
codec_expression:
|
||||
ttl_expression:
|
||||
```
|
@ -7,7 +7,7 @@ sidebar_position: 141
|
||||
|
||||
Суммирует разницу между последовательными строками. Если разница отрицательна — она будет проигнорирована.
|
||||
|
||||
Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), упорядоченных по некоторому временному бакету согласно timestamp, например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, невозможно объединить их в "правом" порядке. Функция отслеживает `timestamp` наблюдаемых значений, поэтому возможно правильно упорядочить состояния во время слияния.
|
||||
Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), хранящих данные, упорядоченные по некоторому округленному временному интервалу, согласно timestamp, например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, их невозможно объединить в правильном порядке без хранения исходного, неокругленного значения timestamp. Функция `deltaSumTimestamp` отслеживает исходные `timestamp` наблюдаемых значений, поэтому значения (состояния) функции правильно вычисляются во время слияния кусков.
|
||||
|
||||
Чтобы вычислить разницу между упорядоченными последовательными строками, вы можете использовать функцию [deltaSum](../../../sql-reference/aggregate-functions/reference/deltasum.md#agg_functions-deltasum) вместо функции `deltaSumTimestamp`.
|
||||
|
||||
|
@ -277,11 +277,11 @@ void Client::initialize(Poco::Util::Application & self)
|
||||
*/
|
||||
|
||||
const char * env_user = getenv("CLICKHOUSE_USER"); // NOLINT(concurrency-mt-unsafe)
|
||||
if (env_user)
|
||||
if (env_user && !config().has("user"))
|
||||
config().setString("user", env_user);
|
||||
|
||||
const char * env_password = getenv("CLICKHOUSE_PASSWORD"); // NOLINT(concurrency-mt-unsafe)
|
||||
if (env_password)
|
||||
if (env_password && !config().has("password"))
|
||||
config().setString("password", env_password);
|
||||
|
||||
parseConnectionsCredentials();
|
||||
|
@ -17,7 +17,6 @@
|
||||
#include <Poco/Net/TCPServerParams.h>
|
||||
#include <Poco/Net/TCPServer.h>
|
||||
#include <Poco/Util/HelpFormatter.h>
|
||||
#include <Poco/Version.h>
|
||||
#include <Poco/Environment.h>
|
||||
#include <sys/stat.h>
|
||||
#include <pwd.h>
|
||||
|
@ -981,7 +981,7 @@ try
|
||||
|
||||
StatusFile status{path / "status", StatusFile::write_full_info};
|
||||
|
||||
DB::ServerUUID::load(path / "uuid", log);
|
||||
ServerUUID::load(path / "uuid", log);
|
||||
|
||||
/// Try to increase limit on number of open files.
|
||||
{
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <Interpreters/Access/InterpreterCreateUserQuery.h>
|
||||
#include <Interpreters/Access/InterpreterShowGrantsQuery.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Poco/JSON/JSON.h>
|
||||
#include <Poco/JSON/Object.h>
|
||||
#include <Poco/JSON/Stringifier.h>
|
||||
@ -19,6 +20,7 @@
|
||||
#include <base/range.h>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -317,15 +319,15 @@ void DiskAccessStorage::scheduleWriteLists(AccessEntityType type)
|
||||
return; /// If the lists' writing thread is still waiting we can update `types_of_lists_to_write` easily,
|
||||
/// without restarting that thread.
|
||||
|
||||
if (lists_writing_thread.joinable())
|
||||
lists_writing_thread.join();
|
||||
if (lists_writing_thread && lists_writing_thread->joinable())
|
||||
lists_writing_thread->join();
|
||||
|
||||
/// Create the 'need_rebuild_lists.mark' file.
|
||||
/// This file will be used later to find out if writing lists is successful or not.
|
||||
std::ofstream out{getNeedRebuildListsMarkFilePath(directory_path)};
|
||||
out.close();
|
||||
|
||||
lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this};
|
||||
lists_writing_thread = std::make_unique<ThreadFromGlobalPool>(&DiskAccessStorage::listsWritingThreadFunc, this);
|
||||
lists_writing_thread_is_waiting = true;
|
||||
}
|
||||
|
||||
@ -349,10 +351,10 @@ void DiskAccessStorage::listsWritingThreadFunc()
|
||||
|
||||
void DiskAccessStorage::stopListsWritingThread()
|
||||
{
|
||||
if (lists_writing_thread.joinable())
|
||||
if (lists_writing_thread && lists_writing_thread->joinable())
|
||||
{
|
||||
lists_writing_thread_should_exit.notify_one();
|
||||
lists_writing_thread.join();
|
||||
lists_writing_thread->join();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Access/MemoryAccessStorage.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/ThreadPool_fwd.h>
|
||||
#include <boost/container/flat_set.hpp>
|
||||
|
||||
|
||||
@ -81,7 +81,7 @@ private:
|
||||
bool failed_to_write_lists TSA_GUARDED_BY(mutex) = false;
|
||||
|
||||
/// List files are written in a separate thread.
|
||||
ThreadFromGlobalPool lists_writing_thread;
|
||||
std::unique_ptr<ThreadFromGlobalPool> lists_writing_thread;
|
||||
|
||||
/// Signals `lists_writing_thread` to exit.
|
||||
std::condition_variable lists_writing_thread_should_exit;
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <memory>
|
||||
#include <Access/AccessEntityIO.h>
|
||||
#include <Access/MemoryAccessStorage.h>
|
||||
#include <Access/ReplicatedAccessStorage.h>
|
||||
@ -15,6 +16,7 @@
|
||||
#include <Common/ZooKeeper/ZooKeeper.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <base/range.h>
|
||||
#include <base/sleep.h>
|
||||
#include <boost/range/algorithm_ext/erase.hpp>
|
||||
@ -72,7 +74,7 @@ void ReplicatedAccessStorage::startWatchingThread()
|
||||
{
|
||||
bool prev_watching_flag = watching.exchange(true);
|
||||
if (!prev_watching_flag)
|
||||
watching_thread = ThreadFromGlobalPool(&ReplicatedAccessStorage::runWatchingThread, this);
|
||||
watching_thread = std::make_unique<ThreadFromGlobalPool>(&ReplicatedAccessStorage::runWatchingThread, this);
|
||||
}
|
||||
|
||||
void ReplicatedAccessStorage::stopWatchingThread()
|
||||
@ -81,8 +83,8 @@ void ReplicatedAccessStorage::stopWatchingThread()
|
||||
if (prev_watching_flag)
|
||||
{
|
||||
watched_queue->finish();
|
||||
if (watching_thread.joinable())
|
||||
watching_thread.join();
|
||||
if (watching_thread && watching_thread->joinable())
|
||||
watching_thread->join();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/ThreadPool_fwd.h>
|
||||
#include <Common/ZooKeeper/Common.h>
|
||||
#include <Common/ZooKeeper/ZooKeeper.h>
|
||||
#include <Common/ConcurrentBoundedQueue.h>
|
||||
@ -21,7 +21,7 @@ public:
|
||||
static constexpr char STORAGE_TYPE[] = "replicated";
|
||||
|
||||
ReplicatedAccessStorage(const String & storage_name, const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper, AccessChangesNotifier & changes_notifier_, bool allow_backup);
|
||||
virtual ~ReplicatedAccessStorage() override;
|
||||
~ReplicatedAccessStorage() override;
|
||||
|
||||
const char * getStorageType() const override { return STORAGE_TYPE; }
|
||||
|
||||
@ -43,7 +43,7 @@ private:
|
||||
std::mutex cached_zookeeper_mutex;
|
||||
|
||||
std::atomic<bool> watching = false;
|
||||
ThreadFromGlobalPool watching_thread;
|
||||
std::unique_ptr<ThreadFromGlobalPool> watching_thread;
|
||||
std::shared_ptr<ConcurrentBoundedQueue<UUID>> watched_queue;
|
||||
|
||||
std::optional<UUID> insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
|
||||
|
@ -0,0 +1,36 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionKolmogorovSmirnovTest(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertBinary(name, argument_types);
|
||||
|
||||
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Aggregate function {} only supports numerical types", name);
|
||||
|
||||
return std::make_shared<AggregateFunctionKolmogorovSmirnov>(argument_types, parameters);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("kolmogorovSmirnovTest", createAggregateFunctionKolmogorovSmirnovTest, AggregateFunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
323
src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h
Normal file
323
src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h
Normal file
@ -0,0 +1,323 @@
|
||||
#pragma once
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/StatCommon.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/PODArray_fwd.h>
|
||||
#include <base/types.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
struct KolmogorovSmirnov : public StatisticalSample<Float64, Float64>
|
||||
{
|
||||
enum class Alternative
|
||||
{
|
||||
TwoSided,
|
||||
Less,
|
||||
Greater
|
||||
};
|
||||
|
||||
std::pair<Float64, Float64> getResult(Alternative alternative, String method)
|
||||
{
|
||||
::sort(x.begin(), x.end());
|
||||
::sort(y.begin(), y.end());
|
||||
|
||||
Float64 max_s = std::numeric_limits<Float64>::min();
|
||||
Float64 min_s = std::numeric_limits<Float64>::max();
|
||||
Float64 now_s = 0;
|
||||
UInt64 pos_x = 0;
|
||||
UInt64 pos_y = 0;
|
||||
UInt64 n1 = x.size();
|
||||
UInt64 n2 = y.size();
|
||||
|
||||
const Float64 n1_d = 1. / n1;
|
||||
const Float64 n2_d = 1. / n2;
|
||||
const Float64 tol = 1e-7;
|
||||
|
||||
// reference: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test
|
||||
while (pos_x < x.size() && pos_y < y.size())
|
||||
{
|
||||
if (likely(fabs(x[pos_x] - y[pos_y]) >= tol))
|
||||
{
|
||||
if (x[pos_x] < y[pos_y])
|
||||
{
|
||||
now_s += n1_d;
|
||||
++pos_x;
|
||||
}
|
||||
else
|
||||
{
|
||||
now_s -= n2_d;
|
||||
++pos_y;
|
||||
}
|
||||
max_s = std::max(max_s, now_s);
|
||||
min_s = std::min(min_s, now_s);
|
||||
}
|
||||
else
|
||||
{
|
||||
now_s += n1_d;
|
||||
++pos_x;
|
||||
}
|
||||
}
|
||||
now_s += n1_d * (x.size() - pos_x) - n2_d * (y.size() - pos_y);
|
||||
min_s = std::min(min_s, now_s);
|
||||
max_s = std::max(max_s, now_s);
|
||||
|
||||
Float64 d = 0;
|
||||
if (alternative == Alternative::TwoSided)
|
||||
d = std::max(std::abs(max_s), std::abs(min_s));
|
||||
else if (alternative == Alternative::Less)
|
||||
d = -min_s;
|
||||
else if (alternative == Alternative::Greater)
|
||||
d = max_s;
|
||||
|
||||
UInt64 g = std::__gcd(n1, n2);
|
||||
UInt64 nx_g = n1 / g;
|
||||
UInt64 ny_g = n2 / g;
|
||||
|
||||
if (method == "auto")
|
||||
method = std::max(n1, n2) <= 10000 ? "exact" : "asymp";
|
||||
else if (method == "exact" && nx_g >= std::numeric_limits<Int32>::max() / ny_g)
|
||||
method = "asymp";
|
||||
|
||||
Float64 p_value = std::numeric_limits<Float64>::infinity();
|
||||
|
||||
if (method == "exact")
|
||||
{
|
||||
/* reference:
|
||||
* Gunar Schröer and Dietrich Trenkler
|
||||
* Exact and Randomization Distributions of Kolmogorov-Smirnov, Tests for Two or Three Samples
|
||||
*
|
||||
* and
|
||||
*
|
||||
* Thomas Viehmann
|
||||
* Numerically more stable computation of the p-values for the two-sample Kolmogorov-Smirnov test
|
||||
*/
|
||||
if (n2 > n1)
|
||||
std::swap(n1, n2);
|
||||
|
||||
const Float64 f_n1 = static_cast<Float64>(n1);
|
||||
const Float64 f_n2 = static_cast<Float64>(n2);
|
||||
const Float64 k_d = (0.5 + floor(d * f_n2 * f_n1 - tol)) / (f_n2 * f_n1);
|
||||
PaddedPODArray<Float64> c(n1 + 1);
|
||||
|
||||
auto check = alternative == Alternative::TwoSided ?
|
||||
[](const Float64 & q, const Float64 & r, const Float64 & s) { return fabs(r - s) >= q; }
|
||||
: [](const Float64 & q, const Float64 & r, const Float64 & s) { return r - s >= q; };
|
||||
|
||||
c[0] = 0;
|
||||
for (UInt64 j = 1; j <= n1; j++)
|
||||
if (check(k_d, 0., j / f_n1))
|
||||
c[j] = 1.;
|
||||
else
|
||||
c[j] = c[j - 1];
|
||||
|
||||
for (UInt64 i = 1; i <= n2; i++)
|
||||
{
|
||||
if (check(k_d, i / f_n2, 0.))
|
||||
c[0] = 1.;
|
||||
for (UInt64 j = 1; j <= n1; j++)
|
||||
if (check(k_d, i / f_n2, j / f_n1))
|
||||
c[j] = 1.;
|
||||
else
|
||||
{
|
||||
Float64 v = i / static_cast<Float64>(i + j);
|
||||
Float64 w = j / static_cast<Float64>(i + j);
|
||||
c[j] = v * c[j] + w * c[j - 1];
|
||||
}
|
||||
}
|
||||
p_value = c[n1];
|
||||
}
|
||||
else if (method == "asymp")
|
||||
{
|
||||
Float64 n = std::min(n1, n2);
|
||||
Float64 m = std::max(n1, n2);
|
||||
Float64 p = sqrt((n * m) / (n + m)) * d;
|
||||
|
||||
if (alternative == Alternative::TwoSided)
|
||||
{
|
||||
/* reference:
|
||||
* J.DURBIN
|
||||
* Distribution theory for tests based on the sample distribution function
|
||||
*/
|
||||
Float64 new_val, old_val, s, w, z;
|
||||
UInt64 k_max = static_cast<UInt64>(sqrt(2 - log(tol)));
|
||||
|
||||
if (p < 1)
|
||||
{
|
||||
z = - (M_PI_2 * M_PI_4) / (p * p);
|
||||
w = log(p);
|
||||
s = 0;
|
||||
for (UInt64 k = 1; k < k_max; k += 2)
|
||||
s += exp(k * k * z - w);
|
||||
p = s / 0.398942280401432677939946059934;
|
||||
}
|
||||
else
|
||||
{
|
||||
z = -2 * p * p;
|
||||
s = -1;
|
||||
UInt64 k = 1;
|
||||
old_val = 0;
|
||||
new_val = 1;
|
||||
while (fabs(old_val - new_val) > tol)
|
||||
{
|
||||
old_val = new_val;
|
||||
new_val += 2 * s * exp(z * k * k);
|
||||
s *= -1;
|
||||
k++;
|
||||
}
|
||||
p = new_val;
|
||||
}
|
||||
p_value = 1 - p;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* reference:
|
||||
* J. L. HODGES, Jr
|
||||
* The significance probability of the Smirnov two-sample test
|
||||
*/
|
||||
|
||||
// Use Hodges' suggested approximation Eqn 5.3
|
||||
// Requires m to be the larger of (n1, n2)
|
||||
Float64 expt = -2 * p * p - 2 * p * (m + 2 * n) / sqrt(m * n * (m + n)) / 3.0;
|
||||
p_value = exp(expt);
|
||||
}
|
||||
}
|
||||
return {d, p_value};
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
class AggregateFunctionKolmogorovSmirnov final:
|
||||
public IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov>
|
||||
{
|
||||
private:
|
||||
using Alternative = typename KolmogorovSmirnov::Alternative;
|
||||
Alternative alternative = Alternative::TwoSided;
|
||||
String method = "auto";
|
||||
|
||||
public:
|
||||
explicit AggregateFunctionKolmogorovSmirnov(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov> ({arguments}, {}, createResultType())
|
||||
{
|
||||
if (params.size() > 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require two parameter or less", getName());
|
||||
|
||||
if (params.empty())
|
||||
return;
|
||||
|
||||
if (params[0].getType() != Field::Types::String)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
|
||||
|
||||
const auto & param = params[0].get<String>();
|
||||
if (param == "two-sided")
|
||||
alternative = Alternative::TwoSided;
|
||||
else if (param == "less")
|
||||
alternative = Alternative::Less;
|
||||
else if (param == "greater")
|
||||
alternative = Alternative::Greater;
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown parameter in aggregate function {}. "
|
||||
"It must be one of: 'two-sided', 'less', 'greater'", getName());
|
||||
|
||||
if (params.size() != 2)
|
||||
return;
|
||||
|
||||
if (params[1].getType() != Field::Types::String)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a String", getName());
|
||||
|
||||
method = params[1].get<String>();
|
||||
if (method != "auto" && method != "exact" && method != "asymp")
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown method in aggregate function {}. "
|
||||
"It must be one of: 'auto', 'exact', 'asymp'", getName());
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return "kolmogorovSmirnovTest";
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
|
||||
static DataTypePtr createResultType()
|
||||
{
|
||||
DataTypes types
|
||||
{
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
std::make_shared<DataTypeNumber<Float64>>(),
|
||||
};
|
||||
|
||||
Strings names
|
||||
{
|
||||
"d_statistic",
|
||||
"p_value"
|
||||
};
|
||||
|
||||
return std::make_shared<DataTypeTuple>(
|
||||
std::move(types),
|
||||
std::move(names)
|
||||
);
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
Float64 value = columns[0]->getFloat64(row_num);
|
||||
UInt8 is_second = columns[1]->getUInt(row_num);
|
||||
if (is_second)
|
||||
this->data(place).addY(value, arena);
|
||||
else
|
||||
this->data(place).addX(value, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs), arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
if (!this->data(place).size_x || !this->data(place).size_y)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName());
|
||||
|
||||
auto [d_statistic, p_value] = this->data(place).getResult(alternative, method);
|
||||
|
||||
/// Because p-value is a probability.
|
||||
p_value = std::min(1.0, std::max(0.0, p_value));
|
||||
|
||||
auto & column_tuple = assert_cast<ColumnTuple &>(to);
|
||||
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
|
||||
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
|
||||
|
||||
column_stat.getData().push_back(d_statistic);
|
||||
column_value.getData().push_back(p_value);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -26,9 +26,11 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
template <typename> class QuantileTiming;
|
||||
template <typename> class QuantileApprox;
|
||||
|
||||
|
||||
/** Generic aggregate function for calculation of quantiles.
|
||||
@ -60,6 +62,7 @@ private:
|
||||
using ColVecType = ColumnVectorOrDecimal<Value>;
|
||||
|
||||
static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>);
|
||||
static constexpr bool is_quantile_approx = std::is_same_v<Data, QuantileApprox<Value>>;
|
||||
static_assert(!is_decimal<Value> || !returns_float);
|
||||
|
||||
QuantileLevels<Float64> levels;
|
||||
@ -67,22 +70,57 @@ private:
|
||||
/// Used when there are single level to get.
|
||||
Float64 level = 0.5;
|
||||
|
||||
/// Used for the approximate version of the algorithm (Greenwald-Khanna)
|
||||
ssize_t accuracy = 10000;
|
||||
|
||||
DataTypePtr & argument_type;
|
||||
|
||||
public:
|
||||
AggregateFunctionQuantile(const DataTypes & argument_types_, const Array & params)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>(
|
||||
argument_types_, params, createResultType(argument_types_))
|
||||
, levels(params, returns_many)
|
||||
, levels(is_quantile_approx && !params.empty() ? Array(params.begin() + 1, params.end()) : params, returns_many)
|
||||
, level(levels.levels[0])
|
||||
, argument_type(this->argument_types[0])
|
||||
{
|
||||
if (!returns_many && levels.size() > 1)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require one parameter or less", getName());
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires one level parameter or less", getName());
|
||||
|
||||
if constexpr (is_quantile_approx)
|
||||
{
|
||||
if (params.empty())
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one param", getName());
|
||||
|
||||
const auto & accuracy_field = params[0];
|
||||
if (!isInt64OrUInt64FieldType(accuracy_field.getType()))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires accuracy parameter with integer type", getName());
|
||||
|
||||
if (accuracy_field.getType() == Field::Types::Int64)
|
||||
accuracy = accuracy_field.get<Int64>();
|
||||
else
|
||||
accuracy = accuracy_field.get<UInt64>();
|
||||
|
||||
if (accuracy <= 0)
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Aggregate function {} requires accuracy parameter with positive value but is {}",
|
||||
getName(),
|
||||
accuracy);
|
||||
}
|
||||
}
|
||||
|
||||
String getName() const override { return Name::name; }
|
||||
|
||||
void create(AggregateDataPtr __restrict place) const override /// NOLINT
|
||||
{
|
||||
if constexpr (is_quantile_approx)
|
||||
new (place) Data(accuracy);
|
||||
else
|
||||
new (place) Data;
|
||||
}
|
||||
|
||||
static DataTypePtr createResultType(const DataTypes & argument_types_)
|
||||
{
|
||||
DataTypePtr res;
|
||||
@ -257,4 +295,7 @@ struct NameQuantilesBFloat16 { static constexpr auto name = "quantilesBFloat16";
|
||||
struct NameQuantileBFloat16Weighted { static constexpr auto name = "quantileBFloat16Weighted"; };
|
||||
struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBFloat16Weighted"; };
|
||||
|
||||
struct NameQuantileApprox { static constexpr auto name = "quantileApprox"; };
|
||||
struct NameQuantilesApprox { static constexpr auto name = "quantilesApprox"; };
|
||||
|
||||
}
|
||||
|
71
src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp
Normal file
71
src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp
Normal file
@ -0,0 +1,71 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
#include <AggregateFunctions/QuantileApprox.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileApprox = AggregateFunctionQuantile<Value, QuantileApprox<Value>, NameQuantileApprox, false, void, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesApprox = AggregateFunctionQuantile<Value, QuantileApprox<Value>, NameQuantilesApprox, false, void, true>;
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
/// Second argument type check doesn't depend on the type of the first one.
|
||||
Function<void, true>::assertSecondArg(argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) \
|
||||
return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
|
||||
|
||||
if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<UInt128, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, true>>(argument_types, params);
|
||||
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
|
||||
argument_type->getName(), name);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory & factory)
|
||||
{
|
||||
/// For aggregate functions returning array we cannot return NULL on empty set.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
|
||||
|
||||
factory.registerFunction(NameQuantileApprox::name, createAggregateFunctionQuantile<FuncQuantileApprox>);
|
||||
factory.registerFunction(NameQuantilesApprox::name, {createAggregateFunctionQuantile<FuncQuantilesApprox>, properties});
|
||||
|
||||
/// 'median' is an alias for 'quantile'
|
||||
factory.registerAlias("medianApprox", NameQuantileApprox::name);
|
||||
}
|
||||
|
||||
}
|
@ -9,7 +9,7 @@
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <base/types.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/ThreadPool_fwd.h>
|
||||
#include <Core/IResolvedFunction.h>
|
||||
|
||||
#include "config.h"
|
||||
|
477
src/AggregateFunctions/QuantileApprox.h
Normal file
477
src/AggregateFunctions/QuantileApprox.h
Normal file
@ -0,0 +1,477 @@
|
||||
#pragma once
|
||||
|
||||
#include <cmath>
|
||||
#include <base/sort.h>
|
||||
#include <Common/RadixSort.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class ApproxSampler
|
||||
{
|
||||
public:
|
||||
struct Stats
|
||||
{
|
||||
T value; // the sampled value
|
||||
Int64 g; // the minimum rank jump from the previous value's minimum rank
|
||||
Int64 delta; // the maximum span of the rank
|
||||
|
||||
Stats() = default;
|
||||
Stats(T value_, Int64 g_, Int64 delta_) : value(value_), g(g_), delta(delta_) {}
|
||||
};
|
||||
|
||||
struct QueryResult
|
||||
{
|
||||
size_t index;
|
||||
Int64 rank;
|
||||
T value;
|
||||
|
||||
QueryResult(size_t index_, Int64 rank_, T value_) : index(index_), rank(rank_), value(value_) { }
|
||||
};
|
||||
|
||||
ApproxSampler() = default;
|
||||
|
||||
explicit ApproxSampler(
|
||||
double relative_error_,
|
||||
size_t compress_threshold_ = default_compress_threshold,
|
||||
size_t count_ = 0,
|
||||
bool compressed_ = false)
|
||||
: relative_error(relative_error_)
|
||||
, compress_threshold(compress_threshold_)
|
||||
, count(count_)
|
||||
, compressed(compressed_)
|
||||
{
|
||||
sampled.reserve(compress_threshold);
|
||||
backup_sampled.reserve(compress_threshold);
|
||||
|
||||
head_sampled.reserve(default_head_size);
|
||||
}
|
||||
|
||||
bool isCompressed() const { return compressed; }
|
||||
void setCompressed() { compressed = true; }
|
||||
|
||||
void insert(T x)
|
||||
{
|
||||
head_sampled.push_back(x);
|
||||
compressed = false;
|
||||
if (head_sampled.size() >= default_head_size)
|
||||
{
|
||||
withHeadBufferInserted();
|
||||
if (sampled.size() >= compress_threshold)
|
||||
compress();
|
||||
}
|
||||
}
|
||||
|
||||
void query(const Float64 * percentiles, const size_t * indices, size_t size, T * result) const
|
||||
{
|
||||
if (!head_sampled.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot operate on an uncompressed summary, call compress() first");
|
||||
|
||||
if (sampled.empty())
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
result[i] = T();
|
||||
return;
|
||||
}
|
||||
|
||||
Int64 current_max = std::numeric_limits<Int64>::min();
|
||||
for (const auto & stats : sampled)
|
||||
current_max = std::max(stats.delta + stats.g, current_max);
|
||||
Int64 target_error = current_max/2;
|
||||
|
||||
size_t index= 0;
|
||||
auto min_rank = sampled[0].g;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
double percentile = percentiles[indices[i]];
|
||||
if (percentile <= relative_error)
|
||||
{
|
||||
result[indices[i]] = sampled.front().value;
|
||||
}
|
||||
else if (percentile >= 1 - relative_error)
|
||||
{
|
||||
result[indices[i]] = sampled.back().value;
|
||||
}
|
||||
else
|
||||
{
|
||||
QueryResult res = findApproxQuantile(index, min_rank, target_error, percentile);
|
||||
index = res.index;
|
||||
min_rank = res.rank;
|
||||
result[indices[i]] = res.value;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void compress()
|
||||
{
|
||||
if (compressed)
|
||||
return;
|
||||
|
||||
withHeadBufferInserted();
|
||||
|
||||
doCompress(2 * relative_error * count);
|
||||
compressed = true;
|
||||
}
|
||||
|
||||
|
||||
void merge(const ApproxSampler & other)
|
||||
{
|
||||
if (other.count == 0)
|
||||
return;
|
||||
else if (count == 0)
|
||||
{
|
||||
compress_threshold = other.compress_threshold;
|
||||
relative_error = other.relative_error;
|
||||
count = other.count;
|
||||
compressed = other.compressed;
|
||||
|
||||
sampled.resize(other.sampled.size());
|
||||
memcpy(sampled.data(), other.sampled.data(), sizeof(Stats) * other.sampled.size());
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Merge the two buffers.
|
||||
// The GK algorithm is a bit unclear about it, but we need to adjust the statistics during the
|
||||
// merging. The main idea is that samples that come from one side will suffer from the lack of
|
||||
// precision of the other.
|
||||
// As a concrete example, take two QuantileSummaries whose samples (value, g, delta) are:
|
||||
// `a = [(0, 1, 0), (20, 99, 0)]` and `b = [(10, 1, 0), (30, 49, 0)]`
|
||||
// This means `a` has 100 values, whose minimum is 0 and maximum is 20,
|
||||
// while `b` has 50 values, between 10 and 30.
|
||||
// The resulting samples of the merge will be:
|
||||
// a+b = [(0, 1, 0), (10, 1, ??), (20, 99, ??), (30, 49, 0)]
|
||||
// The values of `g` do not change, as they represent the minimum number of values between two
|
||||
// consecutive samples. The values of `delta` should be adjusted, however.
|
||||
// Take the case of the sample `10` from `b`. In the original stream, it could have appeared
|
||||
// right after `0` (as expressed by `g=1`) or right before `20`, so `delta=99+0-1=98`.
|
||||
// In the GK algorithm's style of working in terms of maximum bounds, one can observe that the
|
||||
// maximum additional uncertainty over samples coming from `b` is `max(g_a + delta_a) =
|
||||
// floor(2 * eps_a * n_a)`. Likewise, additional uncertainty over samples from `a` is
|
||||
// `floor(2 * eps_b * n_b)`.
|
||||
// Only samples that interleave the other side are affected. That means that samples from
|
||||
// one side that are lesser (or greater) than all samples from the other side are just copied
|
||||
// unmodified.
|
||||
// If the merging instances have different `relativeError`, the resulting instance will carry
|
||||
// the largest one: `eps_ab = max(eps_a, eps_b)`.
|
||||
// The main invariant of the GK algorithm is kept:
|
||||
// `max(g_ab + delta_ab) <= floor(2 * eps_ab * (n_a + n_b))` since
|
||||
// `max(g_ab + delta_ab) <= floor(2 * eps_a * n_a) + floor(2 * eps_b * n_b)`
|
||||
// Finally, one can see how the `insert(x)` operation can be expressed as `merge([(x, 1, 0])`
|
||||
compress();
|
||||
|
||||
backup_sampled.clear();
|
||||
backup_sampled.reserve(sampled.size() + other.sampled.size());
|
||||
double merged_relative_error = std::max(relative_error, other.relative_error);
|
||||
size_t merged_count = count + other.count;
|
||||
Int64 additional_self_delta = static_cast<Int64>(std::floor(2 * other.relative_error * other.count));
|
||||
Int64 additional_other_delta = static_cast<Int64>(std::floor(2 * relative_error * count));
|
||||
|
||||
// Do a merge of two sorted lists until one of the lists is fully consumed
|
||||
size_t self_idx = 0;
|
||||
size_t other_idx = 0;
|
||||
while (self_idx < sampled.size() && other_idx < other.sampled.size())
|
||||
{
|
||||
const Stats & self_sample = sampled[self_idx];
|
||||
const Stats & other_sample = other.sampled[other_idx];
|
||||
|
||||
// Detect next sample
|
||||
Stats next_sample;
|
||||
Int64 additional_delta = 0;
|
||||
if (self_sample.value < other_sample.value)
|
||||
{
|
||||
++self_idx;
|
||||
next_sample = self_sample;
|
||||
additional_delta = other_idx > 0 ? additional_self_delta : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
++other_idx;
|
||||
next_sample = other_sample;
|
||||
additional_delta = self_idx > 0 ? additional_other_delta : 0;
|
||||
}
|
||||
|
||||
// Insert it
|
||||
next_sample.delta += additional_delta;
|
||||
backup_sampled.emplace_back(std::move(next_sample));
|
||||
}
|
||||
|
||||
// Copy the remaining samples from the other list
|
||||
// (by construction, at most one `while` loop will run)
|
||||
while (self_idx < sampled.size())
|
||||
{
|
||||
backup_sampled.emplace_back(sampled[self_idx]);
|
||||
++self_idx;
|
||||
}
|
||||
while (other_idx < other.sampled.size())
|
||||
{
|
||||
backup_sampled.emplace_back(other.sampled[other_idx]);
|
||||
++other_idx;
|
||||
}
|
||||
|
||||
std::swap(sampled, backup_sampled);
|
||||
relative_error = merged_relative_error;
|
||||
count = merged_count;
|
||||
compress_threshold = other.compress_threshold;
|
||||
|
||||
doCompress(2 * merged_relative_error * merged_count);
|
||||
compressed = true;
|
||||
}
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writeIntBinary<size_t>(compress_threshold, buf);
|
||||
writeFloatBinary<double>(relative_error, buf);
|
||||
writeIntBinary<size_t>(count, buf);
|
||||
writeIntBinary<size_t>(sampled.size(), buf);
|
||||
|
||||
for (const auto & stats : sampled)
|
||||
{
|
||||
writeFloatBinary<T>(stats.value, buf);
|
||||
writeIntBinary<Int64>(stats.g, buf);
|
||||
writeIntBinary<Int64>(stats.delta, buf);
|
||||
}
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readIntBinary<size_t>(compress_threshold, buf);
|
||||
readFloatBinary<double>(relative_error, buf);
|
||||
readIntBinary<size_t>(count, buf);
|
||||
|
||||
size_t sampled_len = 0;
|
||||
readIntBinary<size_t>(sampled_len, buf);
|
||||
sampled.resize(sampled_len);
|
||||
|
||||
for (size_t i = 0; i < sampled_len; ++i)
|
||||
{
|
||||
auto stats = sampled[i];
|
||||
readFloatBinary<T>(stats.value, buf);
|
||||
readIntBinary<Int64>(stats.g, buf);
|
||||
readIntBinary<Int64>(stats.delta, buf);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
QueryResult findApproxQuantile(size_t index, Int64 min_rank_at_index, double target_error, double percentile) const
|
||||
{
|
||||
Stats curr_sample = sampled[index];
|
||||
Int64 rank = static_cast<Int64>(std::ceil(percentile * count));
|
||||
size_t i = index;
|
||||
Int64 min_rank = min_rank_at_index;
|
||||
while (i < sampled.size() - 1)
|
||||
{
|
||||
Int64 max_rank = min_rank + curr_sample.delta;
|
||||
if (max_rank - target_error <= rank && rank <= min_rank + target_error)
|
||||
return {i, min_rank, curr_sample.value};
|
||||
else
|
||||
{
|
||||
++i;
|
||||
curr_sample = sampled[i];
|
||||
min_rank += curr_sample.g;
|
||||
}
|
||||
}
|
||||
return {sampled.size()-1, 0, sampled.back().value};
|
||||
}
|
||||
|
||||
void withHeadBufferInserted()
|
||||
{
|
||||
if (head_sampled.empty())
|
||||
return;
|
||||
|
||||
bool use_radix_sort = head_sampled.size() >= 256 && (is_arithmetic_v<T> && !is_big_int_v<T>);
|
||||
if (use_radix_sort)
|
||||
RadixSort<RadixSortNumTraits<T>>::executeLSD(head_sampled.data(), head_sampled.size());
|
||||
else
|
||||
::sort(head_sampled.begin(), head_sampled.end());
|
||||
|
||||
backup_sampled.clear();
|
||||
backup_sampled.reserve(sampled.size() + head_sampled.size());
|
||||
|
||||
size_t sample_idx = 0;
|
||||
size_t ops_idx = 0;
|
||||
size_t current_count = count;
|
||||
for (; ops_idx < head_sampled.size(); ++ops_idx)
|
||||
{
|
||||
T current_sample = head_sampled[ops_idx];
|
||||
|
||||
// Add all the samples before the next observation.
|
||||
while (sample_idx < sampled.size() && sampled[sample_idx].value <= current_sample)
|
||||
{
|
||||
backup_sampled.emplace_back(sampled[sample_idx]);
|
||||
++sample_idx;
|
||||
}
|
||||
|
||||
// If it is the first one to insert, of if it is the last one
|
||||
++current_count;
|
||||
Int64 delta;
|
||||
if (backup_sampled.empty() || (sample_idx == sampled.size() && ops_idx == (head_sampled.size() - 1)))
|
||||
delta = 0;
|
||||
else
|
||||
delta = static_cast<Int64>(std::floor(2 * relative_error * current_count));
|
||||
|
||||
backup_sampled.emplace_back(current_sample, 1, delta);
|
||||
}
|
||||
|
||||
// Add all the remaining existing samples
|
||||
for (; sample_idx < sampled.size(); ++sample_idx)
|
||||
backup_sampled.emplace_back(sampled[sample_idx]);
|
||||
|
||||
std::swap(sampled, backup_sampled);
|
||||
head_sampled.clear();
|
||||
count = current_count;
|
||||
}
|
||||
|
||||
|
||||
void doCompress(double merge_threshold)
|
||||
{
|
||||
if (sampled.empty())
|
||||
return;
|
||||
|
||||
backup_sampled.clear();
|
||||
// Start for the last element, which is always part of the set.
|
||||
// The head contains the current new head, that may be merged with the current element.
|
||||
Stats head = sampled.back();
|
||||
ssize_t i = sampled.size() - 2;
|
||||
|
||||
// Do not compress the last element
|
||||
while (i >= 1)
|
||||
{
|
||||
// The current sample:
|
||||
const auto & sample1 = sampled[i];
|
||||
// Do we need to compress?
|
||||
if (sample1.g + head.g + head.delta < merge_threshold)
|
||||
{
|
||||
// Do not insert yet, just merge the current element into the head.
|
||||
head.g += sample1.g;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Prepend the current head, and keep the current sample as target for merging.
|
||||
backup_sampled.push_back(head);
|
||||
head = sample1;
|
||||
}
|
||||
--i;
|
||||
}
|
||||
|
||||
backup_sampled.push_back(head);
|
||||
// If necessary, add the minimum element:
|
||||
auto curr_head = sampled.front();
|
||||
|
||||
// don't add the minimum element if `currentSamples` has only one element (both `currHead` and
|
||||
// `head` point to the same element)
|
||||
if (curr_head.value <= head.value && sampled.size() > 1)
|
||||
backup_sampled.emplace_back(sampled.front());
|
||||
|
||||
std::reverse(backup_sampled.begin(), backup_sampled.end());
|
||||
std::swap(sampled, backup_sampled);
|
||||
}
|
||||
|
||||
double relative_error;
|
||||
size_t compress_threshold;
|
||||
size_t count = 0;
|
||||
bool compressed;
|
||||
|
||||
PaddedPODArray<Stats> sampled;
|
||||
PaddedPODArray<Stats> backup_sampled;
|
||||
|
||||
PaddedPODArray<T> head_sampled;
|
||||
|
||||
static constexpr size_t default_compress_threshold = 10000;
|
||||
static constexpr size_t default_head_size = 50000;
|
||||
};
|
||||
|
||||
template <typename Value>
|
||||
class QuantileApprox
|
||||
{
|
||||
private:
|
||||
using Data = ApproxSampler<Value>;
|
||||
mutable Data data;
|
||||
|
||||
public:
|
||||
QuantileApprox() = default;
|
||||
|
||||
explicit QuantileApprox(size_t accuracy) : data(1.0 / static_cast<double>(accuracy)) { }
|
||||
|
||||
void add(const Value & x)
|
||||
{
|
||||
data.insert(x);
|
||||
}
|
||||
|
||||
template <typename Weight>
|
||||
void add(const Value &, const Weight &)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method add with weight is not implemented for GKSampler");
|
||||
}
|
||||
|
||||
void merge(const QuantileApprox & rhs)
|
||||
{
|
||||
if (!data.isCompressed())
|
||||
data.compress();
|
||||
|
||||
data.merge(rhs.data);
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
/// Always compress before serialization
|
||||
if (!data.isCompressed())
|
||||
data.compress();
|
||||
|
||||
data.write(buf);
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
data.read(buf);
|
||||
|
||||
data.setCompressed();
|
||||
}
|
||||
|
||||
/// Get the value of the `level` quantile. The level must be between 0 and 1.
|
||||
Value get(Float64 level)
|
||||
{
|
||||
if (!data.isCompressed())
|
||||
data.compress();
|
||||
|
||||
Value res;
|
||||
size_t indice = 0;
|
||||
data.query(&level, &indice, 1, &res);
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
|
||||
/// indices - an array of index levels such that the corresponding elements will go in ascending order.
|
||||
void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
|
||||
{
|
||||
if (!data.isCompressed())
|
||||
data.compress();
|
||||
|
||||
data.query(levels, indices, size, result);
|
||||
}
|
||||
|
||||
Float64 getFloat64(Float64 /*level*/)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFloat64 is not implemented for GKSampler");
|
||||
}
|
||||
|
||||
void getManyFloat(const Float64 * /*levels*/, const size_t * /*indices*/, size_t /*size*/, Float64 * /*result*/)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getManyFloat is not implemented for GKSampler");
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -32,6 +32,7 @@ void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileTDigestWeighted(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileBFloat16(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileBFloat16Weighted(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionRate(AggregateFunctionFactory &);
|
||||
@ -79,6 +80,7 @@ void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory
|
||||
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory);
|
||||
|
||||
class AggregateFunctionCombinatorFactory;
|
||||
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
||||
@ -123,6 +125,7 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionsQuantileTDigestWeighted(factory);
|
||||
registerAggregateFunctionsQuantileBFloat16(factory);
|
||||
registerAggregateFunctionsQuantileBFloat16Weighted(factory);
|
||||
registerAggregateFunctionsQuantileApprox(factory);
|
||||
registerAggregateFunctionsSequenceMatch(factory);
|
||||
registerAggregateFunctionWindowFunnel(factory);
|
||||
registerAggregateFunctionRate(factory);
|
||||
@ -170,6 +173,7 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionExponentialMovingAverage(factory);
|
||||
registerAggregateFunctionSparkbar(factory);
|
||||
registerAggregateFunctionAnalysisOfVariance(factory);
|
||||
registerAggregateFunctionKolmogorovSmirnovTest(factory);
|
||||
|
||||
registerWindowFunctions(factory);
|
||||
}
|
||||
|
@ -86,7 +86,12 @@ public:
|
||||
|
||||
DataTypePtr getResultType() const override
|
||||
{
|
||||
return getExpression()->getResultType();
|
||||
return result_type;
|
||||
}
|
||||
|
||||
void resolve(DataTypePtr lambda_type)
|
||||
{
|
||||
result_type = std::move(lambda_type);
|
||||
}
|
||||
|
||||
void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
|
||||
@ -102,6 +107,7 @@ protected:
|
||||
|
||||
private:
|
||||
Names argument_names;
|
||||
DataTypePtr result_type;
|
||||
|
||||
static constexpr size_t arguments_child_index = 0;
|
||||
static constexpr size_t expression_child_index = 1;
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
|
||||
#include <Databases/IDatabase.h>
|
||||
|
||||
@ -75,6 +76,7 @@
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/QueryTreeBuilder.h>
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/Identifier.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -112,6 +114,8 @@ namespace ErrorCodes
|
||||
extern const int ALIAS_REQUIRED;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int UNKNOWN_TABLE;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
/** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h before.
|
||||
@ -5085,8 +5089,11 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
arguments_projection_names[function_lambda_argument_index] = lambda_argument_projection_name_buffer.str();
|
||||
}
|
||||
|
||||
argument_types[function_lambda_argument_index] = std::make_shared<DataTypeFunction>(function_data_type_argument_types, lambda_to_resolve->getResultType());
|
||||
argument_columns[function_lambda_argument_index].type = argument_types[function_lambda_argument_index];
|
||||
auto lambda_resolved_type = std::make_shared<DataTypeFunction>(function_data_type_argument_types, lambda_to_resolve_typed.getExpression()->getResultType());
|
||||
lambda_to_resolve_typed.resolve(lambda_resolved_type);
|
||||
|
||||
argument_types[function_lambda_argument_index] = lambda_resolved_type;
|
||||
argument_columns[function_lambda_argument_index].type = lambda_resolved_type;
|
||||
function_arguments[function_lambda_argument_index] = std::move(lambda_to_resolve);
|
||||
}
|
||||
|
||||
@ -6076,6 +6083,18 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
|
||||
scope.table_expression_node_to_data.emplace(table_expression_node, std::move(table_expression_data));
|
||||
}
|
||||
|
||||
bool findIdentifier(const FunctionNode & function)
|
||||
{
|
||||
for (const auto & argument : function.getArguments())
|
||||
{
|
||||
if (argument->as<IdentifierNode>())
|
||||
return true;
|
||||
if (const auto * f = argument->as<FunctionNode>(); f && findIdentifier(*f))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Resolve table function node in scope
|
||||
void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
|
||||
IdentifierResolveScope & scope,
|
||||
@ -6087,12 +6106,11 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
|
||||
if (!nested_table_function)
|
||||
expressions_visitor.visit(table_function_node_typed.getArgumentsNode());
|
||||
|
||||
const auto & table_function_factory = TableFunctionFactory::instance();
|
||||
const auto & table_function_name = table_function_node_typed.getTableFunctionName();
|
||||
|
||||
auto & scope_context = scope.context;
|
||||
|
||||
TableFunctionPtr table_function_ptr = table_function_factory.tryGet(table_function_name, scope_context);
|
||||
TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().tryGet(table_function_name, scope_context);
|
||||
if (!table_function_ptr)
|
||||
{
|
||||
auto hints = TableFunctionFactory::instance().getHints(table_function_name);
|
||||
@ -6107,17 +6125,131 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
|
||||
table_function_name);
|
||||
}
|
||||
|
||||
uint64_t use_structure_from_insertion_table_in_table_functions = scope_context->getSettingsRef().use_structure_from_insertion_table_in_table_functions;
|
||||
if (!nested_table_function &&
|
||||
scope_context->getSettingsRef().use_structure_from_insertion_table_in_table_functions &&
|
||||
use_structure_from_insertion_table_in_table_functions &&
|
||||
scope_context->hasInsertionTable() &&
|
||||
table_function_ptr->needStructureHint())
|
||||
{
|
||||
const auto & insertion_table = scope_context->getInsertionTable();
|
||||
if (!insertion_table.empty())
|
||||
{
|
||||
auto insertion_table_storage = DatabaseCatalog::instance().getTable(insertion_table, scope_context);
|
||||
const auto & structure_hint = insertion_table_storage->getInMemoryMetadataPtr()->columns;
|
||||
table_function_ptr->setStructureHint(structure_hint);
|
||||
const auto & insert_structure = DatabaseCatalog::instance().getTable(insertion_table, scope_context)->getInMemoryMetadataPtr()->getColumns();
|
||||
DB::ColumnsDescription structure_hint;
|
||||
|
||||
bool use_columns_from_insert_query = true;
|
||||
|
||||
/// Insert table matches columns against SELECT expression by position, so we want to map
|
||||
/// insert table columns to table function columns through names from SELECT expression.
|
||||
|
||||
auto insert_column = insert_structure.begin();
|
||||
auto insert_structure_end = insert_structure.end(); /// end iterator of the range covered by possible asterisk
|
||||
auto virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
|
||||
bool asterisk = false;
|
||||
const auto & expression_list = scope.scope_node->as<QueryNode &>().getProjection();
|
||||
auto expression = expression_list.begin();
|
||||
|
||||
/// We want to go through SELECT expression list and correspond each expression to column in insert table
|
||||
/// which type will be used as a hint for the file structure inference.
|
||||
for (; expression != expression_list.end() && insert_column != insert_structure_end; ++expression)
|
||||
{
|
||||
if (auto * identifier_node = (*expression)->as<IdentifierNode>())
|
||||
{
|
||||
|
||||
if (!virtual_column_names.contains(identifier_node->getIdentifier().getFullName()))
|
||||
{
|
||||
if (asterisk)
|
||||
{
|
||||
if (use_structure_from_insertion_table_in_table_functions == 1)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
|
||||
|
||||
use_columns_from_insert_query = false;
|
||||
break;
|
||||
}
|
||||
|
||||
structure_hint.add({ identifier_node->getIdentifier().getFullName(), insert_column->type });
|
||||
}
|
||||
|
||||
/// Once we hit asterisk we want to find end of the range covered by asterisk
|
||||
/// contributing every further SELECT expression to the tail of insert structure
|
||||
if (asterisk)
|
||||
--insert_structure_end;
|
||||
else
|
||||
++insert_column;
|
||||
}
|
||||
else if (auto * matcher_node = (*expression)->as<MatcherNode>(); matcher_node && matcher_node->getMatcherType() == MatcherNodeType::ASTERISK)
|
||||
{
|
||||
if (asterisk)
|
||||
{
|
||||
if (use_structure_from_insertion_table_in_table_functions == 1)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only one asterisk can be used in INSERT SELECT query.");
|
||||
|
||||
use_columns_from_insert_query = false;
|
||||
break;
|
||||
}
|
||||
if (!structure_hint.empty())
|
||||
{
|
||||
if (use_structure_from_insertion_table_in_table_functions == 1)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
|
||||
|
||||
use_columns_from_insert_query = false;
|
||||
break;
|
||||
}
|
||||
|
||||
asterisk = true;
|
||||
}
|
||||
else if (auto * function = (*expression)->as<FunctionNode>())
|
||||
{
|
||||
if (use_structure_from_insertion_table_in_table_functions == 2 && findIdentifier(*function))
|
||||
{
|
||||
use_columns_from_insert_query = false;
|
||||
break;
|
||||
}
|
||||
|
||||
/// Once we hit asterisk we want to find end of the range covered by asterisk
|
||||
/// contributing every further SELECT expression to the tail of insert structure
|
||||
if (asterisk)
|
||||
--insert_structure_end;
|
||||
else
|
||||
++insert_column;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Once we hit asterisk we want to find end of the range covered by asterisk
|
||||
/// contributing every further SELECT expression to the tail of insert structure
|
||||
if (asterisk)
|
||||
--insert_structure_end;
|
||||
else
|
||||
++insert_column;
|
||||
}
|
||||
}
|
||||
|
||||
if (use_structure_from_insertion_table_in_table_functions == 2 && !asterisk)
|
||||
{
|
||||
/// For input function we should check if input format supports reading subset of columns.
|
||||
if (table_function_ptr->getName() == "input")
|
||||
use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(scope.context->getInsertFormat());
|
||||
else
|
||||
use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns();
|
||||
}
|
||||
|
||||
if (use_columns_from_insert_query)
|
||||
{
|
||||
if (expression == expression_list.end())
|
||||
{
|
||||
/// Append tail of insert structure to the hint
|
||||
if (asterisk)
|
||||
{
|
||||
for (; insert_column != insert_structure_end; ++insert_column)
|
||||
structure_hint.add({ insert_column->name, insert_column->type });
|
||||
}
|
||||
|
||||
if (!structure_hint.empty())
|
||||
table_function_ptr->setStructureHint(structure_hint);
|
||||
|
||||
} else if (use_structure_from_insertion_table_in_table_functions == 1)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns in insert table less than required by SELECT expression.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -115,13 +115,23 @@ private:
|
||||
|
||||
for (size_t i = 0; i < expected_argument_types_size; ++i)
|
||||
{
|
||||
// Skip lambdas
|
||||
if (WhichDataType(expected_argument_types[i]).isFunction())
|
||||
continue;
|
||||
|
||||
const auto & expected_argument_type = expected_argument_types[i];
|
||||
const auto & actual_argument_type = actual_argument_columns[i].type;
|
||||
|
||||
if (!expected_argument_type)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Function {} expected argument {} type is not set after running {} pass",
|
||||
function->toAST()->formatForErrorMessage(),
|
||||
i + 1,
|
||||
pass_name);
|
||||
|
||||
if (!actual_argument_type)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Function {} actual argument {} type is not set after running {} pass",
|
||||
function->toAST()->formatForErrorMessage(),
|
||||
i + 1,
|
||||
pass_name);
|
||||
|
||||
if (!expected_argument_type->equals(*actual_argument_type))
|
||||
{
|
||||
/// Aggregate functions remove low cardinality for their argument types
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Backups/BackupCoordinationFileInfos.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -771,16 +771,19 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &)
|
||||
String existing_backup_uuid = existing_backup_path;
|
||||
existing_backup_uuid.erase(0, String("backup-").size());
|
||||
|
||||
|
||||
if (existing_backup_uuid == toString(backup_uuid))
|
||||
continue;
|
||||
|
||||
const auto status = zk->get(root_zookeeper_path + "/" + existing_backup_path + "/stage");
|
||||
if (status != Stage::COMPLETED)
|
||||
String status;
|
||||
if (zk->tryGet(root_zookeeper_path + "/" + existing_backup_path + "/stage", status))
|
||||
{
|
||||
LOG_WARNING(log, "Found a concurrent backup: {}, current backup: {}", existing_backup_uuid, toString(backup_uuid));
|
||||
result = true;
|
||||
return;
|
||||
/// If status is not COMPLETED it could be because the backup failed, check if 'error' exists
|
||||
if (status != Stage::COMPLETED && !zk->exists(root_zookeeper_path + "/" + existing_backup_path + "/error"))
|
||||
{
|
||||
LOG_WARNING(log, "Found a concurrent backup: {}, current backup: {}", existing_backup_uuid, toString(backup_uuid));
|
||||
result = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <base/sleep.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <boost/range/algorithm/copy.hpp>
|
||||
#include <base/scope_guard.h>
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
@ -8,10 +8,11 @@ namespace DB
|
||||
BackupEntryFromAppendOnlyFile::BackupEntryFromAppendOnlyFile(
|
||||
const DiskPtr & disk_,
|
||||
const String & file_path_,
|
||||
const ReadSettings & settings_,
|
||||
const std::optional<UInt64> & file_size_,
|
||||
const std::optional<UInt128> & checksum_,
|
||||
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_)
|
||||
: BackupEntryFromImmutableFile(disk_, file_path_, file_size_, checksum_, temporary_file_)
|
||||
: BackupEntryFromImmutableFile(disk_, file_path_, settings_, file_size_, checksum_, temporary_file_)
|
||||
, limit(BackupEntryFromImmutableFile::getSize())
|
||||
{
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ public:
|
||||
BackupEntryFromAppendOnlyFile(
|
||||
const DiskPtr & disk_,
|
||||
const String & file_path_,
|
||||
const ReadSettings & settings_,
|
||||
const std::optional<UInt64> & file_size_ = {},
|
||||
const std::optional<UInt128> & checksum_ = {},
|
||||
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_ = {});
|
||||
|
@ -11,10 +11,16 @@ namespace DB
|
||||
BackupEntryFromImmutableFile::BackupEntryFromImmutableFile(
|
||||
const DiskPtr & disk_,
|
||||
const String & file_path_,
|
||||
const ReadSettings & settings_,
|
||||
const std::optional<UInt64> & file_size_,
|
||||
const std::optional<UInt128> & checksum_,
|
||||
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_)
|
||||
: disk(disk_), file_path(file_path_), file_size(file_size_), checksum(checksum_), temporary_file_on_disk(temporary_file_)
|
||||
: disk(disk_)
|
||||
, file_path(file_path_)
|
||||
, settings(settings_)
|
||||
, file_size(file_size_)
|
||||
, checksum(checksum_)
|
||||
, temporary_file_on_disk(temporary_file_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -30,7 +36,7 @@ UInt64 BackupEntryFromImmutableFile::getSize() const
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> BackupEntryFromImmutableFile::getReadBuffer() const
|
||||
{
|
||||
return disk->readFile(file_path);
|
||||
return disk->readFile(file_path, settings);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IBackupEntry.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <base/defines.h>
|
||||
#include <mutex>
|
||||
|
||||
@ -19,6 +20,7 @@ public:
|
||||
BackupEntryFromImmutableFile(
|
||||
const DiskPtr & disk_,
|
||||
const String & file_path_,
|
||||
const ReadSettings & settings_,
|
||||
const std::optional<UInt64> & file_size_ = {},
|
||||
const std::optional<UInt128> & checksum_ = {},
|
||||
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_ = {});
|
||||
@ -37,6 +39,7 @@ public:
|
||||
private:
|
||||
const DiskPtr disk;
|
||||
const String file_path;
|
||||
ReadSettings settings;
|
||||
mutable std::optional<UInt64> file_size TSA_GUARDED_BY(get_file_size_mutex);
|
||||
mutable std::mutex get_file_size_mutex;
|
||||
const std::optional<UInt128> checksum;
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/scope_guard_safe.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <IO/HashingReadBuffer.h>
|
||||
|
||||
|
||||
|
@ -1,8 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/ThreadPool_fwd.h>
|
||||
|
||||
namespace Poco { class Logger; }
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <IO/copyData.h>
|
||||
#include <IO/WriteBufferFromFileBase.h>
|
||||
#include <IO/SeekableReadBuffer.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -22,6 +23,11 @@ void IBackupReader::copyFileToDisk(const String & file_name, size_t size, DiskPt
|
||||
write_buffer->finalize();
|
||||
}
|
||||
|
||||
IBackupWriter::IBackupWriter(const ContextPtr & context_)
|
||||
: read_settings(context_->getBackupReadSettings())
|
||||
, has_throttling(static_cast<bool>(context_->getBackupsThrottler()))
|
||||
{}
|
||||
|
||||
void IBackupWriter::copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name)
|
||||
{
|
||||
auto read_buffer = create_read_buffer();
|
||||
|
@ -3,6 +3,8 @@
|
||||
#include <Core/Types.h>
|
||||
#include <Disks/DiskType.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -28,6 +30,8 @@ class IBackupWriter /// BackupWriterFile, BackupWriterDisk
|
||||
public:
|
||||
using CreateReadBufferFunction = std::function<std::unique_ptr<SeekableReadBuffer>()>;
|
||||
|
||||
explicit IBackupWriter(const ContextPtr & context_);
|
||||
|
||||
virtual ~IBackupWriter() = default;
|
||||
virtual bool fileExists(const String & file_name) = 0;
|
||||
virtual UInt64 getFileSize(const String & file_name) = 0;
|
||||
@ -38,7 +42,17 @@ public:
|
||||
virtual DataSourceDescription getDataSourceDescription() const = 0;
|
||||
virtual void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name);
|
||||
virtual bool supportNativeCopy(DataSourceDescription /* data_source_description */) const { return false; }
|
||||
|
||||
/// Copy file using native copy (optimized for S3 to use CopyObject)
|
||||
///
|
||||
/// NOTE: It still may fall back to copyDataToFile() if native copy is not possible:
|
||||
/// - different buckets
|
||||
/// - throttling had been requested
|
||||
virtual void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name);
|
||||
|
||||
protected:
|
||||
const ReadSettings read_settings;
|
||||
const bool has_throttling;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -50,7 +50,10 @@ void BackupReaderDisk::copyFileToDisk(const String & file_name, size_t size, Dis
|
||||
}
|
||||
|
||||
|
||||
BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & path_) : disk(disk_), path(path_)
|
||||
BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & path_, const ContextPtr & context_)
|
||||
: IBackupWriter(context_)
|
||||
, disk(disk_)
|
||||
, path(path_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -127,9 +130,9 @@ void BackupWriterDisk::copyFileNative(DiskPtr src_disk, const String & src_file_
|
||||
if (!src_disk)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot natively copy data to disk without source disk");
|
||||
|
||||
if ((src_offset != 0) || (src_size != src_disk->getFileSize(src_file_name)))
|
||||
if (has_throttling || (src_offset != 0) || (src_size != src_disk->getFileSize(src_file_name)))
|
||||
{
|
||||
auto create_read_buffer = [src_disk, src_file_name] { return src_disk->readFile(src_file_name); };
|
||||
auto create_read_buffer = [this, src_disk, src_file_name] { return src_disk->readFile(src_file_name, read_settings); };
|
||||
copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
|
||||
return;
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <filesystem>
|
||||
#include <Backups/BackupIO.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -30,7 +31,7 @@ private:
|
||||
class BackupWriterDisk : public IBackupWriter
|
||||
{
|
||||
public:
|
||||
BackupWriterDisk(const DiskPtr & disk_, const String & path_);
|
||||
BackupWriterDisk(const DiskPtr & disk_, const String & path_, const ContextPtr & context_);
|
||||
~BackupWriterDisk() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
|
@ -49,7 +49,9 @@ void BackupReaderFile::copyFileToDisk(const String & file_name, size_t size, Dis
|
||||
}
|
||||
|
||||
|
||||
BackupWriterFile::BackupWriterFile(const String & path_) : path(path_)
|
||||
BackupWriterFile::BackupWriterFile(const String & path_, const ContextPtr & context_)
|
||||
: IBackupWriter(context_)
|
||||
, path(path_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -152,9 +154,9 @@ void BackupWriterFile::copyFileNative(DiskPtr src_disk, const String & src_file_
|
||||
else
|
||||
abs_source_path = fs::absolute(src_file_name);
|
||||
|
||||
if ((src_offset != 0) || (src_size != fs::file_size(abs_source_path)))
|
||||
if (has_throttling || (src_offset != 0) || (src_size != fs::file_size(abs_source_path)))
|
||||
{
|
||||
auto create_read_buffer = [abs_source_path] { return createReadBufferFromFileBase(abs_source_path, {}); };
|
||||
auto create_read_buffer = [this, abs_source_path] { return createReadBufferFromFileBase(abs_source_path, read_settings); };
|
||||
copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
|
||||
return;
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <filesystem>
|
||||
#include <Backups/BackupIO.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -27,7 +28,7 @@ private:
|
||||
class BackupWriterFile : public IBackupWriter
|
||||
{
|
||||
public:
|
||||
explicit BackupWriterFile(const String & path_);
|
||||
explicit BackupWriterFile(const String & path_, const ContextPtr & context_);
|
||||
~BackupWriterFile() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
|
@ -161,9 +161,9 @@ void BackupReaderS3::copyFileToDisk(const String & file_name, size_t size, DiskP
|
||||
|
||||
BackupWriterS3::BackupWriterS3(
|
||||
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_)
|
||||
: s3_uri(s3_uri_)
|
||||
: IBackupWriter(context_)
|
||||
, s3_uri(s3_uri_)
|
||||
, client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
|
||||
, read_settings(context_->getReadSettings())
|
||||
, request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
|
||||
, log(&Poco::Logger::get("BackupWriterS3"))
|
||||
{
|
||||
@ -189,7 +189,7 @@ void BackupWriterS3::copyFileNative(DiskPtr src_disk, const String & src_file_na
|
||||
auto objects = src_disk->getStorageObjects(src_file_name);
|
||||
if (objects.size() > 1)
|
||||
{
|
||||
auto create_read_buffer = [src_disk, src_file_name] { return src_disk->readFile(src_file_name); };
|
||||
auto create_read_buffer = [this, src_disk, src_file_name] { return src_disk->readFile(src_file_name, read_settings); };
|
||||
copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
|
||||
}
|
||||
else
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <IO/S3Common.h>
|
||||
#include <Storages/StorageS3Settings.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -76,7 +77,6 @@ private:
|
||||
|
||||
S3::URI s3_uri;
|
||||
std::shared_ptr<S3::Client> client;
|
||||
ReadSettings read_settings;
|
||||
S3Settings::RequestSettings request_settings;
|
||||
Poco::Logger * log;
|
||||
std::optional<bool> supports_batch_delete;
|
||||
|
@ -1,7 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/ASTBackupQuery.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/scope_guard_safe.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
|
||||
|
||||
namespace CurrentMetrics
|
||||
@ -182,8 +183,8 @@ namespace
|
||||
|
||||
|
||||
BackupsWorker::BackupsWorker(size_t num_backup_threads, size_t num_restore_threads, bool allow_concurrent_backups_, bool allow_concurrent_restores_)
|
||||
: backups_thread_pool(CurrentMetrics::BackupsThreads, CurrentMetrics::BackupsThreadsActive, num_backup_threads, /* max_free_threads = */ 0, num_backup_threads)
|
||||
, restores_thread_pool(CurrentMetrics::RestoreThreads, CurrentMetrics::RestoreThreadsActive, num_restore_threads, /* max_free_threads = */ 0, num_restore_threads)
|
||||
: backups_thread_pool(std::make_unique<ThreadPool>(CurrentMetrics::BackupsThreads, CurrentMetrics::BackupsThreadsActive, num_backup_threads, /* max_free_threads = */ 0, num_backup_threads))
|
||||
, restores_thread_pool(std::make_unique<ThreadPool>(CurrentMetrics::RestoreThreads, CurrentMetrics::RestoreThreadsActive, num_restore_threads, /* max_free_threads = */ 0, num_restore_threads))
|
||||
, log(&Poco::Logger::get("BackupsWorker"))
|
||||
, allow_concurrent_backups(allow_concurrent_backups_)
|
||||
, allow_concurrent_restores(allow_concurrent_restores_)
|
||||
@ -248,7 +249,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
|
||||
|
||||
if (backup_settings.async)
|
||||
{
|
||||
backups_thread_pool.scheduleOrThrowOnError(
|
||||
backups_thread_pool->scheduleOrThrowOnError(
|
||||
[this, backup_query, backup_id, backup_name_for_logging, backup_info, backup_settings, backup_coordination, context_in_use, mutable_context]
|
||||
{
|
||||
doBackup(
|
||||
@ -435,7 +436,7 @@ void BackupsWorker::buildFileInfosForBackupEntries(const BackupPtr & backup, con
|
||||
LOG_TRACE(log, "{}", Stage::BUILDING_FILE_INFOS);
|
||||
backup_coordination->setStage(Stage::BUILDING_FILE_INFOS, "");
|
||||
backup_coordination->waitForStage(Stage::BUILDING_FILE_INFOS);
|
||||
backup_coordination->addFileInfos(::DB::buildFileInfosForBackupEntries(backup_entries, backup->getBaseBackup(), backups_thread_pool));
|
||||
backup_coordination->addFileInfos(::DB::buildFileInfosForBackupEntries(backup_entries, backup->getBaseBackup(), *backups_thread_pool));
|
||||
}
|
||||
|
||||
|
||||
@ -522,7 +523,7 @@ void BackupsWorker::writeBackupEntries(BackupMutablePtr backup, BackupEntries &&
|
||||
}
|
||||
};
|
||||
|
||||
if (always_single_threaded || !backups_thread_pool.trySchedule([job] { job(true); }))
|
||||
if (always_single_threaded || !backups_thread_pool->trySchedule([job] { job(true); }))
|
||||
job(false);
|
||||
}
|
||||
|
||||
@ -581,7 +582,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
|
||||
|
||||
if (restore_settings.async)
|
||||
{
|
||||
restores_thread_pool.scheduleOrThrowOnError(
|
||||
restores_thread_pool->scheduleOrThrowOnError(
|
||||
[this, restore_query, restore_id, backup_name_for_logging, backup_info, restore_settings, restore_coordination, context_in_use]
|
||||
{
|
||||
doRestore(
|
||||
@ -716,7 +717,7 @@ void BackupsWorker::doRestore(
|
||||
}
|
||||
|
||||
/// Execute the data restoring tasks.
|
||||
restoreTablesData(restore_id, backup, std::move(data_restore_tasks), restores_thread_pool);
|
||||
restoreTablesData(restore_id, backup, std::move(data_restore_tasks), *restores_thread_pool);
|
||||
|
||||
/// We have restored everything, we need to tell other hosts (they could be waiting for it).
|
||||
restore_coordination->setStage(Stage::COMPLETED, "");
|
||||
@ -941,8 +942,8 @@ void BackupsWorker::shutdown()
|
||||
if (has_active_backups_and_restores)
|
||||
LOG_INFO(log, "Waiting for {} backups and {} restores to be finished", num_active_backups, num_active_restores);
|
||||
|
||||
backups_thread_pool.wait();
|
||||
restores_thread_pool.wait();
|
||||
backups_thread_pool->wait();
|
||||
restores_thread_pool->wait();
|
||||
|
||||
if (has_active_backups_and_restores)
|
||||
LOG_INFO(log, "All backup and restore tasks have finished");
|
||||
|
@ -1,7 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/BackupStatus.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/ThreadPool_fwd.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Core/UUID.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <unordered_map>
|
||||
@ -132,8 +133,8 @@ private:
|
||||
void setNumFilesAndSize(const OperationID & id, size_t num_files, UInt64 total_size, size_t num_entries,
|
||||
UInt64 uncompressed_size, UInt64 compressed_size, size_t num_read_files, UInt64 num_read_bytes);
|
||||
|
||||
ThreadPool backups_thread_pool;
|
||||
ThreadPool restores_thread_pool;
|
||||
std::unique_ptr<ThreadPool> backups_thread_pool;
|
||||
std::unique_ptr<ThreadPool> restores_thread_pool;
|
||||
|
||||
std::unordered_map<OperationID, Info> infos;
|
||||
std::condition_variable status_changed;
|
||||
|
@ -279,12 +279,16 @@ bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic<size_t>
|
||||
if (existing_restore_uuid == toString(restore_uuid))
|
||||
continue;
|
||||
|
||||
const auto status = zk->get(root_zookeeper_path + "/" + existing_restore_path + "/stage");
|
||||
if (status != Stage::COMPLETED)
|
||||
String status;
|
||||
if (zk->tryGet(root_zookeeper_path + "/" + existing_restore_path + "/stage", status))
|
||||
{
|
||||
LOG_WARNING(log, "Found a concurrent restore: {}, current restore: {}", existing_restore_uuid, toString(restore_uuid));
|
||||
result = true;
|
||||
return;
|
||||
/// If status is not COMPLETED it could be because the restore failed, check if 'error' exists
|
||||
if (status != Stage::COMPLETED && !zk->exists(root_zookeeper_path + "/" + existing_restore_path + "/error"))
|
||||
{
|
||||
LOG_WARNING(log, "Found a concurrent restore: {}, current restore: {}", existing_restore_uuid, toString(restore_uuid));
|
||||
result = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -178,9 +178,9 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
|
||||
{
|
||||
std::shared_ptr<IBackupWriter> writer;
|
||||
if (engine_name == "File")
|
||||
writer = std::make_shared<BackupWriterFile>(path);
|
||||
writer = std::make_shared<BackupWriterFile>(path, params.context);
|
||||
else
|
||||
writer = std::make_shared<BackupWriterDisk>(disk, path);
|
||||
writer = std::make_shared<BackupWriterDisk>(disk, path, params.context);
|
||||
return std::make_unique<BackupImpl>(
|
||||
backup_name_for_logging,
|
||||
archive_params,
|
||||
|
@ -2,4 +2,4 @@ add_library (bridge
|
||||
IBridge.cpp
|
||||
)
|
||||
|
||||
target_link_libraries (bridge PRIVATE daemon dbms Poco::Data Poco::Data::ODBC)
|
||||
target_link_libraries (bridge PRIVATE daemon dbms)
|
||||
|
@ -14,17 +14,13 @@
|
||||
#include <Server/HTTP/HTTPServer.h>
|
||||
#include <base/errnoToString.h>
|
||||
#include <base/range.h>
|
||||
#include <base/scope_guard.h>
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_ODBC
|
||||
# include <Poco/Data/ODBC/Connector.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
@ -608,7 +608,8 @@ if (ENABLE_TESTS)
|
||||
dbms
|
||||
clickhouse_common_config
|
||||
clickhouse_common_zookeeper
|
||||
string_utils)
|
||||
string_utils
|
||||
hilite_comparator)
|
||||
|
||||
if (TARGET ch_contrib::simdjson)
|
||||
target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::simdjson)
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include <base/argsToConfig.h>
|
||||
#include <base/safeExit.h>
|
||||
#include <base/scope_guard.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Core/Protocol.h>
|
||||
#include <Common/DateLUT.h>
|
||||
@ -2219,9 +2220,6 @@ void ClientBase::runInteractive()
|
||||
LineReader lr(history_file, config().has("multiline"), query_extenders, query_delimiters);
|
||||
#endif
|
||||
|
||||
/// Enable bracketed-paste-mode so that we are able to paste multiline queries as a whole.
|
||||
lr.enableBracketedPaste();
|
||||
|
||||
static const std::initializer_list<std::pair<String, String>> backslash_aliases =
|
||||
{
|
||||
{ "\\l", "SHOW DATABASES" },
|
||||
@ -2239,7 +2237,18 @@ void ClientBase::runInteractive()
|
||||
|
||||
do
|
||||
{
|
||||
auto input = lr.readLine(prompt(), ":-] ");
|
||||
String input;
|
||||
{
|
||||
/// Enable bracketed-paste-mode so that we are able to paste multiline queries as a whole.
|
||||
/// But keep it disabled outside of query input, because it breaks password input
|
||||
/// (e.g. if we need to reconnect and show a password prompt).
|
||||
/// (Alternatively, we could make the password input ignore the control sequences.)
|
||||
lr.enableBracketedPaste();
|
||||
SCOPE_EXIT({ lr.disableBracketedPaste(); });
|
||||
|
||||
input = lr.readLine(prompt(), ":-] ");
|
||||
}
|
||||
|
||||
if (input.empty())
|
||||
break;
|
||||
|
||||
|
@ -46,7 +46,10 @@ public:
|
||||
/// clickhouse-client so that without -m flag, one can still paste multiline queries, and
|
||||
/// possibly get better pasting performance. See https://cirw.in/blog/bracketed-paste for
|
||||
/// more details.
|
||||
/// These methods (if implemented) emit the control characters immediately, without waiting
|
||||
/// for the next readLine() call.
|
||||
virtual void enableBracketedPaste() {}
|
||||
virtual void disableBracketedPaste() {}
|
||||
|
||||
protected:
|
||||
enum InputStatus
|
||||
|
@ -519,4 +519,10 @@ void ReplxxLineReader::enableBracketedPaste()
|
||||
rx.enable_bracketed_paste();
|
||||
}
|
||||
|
||||
void ReplxxLineReader::disableBracketedPaste()
|
||||
{
|
||||
bracketed_paste_enabled = false;
|
||||
rx.disable_bracketed_paste();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ public:
|
||||
~ReplxxLineReader() override;
|
||||
|
||||
void enableBracketedPaste() override;
|
||||
void disableBracketedPaste() override;
|
||||
|
||||
/// If highlight is on, we will set a flag to denote whether the last token is a delimiter.
|
||||
/// This is useful to determine the behavior of <ENTER> key when multiline is enabled.
|
||||
|
@ -5,6 +5,10 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template<typename T, typename ... U>
|
||||
concept is_any_of = (std::same_as<T, U> || ...);
|
||||
|
||||
|
||||
template <typename... T>
|
||||
concept OptionalArgument = requires(T &&...)
|
||||
{
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include <Poco/Version.h>
|
||||
#include <Poco/Exception.h>
|
||||
|
||||
#include <base/defines.h>
|
||||
|
@ -1,8 +1,9 @@
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <Common/Macros.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Core/ServerUUID.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -11,6 +12,8 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SYNTAX_ERROR;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int NO_ELEMENTS_IN_CONFIG;
|
||||
}
|
||||
|
||||
Macros::Macros(const Poco::Util::AbstractConfiguration & config, const String & root_key, Poco::Logger * log)
|
||||
@ -95,7 +98,7 @@ String Macros::expand(const String & s,
|
||||
else if (macro_name == "uuid" && !info.expand_special_macros_only)
|
||||
{
|
||||
if (info.table_id.uuid == UUIDHelpers::Nil)
|
||||
throw Exception(ErrorCodes::SYNTAX_ERROR, "Macro 'uuid' and empty arguments of ReplicatedMergeTree "
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Macro 'uuid' and empty arguments of ReplicatedMergeTree "
|
||||
"are supported only for ON CLUSTER queries with Atomic database engine");
|
||||
/// For ON CLUSTER queries we don't want to require all macros definitions in initiator's config.
|
||||
/// However, initiator must check that for cross-replication cluster zookeeper_path does not contain {uuid} macro.
|
||||
@ -105,6 +108,15 @@ String Macros::expand(const String & s,
|
||||
res += toString(info.table_id.uuid);
|
||||
info.expanded_uuid = true;
|
||||
}
|
||||
else if (macro_name == "server_uuid")
|
||||
{
|
||||
auto uuid = ServerUUID::get();
|
||||
if (UUIDHelpers::Nil == uuid)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Macro {server_uuid} expanded to zero, which means the UUID is not initialized (most likely it's not a server application)");
|
||||
res += toString(uuid);
|
||||
info.expanded_other = true;
|
||||
}
|
||||
else if (info.shard && macro_name == "shard")
|
||||
{
|
||||
res += *info.shard;
|
||||
@ -125,7 +137,7 @@ String Macros::expand(const String & s,
|
||||
info.has_unknown = true;
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::SYNTAX_ERROR, "No macro '{}' in config while processing substitutions in "
|
||||
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No macro '{}' in config while processing substitutions in "
|
||||
"'{}' at '{}' or macro is not supported here", macro_name, s, toString(begin));
|
||||
|
||||
pos = end + 1;
|
||||
@ -142,7 +154,7 @@ String Macros::getValue(const String & key) const
|
||||
{
|
||||
if (auto it = macros.find(key); it != macros.end())
|
||||
return it->second;
|
||||
throw Exception(ErrorCodes::SYNTAX_ERROR, "No macro {} in config", key);
|
||||
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No macro {} in config", key);
|
||||
}
|
||||
|
||||
|
||||
|
@ -118,7 +118,6 @@ MemoryTracker::~MemoryTracker()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MemoryTracker::logPeakMemoryUsage()
|
||||
{
|
||||
log_peak_memory_usage_in_destructor = false;
|
||||
@ -156,6 +155,26 @@ void MemoryTracker::injectFault() const
|
||||
description ? description : "");
|
||||
}
|
||||
|
||||
void MemoryTracker::debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused]])
|
||||
{
|
||||
/// Big allocations through allocNoThrow (without checking memory limits) may easily lead to OOM (and it's hard to debug).
|
||||
/// Let's find them.
|
||||
#ifdef ABORT_ON_LOGICAL_ERROR
|
||||
if (size < 0)
|
||||
return;
|
||||
|
||||
constexpr Int64 threshold = 16 * 1024 * 1024; /// The choice is arbitrary (maybe we should decrease it)
|
||||
if (size < threshold)
|
||||
return;
|
||||
|
||||
MemoryTrackerBlockerInThread blocker(VariableContext::Global);
|
||||
LOG_TEST(&Poco::Logger::get("MemoryTracker"), "Too big allocation ({} bytes) without checking memory limits, "
|
||||
"it may lead to OOM. Stack trace: {}", size, StackTrace().toString());
|
||||
#else
|
||||
return; /// Avoid trash logging in release builds
|
||||
#endif
|
||||
}
|
||||
|
||||
void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker)
|
||||
{
|
||||
if (size < 0)
|
||||
@ -235,7 +254,10 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
|
||||
formatReadableSizeWithBinarySuffix(current_hard_limit));
|
||||
}
|
||||
else
|
||||
{
|
||||
memory_limit_exceeded_ignored = true;
|
||||
debugLogBigAllocationWithoutCheck(size);
|
||||
}
|
||||
}
|
||||
|
||||
Int64 limit_to_check = current_hard_limit;
|
||||
@ -303,7 +325,10 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
memory_limit_exceeded_ignored = true;
|
||||
debugLogBigAllocationWithoutCheck(size);
|
||||
}
|
||||
}
|
||||
|
||||
bool peak_updated = false;
|
||||
@ -323,6 +348,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
|
||||
{
|
||||
bool log_memory_usage = false;
|
||||
peak_updated = updatePeak(will_be, log_memory_usage);
|
||||
debugLogBigAllocationWithoutCheck(size);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -215,6 +215,8 @@ public:
|
||||
|
||||
/// Prints info about peak memory consumption into log.
|
||||
void logPeakMemoryUsage();
|
||||
|
||||
void debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused]]);
|
||||
};
|
||||
|
||||
extern MemoryTracker total_memory_tracker;
|
||||
|
@ -75,10 +75,14 @@
|
||||
M(S3GetRequestThrottlerSleepMicroseconds, "Total time a query was sleeping to conform S3 GET and SELECT request throttling.") \
|
||||
M(S3PutRequestThrottlerCount, "Number of S3 PUT, COPY, POST and LIST requests passed through throttler.") \
|
||||
M(S3PutRequestThrottlerSleepMicroseconds, "Total time a query was sleeping to conform S3 PUT, COPY, POST and LIST request throttling.") \
|
||||
M(RemoteReadThrottlerBytes, "Bytes passed through 'max_remote_read_network_bandwidth_for_server' throttler.") \
|
||||
M(RemoteReadThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_remote_read_network_bandwidth_for_server' throttling.") \
|
||||
M(RemoteWriteThrottlerBytes, "Bytes passed through 'max_remote_write_network_bandwidth_for_server' throttler.") \
|
||||
M(RemoteWriteThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_remote_write_network_bandwidth_for_server' throttling.") \
|
||||
M(RemoteReadThrottlerBytes, "Bytes passed through 'max_remote_read_network_bandwidth_for_server'/'max_remote_read_network_bandwidth' throttler.") \
|
||||
M(RemoteReadThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_remote_read_network_bandwidth_for_server'/'max_remote_read_network_bandwidth' throttling.") \
|
||||
M(RemoteWriteThrottlerBytes, "Bytes passed through 'max_remote_write_network_bandwidth_for_server'/'max_remote_write_network_bandwidth' throttler.") \
|
||||
M(RemoteWriteThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_remote_write_network_bandwidth_for_server'/'max_remote_write_network_bandwidth' throttling.") \
|
||||
M(LocalReadThrottlerBytes, "Bytes passed through 'max_local_read_bandwidth_for_server'/'max_local_read_bandwidth' throttler.") \
|
||||
M(LocalReadThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_local_read_bandwidth_for_server'/'max_local_read_bandwidth' throttling.") \
|
||||
M(LocalWriteThrottlerBytes, "Bytes passed through 'max_local_write_bandwidth_for_server'/'max_local_write_bandwidth' throttler.") \
|
||||
M(LocalWriteThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_local_write_bandwidth_for_server'/'max_local_write_bandwidth' throttling.") \
|
||||
M(ThrottlerSleepMicroseconds, "Total time a query was sleeping to conform all throttling settings.") \
|
||||
\
|
||||
M(QueryMaskingRulesMatch, "Number of times query masking rules was successfully matched.") \
|
||||
|
@ -51,6 +51,9 @@ struct SpaceSavingArena<StringRef>
|
||||
{
|
||||
StringRef emplace(StringRef key)
|
||||
{
|
||||
if (!key.data)
|
||||
return key;
|
||||
|
||||
return copyStringInArena(arena, key);
|
||||
}
|
||||
|
||||
@ -94,8 +97,8 @@ public:
|
||||
void write(WriteBuffer & wb) const
|
||||
{
|
||||
writeBinary(key, wb);
|
||||
writeVarUInt(count, wb);
|
||||
writeVarUInt(error, wb);
|
||||
writeVarUIntOverflow(count, wb);
|
||||
writeVarUIntOverflow(error, wb);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & rb)
|
||||
|
@ -18,6 +18,7 @@
|
||||
|
||||
#include <Common/MemoryTrackerBlockerInThread.h>
|
||||
#include <Common/SystemLogBase.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
|
||||
#include <Common/logger_useful.h>
|
||||
#include <base/scope_guard.h>
|
||||
@ -35,20 +36,18 @@ namespace
|
||||
constexpr size_t DBMS_SYSTEM_LOG_QUEUE_SIZE = 1048576;
|
||||
}
|
||||
|
||||
ISystemLog::~ISystemLog() = default;
|
||||
|
||||
void ISystemLog::stopFlushThread()
|
||||
{
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (!saving_thread.joinable())
|
||||
{
|
||||
if (!saving_thread || !saving_thread->joinable())
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_shutdown)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
is_shutdown = true;
|
||||
|
||||
@ -56,13 +55,13 @@ void ISystemLog::stopFlushThread()
|
||||
flush_event.notify_all();
|
||||
}
|
||||
|
||||
saving_thread.join();
|
||||
saving_thread->join();
|
||||
}
|
||||
|
||||
void ISystemLog::startup()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
saving_thread = ThreadFromGlobalPool([this] { savingThreadFunction(); });
|
||||
saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { savingThreadFunction(); });
|
||||
}
|
||||
|
||||
static thread_local bool recursive_add_call = false;
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/ThreadPool_fwd.h>
|
||||
|
||||
#define SYSTEM_LOG_ELEMENTS(M) \
|
||||
M(AsynchronousMetricLogElement) \
|
||||
@ -60,12 +60,12 @@ public:
|
||||
/// Stop the background flush thread before destructor. No more data will be written.
|
||||
virtual void shutdown() = 0;
|
||||
|
||||
virtual ~ISystemLog() = default;
|
||||
virtual ~ISystemLog();
|
||||
|
||||
virtual void savingThreadFunction() = 0;
|
||||
|
||||
protected:
|
||||
ThreadFromGlobalPool saving_thread;
|
||||
std::unique_ptr<ThreadFromGlobalPool> saving_thread;
|
||||
|
||||
/// Data shared between callers of add()/flush()/shutdown(), and the saving thread
|
||||
std::mutex mutex;
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <Common/ThreadStatus.h>
|
||||
#include <Common/OpenTelemetryTraceContext.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/ThreadPool_fwd.h>
|
||||
#include <base/scope_guard.h>
|
||||
|
||||
/** Very simple thread pool similar to boost::threadpool.
|
||||
|
13
src/Common/ThreadPool_fwd.h
Normal file
13
src/Common/ThreadPool_fwd.h
Normal file
@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
template <typename Thread>
|
||||
class ThreadPoolImpl;
|
||||
|
||||
template <bool propagate_opentelemetry_context>
|
||||
class ThreadFromGlobalPoolImpl;
|
||||
|
||||
using ThreadFromGlobalPoolNoTracingContextPropagation = ThreadFromGlobalPoolImpl<false>;
|
||||
|
||||
using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl<true>;
|
||||
|
||||
using ThreadPool = ThreadPoolImpl<ThreadFromGlobalPoolNoTracingContextPropagation>;
|
@ -41,7 +41,7 @@ class TaskStatsInfoGetter;
|
||||
class InternalTextLogsQueue;
|
||||
struct ViewRuntimeData;
|
||||
class QueryViewsLog;
|
||||
class MemoryTrackerThreadSwitcher;
|
||||
class ThreadGroupSwitcher;
|
||||
using InternalTextLogsQueuePtr = std::shared_ptr<InternalTextLogsQueue>;
|
||||
using InternalTextLogsQueueWeakPtr = std::weak_ptr<InternalTextLogsQueue>;
|
||||
|
||||
@ -106,6 +106,8 @@ public:
|
||||
/// When new query starts, new thread group is created for it, current thread becomes master thread of the query
|
||||
static ThreadGroupStatusPtr createForQuery(ContextPtr query_context_, FatalErrorCallback fatal_error_callback_ = {});
|
||||
|
||||
static ThreadGroupStatusPtr createForBackgroundProcess(ContextPtr storage_context);
|
||||
|
||||
std::vector<UInt64> getInvolvedThreadIds() const;
|
||||
void linkThread(UInt64 thread_it);
|
||||
|
||||
@ -177,12 +179,6 @@ private:
|
||||
bool performance_counters_finalized = false;
|
||||
|
||||
String query_id_from_query_context;
|
||||
/// Requires access to query_id.
|
||||
friend class MemoryTrackerThreadSwitcher;
|
||||
void setQueryId(const String & query_id_)
|
||||
{
|
||||
query_id_from_query_context = query_id_;
|
||||
}
|
||||
|
||||
struct TimePoint
|
||||
{
|
||||
|
@ -273,7 +273,7 @@ struct SetRequest : virtual Request
|
||||
void addRootPath(const String & root_path) override;
|
||||
String getPath() const override { return path; }
|
||||
|
||||
size_t bytesSize() const override { return data.size() + data.size() + sizeof(version); }
|
||||
size_t bytesSize() const override { return path.size() + data.size() + sizeof(version); }
|
||||
};
|
||||
|
||||
struct SetResponse : virtual Response
|
||||
|
@ -669,8 +669,8 @@ void ZooKeeper::receiveThread()
|
||||
earliest_operation = operations.begin()->second;
|
||||
auto earliest_operation_deadline = earliest_operation->time + std::chrono::microseconds(args.operation_timeout_ms * 1000);
|
||||
if (now > earliest_operation_deadline)
|
||||
throw Exception(Error::ZOPERATIONTIMEOUT, "Operation timeout (deadline already expired) for path: {}",
|
||||
earliest_operation->request->getPath());
|
||||
throw Exception(Error::ZOPERATIONTIMEOUT, "Operation timeout (deadline of {} ms already expired) for path: {}",
|
||||
args.operation_timeout_ms, earliest_operation->request->getPath());
|
||||
max_wait_us = std::chrono::duration_cast<std::chrono::microseconds>(earliest_operation_deadline - now).count();
|
||||
}
|
||||
}
|
||||
@ -687,12 +687,12 @@ void ZooKeeper::receiveThread()
|
||||
{
|
||||
if (earliest_operation)
|
||||
{
|
||||
throw Exception(Error::ZOPERATIONTIMEOUT, "Operation timeout (no response) for request {} for path: {}",
|
||||
toString(earliest_operation->request->getOpNum()), earliest_operation->request->getPath());
|
||||
throw Exception(Error::ZOPERATIONTIMEOUT, "Operation timeout (no response in {} ms) for request {} for path: {}",
|
||||
args.operation_timeout_ms, toString(earliest_operation->request->getOpNum()), earliest_operation->request->getPath());
|
||||
}
|
||||
waited_us += max_wait_us;
|
||||
if (waited_us >= args.session_timeout_ms * 1000)
|
||||
throw Exception(Error::ZOPERATIONTIMEOUT, "Nothing is received in session timeout");
|
||||
throw Exception(Error::ZOPERATIONTIMEOUT, "Nothing is received in session timeout of {} ms", args.session_timeout_ms);
|
||||
|
||||
}
|
||||
|
||||
@ -1080,7 +1080,7 @@ void ZooKeeper::pushRequest(RequestInfo && info)
|
||||
if (requests_queue.isFinished())
|
||||
throw Exception(Error::ZSESSIONEXPIRED, "Session expired");
|
||||
|
||||
throw Exception(Error::ZOPERATIONTIMEOUT, "Cannot push request to queue within operation timeout");
|
||||
throw Exception(Error::ZOPERATIONTIMEOUT, "Cannot push request to queue within operation timeout of {} ms", args.operation_timeout_ms);
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
@ -1332,7 +1332,7 @@ void ZooKeeper::close()
|
||||
request_info.request = std::make_shared<ZooKeeperCloseRequest>(std::move(request));
|
||||
|
||||
if (!requests_queue.tryPush(std::move(request_info), args.operation_timeout_ms))
|
||||
throw Exception(Error::ZOPERATIONTIMEOUT, "Cannot push close request to queue within operation timeout");
|
||||
throw Exception(Error::ZOPERATIONTIMEOUT, "Cannot push close request to queue within operation timeout of {} ms", args.operation_timeout_ms);
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::ZooKeeperClose);
|
||||
}
|
||||
|
@ -23,12 +23,6 @@ PoolWithFailover PoolFactory::get(const std::string & config_name, unsigned defa
|
||||
return get(Poco::Util::Application::instance().config(), config_name, default_connections, max_connections, max_tries);
|
||||
}
|
||||
|
||||
/// Duplicate of code from StringUtils.h. Copied here for less dependencies.
|
||||
static bool startsWith(const std::string & s, const char * prefix)
|
||||
{
|
||||
return s.size() >= strlen(prefix) && 0 == memcmp(s.data(), prefix, strlen(prefix));
|
||||
}
|
||||
|
||||
static std::string getPoolEntryName(const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_name)
|
||||
{
|
||||
@ -55,7 +49,7 @@ static std::string getPoolEntryName(const Poco::Util::AbstractConfiguration & co
|
||||
for (const auto & replica_config_key : replica_keys)
|
||||
{
|
||||
/// There could be another elements in the same level in configuration file, like "user", "port"...
|
||||
if (startsWith(replica_config_key, "replica"))
|
||||
if (replica_config_key.starts_with("replica"))
|
||||
{
|
||||
std::string replica_name = config_name + "." + replica_config_key;
|
||||
std::string tmp_host = config.getString(replica_name + ".host", host);
|
||||
|
@ -18,9 +18,6 @@ namespace DB
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, typename ... U>
|
||||
concept is_any_of = (std::same_as<T, U> || ...);
|
||||
|
||||
|
||||
/** Checks type by comparing typeid.
|
||||
* The exact match of the type is checked. That is, cast to the ancestor will be unsuccessful.
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <libnuraft/nuraft.hxx>
|
||||
#include <libnuraft/raft_server.hxx>
|
||||
#include <Common/ConcurrentBoundedQueue.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/ZooKeeper/IKeeper.h>
|
||||
#include <base/hex.h>
|
||||
#include <base/scope_guard.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/LockMemoryExceptionInThread.h>
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <chrono>
|
||||
|
||||
|
||||
@ -160,7 +161,7 @@ BackgroundSchedulePool::BackgroundSchedulePool(size_t size_, CurrentMetrics::Met
|
||||
for (auto & thread : threads)
|
||||
thread = ThreadFromGlobalPoolNoTracingContextPropagation([this] { threadFunction(); });
|
||||
|
||||
delayed_thread = ThreadFromGlobalPoolNoTracingContextPropagation([this] { delayExecutionThreadFunction(); });
|
||||
delayed_thread = std::make_unique<ThreadFromGlobalPoolNoTracingContextPropagation>([this] { delayExecutionThreadFunction(); });
|
||||
}
|
||||
|
||||
|
||||
@ -198,7 +199,7 @@ BackgroundSchedulePool::~BackgroundSchedulePool()
|
||||
delayed_tasks_cond_var.notify_all();
|
||||
|
||||
LOG_TRACE(&Poco::Logger::get("BackgroundSchedulePool/" + thread_name), "Waiting for threads to finish.");
|
||||
delayed_thread.join();
|
||||
delayed_thread->join();
|
||||
|
||||
for (auto & thread : threads)
|
||||
thread.join();
|
||||
|
@ -14,7 +14,7 @@
|
||||
#include <Common/ZooKeeper/Types.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/ThreadPool_fwd.h>
|
||||
#include <base/scope_guard.h>
|
||||
|
||||
|
||||
@ -86,7 +86,7 @@ private:
|
||||
std::condition_variable delayed_tasks_cond_var;
|
||||
std::mutex delayed_tasks_mutex;
|
||||
/// Thread waiting for next delayed task.
|
||||
ThreadFromGlobalPoolNoTracingContextPropagation delayed_thread;
|
||||
std::unique_ptr<ThreadFromGlobalPoolNoTracingContextPropagation> delayed_thread;
|
||||
/// Tasks ordered by scheduled time.
|
||||
DelayedTasks delayed_tasks;
|
||||
|
||||
|
@ -30,7 +30,7 @@
|
||||
|
||||
#define DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION 1
|
||||
|
||||
#define DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION 1
|
||||
#define DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION 2
|
||||
#define DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS 54453
|
||||
|
||||
#define DBMS_MERGE_TREE_PART_INFO_VERSION 1
|
||||
|
@ -19,7 +19,10 @@ void ServerSettings::loadSettingsFromConfig(const Poco::Util::AbstractConfigurat
|
||||
"background_buffer_flush_schedule_pool_size",
|
||||
"background_schedule_pool_size",
|
||||
"background_message_broker_schedule_pool_size",
|
||||
"background_distributed_schedule_pool_size"
|
||||
"background_distributed_schedule_pool_size",
|
||||
|
||||
"max_remote_read_network_bandwidth_for_server",
|
||||
"max_remote_write_network_bandwidth_for_server",
|
||||
};
|
||||
|
||||
for (auto setting : all())
|
||||
|
@ -21,10 +21,15 @@ namespace DB
|
||||
M(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \
|
||||
M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
|
||||
M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
|
||||
M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0) \
|
||||
M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
|
||||
M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
|
||||
M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
|
||||
M(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
|
||||
M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
|
||||
M(Int32, max_connections, 1024, "Max server connections.", 0) \
|
||||
M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
|
||||
|
@ -100,8 +100,10 @@ class IColumn;
|
||||
M(Bool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.", 0) \
|
||||
M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited. Only has meaning at server startup.", 0) \
|
||||
M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited. Only has meaning at server startup.", 0) \
|
||||
M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited. Only has meaning at server startup.", 0) \
|
||||
M(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited. Only has meaning at server startup.", 0) \
|
||||
M(UInt64, max_remote_read_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for read.", 0) \
|
||||
M(UInt64, max_remote_write_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for write.", 0) \
|
||||
M(UInt64, max_local_read_bandwidth, 0, "The maximum speed of local reads in bytes per second.", 0) \
|
||||
M(UInt64, max_local_write_bandwidth, 0, "The maximum speed of local writes in bytes per second.", 0) \
|
||||
M(Bool, stream_like_engine_allow_direct_select, false, "Allow direct SELECT query for Kafka, RabbitMQ, FileLog, Redis Streams and NATS engines. In case there are attached materialized views, SELECT query is not allowed even if this setting is enabled.", 0) \
|
||||
M(String, stream_like_engine_insert_queue, "", "When stream like engine reads from multiple queues, user will need to select one queue to insert into when writing. Used by Redis Streams and NATS.", 0) \
|
||||
\
|
||||
@ -129,7 +131,7 @@ class IColumn;
|
||||
M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misusage", 0) \
|
||||
M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \
|
||||
M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
|
||||
M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \
|
||||
M(Bool, compile_aggregate_expressions, false, "Compile aggregate functions to native code. This feature has a bug and should not be used.", 0) \
|
||||
M(UInt64, min_count_to_compile_aggregate_expression, 3, "The number of identical aggregate expressions before they are JIT-compiled", 0) \
|
||||
M(Bool, compile_sort_description, true, "Compile sort description to native code.", 0) \
|
||||
M(UInt64, min_count_to_compile_sort_description, 3, "The number of identical sort descriptions before they are JIT-compiled", 0) \
|
||||
@ -422,6 +424,7 @@ class IColumn;
|
||||
M(UInt64, backup_restore_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \
|
||||
M(UInt64, backup_restore_keeper_value_max_size, 1048576, "Maximum size of data of a [Zoo]Keeper's node during backup", 0) \
|
||||
M(UInt64, backup_restore_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup or restore", 0) \
|
||||
M(UInt64, max_backup_bandwidth, 0, "The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.", 0) \
|
||||
\
|
||||
M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \
|
||||
M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \
|
||||
@ -464,6 +467,7 @@ class IColumn;
|
||||
M(Bool, allow_introspection_functions, false, "Allow functions for introspection of ELF and DWARF for query profiling. These functions are slow and may impose security considerations.", 0) \
|
||||
\
|
||||
M(Bool, allow_execute_multiif_columnar, true, "Allow execute multiIf function columnar", 0) \
|
||||
M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, "Formatter '%M' in function 'formatDateTime' produces the month name instead of minutes.", 0) \
|
||||
\
|
||||
M(UInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.", 0) \
|
||||
M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited.", 0) \
|
||||
@ -735,6 +739,7 @@ class IColumn;
|
||||
#define MAKE_OBSOLETE(M, TYPE, NAME, DEFAULT) \
|
||||
M(TYPE, NAME, DEFAULT, "Obsolete setting, does nothing.", BaseSettingsHelpers::Flags::OBSOLETE)
|
||||
|
||||
/// NOTE: ServerSettings::loadSettingsFromConfig() should be updated to include this settings
|
||||
#define MAKE_DEPRECATED_BY_SERVER_CONFIG(M, TYPE, NAME, DEFAULT) \
|
||||
M(TYPE, NAME, DEFAULT, "User-level setting is deprecated, and it must be defined in the server configuration instead.", BaseSettingsHelpers::Flags::OBSOLETE)
|
||||
|
||||
@ -768,6 +773,8 @@ class IColumn;
|
||||
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_schedule_pool_size, 128) \
|
||||
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_message_broker_schedule_pool_size, 16) \
|
||||
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_distributed_schedule_pool_size, 16) \
|
||||
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_read_network_bandwidth_for_server, 0) \
|
||||
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_write_network_bandwidth_for_server, 0) \
|
||||
/* ---- */ \
|
||||
MAKE_OBSOLETE(M, DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic) \
|
||||
MAKE_OBSOLETE(M, UInt64, max_pipeline_depth, 0) \
|
||||
|
@ -101,6 +101,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
|
||||
{"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"},
|
||||
{"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}},
|
||||
{"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}},
|
||||
{"23.4", {{"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
|
||||
{"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}},
|
||||
{"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
|
||||
{"enable_positional_arguments", false, true, "Enable positional arguments feature by default"},
|
||||
|
@ -15,7 +15,6 @@
|
||||
#include <Poco/Util/Application.h>
|
||||
#include <Poco/Util/ServerApplication.h>
|
||||
#include <Poco/Net/SocketAddress.h>
|
||||
#include <Poco/Version.h>
|
||||
#include <base/types.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <base/getThreadId.h>
|
||||
|
@ -238,12 +238,15 @@ void SerializationBool::deserializeTextJSON(IColumn &column, ReadBuffer &istr, c
|
||||
ColumnUInt8 * col = checkAndGetDeserializeColumnType(column);
|
||||
bool value = false;
|
||||
|
||||
if (*istr.position() == 't' || *istr.position() == 'f')
|
||||
char first_char = *istr.position();
|
||||
if (first_char == 't' || first_char == 'f')
|
||||
readBoolTextWord(value, istr);
|
||||
else if (*istr.position() == '1' || *istr.position() == '0')
|
||||
else if (first_char == '1' || first_char == '0')
|
||||
readBoolText(value, istr);
|
||||
else
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Invalid boolean value, should be true/false, 1/0.");
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_BOOL,
|
||||
"Invalid boolean value, should be true/false, 1/0, but it starts with the '{}' character.", first_char);
|
||||
|
||||
col->insert(value);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
|
||||
namespace DB
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <Parsers/ASTAlterQuery.h>
|
||||
#include <Parsers/ASTDropQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTDeleteQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
@ -1388,25 +1389,31 @@ bool DatabaseReplicated::shouldReplicateQuery(const ContextPtr & query_context,
|
||||
if (query_context->getClientInfo().is_replicated_database_internal)
|
||||
return false;
|
||||
|
||||
/// Some ALTERs are not replicated on database level
|
||||
if (const auto * alter = query_ptr->as<const ASTAlterQuery>())
|
||||
/// we never replicate KeeperMap operations for some types of queries because it doesn't make sense
|
||||
const auto is_keeper_map_table = [&](const ASTPtr & ast)
|
||||
{
|
||||
auto table_id = query_context->resolveStorageID(*alter, Context::ResolveOrdinary);
|
||||
auto table_id = query_context->resolveStorageID(ast, Context::ResolveOrdinary);
|
||||
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, query_context);
|
||||
|
||||
/// we never replicate KeeperMap operations because it doesn't make sense
|
||||
if (auto * keeper_map = table->as<StorageKeeperMap>())
|
||||
return false;
|
||||
return table->as<StorageKeeperMap>() != nullptr;
|
||||
};
|
||||
|
||||
return !alter->isAttachAlter() && !alter->isFetchAlter() && !alter->isDropPartitionAlter();
|
||||
}
|
||||
/// Some ALTERs are not replicated on database level
|
||||
if (const auto * alter = query_ptr->as<const ASTAlterQuery>())
|
||||
return !alter->isAttachAlter() && !alter->isFetchAlter() && !alter->isDropPartitionAlter() && !is_keeper_map_table(query_ptr);
|
||||
|
||||
/// DROP DATABASE is not replicated
|
||||
if (const auto * drop = query_ptr->as<const ASTDropQuery>())
|
||||
{
|
||||
return drop->table.get();
|
||||
if (drop->table.get())
|
||||
return drop->kind != ASTDropQuery::Truncate || !is_keeper_map_table(query_ptr);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (query_ptr->as<const ASTDeleteQuery>() != nullptr)
|
||||
return !is_keeper_map_table(query_ptr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <base/types.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/ThreadPool_fwd.h>
|
||||
#include <QueryPipeline/BlockIO.h>
|
||||
|
||||
#include <ctime>
|
||||
|
@ -26,7 +26,6 @@ target_link_libraries(clickhouse_dictionaries
|
||||
clickhouse_common_io
|
||||
dbms
|
||||
Poco::Data
|
||||
Poco::Data::ODBC
|
||||
Poco::MongoDB
|
||||
Poco::Redis
|
||||
string_utils
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user