Merge branch 'master' into check_sample_dict_key_is_correct

This commit is contained in:
Anton Popov 2023-04-05 16:31:57 +02:00 committed by GitHub
commit 027f8a02c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
248 changed files with 11051 additions and 1552 deletions

View File

@ -110,6 +110,7 @@ Checks: '*,
-misc-const-correctness, -misc-const-correctness,
-misc-no-recursion, -misc-no-recursion,
-misc-non-private-member-variables-in-classes, -misc-non-private-member-variables-in-classes,
-misc-confusable-identifiers, # useful but slooow
-modernize-avoid-c-arrays, -modernize-avoid-c-arrays,
-modernize-concat-nested-namespaces, -modernize-concat-nested-namespaces,
@ -148,19 +149,6 @@ Checks: '*,
-readability-use-anyofallof, -readability-use-anyofallof,
-zirkon-*, -zirkon-*,
-misc-*, # temporarily disabled due to being too slow
# also disable checks in other categories which are aliases of checks in misc-*:
# https://releases.llvm.org/15.0.0/tools/clang/tools/extra/docs/clang-tidy/checks/list.html
-cert-dcl54-cpp, # alias of misc-new-delete-overloads
-hicpp-new-delete-operators, # alias of misc-new-delete-overloads
-cert-fio38-c, # alias of misc-non-copyable-objects
-cert-dcl03-c, # alias of misc-static-assert
-hicpp-static-assert, # alias of misc-static-assert
-cert-err09-cpp, # alias of misc-throw-by-value-catch-by-reference
-cert-err61-cpp, # alias of misc-throw-by-value-catch-by-reference
-cppcoreguidelines-c-copy-assignment-signature, # alias of misc-unconventional-assign-operator
-cppcoreguidelines-non-private-member-variables-in-classes, # alias of misc-non-private-member-variables-in-classes
' '
WarningsAsErrors: '*' WarningsAsErrors: '*'

View File

@ -36,7 +36,7 @@
namespace detail namespace detail
{ {
template <char ...chars> constexpr bool is_in(char x) { return ((x == chars) || ...); } template <char ...chars> constexpr bool is_in(char x) { return ((x == chars) || ...); } // NOLINT(misc-redundant-expression)
#if defined(__SSE2__) #if defined(__SSE2__)
template <char s0> template <char s0>

View File

@ -0,0 +1,81 @@
/* origin: FreeBSD /usr/src/lib/msun/src/e_expf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "libm.h"
static const float
half[2] = {0.5,-0.5},
ln2hi = 6.9314575195e-1f, /* 0x3f317200 */
ln2lo = 1.4286067653e-6f, /* 0x35bfbe8e */
invln2 = 1.4426950216e+0f, /* 0x3fb8aa3b */
/*
* Domain [-0.34568, 0.34568], range ~[-4.278e-9, 4.447e-9]:
* |x*(exp(x)+1)/(exp(x)-1) - p(x)| < 2**-27.74
*/
P1 = 1.6666625440e-1f, /* 0xaaaa8f.0p-26 */
P2 = -2.7667332906e-3f; /* -0xb55215.0p-32 */
float expf(float x)
{
float_t hi, lo, c, xx, y;
int k, sign;
uint32_t hx;
GET_FLOAT_WORD(hx, x);
sign = hx >> 31; /* sign bit of x */
hx &= 0x7fffffff; /* high word of |x| */
/* special cases */
if (hx >= 0x42aeac50) { /* if |x| >= -87.33655f or NaN */
if (hx >= 0x42b17218 && !sign) { /* x >= 88.722839f */
/* overflow */
x *= 0x1p127f;
return x;
}
if (sign) {
/* underflow */
FORCE_EVAL(-0x1p-149f/x);
if (hx >= 0x42cff1b5) /* x <= -103.972084f */
return 0;
}
}
/* argument reduction */
if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
if (hx > 0x3f851592) /* if |x| > 1.5 ln2 */
k = invln2*x + half[sign];
else
k = 1 - sign - sign;
hi = x - k*ln2hi; /* k*ln2hi is exact here */
lo = k*ln2lo;
x = hi - lo;
} else if (hx > 0x39000000) { /* |x| > 2**-14 */
k = 0;
hi = x;
lo = 0;
} else {
/* raise inexact */
FORCE_EVAL(0x1p127f + x);
return 1 + x;
}
/* x is now in primary range */
xx = x*x;
c = x - xx*(P1+xx*P2);
y = 1 + (x*c/(2-c) - lo + hi);
if (k == 0)
return y;
return scalbnf(y, k);
}

View File

@ -0,0 +1,31 @@
#include <math.h>
#include <stdint.h>
float scalbnf(float x, int n)
{
union {float f; uint32_t i;} u;
float_t y = x;
if (n > 127) {
y *= 0x1p127f;
n -= 127;
if (n > 127) {
y *= 0x1p127f;
n -= 127;
if (n > 127)
n = 127;
}
} else if (n < -126) {
y *= 0x1p-126f;
n += 126;
if (n < -126) {
y *= 0x1p-126f;
n += 126;
if (n < -126)
n = -126;
}
}
u.i = (uint32_t)(0x7f+n)<<23;
x = y * u.f;
return x;
}

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit d03245f801f798c63ee9a7d2b8914a9e5c5cd666 Subproject commit 1f1b3d35fb6eb73e6492d3afd8a85cde848d174f

View File

@ -202,6 +202,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/builder.cc" "${LIBRARY_DIR}/builder.cc"
"${LIBRARY_DIR}/buffer.cc" "${LIBRARY_DIR}/buffer.cc"
"${LIBRARY_DIR}/chunked_array.cc" "${LIBRARY_DIR}/chunked_array.cc"
"${LIBRARY_DIR}/chunk_resolver.cc"
"${LIBRARY_DIR}/compare.cc" "${LIBRARY_DIR}/compare.cc"
"${LIBRARY_DIR}/config.cc" "${LIBRARY_DIR}/config.cc"
"${LIBRARY_DIR}/datum.cc" "${LIBRARY_DIR}/datum.cc"
@ -268,6 +269,10 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/util/uri.cc" "${LIBRARY_DIR}/util/uri.cc"
"${LIBRARY_DIR}/util/utf8.cc" "${LIBRARY_DIR}/util/utf8.cc"
"${LIBRARY_DIR}/util/value_parsing.cc" "${LIBRARY_DIR}/util/value_parsing.cc"
"${LIBRARY_DIR}/util/byte_size.cc"
"${LIBRARY_DIR}/util/debug.cc"
"${LIBRARY_DIR}/util/tracing.cc"
"${LIBRARY_DIR}/util/atfork_internal.cc"
"${LIBRARY_DIR}/vendored/base64.cpp" "${LIBRARY_DIR}/vendored/base64.cpp"
"${LIBRARY_DIR}/vendored/datetime/tz.cpp" "${LIBRARY_DIR}/vendored/datetime/tz.cpp"
@ -301,9 +306,11 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/exec/source_node.cc" "${LIBRARY_DIR}/compute/exec/source_node.cc"
"${LIBRARY_DIR}/compute/exec/sink_node.cc" "${LIBRARY_DIR}/compute/exec/sink_node.cc"
"${LIBRARY_DIR}/compute/exec/order_by_impl.cc" "${LIBRARY_DIR}/compute/exec/order_by_impl.cc"
"${LIBRARY_DIR}/compute/exec/partition_util.cc"
"${LIBRARY_DIR}/compute/function.cc" "${LIBRARY_DIR}/compute/function.cc"
"${LIBRARY_DIR}/compute/function_internal.cc" "${LIBRARY_DIR}/compute/function_internal.cc"
"${LIBRARY_DIR}/compute/kernel.cc" "${LIBRARY_DIR}/compute/kernel.cc"
"${LIBRARY_DIR}/compute/light_array.cc"
"${LIBRARY_DIR}/compute/registry.cc" "${LIBRARY_DIR}/compute/registry.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc" "${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc" "${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc"
@ -317,21 +324,28 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_dictionary.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_dictionary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_extension.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_compare.cc" "${LIBRARY_DIR}/compute/kernels/scalar_compare.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_nested.cc" "${LIBRARY_DIR}/compute/kernels/scalar_nested.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_random.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_round.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc" "${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc" "${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc" "${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_validity.cc" "${LIBRARY_DIR}/compute/kernels/scalar_validity.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc" "${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string_ascii.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string_utf8.cc"
"${LIBRARY_DIR}/compute/kernels/util_internal.cc" "${LIBRARY_DIR}/compute/kernels/util_internal.cc"
"${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc" "${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc"
"${LIBRARY_DIR}/compute/kernels/vector_cumulative_ops.cc"
"${LIBRARY_DIR}/compute/kernels/vector_hash.cc" "${LIBRARY_DIR}/compute/kernels/vector_hash.cc"
"${LIBRARY_DIR}/compute/kernels/vector_rank.cc"
"${LIBRARY_DIR}/compute/kernels/vector_select_k.cc"
"${LIBRARY_DIR}/compute/kernels/vector_nested.cc" "${LIBRARY_DIR}/compute/kernels/vector_nested.cc"
"${LIBRARY_DIR}/compute/kernels/vector_replace.cc" "${LIBRARY_DIR}/compute/kernels/vector_replace.cc"
"${LIBRARY_DIR}/compute/kernels/vector_selection.cc" "${LIBRARY_DIR}/compute/kernels/vector_selection.cc"
@ -340,13 +354,15 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/exec/union_node.cc" "${LIBRARY_DIR}/compute/exec/union_node.cc"
"${LIBRARY_DIR}/compute/exec/key_hash.cc" "${LIBRARY_DIR}/compute/exec/key_hash.cc"
"${LIBRARY_DIR}/compute/exec/key_map.cc" "${LIBRARY_DIR}/compute/exec/key_map.cc"
"${LIBRARY_DIR}/compute/exec/key_compare.cc"
"${LIBRARY_DIR}/compute/exec/key_encode.cc"
"${LIBRARY_DIR}/compute/exec/util.cc" "${LIBRARY_DIR}/compute/exec/util.cc"
"${LIBRARY_DIR}/compute/exec/hash_join_dict.cc" "${LIBRARY_DIR}/compute/exec/hash_join_dict.cc"
"${LIBRARY_DIR}/compute/exec/hash_join.cc" "${LIBRARY_DIR}/compute/exec/hash_join.cc"
"${LIBRARY_DIR}/compute/exec/hash_join_node.cc" "${LIBRARY_DIR}/compute/exec/hash_join_node.cc"
"${LIBRARY_DIR}/compute/exec/task_util.cc" "${LIBRARY_DIR}/compute/exec/task_util.cc"
"${LIBRARY_DIR}/compute/row/encode_internal.cc"
"${LIBRARY_DIR}/compute/row/grouper.cc"
"${LIBRARY_DIR}/compute/row/compare_internal.cc"
"${LIBRARY_DIR}/compute/row/row_internal.cc"
"${LIBRARY_DIR}/ipc/dictionary.cc" "${LIBRARY_DIR}/ipc/dictionary.cc"
"${LIBRARY_DIR}/ipc/feather.cc" "${LIBRARY_DIR}/ipc/feather.cc"
@ -357,7 +373,8 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/ipc/writer.cc" "${LIBRARY_DIR}/ipc/writer.cc"
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc" "${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc"
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc" "${ARROW_SRC_DIR}/arrow/adapters/orc/util.cc"
"${ARROW_SRC_DIR}/arrow/adapters/orc/options.cc"
) )
add_definitions(-DARROW_WITH_LZ4) add_definitions(-DARROW_WITH_LZ4)

View File

@ -0,0 +1,29 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v22.8.16.32-lts (7c4be737bd0) FIXME as compared to v22.8.15.23-lts (d36fa168bbf)
#### Build/Testing/Packaging Improvement
* Backported in [#48344](https://github.com/ClickHouse/ClickHouse/issues/48344): Use sccache as a replacement for ccache and using S3 as cache backend. [#46240](https://github.com/ClickHouse/ClickHouse/pull/46240) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#48250](https://github.com/ClickHouse/ClickHouse/issues/48250): The `clickhouse/clickhouse-keeper` image used to be pushed only with tags `-alpine`, e.g. `latest-alpine`. As it was suggested in https://github.com/ClickHouse/examples/pull/2, now it will be pushed as suffixless too. [#48236](https://github.com/ClickHouse/ClickHouse/pull/48236) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix bug in zero-copy replication disk choice during fetch [#47010](https://github.com/ClickHouse/ClickHouse/pull/47010) ([alesapin](https://github.com/alesapin)).
* Fix query parameters [#47488](https://github.com/ClickHouse/ClickHouse/pull/47488) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix wait for zero copy lock during move [#47631](https://github.com/ClickHouse/ClickHouse/pull/47631) ([alesapin](https://github.com/alesapin)).
* Fix crash in polygonsSymDifferenceCartesian [#47702](https://github.com/ClickHouse/ClickHouse/pull/47702) ([pufit](https://github.com/pufit)).
* Backport to 22.8: Fix moving broken parts to the detached for the object storage disk on startup [#48273](https://github.com/ClickHouse/ClickHouse/pull/48273) ([Aleksei Filatov](https://github.com/aalexfvk)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Add a fuse for backport branches w/o a created PR [#47760](https://github.com/ClickHouse/ClickHouse/pull/47760) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Only valid Reviews.STATES overwrite existing reviews [#47789](https://github.com/ClickHouse/ClickHouse/pull/47789) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Place short return before big block, improve logging [#47822](https://github.com/ClickHouse/ClickHouse/pull/47822) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Artifacts s3 prefix [#47945](https://github.com/ClickHouse/ClickHouse/pull/47945) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix tsan error lock-order-inversion [#47953](https://github.com/ClickHouse/ClickHouse/pull/47953) ([Kruglov Pavel](https://github.com/Avogar)).

View File

@ -0,0 +1,34 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.1.6.42-stable (783ddf67991) FIXME as compared to v23.1.5.24-stable (0e51b53ba99)
#### Build/Testing/Packaging Improvement
* Backported in [#48215](https://github.com/ClickHouse/ClickHouse/issues/48215): Use sccache as a replacement for ccache and using S3 as cache backend. [#46240](https://github.com/ClickHouse/ClickHouse/pull/46240) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#48254](https://github.com/ClickHouse/ClickHouse/issues/48254): The `clickhouse/clickhouse-keeper` image used to be pushed only with tags `-alpine`, e.g. `latest-alpine`. As it was suggested in https://github.com/ClickHouse/examples/pull/2, now it will be pushed as suffixless too. [#48236](https://github.com/ClickHouse/ClickHouse/pull/48236) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix changing an expired role [#46772](https://github.com/ClickHouse/ClickHouse/pull/46772) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix bug in zero-copy replication disk choice during fetch [#47010](https://github.com/ClickHouse/ClickHouse/pull/47010) ([alesapin](https://github.com/alesapin)).
* Fix NOT_IMPLEMENTED error with CROSS JOIN and algorithm = auto [#47068](https://github.com/ClickHouse/ClickHouse/pull/47068) ([Vladimir C](https://github.com/vdimir)).
* Disable logical expression optimizer for expression with aliases. [#47451](https://github.com/ClickHouse/ClickHouse/pull/47451) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix query parameters [#47488](https://github.com/ClickHouse/ClickHouse/pull/47488) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Parameterized view bug fix 47287 47247 [#47495](https://github.com/ClickHouse/ClickHouse/pull/47495) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix wait for zero copy lock during move [#47631](https://github.com/ClickHouse/ClickHouse/pull/47631) ([alesapin](https://github.com/alesapin)).
* Hotfix for too verbose warnings in HTTP [#47903](https://github.com/ClickHouse/ClickHouse/pull/47903) ([Alexander Tokmakov](https://github.com/tavplubix)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Better error messages in ReplicatedMergeTreeAttachThread [#47454](https://github.com/ClickHouse/ClickHouse/pull/47454) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix `00933_test_fix_extra_seek_on_compressed_cache` in releases. [#47490](https://github.com/ClickHouse/ClickHouse/pull/47490) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add a fuse for backport branches w/o a created PR [#47760](https://github.com/ClickHouse/ClickHouse/pull/47760) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Only valid Reviews.STATES overwrite existing reviews [#47789](https://github.com/ClickHouse/ClickHouse/pull/47789) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Place short return before big block, improve logging [#47822](https://github.com/ClickHouse/ClickHouse/pull/47822) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Artifacts s3 prefix [#47945](https://github.com/ClickHouse/ClickHouse/pull/47945) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix tsan error lock-order-inversion [#47953](https://github.com/ClickHouse/ClickHouse/pull/47953) ([Kruglov Pavel](https://github.com/Avogar)).

View File

@ -0,0 +1,40 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.2.5.46-stable (b50faecbb12) FIXME as compared to v23.2.4.12-stable (8fe866cb035)
#### Improvement
* Backported in [#48164](https://github.com/ClickHouse/ClickHouse/issues/48164): Fixed `UNKNOWN_TABLE` exception when attaching to a materialized view that has dependent tables that are not available. This might be useful when trying to restore state from a backup. [#47975](https://github.com/ClickHouse/ClickHouse/pull/47975) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
#### Build/Testing/Packaging Improvement
* Backported in [#48216](https://github.com/ClickHouse/ClickHouse/issues/48216): Use sccache as a replacement for ccache and using S3 as cache backend. [#46240](https://github.com/ClickHouse/ClickHouse/pull/46240) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#48256](https://github.com/ClickHouse/ClickHouse/issues/48256): The `clickhouse/clickhouse-keeper` image used to be pushed only with tags `-alpine`, e.g. `latest-alpine`. As it was suggested in https://github.com/ClickHouse/examples/pull/2, now it will be pushed as suffixless too. [#48236](https://github.com/ClickHouse/ClickHouse/pull/48236) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix changing an expired role [#46772](https://github.com/ClickHouse/ClickHouse/pull/46772) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix bug in zero-copy replication disk choice during fetch [#47010](https://github.com/ClickHouse/ClickHouse/pull/47010) ([alesapin](https://github.com/alesapin)).
* Fix NOT_IMPLEMENTED error with CROSS JOIN and algorithm = auto [#47068](https://github.com/ClickHouse/ClickHouse/pull/47068) ([Vladimir C](https://github.com/vdimir)).
* Disable logical expression optimizer for expression with aliases. [#47451](https://github.com/ClickHouse/ClickHouse/pull/47451) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix query parameters [#47488](https://github.com/ClickHouse/ClickHouse/pull/47488) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Parameterized view bug fix 47287 47247 [#47495](https://github.com/ClickHouse/ClickHouse/pull/47495) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Proper fix for bug in parquet, revert reverted [#45878](https://github.com/ClickHouse/ClickHouse/issues/45878) [#47538](https://github.com/ClickHouse/ClickHouse/pull/47538) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix wait for zero copy lock during move [#47631](https://github.com/ClickHouse/ClickHouse/pull/47631) ([alesapin](https://github.com/alesapin)).
* Hotfix for too verbose warnings in HTTP [#47903](https://github.com/ClickHouse/ClickHouse/pull/47903) ([Alexander Tokmakov](https://github.com/tavplubix)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* fix: keeper systemd service file include invalid inline comment [#47105](https://github.com/ClickHouse/ClickHouse/pull/47105) ([SuperDJY](https://github.com/cmsxbc)).
* Better error messages in ReplicatedMergeTreeAttachThread [#47454](https://github.com/ClickHouse/ClickHouse/pull/47454) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix `00933_test_fix_extra_seek_on_compressed_cache` in releases. [#47490](https://github.com/ClickHouse/ClickHouse/pull/47490) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix startup on older systemd versions [#47689](https://github.com/ClickHouse/ClickHouse/pull/47689) ([Thomas Casteleyn](https://github.com/Hipska)).
* Add a fuse for backport branches w/o a created PR [#47760](https://github.com/ClickHouse/ClickHouse/pull/47760) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Only valid Reviews.STATES overwrite existing reviews [#47789](https://github.com/ClickHouse/ClickHouse/pull/47789) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Place short return before big block, improve logging [#47822](https://github.com/ClickHouse/ClickHouse/pull/47822) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Artifacts s3 prefix [#47945](https://github.com/ClickHouse/ClickHouse/pull/47945) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix tsan error lock-order-inversion [#47953](https://github.com/ClickHouse/ClickHouse/pull/47953) ([Kruglov Pavel](https://github.com/Avogar)).

View File

@ -78,7 +78,8 @@ Of course, it's possible to manually run `CREATE TABLE` with same path on nonrel
### Inserts ### Inserts
When new rows are inserted into `KeeperMap`, if the key already exists, the value will be updated, otherwise new key is created. When new rows are inserted into `KeeperMap`, if the key does not exist, a new entry for the key is created.
If the key exists, and setting `keeper_map_strict_mode` is set to `true`, an exception is thrown, otherwise, the value for the key is overwritten.
Example: Example:
@ -89,6 +90,7 @@ INSERT INTO keeper_map_table VALUES ('some key', 1, 'value', 3.2);
### Deletes ### Deletes
Rows can be deleted using `DELETE` query or `TRUNCATE`. Rows can be deleted using `DELETE` query or `TRUNCATE`.
If the key exists, and setting `keeper_map_strict_mode` is set to `true`, fetching and deleting data will succeed only if it can be executed atomically.
```sql ```sql
DELETE FROM keeper_map_table WHERE key LIKE 'some%' AND v1 > 1; DELETE FROM keeper_map_table WHERE key LIKE 'some%' AND v1 > 1;
@ -105,6 +107,7 @@ TRUNCATE TABLE keeper_map_table;
### Updates ### Updates
Values can be updated using `ALTER TABLE` query. Primary key cannot be updated. Values can be updated using `ALTER TABLE` query. Primary key cannot be updated.
If setting `keeper_map_strict_mode` is set to `true`, fetching and updating data will succeed only if it's executed atomically.
```sql ```sql
ALTER TABLE keeper_map_table UPDATE v1 = v1 * 10 + 2 WHERE key LIKE 'some%' AND v3 > 3.1; ALTER TABLE keeper_map_table UPDATE v1 = v1 * 10 + 2 WHERE key LIKE 'some%' AND v3 > 3.1;

View File

@ -1610,29 +1610,34 @@ See also [Format Schema](#formatschema).
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) | | CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) |
|----------------------------------|------------------------------------------------------------------------------------------------------------------------|------------------------------| |------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------|
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | | `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` | | `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` | | `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` |
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` | | `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | | `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | | `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md), [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `INT32` |
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | | `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `INT64` | | `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md), [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `INT64` |
| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` | | `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` | | `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` | | `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` |
| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` | | `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` |
| `ENUM` | [Enum(8\ |16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` | | `ENUM` | [Enum(8/16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` |
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | | `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | | `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` | | `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
| `DATA` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `DATA` | | `DATA` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `DATA` |
| `DATA` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `DATA` |
| `DATA` | [Decimal128/Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DATA` |
| `STRUCT(entries LIST(STRUCT(key Key, value Value)))` | [Map](/docs/en/sql-reference/data-types/map.md) | `STRUCT(entries LIST(STRUCT(key Key, value Value)))` |
Integer types can be converted into each other during input/output.
For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting. For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting.
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` type also can be nested. Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
### Inserting and Selecting Data {#inserting-and-selecting-data-capnproto} ### Inserting and Selecting Data {#inserting-and-selecting-data-capnproto}

View File

@ -103,6 +103,20 @@ cached - for that use setting [query_cache_min_query_runs](settings/settings.md#
Entries in the query cache become stale after a certain time period (time-to-live). By default, this period is 60 seconds but a different Entries in the query cache become stale after a certain time period (time-to-live). By default, this period is 60 seconds but a different
value can be specified at session, profile or query level using setting [query_cache_ttl](settings/settings.md#query-cache-ttl). value can be specified at session, profile or query level using setting [query_cache_ttl](settings/settings.md#query-cache-ttl).
Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads
from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries).
ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#settings-max_block_size) rows. Due to filtering, aggregation,
etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting
[query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks
are squashed (if they are tiny) or split (if they are large) into blocks of 'max_block_size' size before insertion into the query result
cache. This reduces performance of writes into the query cache but improves compression rate of cache entries and provides more natural
block granularity when query results are later served from the query cache.
As a result, the query cache stores for each query multiple (partial)
result blocks. While this behavior is a good default, it can be suppressed using setting
[query_cache_squash_partial_query_results](settings/settings.md#query-cache-squash-partial-query-results).
Also, results of queries with non-deterministic functions such as `rand()` and `now()` are not cached. This can be overruled using Also, results of queries with non-deterministic functions such as `rand()` and `now()` are not cached. This can be overruled using
setting [query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions). setting [query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions).

View File

@ -1435,6 +1435,28 @@ Possible values:
Default value: `0` Default value: `0`
## query_cache_compress_entries {#query-cache-compress-entries}
Compress entries in the [query cache](../query-cache.md). Lessens the memory consumption of the query cache at the cost of slower inserts into / reads from it.
Possible values:
- 0 - Disabled
- 1 - Enabled
Default value: `1`
## query_cache_squash_partial_results {#query-cache-squash-partial-results}
Squash partial result blocks to blocks of size [max_block_size](#setting-max_block_size). Reduces performance of inserts into the [query cache](../query-cache.md) but improves the compressability of cache entries (see [query_cache_compress-entries](#query_cache_compress_entries)).
Possible values:
- 0 - Disabled
- 1 - Enabled
Default value: `1`
## query_cache_ttl {#query-cache-ttl} ## query_cache_ttl {#query-cache-ttl}
After this time in seconds entries in the [query cache](../query-cache.md) become stale. After this time in seconds entries in the [query cache](../query-cache.md) become stale.
@ -4077,4 +4099,46 @@ SOURCE(CLICKHOUSE(TABLE 'test_local'))
LIFETIME(MIN 0 MAX 300) LIFETIME(MIN 0 MAX 300)
LAYOUT(HASHED()) LAYOUT(HASHED())
SETTINGS(check_dictionary_primary_key = 0); SETTINGS(check_dictionary_primary_key = 0);
``` ```
## function_json_value_return_type_allow_nullable
Control whether allow to return `NULL` when value is not exist for JSON_VALUE function.
```sql
SELECT JSON_VALUE('{"hello":"world"}', '$.b') settings function_json_value_return_type_allow_nullable=true;
┌─JSON_VALUE('{"hello":"world"}', '$.b')─┐
│ ᴺᵁᴸᴸ │
└────────────────────────────────────────┘
1 row in set. Elapsed: 0.001 sec.
```
Possible values:
- true — Allow.
- false — Disallow.
Default value: `false`.
## function_json_value_return_type_allow_complex
Control whether allow to return complex type (such as: struct, array, map) for json_value function.
```sql
SELECT JSON_VALUE('{"hello":{"world":"!"}}', '$.hello') settings function_json_value_return_type_allow_complex=true
┌─JSON_VALUE('{"hello":{"world":"!"}}', '$.hello')─┐
│ {"world":"!"} │
└──────────────────────────────────────────────────┘
1 row in set. Elapsed: 0.001 sec.
```
Possible values:
- true — Allow.
- false — Disallow.
Default value: `false`.

View File

@ -74,7 +74,7 @@ Never set the block size too small or too large.
You can use RAID-0 on SSD. You can use RAID-0 on SSD.
Regardless of RAID use, always use replication for data security. Regardless of RAID use, always use replication for data security.
Enable NCQ with a long queue. For HDD, choose the CFQ scheduler, and for SSD, choose noop. Dont reduce the readahead setting. Enable NCQ with a long queue. For HDD, choose the mq-deadline or CFQ scheduler, and for SSD, choose noop. Dont reduce the readahead setting.
For HDD, enable the write cache. For HDD, enable the write cache.
Make sure that [`fstrim`](https://en.wikipedia.org/wiki/Trim_(computing)) is enabled for NVME and SSD disks in your OS (usually it's implemented using a cronjob or systemd service). Make sure that [`fstrim`](https://en.wikipedia.org/wiki/Trim_(computing)) is enabled for NVME and SSD disks in your OS (usually it's implemented using a cronjob or systemd service).

View File

@ -8,10 +8,150 @@ sidebar_label: clickhouse-local
The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. It accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/index.md). `clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines. The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. It accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/index.md). `clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines.
By default `clickhouse-local` has access to data on the same host, and it does not depend on the server's configuration. It also supports loading server configuration using `--config-file` argument. For temporary data, a unique temporary data directory is created by default. ## Download clickhouse-local
`clickhouse-local` is executed using the same `clickhouse` binary that runs the ClickHouse server and `clickhouse-client`. The easiest way to download the latest version is with the following command:
```bash
curl https://clickhouse.com/ | sh
```
:::note
The binary you just downloaded can run all sorts of ClickHouse tools and utilities. If you want to run ClickHouse as a database server, check out the [Quick Start](../../quick-start.mdx).
:::
## Query data in a CSV file using SQL
A common use of `clickhouse-local` is to run ad-hoc queries on files: where you don't have to insert the data into a table. `clickhouse-local` can stream the data from a file into a temporary table and execute your SQL.
If the file is sitting on the same machine as `clickhouse-local`, use the `file` table engine. The following `reviews.tsv` file contains a sampling of Amazon product reviews:
```bash
./clickhouse local -q "SELECT * FROM file('reviews.tsv')"
```
ClickHouse knows the file uses a tab-separated format from filename extension. If you need to explicitly specify the format, simply add one of the [many ClickHouse input formats](../../interfaces/formats.md):
```bash
./clickhouse local -q "SELECT * FROM file('reviews.tsv', 'TabSeparated')"
```
The `file` table function creates a table, and you can use `DESCRIBE` to see the inferred schema:
```bash
./clickhouse local -q "DESCRIBE file('reviews.tsv')"
```
```response
marketplace Nullable(String)
customer_id Nullable(Int64)
review_id Nullable(String)
product_id Nullable(String)
product_parent Nullable(Int64)
product_title Nullable(String)
product_category Nullable(String)
star_rating Nullable(Int64)
helpful_votes Nullable(Int64)
total_votes Nullable(Int64)
vine Nullable(String)
verified_purchase Nullable(String)
review_headline Nullable(String)
review_body Nullable(String)
review_date Nullable(Date)
```
Let's find a product with the highest rating:
```bash
./clickhouse local -q "SELECT
argMax(product_title,star_rating),
max(star_rating)
FROM file('reviews.tsv')"
```
```response
Monopoly Junior Board Game 5
```
## Query data in a Parquet file in AWS S3
If you have a file in S3, use `clickhouse-local` and the `s3` table function to query the file in place (without inserting the data into a ClickHouse table). We have a file named `house_0.parquet` in a public bucket that contains home prices of property sold in the United Kingdom. Let's see how many rows it has:
```bash
./clickhouse local -q "
SELECT count()
FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/house_parquet/house_0.parquet')"
```
The file has 2.7M rows:
```response
2772030
```
It's always useful to see what the inferred schema that ClickHouse determines from the file:
```bash
./clickhouse local -q "DESCRIBE s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/house_parquet/house_0.parquet')"
```
```response
price Nullable(Int64)
date Nullable(UInt16)
postcode1 Nullable(String)
postcode2 Nullable(String)
type Nullable(String)
is_new Nullable(UInt8)
duration Nullable(String)
addr1 Nullable(String)
addr2 Nullable(String)
street Nullable(String)
locality Nullable(String)
town Nullable(String)
district Nullable(String)
county Nullable(String)
```
Let's see what the most expensive neighborhoods are:
```bash
./clickhouse local -q "
SELECT
town,
district,
count() AS c,
round(avg(price)) AS price,
bar(price, 0, 5000000, 100)
FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/house_parquet/house_0.parquet')
GROUP BY
town,
district
HAVING c >= 100
ORDER BY price DESC
LIMIT 10"
```
```response
LONDON CITY OF LONDON 886 2271305 █████████████████████████████████████████████▍
LEATHERHEAD ELMBRIDGE 206 1176680 ███████████████████████▌
LONDON CITY OF WESTMINSTER 12577 1108221 ██████████████████████▏
LONDON KENSINGTON AND CHELSEA 8728 1094496 █████████████████████▉
HYTHE FOLKESTONE AND HYTHE 130 1023980 ████████████████████▍
CHALFONT ST GILES CHILTERN 113 835754 ████████████████▋
AMERSHAM BUCKINGHAMSHIRE 113 799596 ███████████████▉
VIRGINIA WATER RUNNYMEDE 356 789301 ███████████████▊
BARNET ENFIELD 282 740514 ██████████████▊
NORTHWOOD THREE RIVERS 184 731609 ██████████████▋
```
:::tip
When you are ready to insert your files into ClickHouse, startup a ClickHouse server and insert the results of your `file` and `s3` table functions into a `MergeTree` table. View the [Quick Start](../../quick-start.mdx) for more details.
:::
## Usage {#usage} ## Usage {#usage}
By default `clickhouse-local` has access to data of a ClickHouse server on the same host, and it does not depend on the server's configuration. It also supports loading server configuration using `--config-file` argument. For temporary data, a unique temporary data directory is created by default.
Basic usage (Linux): Basic usage (Linux):
``` bash ``` bash
@ -24,7 +164,9 @@ Basic usage (Mac):
$ ./clickhouse local --structure "table_structure" --input-format "format_of_incoming_data" --query "query" $ ./clickhouse local --structure "table_structure" --input-format "format_of_incoming_data" --query "query"
``` ```
Also supported on Windows through WSL2. :::note
`clickhouse-local` is also supported on Windows through WSL2.
:::
Arguments: Arguments:

View File

@ -0,0 +1,76 @@
---
slug: /en/sql-reference/aggregate-functions/reference/quantileApprox
sidebar_position: 204
---
# quantileApprox
Computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [Greenwald-Khanna](http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf) algorithm. The Greenwald-Khanna algorithm is an algorithm used to compute quantiles on a stream of data in a highly efficient manner. It was introduced by Michael Greenwald and Sanjeev Khanna in 2001. It is widely used in databases and big data systems where computing accurate quantiles on a large stream of data in real-time is necessary. The algorithm is highly efficient, taking only O(log n) space and O(log log n) time per item (where n is the size of the input). It is also highly accurate, providing an approximate quantile value with high probability.
`quantileApprox` is different from other quantile functions in ClickHouse, because it enables user to control the accuracy of the approximate quantile result.
**Syntax**
``` sql
quantileApprox(accuracy, level)(expr)
```
Alias: `medianApprox`.
**Arguments**
- `accuracy` — Accuracy of quantile. Constant positive integer. Larger accuracy value means less error. For example, if the accuracy argument is set to 100, the computed quantile will have an error no greater than 1% with high probability. There is a trade-off between the accuracy of the computed quantiles and the computational complexity of the algorithm. A larger accuracy requires more memory and computational resources to compute the quantile accurately, while a smaller accuracy argument allows for a faster and more memory-efficient computation but with a slightly lower accuracy.
- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
**Returned value**
- Quantile of the specified level and accuracy.
Type:
- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
**Example**
``` sql
SELECT quantileApprox(1, 0.25)(number + 1)
FROM numbers(1000)
┌─quantileApprox(1, 0.25)(plus(number, 1))─┐
│ 1 │
└──────────────────────────────────────────┘
SELECT quantileApprox(10, 0.25)(number + 1)
FROM numbers(1000)
┌─quantileApprox(10, 0.25)(plus(number, 1))─┐
│ 156 │
└───────────────────────────────────────────┘
SELECT quantileApprox(100, 0.25)(number + 1)
FROM numbers(1000)
┌─quantileApprox(100, 0.25)(plus(number, 1))─┐
│ 251 │
└────────────────────────────────────────────┘
SELECT quantileApprox(1000, 0.25)(number + 1)
FROM numbers(1000)
┌─quantileApprox(1000, 0.25)(plus(number, 1))─┐
│ 249 │
└─────────────────────────────────────────────┘
```
**See Also**
- [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)

View File

@ -114,3 +114,59 @@ Result:
│ [249.75,499.5,749.25,899.1,949.05,989.01,998.001] │ │ [249.75,499.5,749.25,899.1,949.05,989.01,998.001] │
└─────────────────────────────────────────────────────────────────────┘ └─────────────────────────────────────────────────────────────────────┘
``` ```
## quantilesApprox
`quantilesApprox` works similarly with `quantileApprox` but allows us to calculate quantities at different levels simultaneously and returns an array.
**Syntax**
``` sql
quantilesApprox(accuracy, level1, level2, ...)(expr)
```
**Returned value**
- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels.
Type of array values:
- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
**Example**
Query:
``` sql
SELECT quantilesApprox(1, 0.25, 0.5, 0.75)(number + 1)
FROM numbers(1000)
┌─quantilesApprox(1, 0.25, 0.5, 0.75)(plus(number, 1))─┐
│ [1,1,1] │
└──────────────────────────────────────────────────────┘
SELECT quantilesApprox(10, 0.25, 0.5, 0.75)(number + 1)
FROM numbers(1000)
┌─quantilesApprox(10, 0.25, 0.5, 0.75)(plus(number, 1))─┐
│ [156,413,659] │
└───────────────────────────────────────────────────────┘
SELECT quantilesApprox(100, 0.25, 0.5, 0.75)(number + 1)
FROM numbers(1000)
┌─quantilesApprox(100, 0.25, 0.5, 0.75)(plus(number, 1))─┐
│ [251,498,741] │
└────────────────────────────────────────────────────────┘
SELECT quantilesApprox(1000, 0.25, 0.5, 0.75)(number + 1)
FROM numbers(1000)
┌─quantilesApprox(1000, 0.25, 0.5, 0.75)(plus(number, 1))─┐
│ [249,499,749] │
└─────────────────────────────────────────────────────────┘
```

View File

@ -1463,28 +1463,28 @@ Result:
└───────────────────────┘ └───────────────────────┘
``` ```
## FROM\_UNIXTIME ## fromUnixTimestamp
Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type. Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type.
FROM_UNIXTIME uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format. fromUnixTimestamp uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format.
Alias: `fromUnixTimestamp`. Alias: `FROM_UNIXTIME`.
**Example:** **Example:**
Query: Query:
```sql ```sql
SELECT FROM_UNIXTIME(423543535); SELECT fromUnixTimestamp(423543535);
``` ```
Result: Result:
```text ```text
┌─FROM_UNIXTIME(423543535)─┐ ┌─fromUnixTimestamp(423543535)─┐
│ 1983-06-04 10:58:55 │ 1983-06-04 10:58:55 │
└──────────────────────────┘ └──────────────────────────────
``` ```
When there are two or three arguments, the first an [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second a constant format string and the third an optional constant time zone string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type. When there are two or three arguments, the first an [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second a constant format string and the third an optional constant time zone string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type.
@ -1492,7 +1492,7 @@ When there are two or three arguments, the first an [Integer](../../sql-referenc
For example: For example:
```sql ```sql
SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime; SELECT fromUnixTimestamp(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
``` ```
```text ```text
@ -1505,11 +1505,12 @@ SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
- [fromUnixTimestampInJodaSyntax](##fromUnixTimestampInJodaSyntax) - [fromUnixTimestampInJodaSyntax](##fromUnixTimestampInJodaSyntax)
## fromUnixTimestampInJodaSyntax ## fromUnixTimestampInJodaSyntax
Similar to FROM_UNIXTIME, except that it formats time in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
Similar to fromUnixTimestamp, except that it formats time in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
**Example:** **Example:**
Query: Query:
``` sql ``` sql
SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC'); SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC');
@ -1517,12 +1518,11 @@ SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC');
Result: Result:
``` ```
┌─fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC')─┐ ┌─fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC')────
│ 2022-11-30 10:41:12 │ │ 2022-11-30 10:41:12 │
└────────────────────────────────────────────────────────────────────────────┘ └────────────────────────────────────────────────────────────────────────────┘
``` ```
## toModifiedJulianDay ## toModifiedJulianDay
Converts a [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar) date in text form `YYYY-MM-DD` to a [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Variants) number in Int32. This function supports date from `0000-01-01` to `9999-12-31`. It raises an exception if the argument cannot be parsed as a date, or the date is invalid. Converts a [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar) date in text form `YYYY-MM-DD` to a [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Variants) number in Int32. This function supports date from `0000-01-01` to `9999-12-31`. It raises an exception if the argument cannot be parsed as a date, or the date is invalid.

View File

@ -401,7 +401,7 @@ Before version 21.11 the order of arguments was wrong, i.e. JSON_QUERY(path, jso
Parses a JSON and extract a value as JSON scalar. Parses a JSON and extract a value as JSON scalar.
If the value does not exist, an empty string will be returned. If the value does not exist, an empty string will be returned by default, and by SET `function_return_type_allow_nullable` = `true`, `NULL` will be returned. If the value is complex type (such as: struct, array, map), an empty string will be returned by default, and by SET `function_json_value_return_type_allow_complex` = `true`, the complex value will be returned.
Example: Example:
@ -410,6 +410,8 @@ SELECT JSON_VALUE('{"hello":"world"}', '$.hello');
SELECT JSON_VALUE('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}', '$.array[*][0 to 2, 4]'); SELECT JSON_VALUE('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}', '$.array[*][0 to 2, 4]');
SELECT JSON_VALUE('{"hello":2}', '$.hello'); SELECT JSON_VALUE('{"hello":2}', '$.hello');
SELECT toTypeName(JSON_VALUE('{"hello":2}', '$.hello')); SELECT toTypeName(JSON_VALUE('{"hello":2}', '$.hello'));
select JSON_VALUE('{"hello":"world"}', '$.b') settings function_return_type_allow_nullable=true;
select JSON_VALUE('{"hello":{"world":"!"}}', '$.hello') settings function_json_value_return_type_allow_complex=true;
``` ```
Result: Result:

View File

@ -208,7 +208,7 @@ Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-referen
Query: Query:
``` sql ``` sql
CREATE TABLE tupletest (`col` Tuple(user_ID UInt64, session_ID UInt64) ENGINE = Memory; CREATE TABLE tupletest (col Tuple(user_ID UInt64, session_ID UInt64)) ENGINE = Memory;
INSERT INTO tupletest VALUES (tuple( 100, 2502)), (tuple(1,100)); INSERT INTO tupletest VALUES (tuple( 100, 2502)), (tuple(1,100));
@ -227,11 +227,11 @@ Result:
It is possible to transform colums to rows using this function: It is possible to transform colums to rows using this function:
``` sql ``` sql
CREATE TABLE tupletest (`col` Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory; CREATE TABLE tupletest (col Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory;
INSERT INTO tupletest VALUES(tuple(3.3, 5.5, 6.6)); INSERT INTO tupletest VALUES(tuple(3.3, 5.5, 6.6));
SELECT arrayJoin(tupleToNameValuePairs(col))FROM tupletest; SELECT arrayJoin(tupleToNameValuePairs(col)) FROM tupletest;
``` ```
Result: Result:

View File

@ -68,9 +68,9 @@ Result:
## mapFromArrays ## mapFromArrays
Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md). Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md). Notice that the second argument could also be a [Map](../../sql-reference/data-types/map.md), thus it is casted to an Array when executing.
The function is a more convenient alternative to `CAST((key_array, value_array), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. The function is a more convenient alternative to `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`.
**Syntax** **Syntax**
@ -82,11 +82,11 @@ Alias: `MAP_FROM_ARRAYS(keys, values)`
**Arguments** **Arguments**
- `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md) - `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md)
- `values` - Given value array to create a map from. - `values` - Given value array or map to create a map from.
**Returned value** **Returned value**
- A map whose keys and values are constructed from the key and value arrays - A map whose keys and values are constructed from the key array and value array/map.
**Example** **Example**
@ -94,13 +94,17 @@ Query:
```sql ```sql
select mapFromArrays(['a', 'b', 'c'], [1, 2, 3]) select mapFromArrays(['a', 'b', 'c'], [1, 2, 3])
```
```text
┌─mapFromArrays(['a', 'b', 'c'], [1, 2, 3])─┐ ┌─mapFromArrays(['a', 'b', 'c'], [1, 2, 3])─┐
│ {'a':1,'b':2,'c':3} │ │ {'a':1,'b':2,'c':3} │
└───────────────────────────────────────────┘ └───────────────────────────────────────────┘
```
SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3))
┌─mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3))─┐
│ {1:('a',1),2:('b',2),3:('c',3)} │
└───────────────────────────────────────────────────────┘
```
## mapAdd ## mapAdd

View File

@ -114,11 +114,11 @@ This will also create system tables even if message queue is empty.
## RELOAD CONFIG ## RELOAD CONFIG
Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeper. Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeper. Note that `SYSTEM RELOAD CONFIG` does not reload `USER` configuration stored in ZooKeeper, it only reloads `USER` configuration that is stored in `users.xml`. To reload all `USER` config use `SYSTEM RELOAD USERS`
## RELOAD USERS ## RELOAD USERS
Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage. Note that `SYSTEM RELOAD CONFIG` will only reload users.xml access storage. Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage.
## SHUTDOWN ## SHUTDOWN
@ -224,6 +224,14 @@ Clears freezed backup with the specified name from all the disks. See more about
SYSTEM UNFREEZE WITH NAME <backup_name> SYSTEM UNFREEZE WITH NAME <backup_name>
``` ```
### WAIT LOADING PARTS
Wait until all asynchronously loading data parts of a table (outdated data parts) will became loaded.
``` sql
SYSTEM WAIT LOADING PARTS [db.]merge_tree_family_table_name
```
## Managing ReplicatedMergeTree Tables ## Managing ReplicatedMergeTree Tables
ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) tables. ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) tables.

View File

@ -1,4 +1,5 @@
#include "ClusterCopierApp.h" #include "ClusterCopierApp.h"
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/StatusFile.h> #include <Common/StatusFile.h>
#include <Common/TerminalSize.h> #include <Common/TerminalSize.h>
#include <IO/ConnectionTimeouts.h> #include <IO/ConnectionTimeouts.h>
@ -192,6 +193,8 @@ void ClusterCopierApp::mainImpl()
if (!task_file.empty()) if (!task_file.empty())
copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false)); copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false));
zkutil::validateZooKeeperConfig(config());
copier->init(); copier->init();
copier->process(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(context->getSettingsRef())); copier->process(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(context->getSettingsRef()));

View File

@ -89,8 +89,12 @@ static std::vector<std::string> extractFromConfig(
if (has_zk_includes && process_zk_includes) if (has_zk_includes && process_zk_includes)
{ {
DB::ConfigurationPtr bootstrap_configuration(new Poco::Util::XMLConfiguration(config_xml)); DB::ConfigurationPtr bootstrap_configuration(new Poco::Util::XMLConfiguration(config_xml));
zkutil::validateZooKeeperConfig(*bootstrap_configuration);
zkutil::ZooKeeperPtr zookeeper = std::make_shared<zkutil::ZooKeeper>( zkutil::ZooKeeperPtr zookeeper = std::make_shared<zkutil::ZooKeeper>(
*bootstrap_configuration, "zookeeper", nullptr); *bootstrap_configuration, bootstrap_configuration->has("zookeeper") ? "zookeeper" : "keeper", nullptr);
zkutil::ZooKeeperNodeCache zk_node_cache([&] { return zookeeper; }); zkutil::ZooKeeperNodeCache zk_node_cache([&] { return zookeeper; });
config_xml = processor.processConfig(&has_zk_includes, &zk_node_cache); config_xml = processor.processConfig(&has_zk_includes, &zk_node_cache);
} }

View File

@ -815,7 +815,8 @@ try
} }
); );
bool has_zookeeper = config().has("zookeeper"); zkutil::validateZooKeeperConfig(config());
bool has_zookeeper = zkutil::hasZooKeeperConfig(config());
zkutil::ZooKeeperNodeCache main_config_zk_node_cache([&] { return global_context->getZooKeeper(); }); zkutil::ZooKeeperNodeCache main_config_zk_node_cache([&] { return global_context->getZooKeeper(); });
zkutil::EventPtr main_config_zk_changed_event = std::make_shared<Poco::Event>(); zkutil::EventPtr main_config_zk_changed_event = std::make_shared<Poco::Event>();
@ -1307,7 +1308,7 @@ try
{ {
/// We do not load ZooKeeper configuration on the first config loading /// We do not load ZooKeeper configuration on the first config loading
/// because TestKeeper server is not started yet. /// because TestKeeper server is not started yet.
if (config->has("zookeeper")) if (zkutil::hasZooKeeperConfig(*config))
global_context->reloadZooKeeperIfChanged(config); global_context->reloadZooKeeperIfChanged(config);
global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config); global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config);

View File

@ -0,0 +1,36 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h>
#include <AggregateFunctions/FactoryHelpers.h>
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
namespace DB
{
struct Settings;
namespace
{
AggregateFunctionPtr createAggregateFunctionKolmogorovSmirnovTest(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Aggregate function {} only supports numerical types", name);
return std::make_shared<AggregateFunctionKolmogorovSmirnov>(argument_types, parameters);
}
}
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory)
{
factory.registerFunction("kolmogorovSmirnovTest", createAggregateFunctionKolmogorovSmirnovTest, AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -0,0 +1,323 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/StatCommon.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnTuple.h>
#include <Common/Exception.h>
#include <Common/assert_cast.h>
#include <Common/ArenaAllocator.h>
#include <Common/PODArray_fwd.h>
#include <base/types.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
}
struct KolmogorovSmirnov : public StatisticalSample<Float64, Float64>
{
enum class Alternative
{
TwoSided,
Less,
Greater
};
std::pair<Float64, Float64> getResult(Alternative alternative, String method)
{
::sort(x.begin(), x.end());
::sort(y.begin(), y.end());
Float64 max_s = std::numeric_limits<Float64>::min();
Float64 min_s = std::numeric_limits<Float64>::max();
Float64 now_s = 0;
UInt64 pos_x = 0;
UInt64 pos_y = 0;
UInt64 n1 = x.size();
UInt64 n2 = y.size();
const Float64 n1_d = 1. / n1;
const Float64 n2_d = 1. / n2;
const Float64 tol = 1e-7;
// reference: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test
while (pos_x < x.size() && pos_y < y.size())
{
if (likely(fabs(x[pos_x] - y[pos_y]) >= tol))
{
if (x[pos_x] < y[pos_y])
{
now_s += n1_d;
++pos_x;
}
else
{
now_s -= n2_d;
++pos_y;
}
max_s = std::max(max_s, now_s);
min_s = std::min(min_s, now_s);
}
else
{
now_s += n1_d;
++pos_x;
}
}
now_s += n1_d * (x.size() - pos_x) - n2_d * (y.size() - pos_y);
min_s = std::min(min_s, now_s);
max_s = std::max(max_s, now_s);
Float64 d = 0;
if (alternative == Alternative::TwoSided)
d = std::max(std::abs(max_s), std::abs(min_s));
else if (alternative == Alternative::Less)
d = -min_s;
else if (alternative == Alternative::Greater)
d = max_s;
UInt64 g = std::__gcd(n1, n2);
UInt64 nx_g = n1 / g;
UInt64 ny_g = n2 / g;
if (method == "auto")
method = std::max(n1, n2) <= 10000 ? "exact" : "asymp";
else if (method == "exact" && nx_g >= std::numeric_limits<Int32>::max() / ny_g)
method = "asymp";
Float64 p_value = std::numeric_limits<Float64>::infinity();
if (method == "exact")
{
/* reference:
* Gunar Schröer and Dietrich Trenkler
* Exact and Randomization Distributions of Kolmogorov-Smirnov, Tests for Two or Three Samples
*
* and
*
* Thomas Viehmann
* Numerically more stable computation of the p-values for the two-sample Kolmogorov-Smirnov test
*/
if (n2 > n1)
std::swap(n1, n2);
const Float64 f_n1 = static_cast<Float64>(n1);
const Float64 f_n2 = static_cast<Float64>(n2);
const Float64 k_d = (0.5 + floor(d * f_n2 * f_n1 - tol)) / (f_n2 * f_n1);
PaddedPODArray<Float64> c(n1 + 1);
auto check = alternative == Alternative::TwoSided ?
[](const Float64 & q, const Float64 & r, const Float64 & s) { return fabs(r - s) >= q; }
: [](const Float64 & q, const Float64 & r, const Float64 & s) { return r - s >= q; };
c[0] = 0;
for (UInt64 j = 1; j <= n1; j++)
if (check(k_d, 0., j / f_n1))
c[j] = 1.;
else
c[j] = c[j - 1];
for (UInt64 i = 1; i <= n2; i++)
{
if (check(k_d, i / f_n2, 0.))
c[0] = 1.;
for (UInt64 j = 1; j <= n1; j++)
if (check(k_d, i / f_n2, j / f_n1))
c[j] = 1.;
else
{
Float64 v = i / static_cast<Float64>(i + j);
Float64 w = j / static_cast<Float64>(i + j);
c[j] = v * c[j] + w * c[j - 1];
}
}
p_value = c[n1];
}
else if (method == "asymp")
{
Float64 n = std::min(n1, n2);
Float64 m = std::max(n1, n2);
Float64 p = sqrt((n * m) / (n + m)) * d;
if (alternative == Alternative::TwoSided)
{
/* reference:
* J.DURBIN
* Distribution theory for tests based on the sample distribution function
*/
Float64 new_val, old_val, s, w, z;
UInt64 k_max = static_cast<UInt64>(sqrt(2 - log(tol)));
if (p < 1)
{
z = - (M_PI_2 * M_PI_4) / (p * p);
w = log(p);
s = 0;
for (UInt64 k = 1; k < k_max; k += 2)
s += exp(k * k * z - w);
p = s / 0.398942280401432677939946059934;
}
else
{
z = -2 * p * p;
s = -1;
UInt64 k = 1;
old_val = 0;
new_val = 1;
while (fabs(old_val - new_val) > tol)
{
old_val = new_val;
new_val += 2 * s * exp(z * k * k);
s *= -1;
k++;
}
p = new_val;
}
p_value = 1 - p;
}
else
{
/* reference:
* J. L. HODGES, Jr
* The significance probability of the Smirnov two-sample test
*/
// Use Hodges' suggested approximation Eqn 5.3
// Requires m to be the larger of (n1, n2)
Float64 expt = -2 * p * p - 2 * p * (m + 2 * n) / sqrt(m * n * (m + n)) / 3.0;
p_value = exp(expt);
}
}
return {d, p_value};
}
};
class AggregateFunctionKolmogorovSmirnov final:
public IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov>
{
private:
using Alternative = typename KolmogorovSmirnov::Alternative;
Alternative alternative = Alternative::TwoSided;
String method = "auto";
public:
explicit AggregateFunctionKolmogorovSmirnov(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov> ({arguments}, {}, createResultType())
{
if (params.size() > 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require two parameter or less", getName());
if (params.empty())
return;
if (params[0].getType() != Field::Types::String)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
const auto & param = params[0].get<String>();
if (param == "two-sided")
alternative = Alternative::TwoSided;
else if (param == "less")
alternative = Alternative::Less;
else if (param == "greater")
alternative = Alternative::Greater;
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown parameter in aggregate function {}. "
"It must be one of: 'two-sided', 'less', 'greater'", getName());
if (params.size() != 2)
return;
if (params[1].getType() != Field::Types::String)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a String", getName());
method = params[1].get<String>();
if (method != "auto" && method != "exact" && method != "asymp")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown method in aggregate function {}. "
"It must be one of: 'auto', 'exact', 'asymp'", getName());
}
String getName() const override
{
return "kolmogorovSmirnovTest";
}
bool allocatesMemoryInArena() const override { return true; }
static DataTypePtr createResultType()
{
DataTypes types
{
std::make_shared<DataTypeNumber<Float64>>(),
std::make_shared<DataTypeNumber<Float64>>(),
};
Strings names
{
"d_statistic",
"p_value"
};
return std::make_shared<DataTypeTuple>(
std::move(types),
std::move(names)
);
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
Float64 value = columns[0]->getFloat64(row_num);
UInt8 is_second = columns[1]->getUInt(row_num);
if (is_second)
this->data(place).addY(value, arena);
else
this->data(place).addX(value, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
this->data(place).merge(this->data(rhs), arena);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).write(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).read(buf, arena);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
if (!this->data(place).size_x || !this->data(place).size_y)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName());
auto [d_statistic, p_value] = this->data(place).getResult(alternative, method);
/// Because p-value is a probability.
p_value = std::min(1.0, std::max(0.0, p_value));
auto & column_tuple = assert_cast<ColumnTuple &>(to);
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
column_stat.getData().push_back(d_statistic);
column_value.getData().push_back(p_value);
}
};
}

View File

@ -26,9 +26,11 @@ namespace ErrorCodes
{ {
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
} }
template <typename> class QuantileTiming; template <typename> class QuantileTiming;
template <typename> class QuantileApprox;
/** Generic aggregate function for calculation of quantiles. /** Generic aggregate function for calculation of quantiles.
@ -60,6 +62,7 @@ private:
using ColVecType = ColumnVectorOrDecimal<Value>; using ColVecType = ColumnVectorOrDecimal<Value>;
static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>); static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>);
static constexpr bool is_quantile_approx = std::is_same_v<Data, QuantileApprox<Value>>;
static_assert(!is_decimal<Value> || !returns_float); static_assert(!is_decimal<Value> || !returns_float);
QuantileLevels<Float64> levels; QuantileLevels<Float64> levels;
@ -67,22 +70,57 @@ private:
/// Used when there are single level to get. /// Used when there are single level to get.
Float64 level = 0.5; Float64 level = 0.5;
/// Used for the approximate version of the algorithm (Greenwald-Khanna)
ssize_t accuracy = 10000;
DataTypePtr & argument_type; DataTypePtr & argument_type;
public: public:
AggregateFunctionQuantile(const DataTypes & argument_types_, const Array & params) AggregateFunctionQuantile(const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>( : IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>(
argument_types_, params, createResultType(argument_types_)) argument_types_, params, createResultType(argument_types_))
, levels(params, returns_many) , levels(is_quantile_approx && !params.empty() ? Array(params.begin() + 1, params.end()) : params, returns_many)
, level(levels.levels[0]) , level(levels.levels[0])
, argument_type(this->argument_types[0]) , argument_type(this->argument_types[0])
{ {
if (!returns_many && levels.size() > 1) if (!returns_many && levels.size() > 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require one parameter or less", getName()); throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires one level parameter or less", getName());
if constexpr (is_quantile_approx)
{
if (params.empty())
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one param", getName());
const auto & accuracy_field = params[0];
if (!isInt64OrUInt64FieldType(accuracy_field.getType()))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires accuracy parameter with integer type", getName());
if (accuracy_field.getType() == Field::Types::Int64)
accuracy = accuracy_field.get<Int64>();
else
accuracy = accuracy_field.get<UInt64>();
if (accuracy <= 0)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Aggregate function {} requires accuracy parameter with positive value but is {}",
getName(),
accuracy);
}
} }
String getName() const override { return Name::name; } String getName() const override { return Name::name; }
void create(AggregateDataPtr __restrict place) const override /// NOLINT
{
if constexpr (is_quantile_approx)
new (place) Data(accuracy);
else
new (place) Data;
}
static DataTypePtr createResultType(const DataTypes & argument_types_) static DataTypePtr createResultType(const DataTypes & argument_types_)
{ {
DataTypePtr res; DataTypePtr res;
@ -257,4 +295,7 @@ struct NameQuantilesBFloat16 { static constexpr auto name = "quantilesBFloat16";
struct NameQuantileBFloat16Weighted { static constexpr auto name = "quantileBFloat16Weighted"; }; struct NameQuantileBFloat16Weighted { static constexpr auto name = "quantileBFloat16Weighted"; };
struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBFloat16Weighted"; }; struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBFloat16Weighted"; };
struct NameQuantileApprox { static constexpr auto name = "quantileApprox"; };
struct NameQuantilesApprox { static constexpr auto name = "quantilesApprox"; };
} }

View File

@ -0,0 +1,71 @@
#include <AggregateFunctions/AggregateFunctionQuantile.h>
#include <AggregateFunctions/QuantileApprox.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Helpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <Core/Field.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
template <typename Value, bool _> using FuncQuantileApprox = AggregateFunctionQuantile<Value, QuantileApprox<Value>, NameQuantileApprox, false, void, false>;
template <typename Value, bool _> using FuncQuantilesApprox = AggregateFunctionQuantile<Value, QuantileApprox<Value>, NameQuantilesApprox, false, void, true>;
template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
/// Second argument type check doesn't depend on the type of the first one.
Function<void, true>::assertSecondArg(argument_types);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) \
return std::make_shared<Function<TYPE, true>>(argument_types, params);
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<UInt128, true>>(argument_types, params);
if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, true>>(argument_types, params);
if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, true>>(argument_types, params);
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
argument_type->getName(), name);
}
}
void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory & factory)
{
/// For aggregate functions returning array we cannot return NULL on empty set.
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
factory.registerFunction(NameQuantileApprox::name, createAggregateFunctionQuantile<FuncQuantileApprox>);
factory.registerFunction(NameQuantilesApprox::name, {createAggregateFunctionQuantile<FuncQuantilesApprox>, properties});
/// 'median' is an alias for 'quantile'
factory.registerAlias("medianApprox", NameQuantileApprox::name);
}
}

View File

@ -0,0 +1,477 @@
#pragma once
#include <cmath>
#include <base/sort.h>
#include <Common/RadixSort.h>
#include <IO/WriteBuffer.h>
#include <IO/ReadBuffer.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
}
template <typename T>
class ApproxSampler
{
public:
struct Stats
{
T value; // the sampled value
Int64 g; // the minimum rank jump from the previous value's minimum rank
Int64 delta; // the maximum span of the rank
Stats() = default;
Stats(T value_, Int64 g_, Int64 delta_) : value(value_), g(g_), delta(delta_) {}
};
struct QueryResult
{
size_t index;
Int64 rank;
T value;
QueryResult(size_t index_, Int64 rank_, T value_) : index(index_), rank(rank_), value(value_) { }
};
ApproxSampler() = default;
explicit ApproxSampler(
double relative_error_,
size_t compress_threshold_ = default_compress_threshold,
size_t count_ = 0,
bool compressed_ = false)
: relative_error(relative_error_)
, compress_threshold(compress_threshold_)
, count(count_)
, compressed(compressed_)
{
sampled.reserve(compress_threshold);
backup_sampled.reserve(compress_threshold);
head_sampled.reserve(default_head_size);
}
bool isCompressed() const { return compressed; }
void setCompressed() { compressed = true; }
void insert(T x)
{
head_sampled.push_back(x);
compressed = false;
if (head_sampled.size() >= default_head_size)
{
withHeadBufferInserted();
if (sampled.size() >= compress_threshold)
compress();
}
}
void query(const Float64 * percentiles, const size_t * indices, size_t size, T * result) const
{
if (!head_sampled.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot operate on an uncompressed summary, call compress() first");
if (sampled.empty())
{
for (size_t i = 0; i < size; ++i)
result[i] = T();
return;
}
Int64 current_max = std::numeric_limits<Int64>::min();
for (const auto & stats : sampled)
current_max = std::max(stats.delta + stats.g, current_max);
Int64 target_error = current_max/2;
size_t index= 0;
auto min_rank = sampled[0].g;
for (size_t i = 0; i < size; ++i)
{
double percentile = percentiles[indices[i]];
if (percentile <= relative_error)
{
result[indices[i]] = sampled.front().value;
}
else if (percentile >= 1 - relative_error)
{
result[indices[i]] = sampled.back().value;
}
else
{
QueryResult res = findApproxQuantile(index, min_rank, target_error, percentile);
index = res.index;
min_rank = res.rank;
result[indices[i]] = res.value;
}
}
}
void compress()
{
if (compressed)
return;
withHeadBufferInserted();
doCompress(2 * relative_error * count);
compressed = true;
}
void merge(const ApproxSampler & other)
{
if (other.count == 0)
return;
else if (count == 0)
{
compress_threshold = other.compress_threshold;
relative_error = other.relative_error;
count = other.count;
compressed = other.compressed;
sampled.resize(other.sampled.size());
memcpy(sampled.data(), other.sampled.data(), sizeof(Stats) * other.sampled.size());
return;
}
else
{
// Merge the two buffers.
// The GK algorithm is a bit unclear about it, but we need to adjust the statistics during the
// merging. The main idea is that samples that come from one side will suffer from the lack of
// precision of the other.
// As a concrete example, take two QuantileSummaries whose samples (value, g, delta) are:
// `a = [(0, 1, 0), (20, 99, 0)]` and `b = [(10, 1, 0), (30, 49, 0)]`
// This means `a` has 100 values, whose minimum is 0 and maximum is 20,
// while `b` has 50 values, between 10 and 30.
// The resulting samples of the merge will be:
// a+b = [(0, 1, 0), (10, 1, ??), (20, 99, ??), (30, 49, 0)]
// The values of `g` do not change, as they represent the minimum number of values between two
// consecutive samples. The values of `delta` should be adjusted, however.
// Take the case of the sample `10` from `b`. In the original stream, it could have appeared
// right after `0` (as expressed by `g=1`) or right before `20`, so `delta=99+0-1=98`.
// In the GK algorithm's style of working in terms of maximum bounds, one can observe that the
// maximum additional uncertainty over samples coming from `b` is `max(g_a + delta_a) =
// floor(2 * eps_a * n_a)`. Likewise, additional uncertainty over samples from `a` is
// `floor(2 * eps_b * n_b)`.
// Only samples that interleave the other side are affected. That means that samples from
// one side that are lesser (or greater) than all samples from the other side are just copied
// unmodified.
// If the merging instances have different `relativeError`, the resulting instance will carry
// the largest one: `eps_ab = max(eps_a, eps_b)`.
// The main invariant of the GK algorithm is kept:
// `max(g_ab + delta_ab) <= floor(2 * eps_ab * (n_a + n_b))` since
// `max(g_ab + delta_ab) <= floor(2 * eps_a * n_a) + floor(2 * eps_b * n_b)`
// Finally, one can see how the `insert(x)` operation can be expressed as `merge([(x, 1, 0])`
compress();
backup_sampled.clear();
backup_sampled.reserve(sampled.size() + other.sampled.size());
double merged_relative_error = std::max(relative_error, other.relative_error);
size_t merged_count = count + other.count;
Int64 additional_self_delta = static_cast<Int64>(std::floor(2 * other.relative_error * other.count));
Int64 additional_other_delta = static_cast<Int64>(std::floor(2 * relative_error * count));
// Do a merge of two sorted lists until one of the lists is fully consumed
size_t self_idx = 0;
size_t other_idx = 0;
while (self_idx < sampled.size() && other_idx < other.sampled.size())
{
const Stats & self_sample = sampled[self_idx];
const Stats & other_sample = other.sampled[other_idx];
// Detect next sample
Stats next_sample;
Int64 additional_delta = 0;
if (self_sample.value < other_sample.value)
{
++self_idx;
next_sample = self_sample;
additional_delta = other_idx > 0 ? additional_self_delta : 0;
}
else
{
++other_idx;
next_sample = other_sample;
additional_delta = self_idx > 0 ? additional_other_delta : 0;
}
// Insert it
next_sample.delta += additional_delta;
backup_sampled.emplace_back(std::move(next_sample));
}
// Copy the remaining samples from the other list
// (by construction, at most one `while` loop will run)
while (self_idx < sampled.size())
{
backup_sampled.emplace_back(sampled[self_idx]);
++self_idx;
}
while (other_idx < other.sampled.size())
{
backup_sampled.emplace_back(other.sampled[other_idx]);
++other_idx;
}
std::swap(sampled, backup_sampled);
relative_error = merged_relative_error;
count = merged_count;
compress_threshold = other.compress_threshold;
doCompress(2 * merged_relative_error * merged_count);
compressed = true;
}
}
void write(WriteBuffer & buf) const
{
writeIntBinary<size_t>(compress_threshold, buf);
writeFloatBinary<double>(relative_error, buf);
writeIntBinary<size_t>(count, buf);
writeIntBinary<size_t>(sampled.size(), buf);
for (const auto & stats : sampled)
{
writeFloatBinary<T>(stats.value, buf);
writeIntBinary<Int64>(stats.g, buf);
writeIntBinary<Int64>(stats.delta, buf);
}
}
void read(ReadBuffer & buf)
{
readIntBinary<size_t>(compress_threshold, buf);
readFloatBinary<double>(relative_error, buf);
readIntBinary<size_t>(count, buf);
size_t sampled_len = 0;
readIntBinary<size_t>(sampled_len, buf);
sampled.resize(sampled_len);
for (size_t i = 0; i < sampled_len; ++i)
{
auto stats = sampled[i];
readFloatBinary<T>(stats.value, buf);
readIntBinary<Int64>(stats.g, buf);
readIntBinary<Int64>(stats.delta, buf);
}
}
private:
QueryResult findApproxQuantile(size_t index, Int64 min_rank_at_index, double target_error, double percentile) const
{
Stats curr_sample = sampled[index];
Int64 rank = static_cast<Int64>(std::ceil(percentile * count));
size_t i = index;
Int64 min_rank = min_rank_at_index;
while (i < sampled.size() - 1)
{
Int64 max_rank = min_rank + curr_sample.delta;
if (max_rank - target_error <= rank && rank <= min_rank + target_error)
return {i, min_rank, curr_sample.value};
else
{
++i;
curr_sample = sampled[i];
min_rank += curr_sample.g;
}
}
return {sampled.size()-1, 0, sampled.back().value};
}
void withHeadBufferInserted()
{
if (head_sampled.empty())
return;
bool use_radix_sort = head_sampled.size() >= 256 && (is_arithmetic_v<T> && !is_big_int_v<T>);
if (use_radix_sort)
RadixSort<RadixSortNumTraits<T>>::executeLSD(head_sampled.data(), head_sampled.size());
else
::sort(head_sampled.begin(), head_sampled.end());
backup_sampled.clear();
backup_sampled.reserve(sampled.size() + head_sampled.size());
size_t sample_idx = 0;
size_t ops_idx = 0;
size_t current_count = count;
for (; ops_idx < head_sampled.size(); ++ops_idx)
{
T current_sample = head_sampled[ops_idx];
// Add all the samples before the next observation.
while (sample_idx < sampled.size() && sampled[sample_idx].value <= current_sample)
{
backup_sampled.emplace_back(sampled[sample_idx]);
++sample_idx;
}
// If it is the first one to insert, of if it is the last one
++current_count;
Int64 delta;
if (backup_sampled.empty() || (sample_idx == sampled.size() && ops_idx == (head_sampled.size() - 1)))
delta = 0;
else
delta = static_cast<Int64>(std::floor(2 * relative_error * current_count));
backup_sampled.emplace_back(current_sample, 1, delta);
}
// Add all the remaining existing samples
for (; sample_idx < sampled.size(); ++sample_idx)
backup_sampled.emplace_back(sampled[sample_idx]);
std::swap(sampled, backup_sampled);
head_sampled.clear();
count = current_count;
}
void doCompress(double merge_threshold)
{
if (sampled.empty())
return;
backup_sampled.clear();
// Start for the last element, which is always part of the set.
// The head contains the current new head, that may be merged with the current element.
Stats head = sampled.back();
ssize_t i = sampled.size() - 2;
// Do not compress the last element
while (i >= 1)
{
// The current sample:
const auto & sample1 = sampled[i];
// Do we need to compress?
if (sample1.g + head.g + head.delta < merge_threshold)
{
// Do not insert yet, just merge the current element into the head.
head.g += sample1.g;
}
else
{
// Prepend the current head, and keep the current sample as target for merging.
backup_sampled.push_back(head);
head = sample1;
}
--i;
}
backup_sampled.push_back(head);
// If necessary, add the minimum element:
auto curr_head = sampled.front();
// don't add the minimum element if `currentSamples` has only one element (both `currHead` and
// `head` point to the same element)
if (curr_head.value <= head.value && sampled.size() > 1)
backup_sampled.emplace_back(sampled.front());
std::reverse(backup_sampled.begin(), backup_sampled.end());
std::swap(sampled, backup_sampled);
}
double relative_error;
size_t compress_threshold;
size_t count = 0;
bool compressed;
PaddedPODArray<Stats> sampled;
PaddedPODArray<Stats> backup_sampled;
PaddedPODArray<T> head_sampled;
static constexpr size_t default_compress_threshold = 10000;
static constexpr size_t default_head_size = 50000;
};
template <typename Value>
class QuantileApprox
{
private:
using Data = ApproxSampler<Value>;
mutable Data data;
public:
QuantileApprox() = default;
explicit QuantileApprox(size_t accuracy) : data(1.0 / static_cast<double>(accuracy)) { }
void add(const Value & x)
{
data.insert(x);
}
template <typename Weight>
void add(const Value &, const Weight &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method add with weight is not implemented for GKSampler");
}
void merge(const QuantileApprox & rhs)
{
if (!data.isCompressed())
data.compress();
data.merge(rhs.data);
}
void serialize(WriteBuffer & buf) const
{
/// Always compress before serialization
if (!data.isCompressed())
data.compress();
data.write(buf);
}
void deserialize(ReadBuffer & buf)
{
data.read(buf);
data.setCompressed();
}
/// Get the value of the `level` quantile. The level must be between 0 and 1.
Value get(Float64 level)
{
if (!data.isCompressed())
data.compress();
Value res;
size_t indice = 0;
data.query(&level, &indice, 1, &res);
return res;
}
/// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
/// indices - an array of index levels such that the corresponding elements will go in ascending order.
void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
{
if (!data.isCompressed())
data.compress();
data.query(levels, indices, size, result);
}
Float64 getFloat64(Float64 /*level*/)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFloat64 is not implemented for GKSampler");
}
void getManyFloat(const Float64 * /*levels*/, const size_t * /*indices*/, size_t /*size*/, Float64 * /*result*/)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getManyFloat is not implemented for GKSampler");
}
};
}

View File

@ -32,6 +32,7 @@ void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileTDigestWeighted(AggregateFunctionFactory &); void registerAggregateFunctionsQuantileTDigestWeighted(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileBFloat16(AggregateFunctionFactory &); void registerAggregateFunctionsQuantileBFloat16(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileBFloat16Weighted(AggregateFunctionFactory &); void registerAggregateFunctionsQuantileBFloat16Weighted(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory &);
void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &); void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &);
void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory &); void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory &);
void registerAggregateFunctionRate(AggregateFunctionFactory &); void registerAggregateFunctionRate(AggregateFunctionFactory &);
@ -79,6 +80,7 @@ void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &); void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &); void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &); void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory);
class AggregateFunctionCombinatorFactory; class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &); void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -123,6 +125,7 @@ void registerAggregateFunctions()
registerAggregateFunctionsQuantileTDigestWeighted(factory); registerAggregateFunctionsQuantileTDigestWeighted(factory);
registerAggregateFunctionsQuantileBFloat16(factory); registerAggregateFunctionsQuantileBFloat16(factory);
registerAggregateFunctionsQuantileBFloat16Weighted(factory); registerAggregateFunctionsQuantileBFloat16Weighted(factory);
registerAggregateFunctionsQuantileApprox(factory);
registerAggregateFunctionsSequenceMatch(factory); registerAggregateFunctionsSequenceMatch(factory);
registerAggregateFunctionWindowFunnel(factory); registerAggregateFunctionWindowFunnel(factory);
registerAggregateFunctionRate(factory); registerAggregateFunctionRate(factory);
@ -170,6 +173,7 @@ void registerAggregateFunctions()
registerAggregateFunctionExponentialMovingAverage(factory); registerAggregateFunctionExponentialMovingAverage(factory);
registerAggregateFunctionSparkbar(factory); registerAggregateFunctionSparkbar(factory);
registerAggregateFunctionAnalysisOfVariance(factory); registerAggregateFunctionAnalysisOfVariance(factory);
registerAggregateFunctionKolmogorovSmirnovTest(factory);
registerWindowFunctions(factory); registerWindowFunctions(factory);
} }

View File

@ -86,7 +86,12 @@ public:
DataTypePtr getResultType() const override DataTypePtr getResultType() const override
{ {
return getExpression()->getResultType(); return result_type;
}
void resolve(DataTypePtr lambda_type)
{
result_type = std::move(lambda_type);
} }
void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
@ -102,6 +107,7 @@ protected:
private: private:
Names argument_names; Names argument_names;
DataTypePtr result_type;
static constexpr size_t arguments_child_index = 0; static constexpr size_t arguments_child_index = 0;
static constexpr size_t expression_child_index = 1; static constexpr size_t expression_child_index = 1;

View File

@ -5085,8 +5085,11 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
arguments_projection_names[function_lambda_argument_index] = lambda_argument_projection_name_buffer.str(); arguments_projection_names[function_lambda_argument_index] = lambda_argument_projection_name_buffer.str();
} }
argument_types[function_lambda_argument_index] = std::make_shared<DataTypeFunction>(function_data_type_argument_types, lambda_to_resolve->getResultType()); auto lambda_resolved_type = std::make_shared<DataTypeFunction>(function_data_type_argument_types, lambda_to_resolve_typed.getExpression()->getResultType());
argument_columns[function_lambda_argument_index].type = argument_types[function_lambda_argument_index]; lambda_to_resolve_typed.resolve(lambda_resolved_type);
argument_types[function_lambda_argument_index] = lambda_resolved_type;
argument_columns[function_lambda_argument_index].type = lambda_resolved_type;
function_arguments[function_lambda_argument_index] = std::move(lambda_to_resolve); function_arguments[function_lambda_argument_index] = std::move(lambda_to_resolve);
} }

View File

@ -115,13 +115,23 @@ private:
for (size_t i = 0; i < expected_argument_types_size; ++i) for (size_t i = 0; i < expected_argument_types_size; ++i)
{ {
// Skip lambdas
if (WhichDataType(expected_argument_types[i]).isFunction())
continue;
const auto & expected_argument_type = expected_argument_types[i]; const auto & expected_argument_type = expected_argument_types[i];
const auto & actual_argument_type = actual_argument_columns[i].type; const auto & actual_argument_type = actual_argument_columns[i].type;
if (!expected_argument_type)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Function {} expected argument {} type is not set after running {} pass",
function->toAST()->formatForErrorMessage(),
i + 1,
pass_name);
if (!actual_argument_type)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Function {} actual argument {} type is not set after running {} pass",
function->toAST()->formatForErrorMessage(),
i + 1,
pass_name);
if (!expected_argument_type->equals(*actual_argument_type)) if (!expected_argument_type->equals(*actual_argument_type))
{ {
/// Aggregate functions remove low cardinality for their argument types /// Aggregate functions remove low cardinality for their argument types

View File

@ -8,7 +8,8 @@
namespace DB namespace DB
{ {
BackupCoordinationLocal::BackupCoordinationLocal(bool plain_backup_) : file_infos(plain_backup_) BackupCoordinationLocal::BackupCoordinationLocal(bool plain_backup_)
: log(&Poco::Logger::get("BackupCoordinationLocal")), file_infos(plain_backup_)
{ {
} }
@ -35,7 +36,7 @@ Strings BackupCoordinationLocal::waitForStage(const String &, std::chrono::milli
void BackupCoordinationLocal::addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<PartNameAndChecksum> & part_names_and_checksums) void BackupCoordinationLocal::addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<PartNameAndChecksum> & part_names_and_checksums)
{ {
std::lock_guard lock{replicated_tables_mutex}; std::lock_guard lock{replicated_tables_mutex};
replicated_tables.addPartNames(table_shared_id, table_name_for_logs, replica_name, part_names_and_checksums); replicated_tables.addPartNames({table_shared_id, table_name_for_logs, replica_name, part_names_and_checksums});
} }
Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const
@ -48,7 +49,7 @@ Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_sha
void BackupCoordinationLocal::addReplicatedMutations(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<MutationInfo> & mutations) void BackupCoordinationLocal::addReplicatedMutations(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<MutationInfo> & mutations)
{ {
std::lock_guard lock{replicated_tables_mutex}; std::lock_guard lock{replicated_tables_mutex};
replicated_tables.addMutations(table_shared_id, table_name_for_logs, replica_name, mutations); replicated_tables.addMutations({table_shared_id, table_name_for_logs, replica_name, mutations});
} }
std::vector<IBackupCoordination::MutationInfo> BackupCoordinationLocal::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const std::vector<IBackupCoordination::MutationInfo> BackupCoordinationLocal::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const
@ -61,7 +62,7 @@ std::vector<IBackupCoordination::MutationInfo> BackupCoordinationLocal::getRepli
void BackupCoordinationLocal::addReplicatedDataPath(const String & table_shared_id, const String & data_path) void BackupCoordinationLocal::addReplicatedDataPath(const String & table_shared_id, const String & data_path)
{ {
std::lock_guard lock{replicated_tables_mutex}; std::lock_guard lock{replicated_tables_mutex};
replicated_tables.addDataPath(table_shared_id, data_path); replicated_tables.addDataPath({table_shared_id, data_path});
} }
Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_shared_id) const Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_shared_id) const
@ -74,7 +75,7 @@ Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_sha
void BackupCoordinationLocal::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path) void BackupCoordinationLocal::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path)
{ {
std::lock_guard lock{replicated_access_mutex}; std::lock_guard lock{replicated_access_mutex};
replicated_access.addFilePath(access_zk_path, access_entity_type, "", file_path); replicated_access.addFilePath({access_zk_path, access_entity_type, "", file_path});
} }
Strings BackupCoordinationLocal::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const Strings BackupCoordinationLocal::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const
@ -87,7 +88,7 @@ Strings BackupCoordinationLocal::getReplicatedAccessFilePaths(const String & acc
void BackupCoordinationLocal::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path) void BackupCoordinationLocal::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path)
{ {
std::lock_guard lock{replicated_sql_objects_mutex}; std::lock_guard lock{replicated_sql_objects_mutex};
replicated_sql_objects.addDirectory(loader_zk_path, object_type, "", dir_path); replicated_sql_objects.addDirectory({loader_zk_path, object_type, "", dir_path});
} }
Strings BackupCoordinationLocal::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const Strings BackupCoordinationLocal::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const
@ -125,7 +126,12 @@ bool BackupCoordinationLocal::startWritingFile(size_t data_file_index)
bool BackupCoordinationLocal::hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const bool BackupCoordinationLocal::hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const
{ {
return (num_active_backups > 1); if (num_active_backups > 1)
{
LOG_WARNING(log, "Found concurrent backups: num_active_backups={}", num_active_backups);
return true;
}
return false;
} }
} }

View File

@ -52,6 +52,8 @@ public:
bool hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const override; bool hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const override;
private: private:
Poco::Logger * const log;
BackupCoordinationReplicatedTables TSA_GUARDED_BY(replicated_tables_mutex) replicated_tables; BackupCoordinationReplicatedTables TSA_GUARDED_BY(replicated_tables_mutex) replicated_tables;
BackupCoordinationReplicatedAccess TSA_GUARDED_BY(replicated_access_mutex) replicated_access; BackupCoordinationReplicatedAccess TSA_GUARDED_BY(replicated_access_mutex) replicated_access;
BackupCoordinationReplicatedSQLObjects TSA_GUARDED_BY(replicated_sql_objects_mutex) replicated_sql_objects; BackupCoordinationReplicatedSQLObjects TSA_GUARDED_BY(replicated_sql_objects_mutex) replicated_sql_objects;

View File

@ -1,13 +1,18 @@
#include <Backups/BackupCoordinationRemote.h> #include <Backups/BackupCoordinationRemote.h>
#include <base/hex.h>
#include <Access/Common/AccessEntityType.h> #include <Access/Common/AccessEntityType.h>
#include <Backups/BackupCoordinationReplicatedAccess.h>
#include <Backups/BackupCoordinationStage.h>
#include <Common/escapeForFileName.h>
#include <Common/ZooKeeper/Common.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Functions/UserDefined/UserDefinedSQLObjectType.h> #include <Functions/UserDefined/UserDefinedSQLObjectType.h>
#include <IO/ReadBufferFromString.h> #include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h> #include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/escapeForFileName.h>
#include <Backups/BackupCoordinationStage.h>
namespace DB namespace DB
@ -154,8 +159,7 @@ BackupCoordinationRemote::BackupCoordinationRemote(
const String & current_host_, const String & current_host_,
bool plain_backup_, bool plain_backup_,
bool is_internal_) bool is_internal_)
: get_zookeeper(get_zookeeper_) : root_zookeeper_path(root_zookeeper_path_)
, root_zookeeper_path(root_zookeeper_path_)
, zookeeper_path(root_zookeeper_path_ + "/backup-" + backup_uuid_) , zookeeper_path(root_zookeeper_path_ + "/backup-" + backup_uuid_)
, keeper_settings(keeper_settings_) , keeper_settings(keeper_settings_)
, backup_uuid(backup_uuid_) , backup_uuid(backup_uuid_)
@ -164,17 +168,33 @@ BackupCoordinationRemote::BackupCoordinationRemote(
, current_host_index(findCurrentHostIndex(all_hosts, current_host)) , current_host_index(findCurrentHostIndex(all_hosts, current_host))
, plain_backup(plain_backup_) , plain_backup(plain_backup_)
, is_internal(is_internal_) , is_internal(is_internal_)
{ , log(&Poco::Logger::get("BackupCoordinationRemote"))
zookeeper_retries_info = ZooKeeperRetriesInfo( , with_retries(
"BackupCoordinationRemote", log,
&Poco::Logger::get("BackupCoordinationRemote"), get_zookeeper_,
keeper_settings.keeper_max_retries, keeper_settings,
keeper_settings.keeper_retry_initial_backoff_ms, [zookeeper_path = zookeeper_path, current_host = current_host, is_internal = is_internal]
keeper_settings.keeper_retry_max_backoff_ms); (WithRetries::FaultyKeeper & zk)
{
/// Recreate this ephemeral node to signal that we are alive.
if (is_internal)
{
String alive_node_path = zookeeper_path + "/stage/alive|" + current_host;
auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
if (code == Coordination::Error::ZNODEEXISTS)
zk->handleEphemeralNodeExistenceNoFailureInjection(alive_node_path, "");
else if (code != Coordination::Error::ZOK)
throw zkutil::KeeperException(code, alive_node_path);
}
})
{
createRootNodes(); createRootNodes();
stage_sync.emplace( stage_sync.emplace(
zookeeper_path + "/stage", [this] { return getZooKeeper(); }, &Poco::Logger::get("BackupCoordination")); zookeeper_path,
with_retries,
log);
} }
BackupCoordinationRemote::~BackupCoordinationRemote() BackupCoordinationRemote::~BackupCoordinationRemote()
@ -190,44 +210,45 @@ BackupCoordinationRemote::~BackupCoordinationRemote()
} }
} }
zkutil::ZooKeeperPtr BackupCoordinationRemote::getZooKeeper() const
{
std::lock_guard lock{zookeeper_mutex};
if (!zookeeper || zookeeper->expired())
{
zookeeper = get_zookeeper();
/// It's possible that we connected to different [Zoo]Keeper instance
/// so we may read a bit stale state.
zookeeper->sync(zookeeper_path);
}
return zookeeper;
}
void BackupCoordinationRemote::createRootNodes() void BackupCoordinationRemote::createRootNodes()
{ {
auto zk = getZooKeeper(); auto holder = with_retries.createRetriesControlHolder("createRootNodes");
zk->createAncestors(zookeeper_path); holder.retries_ctl.retryLoop(
zk->createIfNotExists(zookeeper_path, ""); [&, &zk = holder.faulty_zookeeper]()
zk->createIfNotExists(zookeeper_path + "/repl_part_names", ""); {
zk->createIfNotExists(zookeeper_path + "/repl_mutations", ""); with_retries.renewZooKeeper(zk);
zk->createIfNotExists(zookeeper_path + "/repl_data_paths", "");
zk->createIfNotExists(zookeeper_path + "/repl_access", ""); zk->createAncestors(zookeeper_path);
zk->createIfNotExists(zookeeper_path + "/repl_sql_objects", "");
zk->createIfNotExists(zookeeper_path + "/file_infos", ""); Coordination::Requests ops;
zk->createIfNotExists(zookeeper_path + "/writing_files", ""); Coordination::Responses responses;
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_part_names", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_mutations", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_data_paths", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_access", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_sql_objects", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/file_infos", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/writing_files", "", zkutil::CreateMode::Persistent));
zk->tryMulti(ops, responses);
});
} }
void BackupCoordinationRemote::removeAllNodes() void BackupCoordinationRemote::removeAllNodes()
{ {
/// Usually this function is called by the initiator when a backup is complete so we don't need the coordination anymore. auto holder = with_retries.createRetriesControlHolder("removeAllNodes");
/// holder.retries_ctl.retryLoop(
/// However there can be a rare situation when this function is called after an error occurs on the initiator of a query [&, &zk = holder.faulty_zookeeper]()
/// while some hosts are still making the backup. Removing all the nodes will remove the parent node of the backup coordination {
/// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some useless part /// Usually this function is called by the initiator when a backup is complete so we don't need the coordination anymore.
/// of their backup work before that. Anyway in this case backup won't be finalized (because only an initiator can do that). ///
auto zk = getZooKeeper(); /// However there can be a rare situation when this function is called after an error occurs on the initiator of a query
zk->removeRecursive(zookeeper_path); /// while some hosts are still making the backup. Removing all the nodes will remove the parent node of the backup coordination
/// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some useless part
/// of their backup work before that. Anyway in this case backup won't be finalized (because only an initiator can do that).
with_retries.renewZooKeeper(zk);
zk->removeRecursive(zookeeper_path);
});
} }
@ -255,10 +276,11 @@ Strings BackupCoordinationRemote::waitForStage(const String & stage_to_wait, std
void BackupCoordinationRemote::serializeToMultipleZooKeeperNodes(const String & path, const String & value, const String & logging_name) void BackupCoordinationRemote::serializeToMultipleZooKeeperNodes(const String & path, const String & value, const String & logging_name)
{ {
{ {
ZooKeeperRetriesControl retries_ctl(logging_name + "::create", zookeeper_retries_info); auto holder = with_retries.createRetriesControlHolder(logging_name + "::create");
retries_ctl.retryLoop([&] holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{ {
auto zk = getZooKeeper(); with_retries.renewZooKeeper(zk);
zk->createIfNotExists(path, ""); zk->createIfNotExists(path, "");
}); });
} }
@ -279,10 +301,11 @@ void BackupCoordinationRemote::serializeToMultipleZooKeeperNodes(const String &
String part = value.substr(begin, end - begin); String part = value.substr(begin, end - begin);
String part_path = fmt::format("{}/{:06}", path, i); String part_path = fmt::format("{}/{:06}", path, i);
ZooKeeperRetriesControl retries_ctl(logging_name + "::createPart", zookeeper_retries_info); auto holder = with_retries.createRetriesControlHolder(logging_name + "::createPart");
retries_ctl.retryLoop([&] holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{ {
auto zk = getZooKeeper(); with_retries.renewZooKeeper(zk);
zk->createIfNotExists(part_path, part); zk->createIfNotExists(part_path, part);
}); });
} }
@ -293,9 +316,11 @@ String BackupCoordinationRemote::deserializeFromMultipleZooKeeperNodes(const Str
Strings part_names; Strings part_names;
{ {
ZooKeeperRetriesControl retries_ctl(logging_name + "::getChildren", zookeeper_retries_info); auto holder = with_retries.createRetriesControlHolder(logging_name + "::getChildren");
retries_ctl.retryLoop([&]{ holder.retries_ctl.retryLoop(
auto zk = getZooKeeper(); [&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
part_names = zk->getChildren(path); part_names = zk->getChildren(path);
std::sort(part_names.begin(), part_names.end()); std::sort(part_names.begin(), part_names.end());
}); });
@ -306,10 +331,11 @@ String BackupCoordinationRemote::deserializeFromMultipleZooKeeperNodes(const Str
{ {
String part; String part;
String part_path = path + "/" + part_name; String part_path = path + "/" + part_name;
ZooKeeperRetriesControl retries_ctl(logging_name + "::get", zookeeper_retries_info); auto holder = with_retries.createRetriesControlHolder(logging_name + "::get");
retries_ctl.retryLoop([&] holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{ {
auto zk = getZooKeeper(); with_retries.renewZooKeeper(zk);
part = zk->get(part_path); part = zk->get(part_path);
}); });
res += part; res += part;
@ -330,11 +356,16 @@ void BackupCoordinationRemote::addReplicatedPartNames(
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedPartNames() must not be called after preparing"); throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedPartNames() must not be called after preparing");
} }
auto zk = getZooKeeper(); auto holder = with_retries.createRetriesControlHolder("addReplicatedPartNames");
String path = zookeeper_path + "/repl_part_names/" + escapeForFileName(table_shared_id); holder.retries_ctl.retryLoop(
zk->createIfNotExists(path, ""); [&, &zk = holder.faulty_zookeeper]()
path += "/" + escapeForFileName(replica_name); {
zk->create(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs), zkutil::CreateMode::Persistent); with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_part_names/" + escapeForFileName(table_shared_id);
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(replica_name);
zk->createIfNotExists(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs));
});
} }
Strings BackupCoordinationRemote::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const Strings BackupCoordinationRemote::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const
@ -356,11 +387,16 @@ void BackupCoordinationRemote::addReplicatedMutations(
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedMutations() must not be called after preparing"); throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedMutations() must not be called after preparing");
} }
auto zk = getZooKeeper(); auto holder = with_retries.createRetriesControlHolder("addReplicatedMutations");
String path = zookeeper_path + "/repl_mutations/" + escapeForFileName(table_shared_id); holder.retries_ctl.retryLoop(
zk->createIfNotExists(path, ""); [&, &zk = holder.faulty_zookeeper]()
path += "/" + escapeForFileName(replica_name); {
zk->create(path, ReplicatedMutations::serialize(mutations, table_name_for_logs), zkutil::CreateMode::Persistent); with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_mutations/" + escapeForFileName(table_shared_id);
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(replica_name);
zk->createIfNotExists(path, ReplicatedMutations::serialize(mutations, table_name_for_logs));
});
} }
std::vector<IBackupCoordination::MutationInfo> BackupCoordinationRemote::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const std::vector<IBackupCoordination::MutationInfo> BackupCoordinationRemote::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const
@ -380,11 +416,16 @@ void BackupCoordinationRemote::addReplicatedDataPath(
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedDataPath() must not be called after preparing"); throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedDataPath() must not be called after preparing");
} }
auto zk = getZooKeeper(); auto holder = with_retries.createRetriesControlHolder("addReplicatedDataPath");
String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_shared_id); holder.retries_ctl.retryLoop(
zk->createIfNotExists(path, ""); [&, &zk = holder.faulty_zookeeper]()
path += "/" + escapeForFileName(data_path); {
zk->createIfNotExists(path, ""); with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_shared_id);
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(data_path);
zk->createIfNotExists(path, "");
});
} }
Strings BackupCoordinationRemote::getReplicatedDataPaths(const String & table_shared_id) const Strings BackupCoordinationRemote::getReplicatedDataPaths(const String & table_shared_id) const
@ -400,55 +441,88 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
if (replicated_tables) if (replicated_tables)
return; return;
std::vector<BackupCoordinationReplicatedTables::PartNamesForTableReplica> part_names_for_replicated_tables;
{
auto holder = with_retries.createRetriesControlHolder("prepareReplicatedTables::repl_part_names");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
part_names_for_replicated_tables.clear();
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_part_names";
for (const String & escaped_table_shared_id : zk->getChildren(path))
{
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
String path2 = path + "/" + escaped_table_shared_id;
for (const String & escaped_replica_name : zk->getChildren(path2))
{
String replica_name = unescapeForFileName(escaped_replica_name);
auto part_names = ReplicatedPartNames::deserialize(zk->get(path2 + "/" + escaped_replica_name));
part_names_for_replicated_tables.push_back(
{table_shared_id, part_names.table_name_for_logs, replica_name, part_names.part_names_and_checksums});
}
}
});
}
std::vector<BackupCoordinationReplicatedTables::MutationsForTableReplica> mutations_for_replicated_tables;
{
auto holder = with_retries.createRetriesControlHolder("prepareReplicatedTables::repl_mutations");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
mutations_for_replicated_tables.clear();
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_mutations";
for (const String & escaped_table_shared_id : zk->getChildren(path))
{
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
String path2 = path + "/" + escaped_table_shared_id;
for (const String & escaped_replica_name : zk->getChildren(path2))
{
String replica_name = unescapeForFileName(escaped_replica_name);
auto mutations = ReplicatedMutations::deserialize(zk->get(path2 + "/" + escaped_replica_name));
mutations_for_replicated_tables.push_back(
{table_shared_id, mutations.table_name_for_logs, replica_name, mutations.mutations});
}
}
});
}
std::vector<BackupCoordinationReplicatedTables::DataPathForTableReplica> data_paths_for_replicated_tables;
{
auto holder = with_retries.createRetriesControlHolder("prepareReplicatedTables::repl_data_paths");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
data_paths_for_replicated_tables.clear();
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_data_paths";
for (const String & escaped_table_shared_id : zk->getChildren(path))
{
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
String path2 = path + "/" + escaped_table_shared_id;
for (const String & escaped_data_path : zk->getChildren(path2))
{
String data_path = unescapeForFileName(escaped_data_path);
data_paths_for_replicated_tables.push_back({table_shared_id, data_path});
}
}
});
}
replicated_tables.emplace(); replicated_tables.emplace();
auto zk = getZooKeeper(); for (auto & part_names : part_names_for_replicated_tables)
replicated_tables->addPartNames(std::move(part_names));
{ for (auto & mutations : mutations_for_replicated_tables)
String path = zookeeper_path + "/repl_part_names"; replicated_tables->addMutations(std::move(mutations));
for (const String & escaped_table_shared_id : zk->getChildren(path)) for (auto & data_paths : data_paths_for_replicated_tables)
{ replicated_tables->addDataPath(std::move(data_paths));
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
String path2 = path + "/" + escaped_table_shared_id;
for (const String & escaped_replica_name : zk->getChildren(path2))
{
String replica_name = unescapeForFileName(escaped_replica_name);
auto part_names = ReplicatedPartNames::deserialize(zk->get(path2 + "/" + escaped_replica_name));
replicated_tables->addPartNames(table_shared_id, part_names.table_name_for_logs, replica_name, part_names.part_names_and_checksums);
}
}
}
{
String path = zookeeper_path + "/repl_mutations";
for (const String & escaped_table_shared_id : zk->getChildren(path))
{
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
String path2 = path + "/" + escaped_table_shared_id;
for (const String & escaped_replica_name : zk->getChildren(path2))
{
String replica_name = unescapeForFileName(escaped_replica_name);
auto mutations = ReplicatedMutations::deserialize(zk->get(path2 + "/" + escaped_replica_name));
replicated_tables->addMutations(table_shared_id, mutations.table_name_for_logs, replica_name, mutations.mutations);
}
}
}
{
String path = zookeeper_path + "/repl_data_paths";
for (const String & escaped_table_shared_id : zk->getChildren(path))
{
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
String path2 = path + "/" + escaped_table_shared_id;
for (const String & escaped_data_path : zk->getChildren(path2))
{
String data_path = unescapeForFileName(escaped_data_path);
replicated_tables->addDataPath(table_shared_id, data_path);
}
}
}
} }
void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path) void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path)
{ {
{ {
@ -457,13 +531,18 @@ void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedAccessFilePath() must not be called after preparing"); throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedAccessFilePath() must not be called after preparing");
} }
auto zk = getZooKeeper(); auto holder = with_retries.createRetriesControlHolder("addReplicatedAccessFilePath");
String path = zookeeper_path + "/repl_access/" + escapeForFileName(access_zk_path); holder.retries_ctl.retryLoop(
zk->createIfNotExists(path, ""); [&, &zk = holder.faulty_zookeeper]()
path += "/" + AccessEntityTypeInfo::get(access_entity_type).name; {
zk->createIfNotExists(path, ""); with_retries.renewZooKeeper(zk);
path += "/" + current_host; String path = zookeeper_path + "/repl_access/" + escapeForFileName(access_zk_path);
zk->createIfNotExists(path, file_path); zk->createIfNotExists(path, "");
path += "/" + AccessEntityTypeInfo::get(access_entity_type).name;
zk->createIfNotExists(path, "");
path += "/" + current_host;
zk->createIfNotExists(path, file_path);
});
} }
Strings BackupCoordinationRemote::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const Strings BackupCoordinationRemote::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const
@ -478,25 +557,35 @@ void BackupCoordinationRemote::prepareReplicatedAccess() const
if (replicated_access) if (replicated_access)
return; return;
replicated_access.emplace(); std::vector<BackupCoordinationReplicatedAccess::FilePathForAccessEntitry> file_path_for_access_entities;
auto zk = getZooKeeper(); auto holder = with_retries.createRetriesControlHolder("prepareReplicatedAccess");
holder.retries_ctl.retryLoop(
String path = zookeeper_path + "/repl_access"; [&, &zk = holder.faulty_zookeeper]()
for (const String & escaped_access_zk_path : zk->getChildren(path))
{ {
String access_zk_path = unescapeForFileName(escaped_access_zk_path); file_path_for_access_entities.clear();
String path2 = path + "/" + escaped_access_zk_path; with_retries.renewZooKeeper(zk);
for (const String & type_str : zk->getChildren(path2))
String path = zookeeper_path + "/repl_access";
for (const String & escaped_access_zk_path : zk->getChildren(path))
{ {
AccessEntityType type = AccessEntityTypeInfo::parseType(type_str); String access_zk_path = unescapeForFileName(escaped_access_zk_path);
String path3 = path2 + "/" + type_str; String path2 = path + "/" + escaped_access_zk_path;
for (const String & host_id : zk->getChildren(path3)) for (const String & type_str : zk->getChildren(path2))
{ {
String file_path = zk->get(path3 + "/" + host_id); AccessEntityType type = AccessEntityTypeInfo::parseType(type_str);
replicated_access->addFilePath(access_zk_path, type, host_id, file_path); String path3 = path2 + "/" + type_str;
for (const String & host_id : zk->getChildren(path3))
{
String file_path = zk->get(path3 + "/" + host_id);
file_path_for_access_entities.push_back({access_zk_path, type, host_id, file_path});
}
} }
} }
} });
replicated_access.emplace();
for (auto & file_path : file_path_for_access_entities)
replicated_access->addFilePath(std::move(file_path));
} }
void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path) void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path)
@ -507,21 +596,26 @@ void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedSQLObjectsDir() must not be called after preparing"); throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedSQLObjectsDir() must not be called after preparing");
} }
auto zk = getZooKeeper(); auto holder = with_retries.createRetriesControlHolder("addReplicatedSQLObjectsDir");
String path = zookeeper_path + "/repl_sql_objects/" + escapeForFileName(loader_zk_path); holder.retries_ctl.retryLoop(
zk->createIfNotExists(path, ""); [&, &zk = holder.faulty_zookeeper]()
path += "/";
switch (object_type)
{ {
case UserDefinedSQLObjectType::Function: with_retries.renewZooKeeper(zk);
path += "functions"; String path = zookeeper_path + "/repl_sql_objects/" + escapeForFileName(loader_zk_path);
break; zk->createIfNotExists(path, "");
}
zk->createIfNotExists(path, ""); path += "/";
path += "/" + current_host; switch (object_type)
zk->createIfNotExists(path, dir_path); {
case UserDefinedSQLObjectType::Function:
path += "functions";
break;
}
zk->createIfNotExists(path, "");
path += "/" + current_host;
zk->createIfNotExists(path, dir_path);
});
} }
Strings BackupCoordinationRemote::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const Strings BackupCoordinationRemote::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const
@ -536,27 +630,36 @@ void BackupCoordinationRemote::prepareReplicatedSQLObjects() const
if (replicated_sql_objects) if (replicated_sql_objects)
return; return;
replicated_sql_objects.emplace(); std::vector<BackupCoordinationReplicatedSQLObjects::DirectoryPathForSQLObject> directories_for_sql_objects;
auto zk = getZooKeeper(); auto holder = with_retries.createRetriesControlHolder("prepareReplicatedSQLObjects");
holder.retries_ctl.retryLoop(
String path = zookeeper_path + "/repl_sql_objects"; [&, &zk = holder.faulty_zookeeper]()
for (const String & escaped_loader_zk_path : zk->getChildren(path))
{ {
String loader_zk_path = unescapeForFileName(escaped_loader_zk_path); directories_for_sql_objects.clear();
String objects_path = path + "/" + escaped_loader_zk_path; with_retries.renewZooKeeper(zk);
if (String functions_path = objects_path + "/functions"; zk->exists(functions_path)) String path = zookeeper_path + "/repl_sql_objects";
for (const String & escaped_loader_zk_path : zk->getChildren(path))
{ {
UserDefinedSQLObjectType object_type = UserDefinedSQLObjectType::Function; String loader_zk_path = unescapeForFileName(escaped_loader_zk_path);
for (const String & host_id : zk->getChildren(functions_path)) String objects_path = path + "/" + escaped_loader_zk_path;
if (String functions_path = objects_path + "/functions"; zk->exists(functions_path))
{ {
String dir = zk->get(functions_path + "/" + host_id); UserDefinedSQLObjectType object_type = UserDefinedSQLObjectType::Function;
replicated_sql_objects->addDirectory(loader_zk_path, object_type, host_id, dir); for (const String & host_id : zk->getChildren(functions_path))
{
String dir = zk->get(functions_path + "/" + host_id);
directories_for_sql_objects.push_back({loader_zk_path, object_type, host_id, dir});
}
} }
} }
} });
}
replicated_sql_objects.emplace();
for (auto & directory : directories_for_sql_objects)
replicated_sql_objects->addDirectory(std::move(directory));
}
void BackupCoordinationRemote::addFileInfos(BackupFileInfos && file_infos_) void BackupCoordinationRemote::addFileInfos(BackupFileInfos && file_infos_)
{ {
@ -594,9 +697,11 @@ void BackupCoordinationRemote::prepareFileInfos() const
Strings hosts_with_file_infos; Strings hosts_with_file_infos;
{ {
ZooKeeperRetriesControl retries_ctl("prepareFileInfos::get_hosts", zookeeper_retries_info); auto holder = with_retries.createRetriesControlHolder("prepareFileInfos::get_hosts");
retries_ctl.retryLoop([&]{ holder.retries_ctl.retryLoop(
auto zk = getZooKeeper(); [&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
hosts_with_file_infos = zk->getChildren(zookeeper_path + "/file_infos"); hosts_with_file_infos = zk->getChildren(zookeeper_path + "/file_infos");
}); });
} }
@ -615,10 +720,11 @@ bool BackupCoordinationRemote::startWritingFile(size_t data_file_index)
String full_path = zookeeper_path + "/writing_files/" + std::to_string(data_file_index); String full_path = zookeeper_path + "/writing_files/" + std::to_string(data_file_index);
String host_index_str = std::to_string(current_host_index); String host_index_str = std::to_string(current_host_index);
ZooKeeperRetriesControl retries_ctl("startWritingFile", zookeeper_retries_info); auto holder = with_retries.createRetriesControlHolder("startWritingFile");
retries_ctl.retryLoop([&] holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{ {
auto zk = getZooKeeper(); with_retries.renewZooKeeper(zk);
auto code = zk->tryCreate(full_path, host_index_str, zkutil::CreateMode::Persistent); auto code = zk->tryCreate(full_path, host_index_str, zkutil::CreateMode::Persistent);
if (code == Coordination::Error::ZOK) if (code == Coordination::Error::ZOK)
@ -632,51 +738,63 @@ bool BackupCoordinationRemote::startWritingFile(size_t data_file_index)
return acquired_writing; return acquired_writing;
} }
bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &) const bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &) const
{ {
/// If its internal concurrency will be checked for the base backup /// If its internal concurrency will be checked for the base backup
if (is_internal) if (is_internal)
return false; return false;
auto zk = getZooKeeper();
std::string backup_stage_path = zookeeper_path + "/stage"; std::string backup_stage_path = zookeeper_path + "/stage";
if (!zk->exists(root_zookeeper_path)) bool result = false;
zk->createAncestors(root_zookeeper_path);
for (size_t attempt = 0; attempt < MAX_ZOOKEEPER_ATTEMPTS; ++attempt) auto holder = with_retries.createRetriesControlHolder("getAllArchiveSuffixes");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{ {
Coordination::Stat stat; with_retries.renewZooKeeper(zk);
zk->get(root_zookeeper_path, &stat);
Strings existing_backup_paths = zk->getChildren(root_zookeeper_path);
for (const auto & existing_backup_path : existing_backup_paths) if (!zk->exists(root_zookeeper_path))
zk->createAncestors(root_zookeeper_path);
for (size_t attempt = 0; attempt < MAX_ZOOKEEPER_ATTEMPTS; ++attempt)
{ {
if (startsWith(existing_backup_path, "restore-")) Coordination::Stat stat;
continue; zk->get(root_zookeeper_path, &stat);
Strings existing_backup_paths = zk->getChildren(root_zookeeper_path);
String existing_backup_uuid = existing_backup_path; for (const auto & existing_backup_path : existing_backup_paths)
existing_backup_uuid.erase(0, String("backup-").size()); {
if (startsWith(existing_backup_path, "restore-"))
continue;
if (existing_backup_uuid == toString(backup_uuid)) String existing_backup_uuid = existing_backup_path;
continue; existing_backup_uuid.erase(0, String("backup-").size());
const auto status = zk->get(root_zookeeper_path + "/" + existing_backup_path + "/stage");
if (status != Stage::COMPLETED) if (existing_backup_uuid == toString(backup_uuid))
return true; continue;
const auto status = zk->get(root_zookeeper_path + "/" + existing_backup_path + "/stage");
if (status != Stage::COMPLETED)
{
LOG_WARNING(log, "Found a concurrent backup: {}, current backup: {}", existing_backup_uuid, toString(backup_uuid));
result = true;
return;
}
}
zk->createIfNotExists(backup_stage_path, "");
auto code = zk->trySet(backup_stage_path, Stage::SCHEDULED_TO_START, stat.version);
if (code == Coordination::Error::ZOK)
break;
bool is_last_attempt = (attempt == MAX_ZOOKEEPER_ATTEMPTS - 1);
if ((code != Coordination::Error::ZBADVERSION) || is_last_attempt)
throw zkutil::KeeperException(code, backup_stage_path);
} }
});
zk->createIfNotExists(backup_stage_path, ""); return result;
auto code = zk->trySet(backup_stage_path, Stage::SCHEDULED_TO_START, stat.version);
if (code == Coordination::Error::ZOK)
break;
bool is_last_attempt = (attempt == MAX_ZOOKEEPER_ATTEMPTS - 1);
if ((code != Coordination::Error::ZBADVERSION) || is_last_attempt)
throw zkutil::KeeperException(code, backup_stage_path);
}
return false;
} }
} }

View File

@ -6,7 +6,7 @@
#include <Backups/BackupCoordinationReplicatedSQLObjects.h> #include <Backups/BackupCoordinationReplicatedSQLObjects.h>
#include <Backups/BackupCoordinationReplicatedTables.h> #include <Backups/BackupCoordinationReplicatedTables.h>
#include <Backups/BackupCoordinationStageSync.h> #include <Backups/BackupCoordinationStageSync.h>
#include <Storages/MergeTree/ZooKeeperRetries.h> #include <Backups/WithRetries.h>
namespace DB namespace DB
@ -19,13 +19,7 @@ constexpr size_t MAX_ZOOKEEPER_ATTEMPTS = 10;
class BackupCoordinationRemote : public IBackupCoordination class BackupCoordinationRemote : public IBackupCoordination
{ {
public: public:
struct BackupKeeperSettings using BackupKeeperSettings = WithRetries::KeeperSettings;
{
UInt64 keeper_max_retries;
UInt64 keeper_retry_initial_backoff_ms;
UInt64 keeper_retry_max_backoff_ms;
UInt64 keeper_value_max_size;
};
BackupCoordinationRemote( BackupCoordinationRemote(
zkutil::GetZooKeeper get_zookeeper_, zkutil::GetZooKeeper get_zookeeper_,
@ -79,7 +73,6 @@ public:
static size_t findCurrentHostIndex(const Strings & all_hosts, const String & current_host); static size_t findCurrentHostIndex(const Strings & all_hosts, const String & current_host);
private: private:
zkutil::ZooKeeperPtr getZooKeeper() const;
void createRootNodes(); void createRootNodes();
void removeAllNodes(); void removeAllNodes();
@ -94,7 +87,6 @@ private:
void prepareReplicatedSQLObjects() const TSA_REQUIRES(replicated_sql_objects_mutex); void prepareReplicatedSQLObjects() const TSA_REQUIRES(replicated_sql_objects_mutex);
void prepareFileInfos() const TSA_REQUIRES(file_infos_mutex); void prepareFileInfos() const TSA_REQUIRES(file_infos_mutex);
const zkutil::GetZooKeeper get_zookeeper;
const String root_zookeeper_path; const String root_zookeeper_path;
const String zookeeper_path; const String zookeeper_path;
const BackupKeeperSettings keeper_settings; const BackupKeeperSettings keeper_settings;
@ -104,11 +96,12 @@ private:
const size_t current_host_index; const size_t current_host_index;
const bool plain_backup; const bool plain_backup;
const bool is_internal; const bool is_internal;
Poco::Logger * const log;
mutable ZooKeeperRetriesInfo zookeeper_retries_info; /// The order of these two fields matters, because stage_sync holds a reference to with_retries object
mutable WithRetries with_retries;
std::optional<BackupCoordinationStageSync> stage_sync; std::optional<BackupCoordinationStageSync> stage_sync;
mutable zkutil::ZooKeeperPtr TSA_GUARDED_BY(zookeeper_mutex) zookeeper;
mutable std::optional<BackupCoordinationReplicatedTables> TSA_GUARDED_BY(replicated_tables_mutex) replicated_tables; mutable std::optional<BackupCoordinationReplicatedTables> TSA_GUARDED_BY(replicated_tables_mutex) replicated_tables;
mutable std::optional<BackupCoordinationReplicatedAccess> TSA_GUARDED_BY(replicated_access_mutex) replicated_access; mutable std::optional<BackupCoordinationReplicatedAccess> TSA_GUARDED_BY(replicated_access_mutex) replicated_access;
mutable std::optional<BackupCoordinationReplicatedSQLObjects> TSA_GUARDED_BY(replicated_sql_objects_mutex) replicated_sql_objects; mutable std::optional<BackupCoordinationReplicatedSQLObjects> TSA_GUARDED_BY(replicated_sql_objects_mutex) replicated_sql_objects;

View File

@ -7,8 +7,13 @@ namespace DB
BackupCoordinationReplicatedAccess::BackupCoordinationReplicatedAccess() = default; BackupCoordinationReplicatedAccess::BackupCoordinationReplicatedAccess() = default;
BackupCoordinationReplicatedAccess::~BackupCoordinationReplicatedAccess() = default; BackupCoordinationReplicatedAccess::~BackupCoordinationReplicatedAccess() = default;
void BackupCoordinationReplicatedAccess::addFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path) void BackupCoordinationReplicatedAccess::addFilePath(FilePathForAccessEntitry && file_path_for_access_entity)
{ {
const auto & access_zk_path = file_path_for_access_entity.access_zk_path;
const auto & access_entity_type = file_path_for_access_entity.access_entity_type;
const auto & host_id = file_path_for_access_entity.host_id;
const auto & file_path = file_path_for_access_entity.file_path;
auto & ref = file_paths_by_zk_path[std::make_pair(access_zk_path, access_entity_type)]; auto & ref = file_paths_by_zk_path[std::make_pair(access_zk_path, access_entity_type)];
ref.file_paths.emplace(file_path); ref.file_paths.emplace(file_path);

View File

@ -28,8 +28,16 @@ public:
BackupCoordinationReplicatedAccess(); BackupCoordinationReplicatedAccess();
~BackupCoordinationReplicatedAccess(); ~BackupCoordinationReplicatedAccess();
struct FilePathForAccessEntitry
{
String access_zk_path;
AccessEntityType access_entity_type;
String host_id;
String file_path;
};
/// Adds a path to access*.txt file keeping access entities of a ReplicatedAccessStorage. /// Adds a path to access*.txt file keeping access entities of a ReplicatedAccessStorage.
void addFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path); void addFilePath(FilePathForAccessEntitry && file_path_for_access_entity);
/// Returns all paths added by addFilePath() if `host_id` is a host chosen to store access. /// Returns all paths added by addFilePath() if `host_id` is a host chosen to store access.
Strings getFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const; Strings getFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const;

View File

@ -7,8 +7,13 @@ namespace DB
BackupCoordinationReplicatedSQLObjects::BackupCoordinationReplicatedSQLObjects() = default; BackupCoordinationReplicatedSQLObjects::BackupCoordinationReplicatedSQLObjects() = default;
BackupCoordinationReplicatedSQLObjects::~BackupCoordinationReplicatedSQLObjects() = default; BackupCoordinationReplicatedSQLObjects::~BackupCoordinationReplicatedSQLObjects() = default;
void BackupCoordinationReplicatedSQLObjects::addDirectory(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path) void BackupCoordinationReplicatedSQLObjects::addDirectory(DirectoryPathForSQLObject && directory_path_for_sql_object)
{ {
const auto & loader_zk_path = directory_path_for_sql_object.loader_zk_path;
const auto & object_type = directory_path_for_sql_object.object_type;
const auto & host_id = directory_path_for_sql_object.host_id;
const auto & dir_path = directory_path_for_sql_object.dir_path;
auto & ref = dir_paths_by_zk_path[std::make_pair(loader_zk_path, object_type)]; auto & ref = dir_paths_by_zk_path[std::make_pair(loader_zk_path, object_type)];
ref.dir_paths.emplace(dir_path); ref.dir_paths.emplace(dir_path);

View File

@ -28,8 +28,16 @@ public:
BackupCoordinationReplicatedSQLObjects(); BackupCoordinationReplicatedSQLObjects();
~BackupCoordinationReplicatedSQLObjects(); ~BackupCoordinationReplicatedSQLObjects();
struct DirectoryPathForSQLObject
{
String loader_zk_path;
UserDefinedSQLObjectType object_type;
String host_id;
String dir_path;
};
/// Adds a path to directory keeping user defined SQL objects. /// Adds a path to directory keeping user defined SQL objects.
void addDirectory(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path); void addDirectory(DirectoryPathForSQLObject && directory_path_for_sql_object);
/// Returns all added paths to directories if `host_id` is a host chosen to store user-defined SQL objects. /// Returns all added paths to directories if `host_id` is a host chosen to store user-defined SQL objects.
Strings getDirectories(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const; Strings getDirectories(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const;

View File

@ -149,12 +149,13 @@ private:
BackupCoordinationReplicatedTables::BackupCoordinationReplicatedTables() = default; BackupCoordinationReplicatedTables::BackupCoordinationReplicatedTables() = default;
BackupCoordinationReplicatedTables::~BackupCoordinationReplicatedTables() = default; BackupCoordinationReplicatedTables::~BackupCoordinationReplicatedTables() = default;
void BackupCoordinationReplicatedTables::addPartNames( void BackupCoordinationReplicatedTables::addPartNames(PartNamesForTableReplica && part_names)
const String & table_shared_id,
const String & table_name_for_logs,
const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums)
{ {
const auto & table_shared_id = part_names.table_shared_id;
const auto & table_name_for_logs = part_names.table_name_for_logs;
const auto & replica_name = part_names.replica_name;
const auto & part_names_and_checksums = part_names.part_names_and_checksums;
if (prepared) if (prepared)
throw Exception(ErrorCodes::LOGICAL_ERROR, "addPartNames() must not be called after preparing"); throw Exception(ErrorCodes::LOGICAL_ERROR, "addPartNames() must not be called after preparing");
@ -216,12 +217,13 @@ Strings BackupCoordinationReplicatedTables::getPartNames(const String & table_sh
return it2->second; return it2->second;
} }
void BackupCoordinationReplicatedTables::addMutations( void BackupCoordinationReplicatedTables::addMutations(MutationsForTableReplica && mutations_for_table_replica)
const String & table_shared_id,
const String & table_name_for_logs,
const String & replica_name,
const std::vector<MutationInfo> & mutations)
{ {
const auto & table_shared_id = mutations_for_table_replica.table_shared_id;
const auto & table_name_for_logs = mutations_for_table_replica.table_name_for_logs;
const auto & replica_name = mutations_for_table_replica.replica_name;
const auto & mutations = mutations_for_table_replica.mutations;
if (prepared) if (prepared)
throw Exception(ErrorCodes::LOGICAL_ERROR, "addMutations() must not be called after preparing"); throw Exception(ErrorCodes::LOGICAL_ERROR, "addMutations() must not be called after preparing");
@ -254,8 +256,11 @@ BackupCoordinationReplicatedTables::getMutations(const String & table_shared_id,
return res; return res;
} }
void BackupCoordinationReplicatedTables::addDataPath(const String & table_shared_id, const String & data_path) void BackupCoordinationReplicatedTables::addDataPath(DataPathForTableReplica && data_path_for_table_replica)
{ {
const auto & table_shared_id = data_path_for_table_replica.table_shared_id;
const auto & data_path = data_path_for_table_replica.data_path;
auto & table_info = table_infos[table_shared_id]; auto & table_info = table_infos[table_shared_id];
table_info.data_paths.emplace(data_path); table_info.data_paths.emplace(data_path);
} }

View File

@ -38,15 +38,19 @@ public:
using PartNameAndChecksum = IBackupCoordination::PartNameAndChecksum; using PartNameAndChecksum = IBackupCoordination::PartNameAndChecksum;
struct PartNamesForTableReplica
{
String table_shared_id;
String table_name_for_logs;
String replica_name;
std::vector<PartNameAndChecksum> part_names_and_checksums;
};
/// Adds part names which a specified replica of a replicated table is going to put to the backup. /// Adds part names which a specified replica of a replicated table is going to put to the backup.
/// Multiple replicas of the replicated table call this function and then the added part names can be returned by call of the function /// Multiple replicas of the replicated table call this function and then the added part names can be returned by call of the function
/// getPartNames(). /// getPartNames().
/// Checksums are used only to control that parts under the same names on different replicas are the same. /// Checksums are used only to control that parts under the same names on different replicas are the same.
void addPartNames( void addPartNames(PartNamesForTableReplica && part_names);
const String & table_shared_id,
const String & table_name_for_logs,
const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums);
/// Returns the names of the parts which a specified replica of a replicated table should put to the backup. /// Returns the names of the parts which a specified replica of a replicated table should put to the backup.
/// This is the same list as it was added by call of the function addPartNames() but without duplications and without /// This is the same list as it was added by call of the function addPartNames() but without duplications and without
@ -55,20 +59,30 @@ public:
using MutationInfo = IBackupCoordination::MutationInfo; using MutationInfo = IBackupCoordination::MutationInfo;
struct MutationsForTableReplica
{
String table_shared_id;
String table_name_for_logs;
String replica_name;
std::vector<MutationInfo> mutations;
};
/// Adds information about mutations of a replicated table. /// Adds information about mutations of a replicated table.
void addMutations( void addMutations(MutationsForTableReplica && mutations_for_table_replica);
const String & table_shared_id,
const String & table_name_for_logs,
const String & replica_name,
const std::vector<MutationInfo> & mutations);
/// Returns all mutations of a replicated table which are not finished for some data parts added by addReplicatedPartNames(). /// Returns all mutations of a replicated table which are not finished for some data parts added by addReplicatedPartNames().
std::vector<MutationInfo> getMutations(const String & table_shared_id, const String & replica_name) const; std::vector<MutationInfo> getMutations(const String & table_shared_id, const String & replica_name) const;
struct DataPathForTableReplica
{
String table_shared_id;
String data_path;
};
/// Adds a data path in backup for a replicated table. /// Adds a data path in backup for a replicated table.
/// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function /// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function
/// getDataPaths(). /// getDataPaths().
void addDataPath(const String & table_shared_id, const String & data_path); void addDataPath(DataPathForTableReplica && data_path_for_table_replica);
/// Returns all the data paths in backup added for a replicated table (see also addReplicatedDataPath()). /// Returns all the data paths in backup added for a replicated table (see also addReplicatedDataPath()).
Strings getDataPaths(const String & table_shared_id) const; Strings getDataPaths(const String & table_shared_id) const;

View File

@ -1,11 +1,13 @@
#include <Backups/BackupCoordinationStageSync.h> #include <Backups/BackupCoordinationStageSync.h>
#include <base/chrono_io.h>
#include <Common/ZooKeeper/Common.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/ZooKeeper/KeeperException.h> #include <Common/ZooKeeper/KeeperException.h>
#include <IO/ReadBufferFromString.h> #include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h> #include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <base/chrono_io.h>
namespace DB namespace DB
@ -17,9 +19,12 @@ namespace ErrorCodes
} }
BackupCoordinationStageSync::BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_) BackupCoordinationStageSync::BackupCoordinationStageSync(
: zookeeper_path(zookeeper_path_) const String & root_zookeeper_path_,
, get_zookeeper(get_zookeeper_) WithRetries & with_retries_,
Poco::Logger * log_)
: zookeeper_path(root_zookeeper_path_ + "/stage")
, with_retries(with_retries_)
, log(log_) , log(log_)
{ {
createRootNodes(); createRootNodes();
@ -27,32 +32,48 @@ BackupCoordinationStageSync::BackupCoordinationStageSync(const String & zookeepe
void BackupCoordinationStageSync::createRootNodes() void BackupCoordinationStageSync::createRootNodes()
{ {
auto zookeeper = get_zookeeper(); auto holder = with_retries.createRetriesControlHolder("createRootNodes");
zookeeper->createAncestors(zookeeper_path); holder.retries_ctl.retryLoop(
zookeeper->createIfNotExists(zookeeper_path, ""); [&, &zookeeper = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zookeeper);
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
});
} }
void BackupCoordinationStageSync::set(const String & current_host, const String & new_stage, const String & message) void BackupCoordinationStageSync::set(const String & current_host, const String & new_stage, const String & message)
{ {
auto zookeeper = get_zookeeper(); auto holder = with_retries.createRetriesControlHolder("set");
holder.retries_ctl.retryLoop(
[&, &zookeeper = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zookeeper);
/// Make an ephemeral node so the initiator can track if the current host is still working. /// Make an ephemeral node so the initiator can track if the current host is still working.
String alive_node_path = zookeeper_path + "/alive|" + current_host; String alive_node_path = zookeeper_path + "/alive|" + current_host;
auto code = zookeeper->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral); auto code = zookeeper->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNODEEXISTS) if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNODEEXISTS)
throw zkutil::KeeperException(code, alive_node_path); throw zkutil::KeeperException(code, alive_node_path);
zookeeper->createIfNotExists(zookeeper_path + "/started|" + current_host, ""); zookeeper->createIfNotExists(zookeeper_path + "/started|" + current_host, "");
zookeeper->create(zookeeper_path + "/current|" + current_host + "|" + new_stage, message, zkutil::CreateMode::Persistent); zookeeper->createIfNotExists(zookeeper_path + "/current|" + current_host + "|" + new_stage, message);
});
} }
void BackupCoordinationStageSync::setError(const String & current_host, const Exception & exception) void BackupCoordinationStageSync::setError(const String & current_host, const Exception & exception)
{ {
auto zookeeper = get_zookeeper(); auto holder = with_retries.createRetriesControlHolder("setError");
WriteBufferFromOwnString buf; holder.retries_ctl.retryLoop(
writeStringBinary(current_host, buf); [&, &zookeeper = holder.faulty_zookeeper]()
writeException(exception, buf, true); {
zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str()); with_retries.renewZooKeeper(zookeeper);
WriteBufferFromOwnString buf;
writeStringBinary(current_host, buf);
writeException(exception, buf, true);
zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str());
});
} }
Strings BackupCoordinationStageSync::wait(const Strings & all_hosts, const String & stage_to_wait) Strings BackupCoordinationStageSync::wait(const Strings & all_hosts, const String & stage_to_wait)
@ -83,14 +104,24 @@ struct BackupCoordinationStageSync::State
}; };
BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState( BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState(
zkutil::ZooKeeperPtr zookeeper, const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const
{ {
std::unordered_set<std::string_view> zk_nodes_set{zk_nodes.begin(), zk_nodes.end()}; std::unordered_set<std::string_view> zk_nodes_set{zk_nodes.begin(), zk_nodes.end()};
State state; State state;
if (zk_nodes_set.contains("error")) if (zk_nodes_set.contains("error"))
{ {
ReadBufferFromOwnString buf{zookeeper->get(zookeeper_path + "/error")}; String errors;
{
auto holder = with_retries.createRetriesControlHolder("readCurrentState");
holder.retries_ctl.retryLoop(
[&, &zookeeper = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zookeeper);
errors = zookeeper->get(zookeeper_path + "/error");
});
}
ReadBufferFromOwnString buf{errors};
String host; String host;
readStringBinary(host, buf); readStringBinary(host, buf);
state.error = std::make_pair(host, readException(buf, fmt::format("Got error from {}", host))); state.error = std::make_pair(host, readException(buf, fmt::format("Got error from {}", host)));
@ -102,8 +133,38 @@ BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState
if (!zk_nodes_set.contains("current|" + host + "|" + stage_to_wait)) if (!zk_nodes_set.contains("current|" + host + "|" + stage_to_wait))
{ {
UnreadyHostState unready_host_state; UnreadyHostState unready_host_state;
unready_host_state.started = zk_nodes_set.contains("started|" + host); const String started_node_name = "started|" + host;
unready_host_state.alive = zk_nodes_set.contains("alive|" + host); const String alive_node_name = "alive|" + host;
const String alive_node_path = zookeeper_path + "/" + alive_node_name;
unready_host_state.started = zk_nodes_set.contains(started_node_name);
/// Because we do retries everywhere we can't fully rely on ephemeral nodes anymore.
/// Though we recreate "alive" node when reconnecting it might be not enough and race condition is possible.
/// And everything we can do here - just retry.
/// In worst case when we won't manage to see the alive node for a long time we will just abort the backup.
unready_host_state.alive = zk_nodes_set.contains(alive_node_name);
if (!unready_host_state.alive)
{
LOG_TRACE(log, "Seems like host ({}) is dead. Will retry the check to confirm", host);
auto holder = with_retries.createRetriesControlHolder("readCurrentState::checkAliveNode");
holder.retries_ctl.retryLoop(
[&, &zookeeper = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zookeeper);
if (zookeeper->existsNoFailureInjection(alive_node_path))
{
unready_host_state.alive = true;
return;
}
// Retry with backoff. We also check whether it is last retry or no, because we won't to rethrow an exception.
if (!holder.retries_ctl.isLastRetry())
holder.retries_ctl.setKeeperError(Coordination::Error::ZNONODE, "There is no alive node for host {}. Will retry", host);
});
}
LOG_TRACE(log, "Host ({}) appeared to be {}", host, unready_host_state.alive ? "alive" : "dead");
state.unready_hosts.emplace(host, unready_host_state); state.unready_hosts.emplace(host, unready_host_state);
if (!unready_host_state.alive && unready_host_state.started && !state.host_terminated) if (!unready_host_state.alive && unready_host_state.started && !state.host_terminated)
state.host_terminated = host; state.host_terminated = host;
@ -113,51 +174,62 @@ BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState
if (state.host_terminated || !state.unready_hosts.empty()) if (state.host_terminated || !state.unready_hosts.empty())
return state; return state;
state.results.reserve(all_hosts.size()); auto holder = with_retries.createRetriesControlHolder("waitImpl::collectStagesToWait");
for (const auto & host : all_hosts) holder.retries_ctl.retryLoop(
state.results.emplace_back(zookeeper->get(zookeeper_path + "/current|" + host + "|" + stage_to_wait)); [&, &zookeeper = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zookeeper);
Strings results;
for (const auto & host : all_hosts)
results.emplace_back(zookeeper->get(zookeeper_path + "/current|" + host + "|" + stage_to_wait));
state.results = std::move(results);
});
return state; return state;
} }
Strings BackupCoordinationStageSync::waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const Strings BackupCoordinationStageSync::waitImpl(
const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const
{ {
if (all_hosts.empty()) if (all_hosts.empty())
return {}; return {};
/// Wait until all hosts are ready or an error happens or time is out. /// Wait until all hosts are ready or an error happens or time is out.
auto zookeeper = get_zookeeper();
/// Set by ZooKepper when list of zk nodes have changed.
auto watch = std::make_shared<Poco::Event>();
bool use_timeout = timeout.has_value(); bool use_timeout = timeout.has_value();
std::chrono::steady_clock::time_point end_of_timeout; std::chrono::steady_clock::time_point end_of_timeout;
if (use_timeout) if (use_timeout)
end_of_timeout = std::chrono::steady_clock::now() + std::chrono::duration_cast<std::chrono::steady_clock::duration>(*timeout); end_of_timeout = std::chrono::steady_clock::now() + std::chrono::duration_cast<std::chrono::steady_clock::duration>(*timeout);
State state; State state;
String previous_unready_host; /// Used for logging: we don't want to log the same unready host again.
for (;;) for (;;)
{ {
/// Get zk nodes and subscribe on their changes. LOG_INFO(log, "Waiting for the stage {}", stage_to_wait);
Strings zk_nodes = zookeeper->getChildren(zookeeper_path, nullptr, watch); /// Set by ZooKepper when list of zk nodes have changed.
auto watch = std::make_shared<Poco::Event>();
Strings zk_nodes;
{
auto holder = with_retries.createRetriesControlHolder("waitImpl::getChildren");
holder.retries_ctl.retryLoop(
[&, &zookeeper = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zookeeper);
watch->reset();
/// Get zk nodes and subscribe on their changes.
zk_nodes = zookeeper->getChildren(zookeeper_path, nullptr, watch);
});
}
/// Read and analyze the current state of zk nodes. /// Read and analyze the current state of zk nodes.
state = readCurrentState(zookeeper, zk_nodes, all_hosts, stage_to_wait); state = readCurrentState(zk_nodes, all_hosts, stage_to_wait);
if (state.error || state.host_terminated || state.unready_hosts.empty()) if (state.error || state.host_terminated || state.unready_hosts.empty())
break; /// Error happened or everything is ready. break; /// Error happened or everything is ready.
/// Log that we will wait for another host. /// Log that we will wait
const auto & unready_host = state.unready_hosts.begin()->first; const auto & unready_host = state.unready_hosts.begin()->first;
if (unready_host != previous_unready_host) LOG_INFO(log, "Waiting on ZooKeeper watch for any node to be changed (currently waiting for host {})", unready_host);
{
LOG_TRACE(log, "Waiting for host {}", unready_host);
previous_unready_host = unready_host;
}
/// Wait until `watch_callback` is called by ZooKeeper meaning that zk nodes have changed. /// Wait until `watch_callback` is called by ZooKeeper meaning that zk nodes have changed.
{ {
@ -195,6 +267,7 @@ Strings BackupCoordinationStageSync::waitImpl(const Strings & all_hosts, const S
unready_host_state.started ? "" : ": Operation didn't start"); unready_host_state.started ? "" : ": Operation didn't start");
} }
LOG_TRACE(log, "Everything is Ok. All hosts achieved stage {}", stage_to_wait);
return state.results; return state.results;
} }

View File

@ -1,7 +1,6 @@
#pragma once #pragma once
#include <Common/ZooKeeper/Common.h> #include <Backups/WithRetries.h>
namespace DB namespace DB
{ {
@ -10,7 +9,10 @@ namespace DB
class BackupCoordinationStageSync class BackupCoordinationStageSync
{ {
public: public:
BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_); BackupCoordinationStageSync(
const String & root_zookeeper_path_,
WithRetries & with_retries_,
Poco::Logger * log_);
/// Sets the stage of the current host and signal other hosts if there were other hosts waiting for that. /// Sets the stage of the current host and signal other hosts if there were other hosts waiting for that.
void set(const String & current_host, const String & new_stage, const String & message); void set(const String & current_host, const String & new_stage, const String & message);
@ -27,12 +29,13 @@ private:
void createRootNodes(); void createRootNodes();
struct State; struct State;
State readCurrentState(zkutil::ZooKeeperPtr zookeeper, const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const; State readCurrentState(const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const;
Strings waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const; Strings waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const;
String zookeeper_path; String zookeeper_path;
zkutil::GetZooKeeper get_zookeeper; /// A reference to the field of parent object - BackupCoordinationRemote or RestoreCoordinationRemote
WithRetries & with_retries;
Poco::Logger * log; Poco::Logger * log;
}; };

View File

@ -84,6 +84,12 @@ BackupEntriesCollector::BackupEntriesCollector(
, on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000)) , on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000))
, consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000)) , consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000))
, log(&Poco::Logger::get("BackupEntriesCollector")) , log(&Poco::Logger::get("BackupEntriesCollector"))
, global_zookeeper_retries_info(
"BackupEntriesCollector",
log,
context->getSettingsRef().backup_restore_keeper_max_retries,
context->getSettingsRef().backup_restore_keeper_retry_initial_backoff_ms,
context->getSettingsRef().backup_restore_keeper_retry_max_backoff_ms)
{ {
} }
@ -482,7 +488,10 @@ std::vector<std::pair<ASTPtr, StoragePtr>> BackupEntriesCollector::findTablesInD
try try
{ {
db_tables = database->getTablesForBackup(filter_by_table_name, context); /// Database or table could be replicated - so may use ZooKeeper. We need to retry.
auto zookeeper_retries_info = global_zookeeper_retries_info;
ZooKeeperRetriesControl retries_ctl("getTablesForBackup", zookeeper_retries_info);
retries_ctl.retryLoop([&](){ db_tables = database->getTablesForBackup(filter_by_table_name, context); });
} }
catch (Exception & e) catch (Exception & e)
{ {
@ -745,6 +754,7 @@ void BackupEntriesCollector::addPostTask(std::function<void()> task)
/// Runs all the tasks added with addPostCollectingTask(). /// Runs all the tasks added with addPostCollectingTask().
void BackupEntriesCollector::runPostTasks() void BackupEntriesCollector::runPostTasks()
{ {
LOG_TRACE(log, "Will run {} post tasks", post_tasks.size());
/// Post collecting tasks can add other post collecting tasks, our code is fine with that. /// Post collecting tasks can add other post collecting tasks, our code is fine with that.
while (!post_tasks.empty()) while (!post_tasks.empty())
{ {
@ -752,6 +762,7 @@ void BackupEntriesCollector::runPostTasks()
post_tasks.pop(); post_tasks.pop();
std::move(task)(); std::move(task)();
} }
LOG_TRACE(log, "All post tasks successfully executed");
} }
size_t BackupEntriesCollector::getAccessCounter(AccessEntityType type) size_t BackupEntriesCollector::getAccessCounter(AccessEntityType type)

View File

@ -6,6 +6,7 @@
#include <Parsers/ASTBackupQuery.h> #include <Parsers/ASTBackupQuery.h>
#include <Storages/IStorage_fwd.h> #include <Storages/IStorage_fwd.h>
#include <Storages/TableLockHolder.h> #include <Storages/TableLockHolder.h>
#include <Storages/MergeTree/ZooKeeperRetries.h>
#include <filesystem> #include <filesystem>
#include <queue> #include <queue>
@ -96,6 +97,9 @@ private:
std::chrono::milliseconds on_cluster_first_sync_timeout; std::chrono::milliseconds on_cluster_first_sync_timeout;
std::chrono::milliseconds consistent_metadata_snapshot_timeout; std::chrono::milliseconds consistent_metadata_snapshot_timeout;
Poco::Logger * log; Poco::Logger * log;
/// Unfortunately we can use ZooKeeper for collecting information for backup
/// and we need to retry...
ZooKeeperRetriesInfo global_zookeeper_retries_info;
Strings all_hosts; Strings all_hosts;
DDLRenamingMap renaming_map; DDLRenamingMap renaming_map;

View File

@ -69,7 +69,7 @@ namespace
S3::CredentialsConfiguration S3::CredentialsConfiguration
{ {
settings.auth_settings.use_environment_credentials.value_or( settings.auth_settings.use_environment_credentials.value_or(
context->getConfigRef().getBool("s3.use_environment_credentials", false)), context->getConfigRef().getBool("s3.use_environment_credentials", true)),
settings.auth_settings.use_insecure_imds_request.value_or( settings.auth_settings.use_insecure_imds_request.value_or(
context->getConfigRef().getBool("s3.use_insecure_imds_request", false)), context->getConfigRef().getBool("s3.use_insecure_imds_request", false)),
settings.auth_settings.expiration_window_seconds.value_or( settings.auth_settings.expiration_window_seconds.value_or(

View File

@ -58,10 +58,13 @@ namespace
BackupCoordinationRemote::BackupKeeperSettings keeper_settings BackupCoordinationRemote::BackupKeeperSettings keeper_settings
{ {
.keeper_max_retries = context->getSettingsRef().backup_keeper_max_retries, .keeper_max_retries = context->getSettingsRef().backup_restore_keeper_max_retries,
.keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_keeper_retry_initial_backoff_ms, .keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_restore_keeper_retry_initial_backoff_ms,
.keeper_retry_max_backoff_ms = context->getSettingsRef().backup_keeper_retry_max_backoff_ms, .keeper_retry_max_backoff_ms = context->getSettingsRef().backup_restore_keeper_retry_max_backoff_ms,
.keeper_value_max_size = context->getSettingsRef().backup_keeper_value_max_size, .batch_size_for_keeper_multiread = context->getSettingsRef().backup_restore_batch_size_for_keeper_multiread,
.keeper_fault_injection_probability = context->getSettingsRef().backup_restore_keeper_fault_injection_probability,
.keeper_fault_injection_seed = context->getSettingsRef().backup_restore_keeper_fault_injection_seed,
.keeper_value_max_size = context->getSettingsRef().backup_restore_keeper_value_max_size,
}; };
auto all_hosts = BackupSettings::Util::filterHostIDs( auto all_hosts = BackupSettings::Util::filterHostIDs(
@ -92,10 +95,27 @@ namespace
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); }; auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
RestoreCoordinationRemote::RestoreKeeperSettings keeper_settings
{
.keeper_max_retries = context->getSettingsRef().backup_restore_keeper_max_retries,
.keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_restore_keeper_retry_initial_backoff_ms,
.keeper_retry_max_backoff_ms = context->getSettingsRef().backup_restore_keeper_retry_max_backoff_ms,
.batch_size_for_keeper_multiread = context->getSettingsRef().backup_restore_batch_size_for_keeper_multiread,
.keeper_fault_injection_probability = context->getSettingsRef().backup_restore_keeper_fault_injection_probability,
.keeper_fault_injection_seed = context->getSettingsRef().backup_restore_keeper_fault_injection_seed
};
auto all_hosts = BackupSettings::Util::filterHostIDs( auto all_hosts = BackupSettings::Util::filterHostIDs(
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num); restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
return std::make_shared<RestoreCoordinationRemote>(get_zookeeper, root_zk_path, toString(*restore_settings.restore_uuid), all_hosts, restore_settings.host_id, restore_settings.internal); return std::make_shared<RestoreCoordinationRemote>(
get_zookeeper,
root_zk_path,
keeper_settings,
toString(*restore_settings.restore_uuid),
all_hosts,
restore_settings.host_id,
restore_settings.internal);
} }
else else
{ {
@ -660,7 +680,9 @@ void BackupsWorker::doRestore(
restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ on_cluster); restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ on_cluster);
if (!allow_concurrent_restores && restore_coordination->hasConcurrentRestores(std::ref(num_active_restores))) if (!allow_concurrent_restores && restore_coordination->hasConcurrentRestores(std::ref(num_active_restores)))
throw Exception(ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, "Concurrent restores not supported, turn on setting 'allow_concurrent_restores'"); throw Exception(
ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED,
"Concurrent restores not supported, turn on setting 'allow_concurrent_restores'");
/// Do RESTORE. /// Do RESTORE.
if (on_cluster) if (on_cluster)

View File

@ -1,10 +1,14 @@
#include <Backups/RestoreCoordinationLocal.h> #include <Backups/RestoreCoordinationLocal.h>
#include <Common/logger_useful.h>
namespace DB namespace DB
{ {
RestoreCoordinationLocal::RestoreCoordinationLocal() = default; RestoreCoordinationLocal::RestoreCoordinationLocal() : log(&Poco::Logger::get("RestoreCoordinationLocal"))
{
}
RestoreCoordinationLocal::~RestoreCoordinationLocal() = default; RestoreCoordinationLocal::~RestoreCoordinationLocal() = default;
void RestoreCoordinationLocal::setStage(const String &, const String &) void RestoreCoordinationLocal::setStage(const String &, const String &)
@ -49,7 +53,12 @@ bool RestoreCoordinationLocal::acquireReplicatedSQLObjects(const String &, UserD
bool RestoreCoordinationLocal::hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const bool RestoreCoordinationLocal::hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const
{ {
return (num_active_restores > 1); if (num_active_restores > 1)
{
LOG_WARNING(log, "Found concurrent backups: num_active_restores={}", num_active_restores);
return true;
}
return false;
} }
} }

View File

@ -42,6 +42,8 @@ public:
bool hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const override; bool hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const override;
private: private:
Poco::Logger * const log;
std::set<std::pair<String /* database_zk_path */, String /* table_name */>> acquired_tables_in_replicated_databases; std::set<std::pair<String /* database_zk_path */, String /* table_name */>> acquired_tables_in_replicated_databases;
std::unordered_set<String /* table_zk_path */> acquired_data_in_replicated_tables; std::unordered_set<String /* table_zk_path */> acquired_data_in_replicated_tables;
mutable std::mutex mutex; mutable std::mutex mutex;

View File

@ -1,9 +1,10 @@
#include <Backups/BackupCoordinationRemote.h>
#include <Backups/BackupCoordinationStage.h>
#include <Backups/RestoreCoordinationRemote.h> #include <Backups/RestoreCoordinationRemote.h>
#include <Functions/UserDefined/UserDefinedSQLObjectType.h> #include <Functions/UserDefined/UserDefinedSQLObjectType.h>
#include <Common/ZooKeeper/KeeperException.h> #include <Common/ZooKeeper/KeeperException.h>
#include <Common/escapeForFileName.h> #include <Common/escapeForFileName.h>
#include <Backups/BackupCoordinationStage.h> #include "Backups/BackupCoordinationStageSync.h"
#include <Backups/BackupCoordinationRemote.h>
namespace DB namespace DB
{ {
@ -13,23 +14,47 @@ namespace Stage = BackupCoordinationStage;
RestoreCoordinationRemote::RestoreCoordinationRemote( RestoreCoordinationRemote::RestoreCoordinationRemote(
zkutil::GetZooKeeper get_zookeeper_, zkutil::GetZooKeeper get_zookeeper_,
const String & root_zookeeper_path_, const String & root_zookeeper_path_,
const RestoreKeeperSettings & keeper_settings_,
const String & restore_uuid_, const String & restore_uuid_,
const Strings & all_hosts_, const Strings & all_hosts_,
const String & current_host_, const String & current_host_,
bool is_internal_) bool is_internal_)
: get_zookeeper(get_zookeeper_) : get_zookeeper(get_zookeeper_)
, root_zookeeper_path(root_zookeeper_path_) , root_zookeeper_path(root_zookeeper_path_)
, keeper_settings(keeper_settings_)
, restore_uuid(restore_uuid_) , restore_uuid(restore_uuid_)
, zookeeper_path(root_zookeeper_path_ + "/restore-" + restore_uuid_) , zookeeper_path(root_zookeeper_path_ + "/restore-" + restore_uuid_)
, all_hosts(all_hosts_) , all_hosts(all_hosts_)
, current_host(current_host_) , current_host(current_host_)
, current_host_index(BackupCoordinationRemote::findCurrentHostIndex(all_hosts, current_host)) , current_host_index(BackupCoordinationRemote::findCurrentHostIndex(all_hosts, current_host))
, is_internal(is_internal_) , is_internal(is_internal_)
, log(&Poco::Logger::get("RestoreCoordinationRemote"))
, with_retries(
log,
get_zookeeper_,
keeper_settings,
[zookeeper_path = zookeeper_path, current_host = current_host, is_internal = is_internal]
(WithRetries::FaultyKeeper & zk)
{
/// Recreate this ephemeral node to signal that we are alive.
if (is_internal)
{
String alive_node_path = zookeeper_path + "/stage/alive|" + current_host;
auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
if (code == Coordination::Error::ZNODEEXISTS)
zk->handleEphemeralNodeExistenceNoFailureInjection(alive_node_path, "");
else if (code != Coordination::Error::ZOK)
throw zkutil::KeeperException(code, alive_node_path);
}
})
{ {
createRootNodes(); createRootNodes();
stage_sync.emplace( stage_sync.emplace(
zookeeper_path + "/stage", [this] { return getZooKeeper(); }, &Poco::Logger::get("RestoreCoordination")); zookeeper_path,
with_retries,
log);
} }
RestoreCoordinationRemote::~RestoreCoordinationRemote() RestoreCoordinationRemote::~RestoreCoordinationRemote()
@ -45,31 +70,25 @@ RestoreCoordinationRemote::~RestoreCoordinationRemote()
} }
} }
zkutil::ZooKeeperPtr RestoreCoordinationRemote::getZooKeeper() const
{
std::lock_guard lock{mutex};
if (!zookeeper || zookeeper->expired())
{
zookeeper = get_zookeeper();
/// It's possible that we connected to different [Zoo]Keeper instance
/// so we may read a bit stale state.
zookeeper->sync(zookeeper_path);
}
return zookeeper;
}
void RestoreCoordinationRemote::createRootNodes() void RestoreCoordinationRemote::createRootNodes()
{ {
auto zk = getZooKeeper(); auto holder = with_retries.createRetriesControlHolder("createRootNodes");
zk->createAncestors(zookeeper_path); holder.retries_ctl.retryLoop(
zk->createIfNotExists(zookeeper_path, ""); [&, &zk = holder.faulty_zookeeper]()
zk->createIfNotExists(zookeeper_path + "/repl_databases_tables_acquired", ""); {
zk->createIfNotExists(zookeeper_path + "/repl_tables_data_acquired", ""); with_retries.renewZooKeeper(zk);
zk->createIfNotExists(zookeeper_path + "/repl_access_storages_acquired", ""); zk->createAncestors(zookeeper_path);
zk->createIfNotExists(zookeeper_path + "/repl_sql_objects_acquired", "");
}
Coordination::Requests ops;
Coordination::Responses responses;
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_databases_tables_acquired", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_tables_data_acquired", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_access_storages_acquired", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_sql_objects_acquired", "", zkutil::CreateMode::Persistent));
zk->tryMulti(ops, responses);
});
}
void RestoreCoordinationRemote::setStage(const String & new_stage, const String & message) void RestoreCoordinationRemote::setStage(const String & new_stage, const String & message)
{ {
@ -91,66 +110,121 @@ Strings RestoreCoordinationRemote::waitForStage(const String & stage_to_wait, st
return stage_sync->waitFor(all_hosts, stage_to_wait, timeout); return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
} }
bool RestoreCoordinationRemote::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) bool RestoreCoordinationRemote::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name)
{ {
auto zk = getZooKeeper(); bool result = false;
auto holder = with_retries.createRetriesControlHolder("acquireCreatingTableInReplicatedDatabase");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_databases_tables_acquired/" + escapeForFileName(database_zk_path); String path = zookeeper_path + "/repl_databases_tables_acquired/" + escapeForFileName(database_zk_path);
zk->createIfNotExists(path, ""); zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(table_name); path += "/" + escapeForFileName(table_name);
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent); auto code = zk->tryCreate(path, toString(current_host_index), zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS)) if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path); throw zkutil::KeeperException(code, path);
return (code == Coordination::Error::ZOK); if (code == Coordination::Error::ZOK)
{
result = true;
return;
}
/// We need to check who created that node
result = zk->get(path) == toString(current_host_index);
});
return result;
} }
bool RestoreCoordinationRemote::acquireInsertingDataIntoReplicatedTable(const String & table_zk_path) bool RestoreCoordinationRemote::acquireInsertingDataIntoReplicatedTable(const String & table_zk_path)
{ {
auto zk = getZooKeeper(); bool result = false;
auto holder = with_retries.createRetriesControlHolder("acquireInsertingDataIntoReplicatedTable");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_tables_data_acquired/" + escapeForFileName(table_zk_path); String path = zookeeper_path + "/repl_tables_data_acquired/" + escapeForFileName(table_zk_path);
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent); auto code = zk->tryCreate(path, toString(current_host_index), zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS)) if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path); throw zkutil::KeeperException(code, path);
return (code == Coordination::Error::ZOK); if (code == Coordination::Error::ZOK)
{
result = true;
return;
}
/// We need to check who created that node
result = zk->get(path) == toString(current_host_index);
});
return result;
} }
bool RestoreCoordinationRemote::acquireReplicatedAccessStorage(const String & access_storage_zk_path) bool RestoreCoordinationRemote::acquireReplicatedAccessStorage(const String & access_storage_zk_path)
{ {
auto zk = getZooKeeper(); bool result = false;
auto holder = with_retries.createRetriesControlHolder("acquireReplicatedAccessStorage");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_access_storages_acquired/" + escapeForFileName(access_storage_zk_path); String path = zookeeper_path + "/repl_access_storages_acquired/" + escapeForFileName(access_storage_zk_path);
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent); auto code = zk->tryCreate(path, toString(current_host_index), zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS)) if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path); throw zkutil::KeeperException(code, path);
return (code == Coordination::Error::ZOK); if (code == Coordination::Error::ZOK)
{
result = true;
return;
}
/// We need to check who created that node
result = zk->get(path) == toString(current_host_index);
});
return result;
} }
bool RestoreCoordinationRemote::acquireReplicatedSQLObjects(const String & loader_zk_path, UserDefinedSQLObjectType object_type) bool RestoreCoordinationRemote::acquireReplicatedSQLObjects(const String & loader_zk_path, UserDefinedSQLObjectType object_type)
{ {
auto zk = getZooKeeper(); bool result = false;
auto holder = with_retries.createRetriesControlHolder("acquireReplicatedSQLObjects");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_sql_objects_acquired/" + escapeForFileName(loader_zk_path); String path = zookeeper_path + "/repl_sql_objects_acquired/" + escapeForFileName(loader_zk_path);
zk->createIfNotExists(path, ""); zk->createIfNotExists(path, "");
path += "/"; path += "/";
switch (object_type) switch (object_type)
{ {
case UserDefinedSQLObjectType::Function: case UserDefinedSQLObjectType::Function:
path += "functions"; path += "functions";
break; break;
} }
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent); auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS)) if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path); throw zkutil::KeeperException(code, path);
return (code == Coordination::Error::ZOK); if (code == Coordination::Error::ZOK)
{
result = true;
return;
}
/// We need to check who created that node
result = zk->get(path) == toString(current_host_index);
});
return result;
} }
void RestoreCoordinationRemote::removeAllNodes() void RestoreCoordinationRemote::removeAllNodes()
@ -162,8 +236,13 @@ void RestoreCoordinationRemote::removeAllNodes()
/// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some part /// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some part
/// of their restore work before that. /// of their restore work before that.
auto zk = getZooKeeper(); auto holder = with_retries.createRetriesControlHolder("removeAllNodes");
zk->removeRecursive(zookeeper_path); holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
zk->removeRecursive(zookeeper_path);
});
} }
bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic<size_t> &) const bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic<size_t> &) const
@ -172,43 +251,54 @@ bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic<size_t>
if (is_internal) if (is_internal)
return false; return false;
auto zk = getZooKeeper(); bool result = false;
std::string path = zookeeper_path +"/stage"; std::string path = zookeeper_path +"/stage";
if (! zk->exists(root_zookeeper_path)) auto holder = with_retries.createRetriesControlHolder("createRootNodes");
zk->createAncestors(root_zookeeper_path); holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
for (size_t attempt = 0; attempt < MAX_ZOOKEEPER_ATTEMPTS; ++attempt)
{
Coordination::Stat stat;
zk->get(root_zookeeper_path, &stat);
Strings existing_restore_paths = zk->getChildren(root_zookeeper_path);
for (const auto & existing_restore_path : existing_restore_paths)
{ {
if (startsWith(existing_restore_path, "backup-")) with_retries.renewZooKeeper(zk);
continue;
String existing_restore_uuid = existing_restore_path; if (! zk->exists(root_zookeeper_path))
existing_restore_uuid.erase(0, String("restore-").size()); zk->createAncestors(root_zookeeper_path);
if (existing_restore_uuid == toString(restore_uuid)) for (size_t attempt = 0; attempt < MAX_ZOOKEEPER_ATTEMPTS; ++attempt)
continue; {
Coordination::Stat stat;
zk->get(root_zookeeper_path, &stat);
Strings existing_restore_paths = zk->getChildren(root_zookeeper_path);
for (const auto & existing_restore_path : existing_restore_paths)
{
if (startsWith(existing_restore_path, "backup-"))
continue;
String existing_restore_uuid = existing_restore_path;
existing_restore_uuid.erase(0, String("restore-").size());
const auto status = zk->get(root_zookeeper_path + "/" + existing_restore_path + "/stage"); if (existing_restore_uuid == toString(restore_uuid))
if (status != Stage::COMPLETED) continue;
return true;
}
zk->createIfNotExists(path, ""); const auto status = zk->get(root_zookeeper_path + "/" + existing_restore_path + "/stage");
auto code = zk->trySet(path, Stage::SCHEDULED_TO_START, stat.version); if (status != Stage::COMPLETED)
if (code == Coordination::Error::ZOK) {
break; LOG_WARNING(log, "Found a concurrent restore: {}, current restore: {}", existing_restore_uuid, toString(restore_uuid));
bool is_last_attempt = (attempt == MAX_ZOOKEEPER_ATTEMPTS - 1); result = true;
if ((code != Coordination::Error::ZBADVERSION) || is_last_attempt) return;
throw zkutil::KeeperException(code, path); }
} }
return false;
zk->createIfNotExists(path, "");
auto code = zk->trySet(path, Stage::SCHEDULED_TO_START, stat.version);
if (code == Coordination::Error::ZOK)
break;
bool is_last_attempt = (attempt == MAX_ZOOKEEPER_ATTEMPTS - 1);
if ((code != Coordination::Error::ZBADVERSION) || is_last_attempt)
throw zkutil::KeeperException(code, path);
}
});
return result;
} }
} }

View File

@ -2,6 +2,7 @@
#include <Backups/IRestoreCoordination.h> #include <Backups/IRestoreCoordination.h>
#include <Backups/BackupCoordinationStageSync.h> #include <Backups/BackupCoordinationStageSync.h>
#include <Backups/WithRetries.h>
namespace DB namespace DB
@ -11,9 +12,12 @@ namespace DB
class RestoreCoordinationRemote : public IRestoreCoordination class RestoreCoordinationRemote : public IRestoreCoordination
{ {
public: public:
using RestoreKeeperSettings = WithRetries::KeeperSettings;
RestoreCoordinationRemote( RestoreCoordinationRemote(
zkutil::GetZooKeeper get_zookeeper_, zkutil::GetZooKeeper get_zookeeper_,
const String & root_zookeeper_path_, const String & root_zookeeper_path_,
const RestoreKeeperSettings & keeper_settings_,
const String & restore_uuid_, const String & restore_uuid_,
const Strings & all_hosts_, const Strings & all_hosts_,
const String & current_host_, const String & current_host_,
@ -45,25 +49,26 @@ public:
bool hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const override; bool hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const override;
private: private:
zkutil::ZooKeeperPtr getZooKeeper() const;
void createRootNodes(); void createRootNodes();
void removeAllNodes(); void removeAllNodes();
class ReplicatedDatabasesMetadataSync; class ReplicatedDatabasesMetadataSync;
/// get_zookeeper will provide a zookeeper client without any fault injection
const zkutil::GetZooKeeper get_zookeeper; const zkutil::GetZooKeeper get_zookeeper;
const String root_zookeeper_path; const String root_zookeeper_path;
const RestoreKeeperSettings keeper_settings;
const String restore_uuid; const String restore_uuid;
const String zookeeper_path; const String zookeeper_path;
const Strings all_hosts; const Strings all_hosts;
const String current_host; const String current_host;
const size_t current_host_index; const size_t current_host_index;
const bool is_internal; const bool is_internal;
Poco::Logger * const log;
mutable WithRetries with_retries;
std::optional<BackupCoordinationStageSync> stage_sync; std::optional<BackupCoordinationStageSync> stage_sync;
mutable std::mutex mutex; mutable std::mutex mutex;
mutable zkutil::ZooKeeperPtr zookeeper;
}; };
} }

View File

@ -0,0 +1,61 @@
#include <mutex>
#include <Backups/WithRetries.h>
namespace DB
{
WithRetries::WithRetries(Poco::Logger * log_, zkutil::GetZooKeeper get_zookeeper_, const KeeperSettings & settings_, RenewerCallback callback_)
: log(log_)
, get_zookeeper(get_zookeeper_)
, settings(settings_)
, callback(callback_)
, global_zookeeper_retries_info(
log->name(),
log,
settings.keeper_max_retries,
settings.keeper_retry_initial_backoff_ms,
settings.keeper_retry_max_backoff_ms)
{}
WithRetries::RetriesControlHolder::RetriesControlHolder(const WithRetries * parent, const String & name)
: info(parent->global_zookeeper_retries_info)
, retries_ctl(name, info)
, faulty_zookeeper(parent->getFaultyZooKeeper())
{}
WithRetries::RetriesControlHolder WithRetries::createRetriesControlHolder(const String & name)
{
return RetriesControlHolder(this, name);
}
void WithRetries::renewZooKeeper(FaultyKeeper my_faulty_zookeeper) const
{
std::lock_guard lock(zookeeper_mutex);
if (!zookeeper || zookeeper->expired())
{
zookeeper = get_zookeeper();
my_faulty_zookeeper->setKeeper(zookeeper);
callback(my_faulty_zookeeper);
}
}
WithRetries::FaultyKeeper WithRetries::getFaultyZooKeeper() const
{
/// We need to create new instance of ZooKeeperWithFaultInjection each time a copy a pointer to ZooKeeper client there
/// The reason is that ZooKeeperWithFaultInjection may reset the underlying pointer and there could be a race condition
/// when the same object is used from multiple threads.
auto faulty_zookeeper = ZooKeeperWithFaultInjection::createInstance(
settings.keeper_fault_injection_probability,
settings.keeper_fault_injection_seed,
zookeeper,
log->name(),
log);
return faulty_zookeeper;
}
}

79
src/Backups/WithRetries.h Normal file
View File

@ -0,0 +1,79 @@
#pragma once
#include <Storages/MergeTree/ZooKeeperRetries.h>
#include <Common/ZooKeeper/Common.h>
#include <Common/ZooKeeper/ZooKeeperWithFaultInjection.h>
namespace DB
{
/// In backups every request to [Zoo]Keeper should be retryable
/// and this tiny class encapsulates all the machinery for make it possible -
/// a [Zoo]Keeper client which injects faults with configurable probability
/// and a retries controller which performs retries with growing backoff.
class WithRetries
{
public:
using FaultyKeeper = Coordination::ZooKeeperWithFaultInjection::Ptr;
using RenewerCallback = std::function<void(FaultyKeeper &)>;
struct KeeperSettings
{
UInt64 keeper_max_retries{0};
UInt64 keeper_retry_initial_backoff_ms{0};
UInt64 keeper_retry_max_backoff_ms{0};
UInt64 batch_size_for_keeper_multiread{10000};
Float64 keeper_fault_injection_probability{0};
UInt64 keeper_fault_injection_seed{42};
UInt64 keeper_value_max_size{1048576};
};
/// For simplicity a separate ZooKeeperRetriesInfo and a faulty [Zoo]Keeper client
/// are stored in one place.
/// This helps to avoid writing too much boilerplate each time we need to
/// execute some operation (a set of requests) over [Zoo]Keeper with retries.
/// Why ZooKeeperRetriesInfo is separate for each operation?
/// The reason is that backup usually takes long time to finish and it makes no sense
/// to limit the overall number of retries (for example 1000) for the whole backup
/// and have a continuously growing backoff.
class RetriesControlHolder
{
public:
ZooKeeperRetriesInfo info;
ZooKeeperRetriesControl retries_ctl;
FaultyKeeper faulty_zookeeper;
private:
friend class WithRetries;
RetriesControlHolder(const WithRetries * parent, const String & name);
};
RetriesControlHolder createRetriesControlHolder(const String & name);
WithRetries(Poco::Logger * log, zkutil::GetZooKeeper get_zookeeper_, const KeeperSettings & settings, RenewerCallback callback);
/// Used to re-establish new connection inside a retry loop.
void renewZooKeeper(FaultyKeeper my_faulty_zookeeper) const;
private:
/// This will provide a special wrapper which is useful for testing
FaultyKeeper getFaultyZooKeeper() const;
Poco::Logger * log;
zkutil::GetZooKeeper get_zookeeper;
KeeperSettings settings;
/// This callback is called each time when a new [Zoo]Keeper session is created.
/// In backups it is primarily used to re-create an ephemeral node to signal the coordinator
/// that the host is alive and able to continue writing the backup.
/// Coordinator (or an initiator) of the backup also retries when it doesn't find an ephemeral node
/// for a particular host.
/// Again, this schema is not ideal. False-positives are still possible, but in worst case scenario
/// it could lead just to a failed backup which could possibly be successful
/// if there were a little bit more retries.
RenewerCallback callback;
ZooKeeperRetriesInfo global_zookeeper_retries_info;
/// This is needed only to protect zookeeper object
mutable std::mutex zookeeper_mutex;
mutable zkutil::ZooKeeperPtr zookeeper;
};
}

View File

@ -608,7 +608,8 @@ if (ENABLE_TESTS)
dbms dbms
clickhouse_common_config clickhouse_common_config
clickhouse_common_zookeeper clickhouse_common_zookeeper
string_utils) string_utils
hilite_comparator)
if (TARGET ch_contrib::simdjson) if (TARGET ch_contrib::simdjson)
target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::simdjson) target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::simdjson)

View File

@ -24,7 +24,7 @@ std::shared_ptr<Memory<>> ColumnCompressed::compressBuffer(const void * data, si
Memory<> compressed(max_dest_size); Memory<> compressed(max_dest_size);
auto compressed_size = LZ4_compress_default( int compressed_size = LZ4_compress_default(
reinterpret_cast<const char *>(data), reinterpret_cast<const char *>(data),
compressed.data(), compressed.data(),
static_cast<int>(data_size), static_cast<int>(data_size),

View File

@ -1,11 +1,14 @@
#pragma once #pragma once
#include "ZooKeeper.h"
#include <functional> #include <functional>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ZooKeeper/ZooKeeperWithFaultInjection.h>
namespace zkutil namespace zkutil
{ {
using GetZooKeeper = std::function<ZooKeeperPtr()>; using GetZooKeeper = std::function<ZooKeeperPtr()>;
using GetZooKeeperWithFaultInjection = std::function<Coordination::ZooKeeperWithFaultInjection::Ptr()>;
} }

View File

@ -146,4 +146,3 @@ private:
}; };
} }

View File

@ -29,6 +29,8 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED; extern const int NOT_IMPLEMENTED;
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
extern const int NO_ELEMENTS_IN_CONFIG;
extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
} }
} }
@ -1340,4 +1342,29 @@ String getSequentialNodeName(const String & prefix, UInt64 number)
return name; return name;
} }
void validateZooKeeperConfig(const Poco::Util::AbstractConfiguration & config)
{
if (config.has("zookeeper") && config.has("keeper"))
throw DB::Exception(DB::ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG, "Both ZooKeeper and Keeper are specified");
}
bool hasZooKeeperConfig(const Poco::Util::AbstractConfiguration & config)
{
return config.has("zookeeper") || config.has("keeper") || (config.has("keeper_server") && config.getBool("keeper_server.use_cluster", true));
}
String getZooKeeperConfigName(const Poco::Util::AbstractConfiguration & config)
{
if (config.has("zookeeper"))
return "zookeeper";
if (config.has("keeper"))
return "keeper";
if (config.has("keeper_server") && config.getBool("keeper_server.use_cluster", true))
return "keeper_server";
throw DB::Exception(DB::ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no Zookeeper configuration in server config");
}
} }

View File

@ -669,4 +669,10 @@ String extractZooKeeperPath(const String & path, bool check_starts_with_slash, P
String getSequentialNodeName(const String & prefix, UInt64 number); String getSequentialNodeName(const String & prefix, UInt64 number);
void validateZooKeeperConfig(const Poco::Util::AbstractConfiguration & config);
bool hasZooKeeperConfig(const Poco::Util::AbstractConfiguration & config);
String getZooKeeperConfigName(const Poco::Util::AbstractConfiguration & config);
} }

View File

@ -18,6 +18,116 @@ namespace zkutil
{ {
ZooKeeperArgs::ZooKeeperArgs(const Poco::Util::AbstractConfiguration & config, const String & config_name) ZooKeeperArgs::ZooKeeperArgs(const Poco::Util::AbstractConfiguration & config, const String & config_name)
{
if (config_name == "keeper_server")
initFromKeeperServerSection(config);
else
initFromKeeperSection(config, config_name);
if (!chroot.empty())
{
if (chroot.front() != '/')
throw KeeperException(
Coordination::Error::ZBADARGUMENTS,
"Root path in config file should start with '/', but got {}", chroot);
if (chroot.back() == '/')
chroot.pop_back();
}
if (session_timeout_ms < 0 || operation_timeout_ms < 0 || connection_timeout_ms < 0)
throw KeeperException("Timeout cannot be negative", Coordination::Error::ZBADARGUMENTS);
/// init get_priority_load_balancing
get_priority_load_balancing.hostname_differences.resize(hosts.size());
const String & local_hostname = getFQDNOrHostName();
for (size_t i = 0; i < hosts.size(); ++i)
{
const String & node_host = hosts[i].substr(0, hosts[i].find_last_of(':'));
get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, node_host);
}
}
ZooKeeperArgs::ZooKeeperArgs(const String & hosts_string)
{
splitInto<','>(hosts, hosts_string);
}
void ZooKeeperArgs::initFromKeeperServerSection(const Poco::Util::AbstractConfiguration & config)
{
static constexpr std::string_view config_name = "keeper_server";
if (auto key = std::string{config_name} + ".tcp_port_secure";
config.has(key))
{
auto tcp_port_secure = config.getString(key);
if (tcp_port_secure.empty())
throw KeeperException("Empty tcp_port_secure in config file", Coordination::Error::ZBADARGUMENTS);
}
bool secure{false};
std::string tcp_port;
if (auto tcp_port_secure_key = std::string{config_name} + ".tcp_port_secure";
config.has(tcp_port_secure_key))
{
secure = true;
tcp_port = config.getString(tcp_port_secure_key);
}
else if (auto tcp_port_key = std::string{config_name} + ".tcp_port";
config.has(tcp_port_key))
{
tcp_port = config.getString(tcp_port_key);
}
if (tcp_port.empty())
throw KeeperException("No tcp_port or tcp_port_secure in config file", Coordination::Error::ZBADARGUMENTS);
if (auto coordination_key = std::string{config_name} + ".coordination_settings";
config.has(coordination_key))
{
if (auto operation_timeout_key = coordination_key + ".operation_timeout_ms";
config.has(operation_timeout_key))
operation_timeout_ms = config.getInt(operation_timeout_key);
if (auto session_timeout_key = coordination_key + ".session_timeout_ms";
config.has(session_timeout_key))
session_timeout_ms = config.getInt(session_timeout_key);
}
Poco::Util::AbstractConfiguration::Keys keys;
std::string raft_configuration_key = std::string{config_name} + ".raft_configuration";
config.keys(raft_configuration_key, keys);
for (const auto & key : keys)
{
if (startsWith(key, "server"))
hosts.push_back(
(secure ? "secure://" : "") + config.getString(raft_configuration_key + "." + key + ".hostname") + ":" + tcp_port);
}
static constexpr std::array load_balancing_keys
{
".zookeeper_load_balancing",
".keeper_load_balancing"
};
for (const auto * load_balancing_key : load_balancing_keys)
{
if (auto load_balancing_config = std::string{config_name} + load_balancing_key;
config.has(load_balancing_config))
{
String load_balancing_str = config.getString(load_balancing_config);
/// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`)
auto load_balancing = magic_enum::enum_cast<DB::LoadBalancing>(Poco::toUpper(load_balancing_str));
if (!load_balancing)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str);
get_priority_load_balancing.load_balancing = *load_balancing;
break;
}
}
}
void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguration & config, const std::string & config_name)
{ {
Poco::Util::AbstractConfiguration::Keys keys; Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_name, keys); config.keys(config_name, keys);
@ -84,7 +194,7 @@ ZooKeeperArgs::ZooKeeperArgs(const Poco::Util::AbstractConfiguration & config, c
{ {
implementation = config.getString(config_name + "." + key); implementation = config.getString(config_name + "." + key);
} }
else if (key == "zookeeper_load_balancing") else if (key == "zookeeper_load_balancing" || key == "keeper_load_balancing")
{ {
String load_balancing_str = config.getString(config_name + "." + key); String load_balancing_str = config.getString(config_name + "." + key);
/// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`) /// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`)
@ -96,33 +206,6 @@ ZooKeeperArgs::ZooKeeperArgs(const Poco::Util::AbstractConfiguration & config, c
else else
throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS); throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS);
} }
if (!chroot.empty())
{
if (chroot.front() != '/')
throw KeeperException(
Coordination::Error::ZBADARGUMENTS,
"Root path in config file should start with '/', but got {}", chroot);
if (chroot.back() == '/')
chroot.pop_back();
}
if (session_timeout_ms < 0 || operation_timeout_ms < 0 || connection_timeout_ms < 0)
throw KeeperException("Timeout cannot be negative", Coordination::Error::ZBADARGUMENTS);
/// init get_priority_load_balancing
get_priority_load_balancing.hostname_differences.resize(hosts.size());
const String & local_hostname = getFQDNOrHostName();
for (size_t i = 0; i < hosts.size(); ++i)
{
const String & node_host = hosts[i].substr(0, hosts[i].find_last_of(':'));
get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, node_host);
}
}
ZooKeeperArgs::ZooKeeperArgs(const String & hosts_string)
{
splitInto<','>(hosts, hosts_string);
} }
} }

View File

@ -37,6 +37,10 @@ struct ZooKeeperArgs
UInt64 recv_sleep_ms = 0; UInt64 recv_sleep_ms = 0;
DB::GetPriorityForLoadBalancing get_priority_load_balancing; DB::GetPriorityForLoadBalancing get_priority_load_balancing;
private:
void initFromKeeperServerSection(const Poco::Util::AbstractConfiguration & config);
void initFromKeeperSection(const Poco::Util::AbstractConfiguration & config, const std::string & config_name);
}; };
} }

View File

@ -114,6 +114,7 @@ public:
void setKeeper(zk::Ptr const & keeper_) { keeper = keeper_; } void setKeeper(zk::Ptr const & keeper_) { keeper = keeper_; }
bool isNull() const { return keeper.get() == nullptr; } bool isNull() const { return keeper.get() == nullptr; }
bool expired() { return keeper->expired(); }
/// ///
/// mirror ZooKeeper interface /// mirror ZooKeeper interface
@ -232,6 +233,11 @@ public:
return access("exists", path, [&]() { return keeper->exists(path, stat, watch); }); return access("exists", path, [&]() { return keeper->exists(path, stat, watch); });
} }
bool existsNoFailureInjection(const std::string & path, Coordination::Stat * stat = nullptr, const zkutil::EventPtr & watch = nullptr)
{
return access<false, false, false>("exists", path, [&]() { return keeper->exists(path, stat, watch); });
}
zkutil::ZooKeeper::MultiExistsResponse exists(const std::vector<std::string> & paths) zkutil::ZooKeeper::MultiExistsResponse exists(const std::vector<std::string> & paths)
{ {
return access("exists", !paths.empty() ? paths.front() : "", [&]() { return keeper->exists(paths); }); return access("exists", !paths.empty() ? paths.front() : "", [&]() { return keeper->exists(paths); });
@ -239,19 +245,30 @@ public:
std::string create(const std::string & path, const std::string & data, int32_t mode) std::string create(const std::string & path, const std::string & data, int32_t mode)
{ {
auto path_created = access( std::string path_created;
"create", auto code = tryCreate(path, data, mode, path_created);
if (code != Coordination::Error::ZOK)
throw zkutil::KeeperException(code, path);
return path_created;
}
Coordination::Error tryCreate(const std::string & path, const std::string & data, int32_t mode, std::string & path_created)
{
auto error = access(
"tryCreate",
path, path,
[&]() { return keeper->create(path, data, mode); }, [&]() { return keeper->tryCreate(path, data, mode, path_created); },
[&](std::string const & result_path) [&](Coordination::Error &)
{ {
try try
{ {
if (mode == zkutil::CreateMode::EphemeralSequential || mode == zkutil::CreateMode::Ephemeral) if (mode == zkutil::CreateMode::EphemeralSequential || mode == zkutil::CreateMode::Ephemeral)
{ {
keeper->remove(result_path); keeper->remove(path);
if (unlikely(logger)) if (unlikely(logger))
LOG_TRACE(logger, "ZooKeeperWithFaultInjection cleanup: seed={} func={} path={}", seed, "create", result_path); LOG_TRACE(logger, "ZooKeeperWithFaultInjection cleanup: seed={} func={} path={}", seed, "tryCreate", path);
} }
} }
catch (const zkutil::KeeperException & e) catch (const zkutil::KeeperException & e)
@ -261,8 +278,8 @@ public:
logger, logger,
"ZooKeeperWithFaultInjection cleanup FAILED: seed={} func={} path={} code={} message={} ", "ZooKeeperWithFaultInjection cleanup FAILED: seed={} func={} path={} code={} message={} ",
seed, seed,
"create", "tryCreate",
result_path, path,
e.code, e.code,
e.message()); e.message());
} }
@ -272,10 +289,27 @@ public:
if (unlikely(fault_policy)) if (unlikely(fault_policy))
{ {
if (mode == zkutil::CreateMode::EphemeralSequential || mode == zkutil::CreateMode::Ephemeral) if (mode == zkutil::CreateMode::EphemeralSequential || mode == zkutil::CreateMode::Ephemeral)
ephemeral_nodes.push_back(path_created); ephemeral_nodes.push_back(path);
} }
return path_created; return error;
}
Coordination::Error tryCreate(const std::string & path, const std::string & data, int32_t mode)
{
String path_created;
return tryCreate(path, data, mode, path_created);
}
void createIfNotExists(const std::string & path, const std::string & data)
{
std::string path_created;
auto code = tryCreate(path, data, zkutil::CreateMode::Persistent, path_created);
if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS)
return;
throw zkutil::KeeperException(code, path);
} }
Coordination::Responses multi(const Coordination::Requests & requests) Coordination::Responses multi(const Coordination::Requests & requests)
@ -306,6 +340,27 @@ public:
return access("tryRemove", path, [&]() { return keeper->tryRemove(path, version); }); return access("tryRemove", path, [&]() { return keeper->tryRemove(path, version); });
} }
void removeRecursive(const std::string & path)
{
return access("removeRecursive", path, [&]() { return keeper->removeRecursive(path); });
}
std::string sync(const std::string & path)
{
return access("sync", path, [&]() { return keeper->sync(path); });
}
Coordination::Error trySet(const std::string & path, const std::string & data, int32_t version = -1, Coordination::Stat * stat = nullptr)
{
return access("trySet", path, [&]() { return keeper->trySet(path, data, version, stat); });
}
void handleEphemeralNodeExistenceNoFailureInjection(const std::string & path, const std::string & fast_delete_if_equal_value)
{
return access<false, false, false>("handleEphemeralNodeExistence", path, [&]() { return keeper->handleEphemeralNodeExistence(path, fast_delete_if_equal_value); });
}
void cleanupEphemeralNodes() void cleanupEphemeralNodes()
{ {
for (const auto & path : ephemeral_nodes) for (const auto & path : ephemeral_nodes)

View File

@ -105,7 +105,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
std::move(headers), std::move(headers),
S3::CredentialsConfiguration S3::CredentialsConfiguration
{ {
auth_settings.use_environment_credentials.value_or(false), auth_settings.use_environment_credentials.value_or(true),
auth_settings.use_insecure_imds_request.value_or(false), auth_settings.use_insecure_imds_request.value_or(false),
auth_settings.expiration_window_seconds.value_or(S3::DEFAULT_EXPIRATION_WINDOW_SECONDS), auth_settings.expiration_window_seconds.value_or(S3::DEFAULT_EXPIRATION_WINDOW_SECONDS),
auth_settings.no_sign_request.value_or(false), auth_settings.no_sign_request.value_or(false),

View File

@ -30,7 +30,7 @@
#define DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION 1 #define DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION 1
#define DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION 1 #define DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION 2
#define DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS 54453 #define DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS 54453
#define DBMS_MERGE_TREE_PART_INFO_VERSION 1 #define DBMS_MERGE_TREE_PART_INFO_VERSION 1

View File

@ -415,11 +415,13 @@ class IColumn;
M(UInt64, max_temporary_data_on_disk_size_for_user, 0, "The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running user queries. Zero means unlimited.", 0)\ M(UInt64, max_temporary_data_on_disk_size_for_user, 0, "The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running user queries. Zero means unlimited.", 0)\
M(UInt64, max_temporary_data_on_disk_size_for_query, 0, "The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running queries. Zero means unlimited.", 0)\ M(UInt64, max_temporary_data_on_disk_size_for_query, 0, "The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running queries. Zero means unlimited.", 0)\
\ \
M(UInt64, backup_keeper_max_retries, 20, "Max retries for keeper operations during backup", 0) \ M(UInt64, backup_restore_keeper_max_retries, 20, "Max retries for keeper operations during backup or restore", 0) \
M(UInt64, backup_keeper_retry_initial_backoff_ms, 100, "Initial backoff timeout for [Zoo]Keeper operations during backup", 0) \ M(UInt64, backup_restore_keeper_retry_initial_backoff_ms, 100, "Initial backoff timeout for [Zoo]Keeper operations during backup or restore", 0) \
M(UInt64, backup_keeper_retry_max_backoff_ms, 5000, "Max backoff timeout for [Zoo]Keeper operations during backup", 0) \ M(UInt64, backup_restore_keeper_retry_max_backoff_ms, 5000, "Max backoff timeout for [Zoo]Keeper operations during backup or restore", 0) \
M(UInt64, backup_keeper_value_max_size, 1048576, "Maximum size of data of a [Zoo]Keeper's node during backup", 0) \ M(Float, backup_restore_keeper_fault_injection_probability, 0.0f, "Approximate probability of failure for a keeper request during backup or restore. Valid value is in interval [0.0f, 1.0f]", 0) \
M(UInt64, backup_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup", 0) \ M(UInt64, backup_restore_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \
M(UInt64, backup_restore_keeper_value_max_size, 1048576, "Maximum size of data of a [Zoo]Keeper's node during backup", 0) \
M(UInt64, backup_restore_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup or restore", 0) \
\ \
M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \ M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \
M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \ M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \
@ -558,6 +560,8 @@ class IColumn;
M(Bool, query_cache_store_results_of_queries_with_nondeterministic_functions, false, "Store results of queries with non-deterministic functions (e.g. rand(), now()) in the query cache", 0) \ M(Bool, query_cache_store_results_of_queries_with_nondeterministic_functions, false, "Store results of queries with non-deterministic functions (e.g. rand(), now()) in the query cache", 0) \
M(UInt64, query_cache_min_query_runs, 0, "Minimum number a SELECT query must run before its result is stored in the query cache", 0) \ M(UInt64, query_cache_min_query_runs, 0, "Minimum number a SELECT query must run before its result is stored in the query cache", 0) \
M(Milliseconds, query_cache_min_query_duration, 0, "Minimum time in milliseconds for a query to run for its result to be stored in the query cache.", 0) \ M(Milliseconds, query_cache_min_query_duration, 0, "Minimum time in milliseconds for a query to run for its result to be stored in the query cache.", 0) \
M(Bool, query_cache_compress_entries, true, "Compress cache entries.", 0) \
M(Bool, query_cache_squash_partial_results, true, "Squash partial result blocks to blocks of size 'max_block_size'. Reduces performance of inserts into the query cache but improves the compressability of cache entries.", 0) \
M(Seconds, query_cache_ttl, 60, "After this time in seconds entries in the query cache become stale", 0) \ M(Seconds, query_cache_ttl, 60, "After this time in seconds entries in the query cache become stale", 0) \
M(Bool, query_cache_share_between_users, false, "Allow other users to read entry in the query cache", 0) \ M(Bool, query_cache_share_between_users, false, "Allow other users to read entry in the query cache", 0) \
\ \
@ -722,6 +726,9 @@ class IColumn;
M(Bool, force_aggregation_in_order, false, "Force use of aggregation in order on remote nodes during distributed aggregation. PLEASE, NEVER CHANGE THIS SETTING VALUE MANUALLY!", IMPORTANT) \ M(Bool, force_aggregation_in_order, false, "Force use of aggregation in order on remote nodes during distributed aggregation. PLEASE, NEVER CHANGE THIS SETTING VALUE MANUALLY!", IMPORTANT) \
M(UInt64, http_max_request_param_data_size, 10_MiB, "Limit on size of request data used as a query parameter in predefined HTTP requests.", 0) \ M(UInt64, http_max_request_param_data_size, 10_MiB, "Limit on size of request data used as a query parameter in predefined HTTP requests.", 0) \
M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \ M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \
M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
M(Bool, function_json_value_return_type_allow_nullable, false, "Allow function to return nullable type.", 0) \
M(Bool, function_json_value_return_type_allow_complex, false, "Allow function to return complex type, such as: struct, array, map.", 0) \
// End of COMMON_SETTINGS // End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
@ -946,7 +953,6 @@ class IColumn;
M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \ M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \
M(Bool, check_dictionary_primary_key, true, "Check primary key type for simple dictionary is native unsigned integer", 0) \ M(Bool, check_dictionary_primary_key, true, "Check primary key type for simple dictionary is native unsigned integer", 0) \
// End of FORMAT_FACTORY_SETTINGS // End of FORMAT_FACTORY_SETTINGS
// Please add settings non-related to formats into the COMMON_SETTINGS above. // Please add settings non-related to formats into the COMMON_SETTINGS above.

View File

@ -34,6 +34,7 @@
#include <Parsers/parseQuery.h> #include <Parsers/parseQuery.h>
#include <Parsers/ParserCreateQuery.h> #include <Parsers/ParserCreateQuery.h>
#include <Parsers/queryToString.h> #include <Parsers/queryToString.h>
#include <Storages/StorageKeeperMap.h>
namespace DB namespace DB
{ {
@ -1390,6 +1391,13 @@ bool DatabaseReplicated::shouldReplicateQuery(const ContextPtr & query_context,
/// Some ALTERs are not replicated on database level /// Some ALTERs are not replicated on database level
if (const auto * alter = query_ptr->as<const ASTAlterQuery>()) if (const auto * alter = query_ptr->as<const ASTAlterQuery>())
{ {
auto table_id = query_context->resolveStorageID(*alter, Context::ResolveOrdinary);
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, query_context);
/// we never replicate KeeperMap operations because it doesn't make sense
if (auto * keeper_map = table->as<StorageKeeperMap>())
return false;
return !alter->isAttachAlter() && !alter->isFetchAlter() && !alter->isDropPartitionAlter(); return !alter->isAttachAlter() && !alter->isFetchAlter() && !alter->isDropPartitionAlter();
} }

View File

@ -154,7 +154,7 @@ std::unique_ptr<S3::Client> getClient(
{}, {},
S3::CredentialsConfiguration S3::CredentialsConfiguration
{ {
config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", false)), config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", true)),
config.getBool(config_prefix + ".use_insecure_imds_request", config.getBool("s3.use_insecure_imds_request", false)), config.getBool(config_prefix + ".use_insecure_imds_request", config.getBool("s3.use_insecure_imds_request", false)),
config.getUInt64(config_prefix + ".expiration_window_seconds", config.getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), config.getUInt64(config_prefix + ".expiration_window_seconds", config.getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
config.getBool(config_prefix + ".no_sign_request", config.getBool("s3.no_sign_request", false)) config.getBool(config_prefix + ".no_sign_request", config.getBool("s3.no_sign_request", false))

View File

@ -9,6 +9,7 @@
#include <DataTypes/DataTypeTuple.h> #include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h> #include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/IDataType.h> #include <DataTypes/IDataType.h>
#include <boost/algorithm/string.hpp> #include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/join.hpp> #include <boost/algorithm/string/join.hpp>
@ -264,23 +265,25 @@ static bool checkTupleType(const capnp::Type & capnp_type, const DataTypePtr & d
return false; return false;
} }
if (!tuple_data_type->haveExplicitNames()) bool have_explicit_names = tuple_data_type->haveExplicitNames();
const auto & nested_names = tuple_data_type->getElementNames();
for (uint32_t i = 0; i != nested_names.size(); ++i)
{ {
error_message += "Only named Tuple can be converted to CapnProto Struct"; if (have_explicit_names)
return false;
}
for (const auto & name : tuple_data_type->getElementNames())
{
KJ_IF_MAYBE(field, struct_schema.findFieldByName(name))
{ {
if (!checkCapnProtoType(field->getType(), nested_types[tuple_data_type->getPositionByName(name)], mode, error_message, name)) KJ_IF_MAYBE (field, struct_schema.findFieldByName(nested_names[i]))
{
if (!checkCapnProtoType(field->getType(), nested_types[tuple_data_type->getPositionByName(nested_names[i])], mode, error_message, nested_names[i]))
return false;
}
else
{
error_message += "CapnProto struct doesn't contain a field with name " + nested_names[i];
return false; return false;
}
} }
else else if (!checkCapnProtoType(struct_schema.getFields()[i].getType(), nested_types[tuple_data_type->getPositionByName(nested_names[i])], mode, error_message, nested_names[i]))
{
error_message += "CapnProto struct doesn't contain a field with name " + name;
return false; return false;
}
} }
return true; return true;
@ -307,41 +310,129 @@ static bool checkArrayType(const capnp::Type & capnp_type, const DataTypePtr & d
return checkCapnProtoType(list_schema.getElementType(), nested_type, mode, error_message, column_name); return checkCapnProtoType(list_schema.getElementType(), nested_type, mode, error_message, column_name);
} }
static bool checkMapType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message)
{
/// We output/input Map type as follow CapnProto schema
///
/// struct Map {
/// struct Entry {
/// key @0: Key;
/// value @1: Value;
/// }
/// entries @0 :List(Entry);
/// }
if (!capnp_type.isStruct())
return false;
auto struct_schema = capnp_type.asStruct();
if (checkIfStructContainsUnnamedUnion(struct_schema))
{
error_message += "CapnProto struct contains unnamed union";
return false;
}
if (struct_schema.getFields().size() != 1)
{
error_message += "CapnProto struct that represents Map type can contain only one field";
return false;
}
const auto & field_type = struct_schema.getFields()[0].getType();
if (!field_type.isList())
{
error_message += "Field of CapnProto struct that represents Map is not a list";
return false;
}
auto list_element_type = field_type.asList().getElementType();
if (!list_element_type.isStruct())
{
error_message += "Field of CapnProto struct that represents Map is not a list of structs";
return false;
}
auto key_value_struct = list_element_type.asStruct();
if (checkIfStructContainsUnnamedUnion(key_value_struct))
{
error_message += "CapnProto struct contains unnamed union";
return false;
}
if (key_value_struct.getFields().size() != 2)
{
error_message += "Key-value structure for Map struct should have exactly 2 fields";
return false;
}
const auto & map_type = assert_cast<const DataTypeMap &>(*data_type);
DataTypes types = {map_type.getKeyType(), map_type.getValueType()};
Names names = {"key", "value"};
for (size_t i = 0; i != types.size(); ++i)
{
KJ_IF_MAYBE(field, key_value_struct.findFieldByName(names[i]))
{
if (!checkCapnProtoType(field->getType(), types[i], mode, error_message, names[i]))
return false;
}
else
{
error_message += R"(Key-value structure for Map struct should have exactly 2 fields with names "key" and "value")";
return false;
}
}
return true;
}
static bool isCapnInteger(const capnp::Type & capnp_type)
{
return capnp_type.isInt8() || capnp_type.isUInt8() || capnp_type.isInt16() || capnp_type.isUInt16() || capnp_type.isInt32()
|| capnp_type.isUInt32() || capnp_type.isInt64() || capnp_type.isUInt64();
}
static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message, const String & column_name) static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message, const String & column_name)
{ {
switch (data_type->getTypeId()) switch (data_type->getTypeId())
{ {
case TypeIndex::UInt8: case TypeIndex::UInt8:
return capnp_type.isBool() || capnp_type.isUInt8(); return capnp_type.isBool() || isCapnInteger(capnp_type);
case TypeIndex::Date: [[fallthrough]]; case TypeIndex::Int8: [[fallthrough]];
case TypeIndex::UInt16: case TypeIndex::Int16: [[fallthrough]];
case TypeIndex::UInt16: [[fallthrough]];
case TypeIndex::Int32: [[fallthrough]];
case TypeIndex::UInt32: [[fallthrough]];
case TypeIndex::Int64: [[fallthrough]];
case TypeIndex::UInt64:
/// Allow integer conversions durin input/output.
return isCapnInteger(capnp_type);
case TypeIndex::Date:
return capnp_type.isUInt16(); return capnp_type.isUInt16();
case TypeIndex::DateTime: [[fallthrough]]; case TypeIndex::DateTime: [[fallthrough]];
case TypeIndex::IPv4: [[fallthrough]]; case TypeIndex::IPv4:
case TypeIndex::UInt32:
return capnp_type.isUInt32(); return capnp_type.isUInt32();
case TypeIndex::UInt64:
return capnp_type.isUInt64();
case TypeIndex::Int8:
return capnp_type.isInt8();
case TypeIndex::Int16:
return capnp_type.isInt16();
case TypeIndex::Date32: [[fallthrough]]; case TypeIndex::Date32: [[fallthrough]];
case TypeIndex::Decimal32: [[fallthrough]]; case TypeIndex::Decimal32:
case TypeIndex::Int32: return capnp_type.isInt32() || capnp_type.isUInt32();
return capnp_type.isInt32();
case TypeIndex::DateTime64: [[fallthrough]]; case TypeIndex::DateTime64: [[fallthrough]];
case TypeIndex::Decimal64: [[fallthrough]]; case TypeIndex::Decimal64:
case TypeIndex::Int64: return capnp_type.isInt64() || capnp_type.isUInt64();
return capnp_type.isInt64(); case TypeIndex::Float32:[[fallthrough]];
case TypeIndex::Float32:
return capnp_type.isFloat32();
case TypeIndex::Float64: case TypeIndex::Float64:
return capnp_type.isFloat64(); /// Allow converting between Float32 and isFloat64
return capnp_type.isFloat32() || capnp_type.isFloat64();
case TypeIndex::Enum8: case TypeIndex::Enum8:
return checkEnums<Int8>(capnp_type, data_type, mode, INT8_MAX, error_message); return checkEnums<Int8>(capnp_type, data_type, mode, INT8_MAX, error_message);
case TypeIndex::Enum16: case TypeIndex::Enum16:
return checkEnums<Int16>(capnp_type, data_type, mode, INT16_MAX, error_message); return checkEnums<Int16>(capnp_type, data_type, mode, INT16_MAX, error_message);
case TypeIndex::Int128: [[fallthrough]];
case TypeIndex::UInt128: [[fallthrough]];
case TypeIndex::Int256: [[fallthrough]];
case TypeIndex::UInt256: [[fallthrough]];
case TypeIndex::Decimal128: [[fallthrough]];
case TypeIndex::Decimal256:
return capnp_type.isData();
case TypeIndex::Tuple: case TypeIndex::Tuple:
return checkTupleType(capnp_type, data_type, mode, error_message); return checkTupleType(capnp_type, data_type, mode, error_message);
case TypeIndex::Nullable: case TypeIndex::Nullable:
@ -359,6 +450,8 @@ static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr
case TypeIndex::IPv6: [[fallthrough]]; case TypeIndex::IPv6: [[fallthrough]];
case TypeIndex::String: case TypeIndex::String:
return capnp_type.isText() || capnp_type.isData(); return capnp_type.isText() || capnp_type.isData();
case TypeIndex::Map:
return checkMapType(capnp_type, data_type, mode, error_message);
default: default:
return false; return false;
} }

View File

@ -3,9 +3,11 @@
#include <sstream> #include <sstream>
#include <type_traits> #include <type_traits>
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h> #include <Columns/ColumnsNumber.h>
#include <Core/Settings.h> #include <Core/Settings.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h> #include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <Common/JSONParsers/DummyJSONParser.h> #include <Common/JSONParsers/DummyJSONParser.h>
@ -40,7 +42,7 @@ public:
class Executor class Executor
{ {
public: public:
static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth) static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, const ContextPtr & context)
{ {
MutableColumnPtr to{result_type->createColumn()}; MutableColumnPtr to{result_type->createColumn()};
to->reserve(input_rows_count); to->reserve(input_rows_count);
@ -115,7 +117,6 @@ public:
/// Parse JSON for every row /// Parse JSON for every row
Impl<JSONParser> impl; Impl<JSONParser> impl;
for (const auto i : collections::range(0, input_rows_count)) for (const auto i : collections::range(0, input_rows_count))
{ {
std::string_view json{ std::string_view json{
@ -125,7 +126,7 @@ public:
bool added_to_column = false; bool added_to_column = false;
if (document_ok) if (document_ok)
{ {
added_to_column = impl.insertResultToColumn(*to, document, res); added_to_column = impl.insertResultToColumn(*to, document, res, context);
} }
if (!added_to_column) if (!added_to_column)
{ {
@ -154,7 +155,7 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{ {
return Impl<DummyJSONParser>::getReturnType(Name::name, arguments); return Impl<DummyJSONParser>::getReturnType(Name::name, arguments, getContext());
} }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
@ -167,9 +168,9 @@ public:
unsigned parse_depth = static_cast<unsigned>(getContext()->getSettingsRef().max_parser_depth); unsigned parse_depth = static_cast<unsigned>(getContext()->getSettingsRef().max_parser_depth);
#if USE_SIMDJSON #if USE_SIMDJSON
if (getContext()->getSettingsRef().allow_simdjson) if (getContext()->getSettingsRef().allow_simdjson)
return FunctionSQLJSONHelpers::Executor<Name, Impl, SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth); return FunctionSQLJSONHelpers::Executor<Name, Impl, SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, getContext());
#endif #endif
return FunctionSQLJSONHelpers::Executor<Name, Impl, DummyJSONParser>::run(arguments, result_type, input_rows_count, parse_depth); return FunctionSQLJSONHelpers::Executor<Name, Impl, DummyJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, getContext());
} }
}; };
@ -194,11 +195,11 @@ class JSONExistsImpl
public: public:
using Element = typename JSONParser::Element; using Element = typename JSONParser::Element;
static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared<DataTypeUInt8>(); } static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const ContextPtr &) { return std::make_shared<DataTypeUInt8>(); }
static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr) static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr, const ContextPtr &)
{ {
GeneratorJSONPath<JSONParser> generator_json_path(query_ptr); GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
Element current_element = root; Element current_element = root;
@ -233,11 +234,22 @@ class JSONValueImpl
public: public:
using Element = typename JSONParser::Element; using Element = typename JSONParser::Element;
static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared<DataTypeString>(); } static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const ContextPtr & context)
{
if (context->getSettingsRef().function_json_value_return_type_allow_nullable)
{
DataTypePtr string_type = std::make_shared<DataTypeString>();
return std::make_shared<DataTypeNullable>(string_type);
}
else
{
return std::make_shared<DataTypeString>();
}
}
static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr) static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr, const ContextPtr & context)
{ {
GeneratorJSONPath<JSONParser> generator_json_path(query_ptr); GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
Element current_element = root; Element current_element = root;
@ -247,7 +259,11 @@ public:
{ {
if (status == VisitorStatus::Ok) if (status == VisitorStatus::Ok)
{ {
if (!(current_element.isArray() || current_element.isObject())) if (context->getSettingsRef().function_json_value_return_type_allow_complex)
{
break;
}
else if (!(current_element.isArray() || current_element.isObject()))
{ {
break; break;
} }
@ -267,9 +283,19 @@ public:
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
out << current_element.getElement(); out << current_element.getElement();
auto output_str = out.str(); auto output_str = out.str();
ColumnString & col_str = assert_cast<ColumnString &>(dest); ColumnString * col_str;
ColumnString::Chars & data = col_str.getChars(); if (isColumnNullable(dest))
ColumnString::Offsets & offsets = col_str.getOffsets(); {
ColumnNullable & col_null = assert_cast<ColumnNullable &>(dest);
col_null.getNullMapData().push_back(0);
col_str = assert_cast<ColumnString *>(&col_null.getNestedColumn());
}
else
{
col_str = assert_cast<ColumnString *>(&dest);
}
ColumnString::Chars & data = col_str->getChars();
ColumnString::Offsets & offsets = col_str->getOffsets();
if (current_element.isString()) if (current_element.isString())
{ {
@ -280,7 +306,7 @@ public:
} }
else else
{ {
col_str.insertData(output_str.data(), output_str.size()); col_str->insertData(output_str.data(), output_str.size());
} }
return true; return true;
} }
@ -296,11 +322,11 @@ class JSONQueryImpl
public: public:
using Element = typename JSONParser::Element; using Element = typename JSONParser::Element;
static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared<DataTypeString>(); } static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const ContextPtr &) { return std::make_shared<DataTypeString>(); }
static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr) static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr, const ContextPtr &)
{ {
GeneratorJSONPath<JSONParser> generator_json_path(query_ptr); GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
Element current_element = root; Element current_element = root;

View File

@ -20,6 +20,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int UNSUPPORTED_METHOD; extern const int UNSUPPORTED_METHOD;
extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
} }
void UserDefinedSQLFunctionVisitor::visit(ASTPtr & ast) void UserDefinedSQLFunctionVisitor::visit(ASTPtr & ast)
@ -132,6 +133,12 @@ ASTPtr UserDefinedSQLFunctionVisitor::tryToReplaceFunction(const ASTFunction & f
if (!user_defined_function) if (!user_defined_function)
return nullptr; return nullptr;
/// All UDFs are not parametric for now.
if (function.parameters)
{
throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function.name);
}
const auto & function_arguments_list = function.children.at(0)->as<ASTExpressionList>(); const auto & function_arguments_list = function.children.at(0)->as<ASTExpressionList>();
auto & function_arguments = function_arguments_list->children; auto & function_arguments = function_arguments_list->children;

View File

@ -1405,7 +1405,7 @@ REGISTER_FUNCTION(FormatDateTime)
factory.registerAlias("DATE_FORMAT", FunctionFormatDateTime::name); factory.registerAlias("DATE_FORMAT", FunctionFormatDateTime::name);
factory.registerFunction<FunctionFromUnixTimestamp>(); factory.registerFunction<FunctionFromUnixTimestamp>();
factory.registerAlias("FROM_UNIXTIME", "fromUnixTimestamp"); factory.registerAlias("FROM_UNIXTIME", FunctionFromUnixTimestamp::name);
factory.registerFunction<FunctionFormatDateTimeInJodaSyntax>(); factory.registerFunction<FunctionFormatDateTimeInJodaSyntax>();
factory.registerFunction<FunctionFromUnixTimestampInJodaSyntax>(); factory.registerFunction<FunctionFromUnixTimestampInJodaSyntax>();

View File

@ -174,23 +174,31 @@ public:
getName(), getName(),
arguments.size()); arguments.size());
const auto * keys_type = checkAndGetDataType<DataTypeArray>(arguments[0].get()); /// The first argument should always be Array.
if (!keys_type) /// Because key type can not be nested type of Map, which is Tuple
DataTypePtr key_type;
if (const auto * keys_type = checkAndGetDataType<DataTypeArray>(arguments[0].get()))
key_type = keys_type->getNestedType();
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be an Array", getName()); throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be an Array", getName());
const auto * values_type = checkAndGetDataType<DataTypeArray>(arguments[1].get()); DataTypePtr value_type;
if (!values_type) if (const auto * value_array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get()))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be an Array", getName()); value_type = value_array_type->getNestedType();
else if (const auto * value_map_type = checkAndGetDataType<DataTypeMap>(arguments[1].get()))
value_type = std::make_shared<DataTypeTuple>(value_map_type->getKeyValueTypes());
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be Array or Map", getName());
DataTypes key_value_types{keys_type->getNestedType(), values_type->getNestedType()}; DataTypes key_value_types{key_type, value_type};
return std::make_shared<DataTypeMap>(key_value_types); return std::make_shared<DataTypeMap>(key_value_types);
} }
ColumnPtr executeImpl( ColumnPtr executeImpl(
const ColumnsWithTypeAndName & arguments, const DataTypePtr & /* result_type */, size_t /* input_rows_count */) const override const ColumnsWithTypeAndName & arguments, const DataTypePtr & /* result_type */, size_t /* input_rows_count */) const override
{ {
ColumnPtr holder_keys;
bool is_keys_const = isColumnConst(*arguments[0].column); bool is_keys_const = isColumnConst(*arguments[0].column);
ColumnPtr holder_keys;
const ColumnArray * col_keys; const ColumnArray * col_keys;
if (is_keys_const) if (is_keys_const)
{ {
@ -202,24 +210,26 @@ public:
col_keys = checkAndGetColumn<ColumnArray>(arguments[0].column.get()); col_keys = checkAndGetColumn<ColumnArray>(arguments[0].column.get());
} }
ColumnPtr holder_values; if (!col_keys)
bool is_values_const = isColumnConst(*arguments[1].column); throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The first argument of function {} must be Array", getName());
const ColumnArray * col_values;
if (is_values_const)
{
holder_values = arguments[1].column->convertToFullColumnIfConst();
col_values = checkAndGetColumn<ColumnArray>(holder_values.get());
}
else
{
col_values = checkAndGetColumn<ColumnArray>(arguments[1].column.get());
}
if (!col_keys || !col_values) bool is_values_const = isColumnConst(*arguments[1].column);
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Arguments of function {} must be array", getName()); ColumnPtr holder_values;
if (is_values_const)
holder_values = arguments[1].column->convertToFullColumnIfConst();
else
holder_values = arguments[1].column;
const ColumnArray * col_values;
if (const auto * col_values_array = checkAndGetColumn<ColumnArray>(holder_values.get()))
col_values = col_values_array;
else if (const auto * col_values_map = checkAndGetColumn<ColumnMap>(holder_values.get()))
col_values = &col_values_map->getNestedColumn();
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The second arguments of function {} must be Array or Map", getName());
if (!col_keys->hasEqualOffsets(*col_values)) if (!col_keys->hasEqualOffsets(*col_values))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Array arguments for function {} must have equal sizes", getName()); throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Two arguments for function {} must have equal sizes", getName());
const auto & data_keys = col_keys->getDataPtr(); const auto & data_keys = col_keys->getDataPtr();
const auto & data_values = col_values->getDataPtr(); const auto & data_values = col_values->getDataPtr();

View File

@ -1,4 +1,5 @@
#include <IO/CascadeWriteBuffer.h> #include <IO/CascadeWriteBuffer.h>
#include <IO/MemoryReadWriteBuffer.h>
#include <Common/Exception.h> #include <Common/Exception.h>
namespace DB namespace DB
@ -35,9 +36,9 @@ void CascadeWriteBuffer::nextImpl()
curr_buffer->position() = position(); curr_buffer->position() = position();
curr_buffer->next(); curr_buffer->next();
} }
catch (const Exception & e) catch (const MemoryWriteBuffer::CurrentBufferExhausted &)
{ {
if (curr_buffer_num < num_sources && e.code() == ErrorCodes::CURRENT_WRITE_BUFFER_IS_EXHAUSTED) if (curr_buffer_num < num_sources)
{ {
/// TODO: protocol should require set(position(), 0) before Exception /// TODO: protocol should require set(position(), 0) before Exception
@ -46,7 +47,7 @@ void CascadeWriteBuffer::nextImpl()
curr_buffer = setNextBuffer(); curr_buffer = setNextBuffer();
} }
else else
throw; throw Exception(ErrorCodes::CURRENT_WRITE_BUFFER_IS_EXHAUSTED, "MemoryWriteBuffer limit is exhausted");
} }
set(curr_buffer->position(), curr_buffer->buffer().end() - curr_buffer->position()); set(curr_buffer->position(), curr_buffer->buffer().end() - curr_buffer->position());

View File

@ -16,7 +16,7 @@ namespace ErrorCodes
* (lazy_sources contains not pointers themself, but their delayed constructors) * (lazy_sources contains not pointers themself, but their delayed constructors)
* *
* Firtly, CascadeWriteBuffer redirects data to first buffer of the sequence * Firtly, CascadeWriteBuffer redirects data to first buffer of the sequence
* If current WriteBuffer cannot receive data anymore, it throws special exception CURRENT_WRITE_BUFFER_IS_EXHAUSTED in nextImpl() body, * If current WriteBuffer cannot receive data anymore, it throws special exception MemoryWriteBuffer::CurrentBufferExhausted in nextImpl() body,
* CascadeWriteBuffer prepare next buffer and continuously redirects data to it. * CascadeWriteBuffer prepare next buffer and continuously redirects data to it.
* If there are no buffers anymore CascadeWriteBuffer throws an exception. * If there are no buffers anymore CascadeWriteBuffer throws an exception.
* *

View File

@ -5,12 +5,6 @@
namespace DB namespace DB
{ {
namespace ErrorCodes
{
extern const int CURRENT_WRITE_BUFFER_IS_EXHAUSTED;
}
class ReadBufferFromMemoryWriteBuffer : public ReadBuffer, boost::noncopyable, private Allocator<false> class ReadBufferFromMemoryWriteBuffer : public ReadBuffer, boost::noncopyable, private Allocator<false>
{ {
public: public:
@ -118,7 +112,7 @@ void MemoryWriteBuffer::addChunk()
if (0 == next_chunk_size) if (0 == next_chunk_size)
{ {
set(position(), 0); set(position(), 0);
throw Exception(ErrorCodes::CURRENT_WRITE_BUFFER_IS_EXHAUSTED, "MemoryWriteBuffer limit is exhausted"); throw MemoryWriteBuffer::CurrentBufferExhausted();
} }
} }

View File

@ -16,6 +16,12 @@ namespace DB
class MemoryWriteBuffer : public WriteBuffer, public IReadableWriteBuffer, boost::noncopyable, private Allocator<false> class MemoryWriteBuffer : public WriteBuffer, public IReadableWriteBuffer, boost::noncopyable, private Allocator<false>
{ {
public: public:
/// Special exception to throw when the current WriteBuffer cannot receive data
class CurrentBufferExhausted : public std::exception
{
public:
const char * what() const noexcept override { return "MemoryWriteBuffer limit is exhausted"; }
};
/// Use max_total_size_ = 0 for unlimited storage /// Use max_total_size_ = 0 for unlimited storage
explicit MemoryWriteBuffer( explicit MemoryWriteBuffer(

View File

@ -198,7 +198,7 @@ TEST(MemoryWriteBuffer, WriteAndReread)
if (s > 1) if (s > 1)
{ {
MemoryWriteBuffer buf(s - 1); MemoryWriteBuffer buf(s - 1);
EXPECT_THROW(buf.write(data.data(), data.size()), DB::Exception); EXPECT_THROW(buf.write(data.data(), data.size()), MemoryWriteBuffer::CurrentBufferExhausted);
} }
} }

View File

@ -2497,4 +2497,30 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG(
return result_dag; return result_dag;
} }
FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAGPtr & actions_)
:actions(actions_)
{
for (const auto * node : actions->getOutputs())
index.emplace(node->result_name, node);
}
const ActionsDAG::Node * FindOriginalNodeForOutputName::find(const String & output_name)
{
const auto it = index.find(output_name);
if (it == index.end())
return nullptr;
/// find original(non alias) node it refers to
const ActionsDAG::Node * node = it->second;
while (node && node->type == ActionsDAG::ActionType::ALIAS)
{
chassert(!node->children.empty());
node = node->children.front();
}
if (node && node->type != ActionsDAG::ActionType::INPUT)
return nullptr;
return node;
}
} }

View File

@ -402,6 +402,19 @@ private:
static ActionsDAGPtr cloneActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs); static ActionsDAGPtr cloneActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs);
}; };
class FindOriginalNodeForOutputName
{
using NameToNodeIndex = std::unordered_map<std::string_view, const ActionsDAG::Node *>;
public:
explicit FindOriginalNodeForOutputName(const ActionsDAGPtr & actions);
const ActionsDAG::Node* find(const String& output_name);
private:
ActionsDAGPtr actions;
NameToNodeIndex index;
};
/// This is an ugly way to bypass impossibility to forward declare ActionDAG::Node. /// This is an ugly way to bypass impossibility to forward declare ActionDAG::Node.
struct ActionDAGNodes struct ActionDAGNodes
{ {

View File

@ -75,6 +75,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
} }
static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols) static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols)
@ -1109,6 +1110,12 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
} }
} }
/// Normal functions are not parametric for now.
if (node.parameters)
{
throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", node.name);
}
Names argument_names; Names argument_names;
DataTypes argument_types; DataTypes argument_types;
bool arguments_present = true; bool arguments_present = true;

View File

@ -122,12 +122,15 @@ ASTPtr removeQueryCacheSettings(ASTPtr ast)
QueryCache::Key::Key( QueryCache::Key::Key(
ASTPtr ast_, ASTPtr ast_,
Block header_, const std::optional<String> & username_, Block header_,
std::chrono::time_point<std::chrono::system_clock> expires_at_) const std::optional<String> & username_,
std::chrono::time_point<std::chrono::system_clock> expires_at_,
bool is_compressed_)
: ast(removeQueryCacheSettings(ast_)) : ast(removeQueryCacheSettings(ast_))
, header(header_) , header(header_)
, username(username_) , username(username_)
, expires_at(expires_at_) , expires_at(expires_at_)
, is_compressed(is_compressed_)
{ {
} }
@ -153,7 +156,7 @@ size_t QueryCache::KeyHasher::operator()(const Key & key) const
return res; return res;
} }
size_t QueryCache::QueryResultWeight::operator()(const QueryResult & chunks) const size_t QueryCache::QueryResultWeight::operator()(const Chunks & chunks) const
{ {
size_t res = 0; size_t res = 0;
for (const auto & chunk : chunks) for (const auto & chunk : chunks)
@ -168,12 +171,16 @@ bool QueryCache::IsStale::operator()(const Key & key) const
QueryCache::Writer::Writer(Cache & cache_, const Key & key_, QueryCache::Writer::Writer(Cache & cache_, const Key & key_,
size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_,
std::chrono::milliseconds min_query_runtime_) std::chrono::milliseconds min_query_runtime_,
bool squash_partial_results_,
size_t max_block_size_)
: cache(cache_) : cache(cache_)
, key(key_) , key(key_)
, max_entry_size_in_bytes(max_entry_size_in_bytes_) , max_entry_size_in_bytes(max_entry_size_in_bytes_)
, max_entry_size_in_rows(max_entry_size_in_rows_) , max_entry_size_in_rows(max_entry_size_in_rows_)
, min_query_runtime(min_query_runtime_) , min_query_runtime(min_query_runtime_)
, squash_partial_results(squash_partial_results_)
, max_block_size(max_block_size_)
{ {
if (auto entry = cache.getWithKey(key); entry.has_value() && !IsStale()(entry->key)) if (auto entry = cache.getWithKey(key); entry.has_value() && !IsStale()(entry->key))
{ {
@ -211,6 +218,8 @@ void QueryCache::Writer::finalizeWrite()
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
chassert(!was_finalized);
if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - query_start_time) < min_query_runtime) if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - query_start_time) < min_query_runtime)
{ {
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.queryStringFromAst()); LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.queryStringFromAst());
@ -224,7 +233,67 @@ void QueryCache::Writer::finalizeWrite()
return; return;
} }
if (squash_partial_results)
{
// Squash partial result chunks to chunks of size 'max_block_size' each. This costs some performance but provides a more natural
// compression of neither too small nor big blocks. Also, it will look like 'max_block_size' is respected when the query result is
// served later on from the query cache.
Chunks squashed_chunks;
size_t rows_remaining_in_squashed = 0; /// how many further rows can the last squashed chunk consume until it reaches max_block_size
for (const auto & chunk : *query_result)
{
const size_t rows_chunk = chunk.getNumRows();
size_t rows_chunk_processed = 0;
if (rows_chunk == 0)
continue;
while (true)
{
if (rows_remaining_in_squashed == 0)
{
Chunk empty_chunk = Chunk(chunk.cloneEmptyColumns(), 0);
squashed_chunks.push_back(std::move(empty_chunk));
rows_remaining_in_squashed = max_block_size;
}
const size_t rows_to_append = std::min(rows_chunk - rows_chunk_processed, rows_remaining_in_squashed);
squashed_chunks.back().append(chunk, rows_chunk_processed, rows_to_append);
rows_chunk_processed += rows_to_append;
rows_remaining_in_squashed += rows_to_append;
if (rows_chunk_processed == rows_chunk)
break;
}
}
*query_result = std::move(squashed_chunks);
}
if (key.is_compressed)
{
Chunks compressed_chunks;
const Chunks & decompressed_chunks = *query_result;
for (const auto & decompressed_chunk : decompressed_chunks)
{
const Columns & decompressed_columns = decompressed_chunk.getColumns();
Columns compressed_columns;
for (const auto & decompressed_column : decompressed_columns)
{
auto compressed_column = decompressed_column->compress();
compressed_columns.push_back(compressed_column);
}
Chunk compressed_chunk(compressed_columns, decompressed_chunk.getNumRows());
compressed_chunks.push_back(std::move(compressed_chunk));
}
*query_result = std::move(compressed_chunks);
}
cache.set(key, query_result); cache.set(key, query_result);
was_finalized = true;
} }
QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guard<std::mutex> &) QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guard<std::mutex> &)
@ -249,7 +318,28 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar
return; return;
} }
pipe = Pipe(std::make_shared<SourceFromChunks>(entry->key.header, entry->mapped)); if (!entry->key.is_compressed)
pipe = Pipe(std::make_shared<SourceFromChunks>(entry->key.header, entry->mapped));
else
{
auto decompressed_chunks = std::make_shared<Chunks>();
const Chunks & compressed_chunks = *entry->mapped;
for (const auto & compressed_chunk : compressed_chunks)
{
const Columns & compressed_chunk_columns = compressed_chunk.getColumns();
Columns decompressed_columns;
for (const auto & compressed_column : compressed_chunk_columns)
{
auto column = compressed_column->decompress();
decompressed_columns.push_back(column);
}
Chunk decompressed_chunk(decompressed_columns, compressed_chunk.getNumRows());
decompressed_chunks->push_back(std::move(decompressed_chunk));
}
pipe = Pipe(std::make_shared<SourceFromChunks>(entry->key.header, decompressed_chunks));
}
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.queryStringFromAst()); LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.queryStringFromAst());
} }
@ -277,10 +367,10 @@ QueryCache::Reader QueryCache::createReader(const Key & key)
return Reader(cache, key, lock); return Reader(cache, key, lock);
} }
QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::milliseconds min_query_runtime) QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::milliseconds min_query_runtime, bool squash_partial_results, size_t max_block_size)
{ {
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
return Writer(cache, key, max_entry_size_in_bytes, max_entry_size_in_rows, min_query_runtime); return Writer(cache, key, max_entry_size_in_bytes, max_entry_size_in_rows, min_query_runtime, squash_partial_results, max_block_size);
} }
void QueryCache::reset() void QueryCache::reset()
@ -308,7 +398,7 @@ std::vector<QueryCache::Cache::KeyMapped> QueryCache::dump() const
} }
QueryCache::QueryCache() QueryCache::QueryCache()
: cache(std::make_unique<TTLCachePolicy<Key, QueryResult, KeyHasher, QueryResultWeight, IsStale>>()) : cache(std::make_unique<TTLCachePolicy<Key, Chunks, KeyHasher, QueryResultWeight, IsStale>>())
{ {
} }

View File

@ -50,16 +50,19 @@ public:
/// When does the entry expire? /// When does the entry expire?
const std::chrono::time_point<std::chrono::system_clock> expires_at; const std::chrono::time_point<std::chrono::system_clock> expires_at;
/// Is the entry compressed?
const bool is_compressed;
Key(ASTPtr ast_, Key(ASTPtr ast_,
Block header_, const std::optional<String> & username_, Block header_,
std::chrono::time_point<std::chrono::system_clock> expires_at_); const std::optional<String> & username_,
std::chrono::time_point<std::chrono::system_clock> expires_at_,
bool is_compressed);
bool operator==(const Key & other) const; bool operator==(const Key & other) const;
String queryStringFromAst() const; String queryStringFromAst() const;
}; };
using QueryResult = Chunks;
private: private:
struct KeyHasher struct KeyHasher
{ {
@ -68,7 +71,7 @@ private:
struct QueryResultWeight struct QueryResultWeight
{ {
size_t operator()(const QueryResult & chunks) const; size_t operator()(const Chunks & chunks) const;
}; };
struct IsStale struct IsStale
@ -77,7 +80,7 @@ private:
}; };
/// query --> query result /// query --> query result
using Cache = CacheBase<Key, QueryResult, KeyHasher, QueryResultWeight>; using Cache = CacheBase<Key, Chunks, KeyHasher, QueryResultWeight>;
/// query --> query execution count /// query --> query execution count
using TimesExecuted = std::unordered_map<Key, size_t, KeyHasher>; using TimesExecuted = std::unordered_map<Key, size_t, KeyHasher>;
@ -109,12 +112,17 @@ public:
const size_t max_entry_size_in_rows; const size_t max_entry_size_in_rows;
const std::chrono::time_point<std::chrono::system_clock> query_start_time = std::chrono::system_clock::now(); /// Writer construction and finalizeWrite() coincide with query start/end const std::chrono::time_point<std::chrono::system_clock> query_start_time = std::chrono::system_clock::now(); /// Writer construction and finalizeWrite() coincide with query start/end
const std::chrono::milliseconds min_query_runtime; const std::chrono::milliseconds min_query_runtime;
std::shared_ptr<QueryResult> query_result TSA_GUARDED_BY(mutex) = std::make_shared<QueryResult>(); const bool squash_partial_results;
const size_t max_block_size;
std::shared_ptr<Chunks> query_result TSA_GUARDED_BY(mutex) = std::make_shared<Chunks>();
std::atomic<bool> skip_insert = false; std::atomic<bool> skip_insert = false;
bool was_finalized = false;
Writer(Cache & cache_, const Key & key_, Writer(Cache & cache_, const Key & key_,
size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_,
std::chrono::milliseconds min_query_runtime_); std::chrono::milliseconds min_query_runtime_,
bool squash_partial_results_,
size_t max_block_size_);
friend class QueryCache; /// for createWriter() friend class QueryCache; /// for createWriter()
}; };
@ -136,7 +144,7 @@ public:
void updateConfiguration(const Poco::Util::AbstractConfiguration & config); void updateConfiguration(const Poco::Util::AbstractConfiguration & config);
Reader createReader(const Key & key); Reader createReader(const Key & key);
Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime); Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime, bool squash_partial_results, size_t max_block_size);
void reset(); void reset();

View File

@ -275,7 +275,9 @@ void executeQueryWithParallelReplicas(
auto shard_info = not_optimized_cluster->getShardsInfo().front(); auto shard_info = not_optimized_cluster->getShardsInfo().front();
const auto & settings = context->getSettingsRef(); const auto & settings = context->getSettingsRef();
auto all_replicas_count = std::min(static_cast<size_t>(settings.max_parallel_replicas), shard_info.all_addresses.size()); ClusterPtr new_cluster = not_optimized_cluster->getClusterWithReplicasAsShards(settings);
auto all_replicas_count = std::min(static_cast<size_t>(settings.max_parallel_replicas), new_cluster->getShardCount());
auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(all_replicas_count); auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(all_replicas_count);
auto remote_plan = std::make_unique<QueryPlan>(); auto remote_plan = std::make_unique<QueryPlan>();
auto plans = std::vector<QueryPlanPtr>(); auto plans = std::vector<QueryPlanPtr>();
@ -287,35 +289,13 @@ void executeQueryWithParallelReplicas(
/// to then tell it about the reading method we chose. /// to then tell it about the reading method we chose.
query_info.coordinator = coordinator; query_info.coordinator = coordinator;
UUID parallel_group_id = UUIDHelpers::generateV4();
plans.emplace_back(createLocalPlan(
query_ast,
stream_factory.header,
context,
stream_factory.processed_stage,
shard_info.shard_num,
/*shard_count*/1,
0,
all_replicas_count,
coordinator,
parallel_group_id));
if (!shard_info.hasRemoteConnections())
{
if (!plans.front())
throw Exception(ErrorCodes::LOGICAL_ERROR, "An empty plan was generated to read from local shard and there is no remote connections. This is a bug");
query_plan = std::move(*plans.front());
return;
}
auto new_context = Context::createCopy(context); auto new_context = Context::createCopy(context);
auto scalars = new_context->hasQueryContext() ? new_context->getQueryContext()->getScalars() : Scalars{}; auto scalars = new_context->hasQueryContext() ? new_context->getQueryContext()->getScalars() : Scalars{};
auto external_tables = new_context->getExternalTables(); auto external_tables = new_context->getExternalTables();
auto read_from_remote = std::make_unique<ReadFromParallelRemoteReplicasStep>( auto read_from_remote = std::make_unique<ReadFromParallelRemoteReplicasStep>(
query_ast, query_ast,
std::move(shard_info), new_cluster,
coordinator, coordinator,
stream_factory.header, stream_factory.header,
stream_factory.processed_stage, stream_factory.processed_stage,
@ -326,8 +306,7 @@ void executeQueryWithParallelReplicas(
std::move(scalars), std::move(scalars),
std::move(external_tables), std::move(external_tables),
&Poco::Logger::get("ReadFromParallelRemoteReplicasStep"), &Poco::Logger::get("ReadFromParallelRemoteReplicasStep"),
query_info.storage_limits, query_info.storage_limits);
parallel_group_id);
remote_plan->addStep(std::move(read_from_remote)); remote_plan->addStep(std::move(read_from_remote));
remote_plan->addInterpreterContext(context); remote_plan->addInterpreterContext(context);

View File

@ -2362,7 +2362,7 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const
const auto & config = shared->zookeeper_config ? *shared->zookeeper_config : getConfigRef(); const auto & config = shared->zookeeper_config ? *shared->zookeeper_config : getConfigRef();
if (!shared->zookeeper) if (!shared->zookeeper)
shared->zookeeper = std::make_shared<zkutil::ZooKeeper>(config, "zookeeper", getZooKeeperLog()); shared->zookeeper = std::make_shared<zkutil::ZooKeeper>(config, zkutil::getZooKeeperConfigName(config), getZooKeeperLog());
else if (shared->zookeeper->expired()) else if (shared->zookeeper->expired())
{ {
Stopwatch watch; Stopwatch watch;
@ -2401,8 +2401,9 @@ bool Context::tryCheckClientConnectionToMyKeeperCluster() const
{ {
try try
{ {
const auto config_name = zkutil::getZooKeeperConfigName(getConfigRef());
/// If our server is part of main Keeper cluster /// If our server is part of main Keeper cluster
if (checkZooKeeperConfigIsLocal(getConfigRef(), "zookeeper")) if (config_name == "keeper_server" || checkZooKeeperConfigIsLocal(getConfigRef(), config_name))
{ {
LOG_DEBUG(shared->log, "Keeper server is participant of the main zookeeper cluster, will try to connect to it"); LOG_DEBUG(shared->log, "Keeper server is participant of the main zookeeper cluster, will try to connect to it");
getZooKeeper(); getZooKeeper();
@ -2608,7 +2609,7 @@ void Context::reloadZooKeeperIfChanged(const ConfigurationPtr & config) const
bool server_started = isServerCompletelyStarted(); bool server_started = isServerCompletelyStarted();
std::lock_guard lock(shared->zookeeper_mutex); std::lock_guard lock(shared->zookeeper_mutex);
shared->zookeeper_config = config; shared->zookeeper_config = config;
reloadZooKeeperIfChangedImpl(config, "zookeeper", shared->zookeeper, getZooKeeperLog(), server_started); reloadZooKeeperIfChangedImpl(config, zkutil::getZooKeeperConfigName(*config), shared->zookeeper, getZooKeeperLog(), server_started);
} }
void Context::reloadAuxiliaryZooKeepersConfigIfChanged(const ConfigurationPtr & config) void Context::reloadAuxiliaryZooKeepersConfigIfChanged(const ConfigurationPtr & config)
@ -2633,7 +2634,7 @@ void Context::reloadAuxiliaryZooKeepersConfigIfChanged(const ConfigurationPtr &
bool Context::hasZooKeeper() const bool Context::hasZooKeeper() const
{ {
return getConfigRef().has("zookeeper"); return zkutil::hasZooKeeperConfig(getConfigRef());
} }
bool Context::hasAuxiliaryZooKeeper(const String & name) const bool Context::hasAuxiliaryZooKeeper(const String & name) const

View File

@ -18,8 +18,6 @@
#include <IO/IResourceManager.h> #include <IO/IResourceManager.h>
#include <Parsers/ASTSelectQuery.h> #include <Parsers/ASTSelectQuery.h>
#include <Parsers/IAST_fwd.h> #include <Parsers/IAST_fwd.h>
#include <Processors/ResizeProcessor.h>
#include <Processors/Transforms/ReadFromMergeTreeDependencyTransform.h>
#include <Server/HTTP/HTTPContext.h> #include <Server/HTTP/HTTPContext.h>
#include <Storages/ColumnsDescription.h> #include <Storages/ColumnsDescription.h>
#include <Storages/IStorage_fwd.h> #include <Storages/IStorage_fwd.h>

View File

@ -20,6 +20,7 @@
#include <Storages/LiveView/StorageLiveView.h> #include <Storages/LiveView/StorageLiveView.h>
#include <Storages/MutationCommands.h> #include <Storages/MutationCommands.h>
#include <Storages/PartitionCommands.h> #include <Storages/PartitionCommands.h>
#include <Storages/StorageKeeperMap.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h> #include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
@ -39,6 +40,8 @@ namespace ErrorCodes
extern const int INCORRECT_QUERY; extern const int INCORRECT_QUERY;
extern const int NOT_IMPLEMENTED; extern const int NOT_IMPLEMENTED;
extern const int TABLE_IS_READ_ONLY; extern const int TABLE_IS_READ_ONLY;
extern const int BAD_ARGUMENTS;
extern const int UNKNOWN_TABLE;
} }
@ -72,16 +75,21 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
if (!UserDefinedSQLFunctionFactory::instance().empty()) if (!UserDefinedSQLFunctionFactory::instance().empty())
UserDefinedSQLFunctionVisitor::visit(query_ptr); UserDefinedSQLFunctionVisitor::visit(query_ptr);
auto table_id = getContext()->resolveStorageID(alter, Context::ResolveOrdinary);
query_ptr->as<ASTAlterQuery &>().setDatabase(table_id.database_name);
StoragePtr table = DatabaseCatalog::instance().tryGetTable(table_id, getContext());
if (!alter.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext())) if (!alter.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext()))
{ {
if (table && table->as<StorageKeeperMap>())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Mutations with ON CLUSTER are not allowed for KeeperMap tables");
DDLQueryOnClusterParams params; DDLQueryOnClusterParams params;
params.access_to_check = getRequiredAccess(); params.access_to_check = getRequiredAccess();
return executeDDLQueryOnCluster(query_ptr, getContext(), params); return executeDDLQueryOnCluster(query_ptr, getContext(), params);
} }
getContext()->checkAccess(getRequiredAccess()); getContext()->checkAccess(getRequiredAccess());
auto table_id = getContext()->resolveStorageID(alter, Context::ResolveOrdinary);
query_ptr->as<ASTAlterQuery &>().setDatabase(table_id.database_name);
DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name); DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
if (database->shouldReplicateQuery(getContext(), query_ptr)) if (database->shouldReplicateQuery(getContext(), query_ptr))
@ -91,7 +99,9 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter)
return database->tryEnqueueReplicatedDDL(query_ptr, getContext()); return database->tryEnqueueReplicatedDDL(query_ptr, getContext());
} }
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext()); if (!table)
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Could not find table: {}", table_id.table_name);
checkStorageSupportsTransactionsIfNeeded(table, getContext()); checkStorageSupportsTransactionsIfNeeded(table, getContext());
if (table->isStaticStorage()) if (table->isStaticStorage())
throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only"); throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only");

View File

@ -550,6 +550,12 @@ void MutationsInterpreter::prepare(bool dry_run)
if (source.hasLightweightDeleteMask()) if (source.hasLightweightDeleteMask())
all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN});
if (return_all_columns)
{
for (const auto & column : source.getStorage()->getVirtuals())
all_columns.push_back(column);
}
NameSet updated_columns; NameSet updated_columns;
bool materialize_ttl_recalculate_only = source.materializeTTLRecalculateOnly(); bool materialize_ttl_recalculate_only = source.materializeTTLRecalculateOnly();
@ -906,6 +912,8 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
{ {
auto storage_snapshot = source.getStorageSnapshot(metadata_snapshot, context); auto storage_snapshot = source.getStorageSnapshot(metadata_snapshot, context);
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects(); auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects();
if (return_all_columns)
options.withVirtuals();
auto all_columns = storage_snapshot->getColumns(options); auto all_columns = storage_snapshot->getColumns(options);
/// Add _row_exists column if it is present in the part /// Add _row_exists column if it is present in the part
@ -1256,6 +1264,7 @@ void MutationsInterpreter::validate()
} }
QueryPlan plan; QueryPlan plan;
initQueryPlan(stages.front(), plan); initQueryPlan(stages.front(), plan);
auto pipeline = addStreamsForLaterStages(stages, plan); auto pipeline = addStreamsForLaterStages(stages, plan);
} }

View File

@ -726,7 +726,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
QueryCache::Key key( QueryCache::Key key(
ast, res.pipeline.getHeader(), ast, res.pipeline.getHeader(),
std::make_optional<String>(context->getUserName()), std::make_optional<String>(context->getUserName()),
std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl)); /*dummy value for expires_at*/ std::chrono::system_clock::from_time_t(1),
/*dummy value for is_compressed*/ true);
QueryCache::Reader reader = query_cache->createReader(key); QueryCache::Reader reader = query_cache->createReader(key);
if (reader.hasCacheEntryForKey()) if (reader.hasCacheEntryForKey())
{ {
@ -748,13 +749,18 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
QueryCache::Key key( QueryCache::Key key(
ast, res.pipeline.getHeader(), ast, res.pipeline.getHeader(),
settings.query_cache_share_between_users ? std::nullopt : std::make_optional<String>(context->getUserName()), settings.query_cache_share_between_users ? std::nullopt : std::make_optional<String>(context->getUserName()),
std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl)); std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl),
settings.query_cache_compress_entries);
const size_t num_query_runs = query_cache->recordQueryRun(key); const size_t num_query_runs = query_cache->recordQueryRun(key);
if (num_query_runs > settings.query_cache_min_query_runs) if (num_query_runs > settings.query_cache_min_query_runs)
{ {
auto stream_in_query_cache_transform = std::make_shared<StreamInQueryCacheTransform>(res.pipeline.getHeader(), query_cache, key, auto stream_in_query_cache_transform =
std::chrono::milliseconds(context->getSettings().query_cache_min_query_duration.totalMilliseconds())); std::make_shared<StreamInQueryCacheTransform>(
res.pipeline.getHeader(), query_cache, key,
std::chrono::milliseconds(context->getSettings().query_cache_min_query_duration.totalMilliseconds()),
context->getSettings().query_cache_squash_partial_results,
context->getSettings().max_block_size);
res.pipeline.streamIntoQueryCache(stream_in_query_cache_transform); res.pipeline.streamIntoQueryCache(stream_in_query_cache_transform);
} }
} }

View File

@ -611,8 +611,16 @@ void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState
FormatStateStacked frame_nested = frame; FormatStateStacked frame_nested = frame;
frame_nested.need_parens = false; frame_nested.need_parens = false;
frame_nested.expression_list_always_start_on_new_line = true; if (settings.one_line)
static_cast<ASTExpressionList *>(command_list)->formatImplMultiline(settings, state, frame_nested); {
frame_nested.expression_list_prepend_whitespace = true;
command_list->formatImpl(settings, state, frame_nested);
}
else
{
frame_nested.expression_list_always_start_on_new_line = true;
command_list->as<ASTExpressionList &>().formatImplMultiline(settings, state, frame_nested);
}
} }
} }

View File

@ -440,10 +440,10 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
if (select) if (select)
{ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS" settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS"
<< (comment ? "(" : "") << settings.nl_or_ws
<< settings.nl_or_ws << (settings.hilite ? hilite_none : ""); << (comment ? "(" : "") << (settings.hilite ? hilite_none : "");
select->formatImpl(settings, state, frame); select->formatImpl(settings, state, frame);
settings.ostr << (comment ? ")" : ""); settings.ostr << (settings.hilite ? hilite_keyword : "") << (comment ? ")" : "") << (settings.hilite ? hilite_none : "");
} }
if (comment) if (comment)

View File

@ -161,8 +161,9 @@ void ASTDictionary::formatImpl(const FormatSettings & settings, FormatState & st
if (source) if (source)
{ {
settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << "SOURCE(" settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << "SOURCE"
<< (settings.hilite ? hilite_none : ""); << (settings.hilite ? hilite_none : "");
settings.ostr << "(";
source->formatImpl(settings, state, frame); source->formatImpl(settings, state, frame);
settings.ostr << ")"; settings.ostr << ")";
} }

View File

@ -56,16 +56,16 @@ void ASTDictionaryAttributeDeclaration::formatImpl(const FormatSettings & settin
} }
if (hierarchical) if (hierarchical)
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "HIERARCHICAL"; settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "HIERARCHICAL" << (settings.hilite ? hilite_none : "");
if (bidirectional) if (bidirectional)
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "BIDIRECTIONAL"; settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "BIDIRECTIONAL" << (settings.hilite ? hilite_none : "");
if (injective) if (injective)
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "INJECTIVE"; settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "INJECTIVE" << (settings.hilite ? hilite_none : "");
if (is_object_id) if (is_object_id)
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "IS_OBJECT_ID"; settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "IS_OBJECT_ID" << (settings.hilite ? hilite_none : "");
} }
} }

View File

@ -692,12 +692,15 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
{ {
std::string nl_or_nothing = settings.one_line ? "" : "\n"; std::string nl_or_nothing = settings.one_line ? "" : "\n";
std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' '); std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' ');
settings.ostr << (settings.hilite ? hilite_function : "") << name << "(" << nl_or_nothing; settings.ostr << (settings.hilite ? hilite_function : "") << name << (settings.hilite ? hilite_none : "");
settings.ostr << (settings.hilite ? hilite_function : "") << "(" << (settings.hilite ? hilite_none : "");
settings.ostr << nl_or_nothing;
FormatStateStacked frame_nested = frame; FormatStateStacked frame_nested = frame;
frame_nested.need_parens = false; frame_nested.need_parens = false;
++frame_nested.indent; ++frame_nested.indent;
query->formatImpl(settings, state, frame_nested); query->formatImpl(settings, state, frame_nested);
settings.ostr << nl_or_nothing << indent_str << ")"; settings.ostr << nl_or_nothing << indent_str;
settings.ostr << (settings.hilite ? hilite_function : "") << ")" << (settings.hilite ? hilite_none : "");
return; return;
} }

View File

@ -29,11 +29,13 @@ void ASTKillQueryQuery::formatQueryImpl(const FormatSettings & settings, FormatS
break; break;
} }
settings.ostr << (settings.hilite ? hilite_none : "");
formatOnCluster(settings); formatOnCluster(settings);
if (where_expression) if (where_expression)
{ {
settings.ostr << " WHERE " << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : "");
where_expression->formatImpl(settings, state, frame); where_expression->formatImpl(settings, state, frame);
} }

View File

@ -20,7 +20,9 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta
/// This is needed because the query can become extraordinary large after substitution of aliases. /// This is needed because the query can become extraordinary large after substitution of aliases.
if (!alias.empty() && !state.printed_asts_with_alias.emplace(frame.current_select, alias, getTreeHash()).second) if (!alias.empty() && !state.printed_asts_with_alias.emplace(frame.current_select, alias, getTreeHash()).second)
{ {
settings.ostr << (settings.hilite ? IAST::hilite_identifier : "");
settings.writeIdentifier(alias); settings.writeIdentifier(alias);
settings.ostr << (settings.hilite ? IAST::hilite_none : "");
} }
else else
{ {

View File

@ -18,7 +18,9 @@ void ASTWithElement::formatImpl(const FormatSettings & settings, FormatState & s
{ {
std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
settings.ostr << (settings.hilite ? hilite_alias : "");
settings.writeIdentifier(name); settings.writeIdentifier(name);
settings.ostr << (settings.hilite ? hilite_none : "");
settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS" << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS" << (settings.hilite ? hilite_none : "");
settings.ostr << settings.nl_or_ws << indent_str; settings.ostr << settings.nl_or_ws << indent_str;
dynamic_cast<const ASTWithAlias &>(*subquery).formatImplWithoutAlias(settings, state, frame); dynamic_cast<const ASTWithAlias &>(*subquery).formatImplWithoutAlias(settings, state, frame);

View File

@ -22,3 +22,7 @@ endif()
if (ENABLE_FUZZING) if (ENABLE_FUZZING)
add_subdirectory(fuzzers) add_subdirectory(fuzzers)
endif() endif()
if (ENABLE_TESTS)
add_subdirectory(HiliteComparator)
endif ()

Some files were not shown because too many files have changed in this diff Show More