mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 18:12:02 +00:00
Merge branch 'master' into fix-protobuf-abort
This commit is contained in:
commit
96a3307bda
81
base/glibc-compatibility/musl/expf.c
Normal file
81
base/glibc-compatibility/musl/expf.c
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
/* origin: FreeBSD /usr/src/lib/msun/src/e_expf.c */
|
||||||
|
/*
|
||||||
|
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libm.h"
|
||||||
|
|
||||||
|
static const float
|
||||||
|
half[2] = {0.5,-0.5},
|
||||||
|
ln2hi = 6.9314575195e-1f, /* 0x3f317200 */
|
||||||
|
ln2lo = 1.4286067653e-6f, /* 0x35bfbe8e */
|
||||||
|
invln2 = 1.4426950216e+0f, /* 0x3fb8aa3b */
|
||||||
|
/*
|
||||||
|
* Domain [-0.34568, 0.34568], range ~[-4.278e-9, 4.447e-9]:
|
||||||
|
* |x*(exp(x)+1)/(exp(x)-1) - p(x)| < 2**-27.74
|
||||||
|
*/
|
||||||
|
P1 = 1.6666625440e-1f, /* 0xaaaa8f.0p-26 */
|
||||||
|
P2 = -2.7667332906e-3f; /* -0xb55215.0p-32 */
|
||||||
|
|
||||||
|
float expf(float x)
|
||||||
|
{
|
||||||
|
float_t hi, lo, c, xx, y;
|
||||||
|
int k, sign;
|
||||||
|
uint32_t hx;
|
||||||
|
|
||||||
|
GET_FLOAT_WORD(hx, x);
|
||||||
|
sign = hx >> 31; /* sign bit of x */
|
||||||
|
hx &= 0x7fffffff; /* high word of |x| */
|
||||||
|
|
||||||
|
/* special cases */
|
||||||
|
if (hx >= 0x42aeac50) { /* if |x| >= -87.33655f or NaN */
|
||||||
|
if (hx >= 0x42b17218 && !sign) { /* x >= 88.722839f */
|
||||||
|
/* overflow */
|
||||||
|
x *= 0x1p127f;
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
if (sign) {
|
||||||
|
/* underflow */
|
||||||
|
FORCE_EVAL(-0x1p-149f/x);
|
||||||
|
if (hx >= 0x42cff1b5) /* x <= -103.972084f */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* argument reduction */
|
||||||
|
if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
|
||||||
|
if (hx > 0x3f851592) /* if |x| > 1.5 ln2 */
|
||||||
|
k = invln2*x + half[sign];
|
||||||
|
else
|
||||||
|
k = 1 - sign - sign;
|
||||||
|
hi = x - k*ln2hi; /* k*ln2hi is exact here */
|
||||||
|
lo = k*ln2lo;
|
||||||
|
x = hi - lo;
|
||||||
|
} else if (hx > 0x39000000) { /* |x| > 2**-14 */
|
||||||
|
k = 0;
|
||||||
|
hi = x;
|
||||||
|
lo = 0;
|
||||||
|
} else {
|
||||||
|
/* raise inexact */
|
||||||
|
FORCE_EVAL(0x1p127f + x);
|
||||||
|
return 1 + x;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* x is now in primary range */
|
||||||
|
xx = x*x;
|
||||||
|
c = x - xx*(P1+xx*P2);
|
||||||
|
y = 1 + (x*c/(2-c) - lo + hi);
|
||||||
|
if (k == 0)
|
||||||
|
return y;
|
||||||
|
return scalbnf(y, k);
|
||||||
|
}
|
31
base/glibc-compatibility/musl/scalbnf.c
Normal file
31
base/glibc-compatibility/musl/scalbnf.c
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
float scalbnf(float x, int n)
|
||||||
|
{
|
||||||
|
union {float f; uint32_t i;} u;
|
||||||
|
float_t y = x;
|
||||||
|
|
||||||
|
if (n > 127) {
|
||||||
|
y *= 0x1p127f;
|
||||||
|
n -= 127;
|
||||||
|
if (n > 127) {
|
||||||
|
y *= 0x1p127f;
|
||||||
|
n -= 127;
|
||||||
|
if (n > 127)
|
||||||
|
n = 127;
|
||||||
|
}
|
||||||
|
} else if (n < -126) {
|
||||||
|
y *= 0x1p-126f;
|
||||||
|
n += 126;
|
||||||
|
if (n < -126) {
|
||||||
|
y *= 0x1p-126f;
|
||||||
|
n += 126;
|
||||||
|
if (n < -126)
|
||||||
|
n = -126;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
u.i = (uint32_t)(0x7f+n)<<23;
|
||||||
|
x = y * u.f;
|
||||||
|
return x;
|
||||||
|
}
|
2
contrib/arrow
vendored
2
contrib/arrow
vendored
@ -1 +1 @@
|
|||||||
Subproject commit d03245f801f798c63ee9a7d2b8914a9e5c5cd666
|
Subproject commit 1f1b3d35fb6eb73e6492d3afd8a85cde848d174f
|
@ -202,6 +202,7 @@ set(ARROW_SRCS
|
|||||||
"${LIBRARY_DIR}/builder.cc"
|
"${LIBRARY_DIR}/builder.cc"
|
||||||
"${LIBRARY_DIR}/buffer.cc"
|
"${LIBRARY_DIR}/buffer.cc"
|
||||||
"${LIBRARY_DIR}/chunked_array.cc"
|
"${LIBRARY_DIR}/chunked_array.cc"
|
||||||
|
"${LIBRARY_DIR}/chunk_resolver.cc"
|
||||||
"${LIBRARY_DIR}/compare.cc"
|
"${LIBRARY_DIR}/compare.cc"
|
||||||
"${LIBRARY_DIR}/config.cc"
|
"${LIBRARY_DIR}/config.cc"
|
||||||
"${LIBRARY_DIR}/datum.cc"
|
"${LIBRARY_DIR}/datum.cc"
|
||||||
@ -268,6 +269,10 @@ set(ARROW_SRCS
|
|||||||
"${LIBRARY_DIR}/util/uri.cc"
|
"${LIBRARY_DIR}/util/uri.cc"
|
||||||
"${LIBRARY_DIR}/util/utf8.cc"
|
"${LIBRARY_DIR}/util/utf8.cc"
|
||||||
"${LIBRARY_DIR}/util/value_parsing.cc"
|
"${LIBRARY_DIR}/util/value_parsing.cc"
|
||||||
|
"${LIBRARY_DIR}/util/byte_size.cc"
|
||||||
|
"${LIBRARY_DIR}/util/debug.cc"
|
||||||
|
"${LIBRARY_DIR}/util/tracing.cc"
|
||||||
|
"${LIBRARY_DIR}/util/atfork_internal.cc"
|
||||||
"${LIBRARY_DIR}/vendored/base64.cpp"
|
"${LIBRARY_DIR}/vendored/base64.cpp"
|
||||||
"${LIBRARY_DIR}/vendored/datetime/tz.cpp"
|
"${LIBRARY_DIR}/vendored/datetime/tz.cpp"
|
||||||
|
|
||||||
@ -301,9 +306,11 @@ set(ARROW_SRCS
|
|||||||
"${LIBRARY_DIR}/compute/exec/source_node.cc"
|
"${LIBRARY_DIR}/compute/exec/source_node.cc"
|
||||||
"${LIBRARY_DIR}/compute/exec/sink_node.cc"
|
"${LIBRARY_DIR}/compute/exec/sink_node.cc"
|
||||||
"${LIBRARY_DIR}/compute/exec/order_by_impl.cc"
|
"${LIBRARY_DIR}/compute/exec/order_by_impl.cc"
|
||||||
|
"${LIBRARY_DIR}/compute/exec/partition_util.cc"
|
||||||
"${LIBRARY_DIR}/compute/function.cc"
|
"${LIBRARY_DIR}/compute/function.cc"
|
||||||
"${LIBRARY_DIR}/compute/function_internal.cc"
|
"${LIBRARY_DIR}/compute/function_internal.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernel.cc"
|
"${LIBRARY_DIR}/compute/kernel.cc"
|
||||||
|
"${LIBRARY_DIR}/compute/light_array.cc"
|
||||||
"${LIBRARY_DIR}/compute/registry.cc"
|
"${LIBRARY_DIR}/compute/registry.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc"
|
"${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc"
|
"${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc"
|
||||||
@ -317,21 +324,28 @@ set(ARROW_SRCS
|
|||||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc"
|
"${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_dictionary.cc"
|
"${LIBRARY_DIR}/compute/kernels/scalar_cast_dictionary.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc"
|
"${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc"
|
||||||
|
"${LIBRARY_DIR}/compute/kernels/scalar_cast_extension.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc"
|
"${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc"
|
"${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc"
|
"${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc"
|
"${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/scalar_compare.cc"
|
"${LIBRARY_DIR}/compute/kernels/scalar_compare.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/scalar_nested.cc"
|
"${LIBRARY_DIR}/compute/kernels/scalar_nested.cc"
|
||||||
|
"${LIBRARY_DIR}/compute/kernels/scalar_random.cc"
|
||||||
|
"${LIBRARY_DIR}/compute/kernels/scalar_round.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc"
|
"${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/scalar_string.cc"
|
|
||||||
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc"
|
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc"
|
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/scalar_validity.cc"
|
"${LIBRARY_DIR}/compute/kernels/scalar_validity.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
|
"${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
|
||||||
|
"${LIBRARY_DIR}/compute/kernels/scalar_string_ascii.cc"
|
||||||
|
"${LIBRARY_DIR}/compute/kernels/scalar_string_utf8.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/util_internal.cc"
|
"${LIBRARY_DIR}/compute/kernels/util_internal.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc"
|
"${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc"
|
||||||
|
"${LIBRARY_DIR}/compute/kernels/vector_cumulative_ops.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/vector_hash.cc"
|
"${LIBRARY_DIR}/compute/kernels/vector_hash.cc"
|
||||||
|
"${LIBRARY_DIR}/compute/kernels/vector_rank.cc"
|
||||||
|
"${LIBRARY_DIR}/compute/kernels/vector_select_k.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/vector_nested.cc"
|
"${LIBRARY_DIR}/compute/kernels/vector_nested.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/vector_replace.cc"
|
"${LIBRARY_DIR}/compute/kernels/vector_replace.cc"
|
||||||
"${LIBRARY_DIR}/compute/kernels/vector_selection.cc"
|
"${LIBRARY_DIR}/compute/kernels/vector_selection.cc"
|
||||||
@ -340,13 +354,15 @@ set(ARROW_SRCS
|
|||||||
"${LIBRARY_DIR}/compute/exec/union_node.cc"
|
"${LIBRARY_DIR}/compute/exec/union_node.cc"
|
||||||
"${LIBRARY_DIR}/compute/exec/key_hash.cc"
|
"${LIBRARY_DIR}/compute/exec/key_hash.cc"
|
||||||
"${LIBRARY_DIR}/compute/exec/key_map.cc"
|
"${LIBRARY_DIR}/compute/exec/key_map.cc"
|
||||||
"${LIBRARY_DIR}/compute/exec/key_compare.cc"
|
|
||||||
"${LIBRARY_DIR}/compute/exec/key_encode.cc"
|
|
||||||
"${LIBRARY_DIR}/compute/exec/util.cc"
|
"${LIBRARY_DIR}/compute/exec/util.cc"
|
||||||
"${LIBRARY_DIR}/compute/exec/hash_join_dict.cc"
|
"${LIBRARY_DIR}/compute/exec/hash_join_dict.cc"
|
||||||
"${LIBRARY_DIR}/compute/exec/hash_join.cc"
|
"${LIBRARY_DIR}/compute/exec/hash_join.cc"
|
||||||
"${LIBRARY_DIR}/compute/exec/hash_join_node.cc"
|
"${LIBRARY_DIR}/compute/exec/hash_join_node.cc"
|
||||||
"${LIBRARY_DIR}/compute/exec/task_util.cc"
|
"${LIBRARY_DIR}/compute/exec/task_util.cc"
|
||||||
|
"${LIBRARY_DIR}/compute/row/encode_internal.cc"
|
||||||
|
"${LIBRARY_DIR}/compute/row/grouper.cc"
|
||||||
|
"${LIBRARY_DIR}/compute/row/compare_internal.cc"
|
||||||
|
"${LIBRARY_DIR}/compute/row/row_internal.cc"
|
||||||
|
|
||||||
"${LIBRARY_DIR}/ipc/dictionary.cc"
|
"${LIBRARY_DIR}/ipc/dictionary.cc"
|
||||||
"${LIBRARY_DIR}/ipc/feather.cc"
|
"${LIBRARY_DIR}/ipc/feather.cc"
|
||||||
@ -357,7 +373,8 @@ set(ARROW_SRCS
|
|||||||
"${LIBRARY_DIR}/ipc/writer.cc"
|
"${LIBRARY_DIR}/ipc/writer.cc"
|
||||||
|
|
||||||
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc"
|
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc"
|
||||||
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc"
|
"${ARROW_SRC_DIR}/arrow/adapters/orc/util.cc"
|
||||||
|
"${ARROW_SRC_DIR}/arrow/adapters/orc/options.cc"
|
||||||
)
|
)
|
||||||
|
|
||||||
add_definitions(-DARROW_WITH_LZ4)
|
add_definitions(-DARROW_WITH_LZ4)
|
||||||
|
@ -1610,29 +1610,34 @@ See also [Format Schema](#formatschema).
|
|||||||
|
|
||||||
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
|
||||||
|
|
||||||
| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) |
|
| CapnProto data type (`INSERT`) | ClickHouse data type | CapnProto data type (`SELECT`) |
|
||||||
|----------------------------------|------------------------------------------------------------------------------------------------------------------------|------------------------------|
|
|------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------|
|
||||||
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
|
||||||
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
|
||||||
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` |
|
| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) | `UINT16` |
|
||||||
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
|
||||||
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
|
||||||
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
|
| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md), [Decimal32](/docs/en/sql-reference/data-types/decimal.md) | `INT32` |
|
||||||
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
|
||||||
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md) | `INT64` |
|
| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md), [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `INT64` |
|
||||||
| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
|
| `FLOAT32` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
|
||||||
| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
|
| `FLOAT64` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
|
||||||
| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` |
|
| `TEXT, DATA` | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA` |
|
||||||
| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` |
|
| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md) | `union(T, Void), union(Void, T)` |
|
||||||
| `ENUM` | [Enum(8\ |16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` |
|
| `ENUM` | [Enum(8/16)](/docs/en/sql-reference/data-types/enum.md) | `ENUM` |
|
||||||
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
|
||||||
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
|
||||||
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
|
||||||
| `DATA` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `DATA` |
|
| `DATA` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `DATA` |
|
||||||
|
| `DATA` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `DATA` |
|
||||||
|
| `DATA` | [Decimal128/Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DATA` |
|
||||||
|
| `STRUCT(entries LIST(STRUCT(key Key, value Value)))` | [Map](/docs/en/sql-reference/data-types/map.md) | `STRUCT(entries LIST(STRUCT(key Key, value Value)))` |
|
||||||
|
|
||||||
|
Integer types can be converted into each other during input/output.
|
||||||
|
|
||||||
For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting.
|
For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting.
|
||||||
|
|
||||||
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` type also can be nested.
|
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
|
||||||
|
|
||||||
### Inserting and Selecting Data {#inserting-and-selecting-data-capnproto}
|
### Inserting and Selecting Data {#inserting-and-selecting-data-capnproto}
|
||||||
|
|
||||||
|
@ -0,0 +1,76 @@
|
|||||||
|
---
|
||||||
|
slug: /en/sql-reference/aggregate-functions/reference/quantileApprox
|
||||||
|
sidebar_position: 204
|
||||||
|
---
|
||||||
|
|
||||||
|
# quantileApprox
|
||||||
|
|
||||||
|
Computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [Greenwald-Khanna](http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf) algorithm. The Greenwald-Khanna algorithm is an algorithm used to compute quantiles on a stream of data in a highly efficient manner. It was introduced by Michael Greenwald and Sanjeev Khanna in 2001. It is widely used in databases and big data systems where computing accurate quantiles on a large stream of data in real-time is necessary. The algorithm is highly efficient, taking only O(log n) space and O(log log n) time per item (where n is the size of the input). It is also highly accurate, providing an approximate quantile value with high probability.
|
||||||
|
|
||||||
|
`quantileApprox` is different from other quantile functions in ClickHouse, because it enables user to control the accuracy of the approximate quantile result.
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
quantileApprox(accuracy, level)(expr)
|
||||||
|
```
|
||||||
|
|
||||||
|
Alias: `medianApprox`.
|
||||||
|
|
||||||
|
**Arguments**
|
||||||
|
|
||||||
|
- `accuracy` — Accuracy of quantile. Constant positive integer. Larger accuracy value means less error. For example, if the accuracy argument is set to 100, the computed quantile will have an error no greater than 1% with high probability. There is a trade-off between the accuracy of the computed quantiles and the computational complexity of the algorithm. A larger accuracy requires more memory and computational resources to compute the quantile accurately, while a smaller accuracy argument allows for a faster and more memory-efficient computation but with a slightly lower accuracy.
|
||||||
|
|
||||||
|
- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
|
||||||
|
|
||||||
|
- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
|
||||||
|
|
||||||
|
|
||||||
|
**Returned value**
|
||||||
|
|
||||||
|
- Quantile of the specified level and accuracy.
|
||||||
|
|
||||||
|
|
||||||
|
Type:
|
||||||
|
|
||||||
|
- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
|
||||||
|
- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
|
||||||
|
- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT quantileApprox(1, 0.25)(number + 1)
|
||||||
|
FROM numbers(1000)
|
||||||
|
|
||||||
|
┌─quantileApprox(1, 0.25)(plus(number, 1))─┐
|
||||||
|
│ 1 │
|
||||||
|
└──────────────────────────────────────────┘
|
||||||
|
|
||||||
|
SELECT quantileApprox(10, 0.25)(number + 1)
|
||||||
|
FROM numbers(1000)
|
||||||
|
|
||||||
|
┌─quantileApprox(10, 0.25)(plus(number, 1))─┐
|
||||||
|
│ 156 │
|
||||||
|
└───────────────────────────────────────────┘
|
||||||
|
|
||||||
|
SELECT quantileApprox(100, 0.25)(number + 1)
|
||||||
|
FROM numbers(1000)
|
||||||
|
|
||||||
|
┌─quantileApprox(100, 0.25)(plus(number, 1))─┐
|
||||||
|
│ 251 │
|
||||||
|
└────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
SELECT quantileApprox(1000, 0.25)(number + 1)
|
||||||
|
FROM numbers(1000)
|
||||||
|
|
||||||
|
┌─quantileApprox(1000, 0.25)(plus(number, 1))─┐
|
||||||
|
│ 249 │
|
||||||
|
└─────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
**See Also**
|
||||||
|
|
||||||
|
- [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
|
||||||
|
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)
|
@ -114,3 +114,59 @@ Result:
|
|||||||
│ [249.75,499.5,749.25,899.1,949.05,989.01,998.001] │
|
│ [249.75,499.5,749.25,899.1,949.05,989.01,998.001] │
|
||||||
└─────────────────────────────────────────────────────────────────────┘
|
└─────────────────────────────────────────────────────────────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## quantilesApprox
|
||||||
|
|
||||||
|
`quantilesApprox` works similarly with `quantileApprox` but allows us to calculate quantities at different levels simultaneously and returns an array.
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
quantilesApprox(accuracy, level1, level2, ...)(expr)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Returned value**
|
||||||
|
|
||||||
|
- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels.
|
||||||
|
|
||||||
|
Type of array values:
|
||||||
|
|
||||||
|
- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
|
||||||
|
- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
|
||||||
|
- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
Query:
|
||||||
|
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT quantilesApprox(1, 0.25, 0.5, 0.75)(number + 1)
|
||||||
|
FROM numbers(1000)
|
||||||
|
|
||||||
|
┌─quantilesApprox(1, 0.25, 0.5, 0.75)(plus(number, 1))─┐
|
||||||
|
│ [1,1,1] │
|
||||||
|
└──────────────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
SELECT quantilesApprox(10, 0.25, 0.5, 0.75)(number + 1)
|
||||||
|
FROM numbers(1000)
|
||||||
|
|
||||||
|
┌─quantilesApprox(10, 0.25, 0.5, 0.75)(plus(number, 1))─┐
|
||||||
|
│ [156,413,659] │
|
||||||
|
└───────────────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
|
||||||
|
SELECT quantilesApprox(100, 0.25, 0.5, 0.75)(number + 1)
|
||||||
|
FROM numbers(1000)
|
||||||
|
|
||||||
|
┌─quantilesApprox(100, 0.25, 0.5, 0.75)(plus(number, 1))─┐
|
||||||
|
│ [251,498,741] │
|
||||||
|
└────────────────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
SELECT quantilesApprox(1000, 0.25, 0.5, 0.75)(number + 1)
|
||||||
|
FROM numbers(1000)
|
||||||
|
|
||||||
|
┌─quantilesApprox(1000, 0.25, 0.5, 0.75)(plus(number, 1))─┐
|
||||||
|
│ [249,499,749] │
|
||||||
|
└─────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
@ -114,11 +114,11 @@ This will also create system tables even if message queue is empty.
|
|||||||
|
|
||||||
## RELOAD CONFIG
|
## RELOAD CONFIG
|
||||||
|
|
||||||
Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeper.
|
Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeper. Note that `SYSTEM RELOAD CONFIG` does not reload `USER` configuration stored in ZooKeeper, it only reloads `USER` configuration that is stored in `users.xml`. To reload all `USER` config use `SYSTEM RELOAD USERS`
|
||||||
|
|
||||||
## RELOAD USERS
|
## RELOAD USERS
|
||||||
|
|
||||||
Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage. Note that `SYSTEM RELOAD CONFIG` will only reload users.xml access storage.
|
Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage.
|
||||||
|
|
||||||
## SHUTDOWN
|
## SHUTDOWN
|
||||||
|
|
||||||
@ -224,6 +224,14 @@ Clears freezed backup with the specified name from all the disks. See more about
|
|||||||
SYSTEM UNFREEZE WITH NAME <backup_name>
|
SYSTEM UNFREEZE WITH NAME <backup_name>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### WAIT LOADING PARTS
|
||||||
|
|
||||||
|
Wait until all asynchronously loading data parts of a table (outdated data parts) will became loaded.
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SYSTEM WAIT LOADING PARTS [db.]merge_tree_family_table_name
|
||||||
|
```
|
||||||
|
|
||||||
## Managing ReplicatedMergeTree Tables
|
## Managing ReplicatedMergeTree Tables
|
||||||
|
|
||||||
ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) tables.
|
ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) tables.
|
||||||
|
@ -0,0 +1,36 @@
|
|||||||
|
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||||
|
#include <AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h>
|
||||||
|
#include <AggregateFunctions/FactoryHelpers.h>
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
struct Settings;
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
AggregateFunctionPtr createAggregateFunctionKolmogorovSmirnovTest(
|
||||||
|
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||||
|
{
|
||||||
|
assertBinary(name, argument_types);
|
||||||
|
|
||||||
|
if (!isNumber(argument_types[0]) || !isNumber(argument_types[1]))
|
||||||
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Aggregate function {} only supports numerical types", name);
|
||||||
|
|
||||||
|
return std::make_shared<AggregateFunctionKolmogorovSmirnov>(argument_types, parameters);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory)
|
||||||
|
{
|
||||||
|
factory.registerFunction("kolmogorovSmirnovTest", createAggregateFunctionKolmogorovSmirnovTest, AggregateFunctionFactory::CaseInsensitive);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
323
src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h
Normal file
323
src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h
Normal file
@ -0,0 +1,323 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <AggregateFunctions/IAggregateFunction.h>
|
||||||
|
#include <AggregateFunctions/StatCommon.h>
|
||||||
|
#include <Columns/ColumnVector.h>
|
||||||
|
#include <Columns/ColumnTuple.h>
|
||||||
|
#include <Common/Exception.h>
|
||||||
|
#include <Common/assert_cast.h>
|
||||||
|
#include <Common/ArenaAllocator.h>
|
||||||
|
#include <Common/PODArray_fwd.h>
|
||||||
|
#include <base/types.h>
|
||||||
|
#include <DataTypes/DataTypeNullable.h>
|
||||||
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
|
#include <DataTypes/DataTypeTuple.h>
|
||||||
|
#include <IO/ReadHelpers.h>
|
||||||
|
#include <IO/WriteHelpers.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
struct Settings;
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||||
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct KolmogorovSmirnov : public StatisticalSample<Float64, Float64>
|
||||||
|
{
|
||||||
|
enum class Alternative
|
||||||
|
{
|
||||||
|
TwoSided,
|
||||||
|
Less,
|
||||||
|
Greater
|
||||||
|
};
|
||||||
|
|
||||||
|
std::pair<Float64, Float64> getResult(Alternative alternative, String method)
|
||||||
|
{
|
||||||
|
::sort(x.begin(), x.end());
|
||||||
|
::sort(y.begin(), y.end());
|
||||||
|
|
||||||
|
Float64 max_s = std::numeric_limits<Float64>::min();
|
||||||
|
Float64 min_s = std::numeric_limits<Float64>::max();
|
||||||
|
Float64 now_s = 0;
|
||||||
|
UInt64 pos_x = 0;
|
||||||
|
UInt64 pos_y = 0;
|
||||||
|
UInt64 n1 = x.size();
|
||||||
|
UInt64 n2 = y.size();
|
||||||
|
|
||||||
|
const Float64 n1_d = 1. / n1;
|
||||||
|
const Float64 n2_d = 1. / n2;
|
||||||
|
const Float64 tol = 1e-7;
|
||||||
|
|
||||||
|
// reference: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test
|
||||||
|
while (pos_x < x.size() && pos_y < y.size())
|
||||||
|
{
|
||||||
|
if (likely(fabs(x[pos_x] - y[pos_y]) >= tol))
|
||||||
|
{
|
||||||
|
if (x[pos_x] < y[pos_y])
|
||||||
|
{
|
||||||
|
now_s += n1_d;
|
||||||
|
++pos_x;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
now_s -= n2_d;
|
||||||
|
++pos_y;
|
||||||
|
}
|
||||||
|
max_s = std::max(max_s, now_s);
|
||||||
|
min_s = std::min(min_s, now_s);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
now_s += n1_d;
|
||||||
|
++pos_x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
now_s += n1_d * (x.size() - pos_x) - n2_d * (y.size() - pos_y);
|
||||||
|
min_s = std::min(min_s, now_s);
|
||||||
|
max_s = std::max(max_s, now_s);
|
||||||
|
|
||||||
|
Float64 d = 0;
|
||||||
|
if (alternative == Alternative::TwoSided)
|
||||||
|
d = std::max(std::abs(max_s), std::abs(min_s));
|
||||||
|
else if (alternative == Alternative::Less)
|
||||||
|
d = -min_s;
|
||||||
|
else if (alternative == Alternative::Greater)
|
||||||
|
d = max_s;
|
||||||
|
|
||||||
|
UInt64 g = std::__gcd(n1, n2);
|
||||||
|
UInt64 nx_g = n1 / g;
|
||||||
|
UInt64 ny_g = n2 / g;
|
||||||
|
|
||||||
|
if (method == "auto")
|
||||||
|
method = std::max(n1, n2) <= 10000 ? "exact" : "asymp";
|
||||||
|
else if (method == "exact" && nx_g >= std::numeric_limits<Int32>::max() / ny_g)
|
||||||
|
method = "asymp";
|
||||||
|
|
||||||
|
Float64 p_value = std::numeric_limits<Float64>::infinity();
|
||||||
|
|
||||||
|
if (method == "exact")
|
||||||
|
{
|
||||||
|
/* reference:
|
||||||
|
* Gunar Schröer and Dietrich Trenkler
|
||||||
|
* Exact and Randomization Distributions of Kolmogorov-Smirnov, Tests for Two or Three Samples
|
||||||
|
*
|
||||||
|
* and
|
||||||
|
*
|
||||||
|
* Thomas Viehmann
|
||||||
|
* Numerically more stable computation of the p-values for the two-sample Kolmogorov-Smirnov test
|
||||||
|
*/
|
||||||
|
if (n2 > n1)
|
||||||
|
std::swap(n1, n2);
|
||||||
|
|
||||||
|
const Float64 f_n1 = static_cast<Float64>(n1);
|
||||||
|
const Float64 f_n2 = static_cast<Float64>(n2);
|
||||||
|
const Float64 k_d = (0.5 + floor(d * f_n2 * f_n1 - tol)) / (f_n2 * f_n1);
|
||||||
|
PaddedPODArray<Float64> c(n1 + 1);
|
||||||
|
|
||||||
|
auto check = alternative == Alternative::TwoSided ?
|
||||||
|
[](const Float64 & q, const Float64 & r, const Float64 & s) { return fabs(r - s) >= q; }
|
||||||
|
: [](const Float64 & q, const Float64 & r, const Float64 & s) { return r - s >= q; };
|
||||||
|
|
||||||
|
c[0] = 0;
|
||||||
|
for (UInt64 j = 1; j <= n1; j++)
|
||||||
|
if (check(k_d, 0., j / f_n1))
|
||||||
|
c[j] = 1.;
|
||||||
|
else
|
||||||
|
c[j] = c[j - 1];
|
||||||
|
|
||||||
|
for (UInt64 i = 1; i <= n2; i++)
|
||||||
|
{
|
||||||
|
if (check(k_d, i / f_n2, 0.))
|
||||||
|
c[0] = 1.;
|
||||||
|
for (UInt64 j = 1; j <= n1; j++)
|
||||||
|
if (check(k_d, i / f_n2, j / f_n1))
|
||||||
|
c[j] = 1.;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Float64 v = i / static_cast<Float64>(i + j);
|
||||||
|
Float64 w = j / static_cast<Float64>(i + j);
|
||||||
|
c[j] = v * c[j] + w * c[j - 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p_value = c[n1];
|
||||||
|
}
|
||||||
|
else if (method == "asymp")
|
||||||
|
{
|
||||||
|
Float64 n = std::min(n1, n2);
|
||||||
|
Float64 m = std::max(n1, n2);
|
||||||
|
Float64 p = sqrt((n * m) / (n + m)) * d;
|
||||||
|
|
||||||
|
if (alternative == Alternative::TwoSided)
|
||||||
|
{
|
||||||
|
/* reference:
|
||||||
|
* J.DURBIN
|
||||||
|
* Distribution theory for tests based on the sample distribution function
|
||||||
|
*/
|
||||||
|
Float64 new_val, old_val, s, w, z;
|
||||||
|
UInt64 k_max = static_cast<UInt64>(sqrt(2 - log(tol)));
|
||||||
|
|
||||||
|
if (p < 1)
|
||||||
|
{
|
||||||
|
z = - (M_PI_2 * M_PI_4) / (p * p);
|
||||||
|
w = log(p);
|
||||||
|
s = 0;
|
||||||
|
for (UInt64 k = 1; k < k_max; k += 2)
|
||||||
|
s += exp(k * k * z - w);
|
||||||
|
p = s / 0.398942280401432677939946059934;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
z = -2 * p * p;
|
||||||
|
s = -1;
|
||||||
|
UInt64 k = 1;
|
||||||
|
old_val = 0;
|
||||||
|
new_val = 1;
|
||||||
|
while (fabs(old_val - new_val) > tol)
|
||||||
|
{
|
||||||
|
old_val = new_val;
|
||||||
|
new_val += 2 * s * exp(z * k * k);
|
||||||
|
s *= -1;
|
||||||
|
k++;
|
||||||
|
}
|
||||||
|
p = new_val;
|
||||||
|
}
|
||||||
|
p_value = 1 - p;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* reference:
|
||||||
|
* J. L. HODGES, Jr
|
||||||
|
* The significance probability of the Smirnov two-sample test
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Use Hodges' suggested approximation Eqn 5.3
|
||||||
|
// Requires m to be the larger of (n1, n2)
|
||||||
|
Float64 expt = -2 * p * p - 2 * p * (m + 2 * n) / sqrt(m * n * (m + n)) / 3.0;
|
||||||
|
p_value = exp(expt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {d, p_value};
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
class AggregateFunctionKolmogorovSmirnov final:
|
||||||
|
public IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov>
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
using Alternative = typename KolmogorovSmirnov::Alternative;
|
||||||
|
Alternative alternative = Alternative::TwoSided;
|
||||||
|
String method = "auto";
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit AggregateFunctionKolmogorovSmirnov(const DataTypes & arguments, const Array & params)
|
||||||
|
: IAggregateFunctionDataHelper<KolmogorovSmirnov, AggregateFunctionKolmogorovSmirnov> ({arguments}, {}, createResultType())
|
||||||
|
{
|
||||||
|
if (params.size() > 2)
|
||||||
|
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require two parameter or less", getName());
|
||||||
|
|
||||||
|
if (params.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (params[0].getType() != Field::Types::String)
|
||||||
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName());
|
||||||
|
|
||||||
|
const auto & param = params[0].get<String>();
|
||||||
|
if (param == "two-sided")
|
||||||
|
alternative = Alternative::TwoSided;
|
||||||
|
else if (param == "less")
|
||||||
|
alternative = Alternative::Less;
|
||||||
|
else if (param == "greater")
|
||||||
|
alternative = Alternative::Greater;
|
||||||
|
else
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown parameter in aggregate function {}. "
|
||||||
|
"It must be one of: 'two-sided', 'less', 'greater'", getName());
|
||||||
|
|
||||||
|
if (params.size() != 2)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (params[1].getType() != Field::Types::String)
|
||||||
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a String", getName());
|
||||||
|
|
||||||
|
method = params[1].get<String>();
|
||||||
|
if (method != "auto" && method != "exact" && method != "asymp")
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown method in aggregate function {}. "
|
||||||
|
"It must be one of: 'auto', 'exact', 'asymp'", getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override
|
||||||
|
{
|
||||||
|
return "kolmogorovSmirnovTest";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool allocatesMemoryInArena() const override { return true; }
|
||||||
|
|
||||||
|
static DataTypePtr createResultType()
|
||||||
|
{
|
||||||
|
DataTypes types
|
||||||
|
{
|
||||||
|
std::make_shared<DataTypeNumber<Float64>>(),
|
||||||
|
std::make_shared<DataTypeNumber<Float64>>(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Strings names
|
||||||
|
{
|
||||||
|
"d_statistic",
|
||||||
|
"p_value"
|
||||||
|
};
|
||||||
|
|
||||||
|
return std::make_shared<DataTypeTuple>(
|
||||||
|
std::move(types),
|
||||||
|
std::move(names)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||||
|
{
|
||||||
|
Float64 value = columns[0]->getFloat64(row_num);
|
||||||
|
UInt8 is_second = columns[1]->getUInt(row_num);
|
||||||
|
if (is_second)
|
||||||
|
this->data(place).addY(value, arena);
|
||||||
|
else
|
||||||
|
this->data(place).addX(value, arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||||
|
{
|
||||||
|
this->data(place).merge(this->data(rhs), arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||||
|
{
|
||||||
|
this->data(place).write(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||||
|
{
|
||||||
|
this->data(place).read(buf, arena);
|
||||||
|
}
|
||||||
|
|
||||||
|
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||||
|
{
|
||||||
|
if (!this->data(place).size_x || !this->data(place).size_y)
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName());
|
||||||
|
|
||||||
|
auto [d_statistic, p_value] = this->data(place).getResult(alternative, method);
|
||||||
|
|
||||||
|
/// Because p-value is a probability.
|
||||||
|
p_value = std::min(1.0, std::max(0.0, p_value));
|
||||||
|
|
||||||
|
auto & column_tuple = assert_cast<ColumnTuple &>(to);
|
||||||
|
auto & column_stat = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(0));
|
||||||
|
auto & column_value = assert_cast<ColumnVector<Float64> &>(column_tuple.getColumn(1));
|
||||||
|
|
||||||
|
column_stat.getData().push_back(d_statistic);
|
||||||
|
column_value.getData().push_back(p_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -26,9 +26,11 @@ namespace ErrorCodes
|
|||||||
{
|
{
|
||||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename> class QuantileTiming;
|
template <typename> class QuantileTiming;
|
||||||
|
template <typename> class QuantileApprox;
|
||||||
|
|
||||||
|
|
||||||
/** Generic aggregate function for calculation of quantiles.
|
/** Generic aggregate function for calculation of quantiles.
|
||||||
@ -60,6 +62,7 @@ private:
|
|||||||
using ColVecType = ColumnVectorOrDecimal<Value>;
|
using ColVecType = ColumnVectorOrDecimal<Value>;
|
||||||
|
|
||||||
static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>);
|
static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>);
|
||||||
|
static constexpr bool is_quantile_approx = std::is_same_v<Data, QuantileApprox<Value>>;
|
||||||
static_assert(!is_decimal<Value> || !returns_float);
|
static_assert(!is_decimal<Value> || !returns_float);
|
||||||
|
|
||||||
QuantileLevels<Float64> levels;
|
QuantileLevels<Float64> levels;
|
||||||
@ -67,22 +70,57 @@ private:
|
|||||||
/// Used when there are single level to get.
|
/// Used when there are single level to get.
|
||||||
Float64 level = 0.5;
|
Float64 level = 0.5;
|
||||||
|
|
||||||
|
/// Used for the approximate version of the algorithm (Greenwald-Khanna)
|
||||||
|
ssize_t accuracy = 10000;
|
||||||
|
|
||||||
DataTypePtr & argument_type;
|
DataTypePtr & argument_type;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
AggregateFunctionQuantile(const DataTypes & argument_types_, const Array & params)
|
AggregateFunctionQuantile(const DataTypes & argument_types_, const Array & params)
|
||||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>(
|
: IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>(
|
||||||
argument_types_, params, createResultType(argument_types_))
|
argument_types_, params, createResultType(argument_types_))
|
||||||
, levels(params, returns_many)
|
, levels(is_quantile_approx && !params.empty() ? Array(params.begin() + 1, params.end()) : params, returns_many)
|
||||||
, level(levels.levels[0])
|
, level(levels.levels[0])
|
||||||
, argument_type(this->argument_types[0])
|
, argument_type(this->argument_types[0])
|
||||||
{
|
{
|
||||||
if (!returns_many && levels.size() > 1)
|
if (!returns_many && levels.size() > 1)
|
||||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require one parameter or less", getName());
|
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires one level parameter or less", getName());
|
||||||
|
|
||||||
|
if constexpr (is_quantile_approx)
|
||||||
|
{
|
||||||
|
if (params.empty())
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one param", getName());
|
||||||
|
|
||||||
|
const auto & accuracy_field = params[0];
|
||||||
|
if (!isInt64OrUInt64FieldType(accuracy_field.getType()))
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires accuracy parameter with integer type", getName());
|
||||||
|
|
||||||
|
if (accuracy_field.getType() == Field::Types::Int64)
|
||||||
|
accuracy = accuracy_field.get<Int64>();
|
||||||
|
else
|
||||||
|
accuracy = accuracy_field.get<UInt64>();
|
||||||
|
|
||||||
|
if (accuracy <= 0)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::BAD_ARGUMENTS,
|
||||||
|
"Aggregate function {} requires accuracy parameter with positive value but is {}",
|
||||||
|
getName(),
|
||||||
|
accuracy);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
String getName() const override { return Name::name; }
|
String getName() const override { return Name::name; }
|
||||||
|
|
||||||
|
void create(AggregateDataPtr __restrict place) const override /// NOLINT
|
||||||
|
{
|
||||||
|
if constexpr (is_quantile_approx)
|
||||||
|
new (place) Data(accuracy);
|
||||||
|
else
|
||||||
|
new (place) Data;
|
||||||
|
}
|
||||||
|
|
||||||
static DataTypePtr createResultType(const DataTypes & argument_types_)
|
static DataTypePtr createResultType(const DataTypes & argument_types_)
|
||||||
{
|
{
|
||||||
DataTypePtr res;
|
DataTypePtr res;
|
||||||
@ -257,4 +295,7 @@ struct NameQuantilesBFloat16 { static constexpr auto name = "quantilesBFloat16";
|
|||||||
struct NameQuantileBFloat16Weighted { static constexpr auto name = "quantileBFloat16Weighted"; };
|
struct NameQuantileBFloat16Weighted { static constexpr auto name = "quantileBFloat16Weighted"; };
|
||||||
struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBFloat16Weighted"; };
|
struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBFloat16Weighted"; };
|
||||||
|
|
||||||
|
struct NameQuantileApprox { static constexpr auto name = "quantileApprox"; };
|
||||||
|
struct NameQuantilesApprox { static constexpr auto name = "quantilesApprox"; };
|
||||||
|
|
||||||
}
|
}
|
||||||
|
71
src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp
Normal file
71
src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||||
|
#include <AggregateFunctions/QuantileApprox.h>
|
||||||
|
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||||
|
#include <AggregateFunctions/Helpers.h>
|
||||||
|
#include <DataTypes/DataTypeDate.h>
|
||||||
|
#include <DataTypes/DataTypeDateTime.h>
|
||||||
|
#include <Core/Field.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
template <typename Value, bool _> using FuncQuantileApprox = AggregateFunctionQuantile<Value, QuantileApprox<Value>, NameQuantileApprox, false, void, false>;
|
||||||
|
template <typename Value, bool _> using FuncQuantilesApprox = AggregateFunctionQuantile<Value, QuantileApprox<Value>, NameQuantilesApprox, false, void, true>;
|
||||||
|
|
||||||
|
template <template <typename, bool> class Function>
|
||||||
|
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||||
|
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||||
|
{
|
||||||
|
/// Second argument type check doesn't depend on the type of the first one.
|
||||||
|
Function<void, true>::assertSecondArg(argument_types);
|
||||||
|
|
||||||
|
const DataTypePtr & argument_type = argument_types[0];
|
||||||
|
WhichDataType which(argument_type);
|
||||||
|
|
||||||
|
#define DISPATCH(TYPE) \
|
||||||
|
if (which.idx == TypeIndex::TYPE) \
|
||||||
|
return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||||
|
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||||
|
#undef DISPATCH
|
||||||
|
|
||||||
|
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||||
|
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||||
|
|
||||||
|
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
|
||||||
|
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
|
||||||
|
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
|
||||||
|
if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
|
||||||
|
if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
|
||||||
|
|
||||||
|
if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
|
||||||
|
if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<UInt128, true>>(argument_types, params);
|
||||||
|
if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, true>>(argument_types, params);
|
||||||
|
if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, true>>(argument_types, params);
|
||||||
|
|
||||||
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
|
||||||
|
argument_type->getName(), name);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory & factory)
|
||||||
|
{
|
||||||
|
/// For aggregate functions returning array we cannot return NULL on empty set.
|
||||||
|
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
|
||||||
|
|
||||||
|
factory.registerFunction(NameQuantileApprox::name, createAggregateFunctionQuantile<FuncQuantileApprox>);
|
||||||
|
factory.registerFunction(NameQuantilesApprox::name, {createAggregateFunctionQuantile<FuncQuantilesApprox>, properties});
|
||||||
|
|
||||||
|
/// 'median' is an alias for 'quantile'
|
||||||
|
factory.registerAlias("medianApprox", NameQuantileApprox::name);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
477
src/AggregateFunctions/QuantileApprox.h
Normal file
477
src/AggregateFunctions/QuantileApprox.h
Normal file
@ -0,0 +1,477 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <base/sort.h>
|
||||||
|
#include <Common/RadixSort.h>
|
||||||
|
#include <IO/WriteBuffer.h>
|
||||||
|
#include <IO/ReadBuffer.h>
|
||||||
|
#include <IO/WriteHelpers.h>
|
||||||
|
#include <IO/ReadHelpers.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int LOGICAL_ERROR;
|
||||||
|
extern const int NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class ApproxSampler
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
struct Stats
|
||||||
|
{
|
||||||
|
T value; // the sampled value
|
||||||
|
Int64 g; // the minimum rank jump from the previous value's minimum rank
|
||||||
|
Int64 delta; // the maximum span of the rank
|
||||||
|
|
||||||
|
Stats() = default;
|
||||||
|
Stats(T value_, Int64 g_, Int64 delta_) : value(value_), g(g_), delta(delta_) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct QueryResult
|
||||||
|
{
|
||||||
|
size_t index;
|
||||||
|
Int64 rank;
|
||||||
|
T value;
|
||||||
|
|
||||||
|
QueryResult(size_t index_, Int64 rank_, T value_) : index(index_), rank(rank_), value(value_) { }
|
||||||
|
};
|
||||||
|
|
||||||
|
ApproxSampler() = default;
|
||||||
|
|
||||||
|
explicit ApproxSampler(
|
||||||
|
double relative_error_,
|
||||||
|
size_t compress_threshold_ = default_compress_threshold,
|
||||||
|
size_t count_ = 0,
|
||||||
|
bool compressed_ = false)
|
||||||
|
: relative_error(relative_error_)
|
||||||
|
, compress_threshold(compress_threshold_)
|
||||||
|
, count(count_)
|
||||||
|
, compressed(compressed_)
|
||||||
|
{
|
||||||
|
sampled.reserve(compress_threshold);
|
||||||
|
backup_sampled.reserve(compress_threshold);
|
||||||
|
|
||||||
|
head_sampled.reserve(default_head_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isCompressed() const { return compressed; }
|
||||||
|
void setCompressed() { compressed = true; }
|
||||||
|
|
||||||
|
void insert(T x)
|
||||||
|
{
|
||||||
|
head_sampled.push_back(x);
|
||||||
|
compressed = false;
|
||||||
|
if (head_sampled.size() >= default_head_size)
|
||||||
|
{
|
||||||
|
withHeadBufferInserted();
|
||||||
|
if (sampled.size() >= compress_threshold)
|
||||||
|
compress();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void query(const Float64 * percentiles, const size_t * indices, size_t size, T * result) const
|
||||||
|
{
|
||||||
|
if (!head_sampled.empty())
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot operate on an uncompressed summary, call compress() first");
|
||||||
|
|
||||||
|
if (sampled.empty())
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
result[i] = T();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Int64 current_max = std::numeric_limits<Int64>::min();
|
||||||
|
for (const auto & stats : sampled)
|
||||||
|
current_max = std::max(stats.delta + stats.g, current_max);
|
||||||
|
Int64 target_error = current_max/2;
|
||||||
|
|
||||||
|
size_t index= 0;
|
||||||
|
auto min_rank = sampled[0].g;
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
double percentile = percentiles[indices[i]];
|
||||||
|
if (percentile <= relative_error)
|
||||||
|
{
|
||||||
|
result[indices[i]] = sampled.front().value;
|
||||||
|
}
|
||||||
|
else if (percentile >= 1 - relative_error)
|
||||||
|
{
|
||||||
|
result[indices[i]] = sampled.back().value;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
QueryResult res = findApproxQuantile(index, min_rank, target_error, percentile);
|
||||||
|
index = res.index;
|
||||||
|
min_rank = res.rank;
|
||||||
|
result[indices[i]] = res.value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void compress()
|
||||||
|
{
|
||||||
|
if (compressed)
|
||||||
|
return;
|
||||||
|
|
||||||
|
withHeadBufferInserted();
|
||||||
|
|
||||||
|
doCompress(2 * relative_error * count);
|
||||||
|
compressed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void merge(const ApproxSampler & other)
|
||||||
|
{
|
||||||
|
if (other.count == 0)
|
||||||
|
return;
|
||||||
|
else if (count == 0)
|
||||||
|
{
|
||||||
|
compress_threshold = other.compress_threshold;
|
||||||
|
relative_error = other.relative_error;
|
||||||
|
count = other.count;
|
||||||
|
compressed = other.compressed;
|
||||||
|
|
||||||
|
sampled.resize(other.sampled.size());
|
||||||
|
memcpy(sampled.data(), other.sampled.data(), sizeof(Stats) * other.sampled.size());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Merge the two buffers.
|
||||||
|
// The GK algorithm is a bit unclear about it, but we need to adjust the statistics during the
|
||||||
|
// merging. The main idea is that samples that come from one side will suffer from the lack of
|
||||||
|
// precision of the other.
|
||||||
|
// As a concrete example, take two QuantileSummaries whose samples (value, g, delta) are:
|
||||||
|
// `a = [(0, 1, 0), (20, 99, 0)]` and `b = [(10, 1, 0), (30, 49, 0)]`
|
||||||
|
// This means `a` has 100 values, whose minimum is 0 and maximum is 20,
|
||||||
|
// while `b` has 50 values, between 10 and 30.
|
||||||
|
// The resulting samples of the merge will be:
|
||||||
|
// a+b = [(0, 1, 0), (10, 1, ??), (20, 99, ??), (30, 49, 0)]
|
||||||
|
// The values of `g` do not change, as they represent the minimum number of values between two
|
||||||
|
// consecutive samples. The values of `delta` should be adjusted, however.
|
||||||
|
// Take the case of the sample `10` from `b`. In the original stream, it could have appeared
|
||||||
|
// right after `0` (as expressed by `g=1`) or right before `20`, so `delta=99+0-1=98`.
|
||||||
|
// In the GK algorithm's style of working in terms of maximum bounds, one can observe that the
|
||||||
|
// maximum additional uncertainty over samples coming from `b` is `max(g_a + delta_a) =
|
||||||
|
// floor(2 * eps_a * n_a)`. Likewise, additional uncertainty over samples from `a` is
|
||||||
|
// `floor(2 * eps_b * n_b)`.
|
||||||
|
// Only samples that interleave the other side are affected. That means that samples from
|
||||||
|
// one side that are lesser (or greater) than all samples from the other side are just copied
|
||||||
|
// unmodified.
|
||||||
|
// If the merging instances have different `relativeError`, the resulting instance will carry
|
||||||
|
// the largest one: `eps_ab = max(eps_a, eps_b)`.
|
||||||
|
// The main invariant of the GK algorithm is kept:
|
||||||
|
// `max(g_ab + delta_ab) <= floor(2 * eps_ab * (n_a + n_b))` since
|
||||||
|
// `max(g_ab + delta_ab) <= floor(2 * eps_a * n_a) + floor(2 * eps_b * n_b)`
|
||||||
|
// Finally, one can see how the `insert(x)` operation can be expressed as `merge([(x, 1, 0])`
|
||||||
|
compress();
|
||||||
|
|
||||||
|
backup_sampled.clear();
|
||||||
|
backup_sampled.reserve(sampled.size() + other.sampled.size());
|
||||||
|
double merged_relative_error = std::max(relative_error, other.relative_error);
|
||||||
|
size_t merged_count = count + other.count;
|
||||||
|
Int64 additional_self_delta = static_cast<Int64>(std::floor(2 * other.relative_error * other.count));
|
||||||
|
Int64 additional_other_delta = static_cast<Int64>(std::floor(2 * relative_error * count));
|
||||||
|
|
||||||
|
// Do a merge of two sorted lists until one of the lists is fully consumed
|
||||||
|
size_t self_idx = 0;
|
||||||
|
size_t other_idx = 0;
|
||||||
|
while (self_idx < sampled.size() && other_idx < other.sampled.size())
|
||||||
|
{
|
||||||
|
const Stats & self_sample = sampled[self_idx];
|
||||||
|
const Stats & other_sample = other.sampled[other_idx];
|
||||||
|
|
||||||
|
// Detect next sample
|
||||||
|
Stats next_sample;
|
||||||
|
Int64 additional_delta = 0;
|
||||||
|
if (self_sample.value < other_sample.value)
|
||||||
|
{
|
||||||
|
++self_idx;
|
||||||
|
next_sample = self_sample;
|
||||||
|
additional_delta = other_idx > 0 ? additional_self_delta : 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
++other_idx;
|
||||||
|
next_sample = other_sample;
|
||||||
|
additional_delta = self_idx > 0 ? additional_other_delta : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert it
|
||||||
|
next_sample.delta += additional_delta;
|
||||||
|
backup_sampled.emplace_back(std::move(next_sample));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy the remaining samples from the other list
|
||||||
|
// (by construction, at most one `while` loop will run)
|
||||||
|
while (self_idx < sampled.size())
|
||||||
|
{
|
||||||
|
backup_sampled.emplace_back(sampled[self_idx]);
|
||||||
|
++self_idx;
|
||||||
|
}
|
||||||
|
while (other_idx < other.sampled.size())
|
||||||
|
{
|
||||||
|
backup_sampled.emplace_back(other.sampled[other_idx]);
|
||||||
|
++other_idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::swap(sampled, backup_sampled);
|
||||||
|
relative_error = merged_relative_error;
|
||||||
|
count = merged_count;
|
||||||
|
compress_threshold = other.compress_threshold;
|
||||||
|
|
||||||
|
doCompress(2 * merged_relative_error * merged_count);
|
||||||
|
compressed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void write(WriteBuffer & buf) const
|
||||||
|
{
|
||||||
|
writeIntBinary<size_t>(compress_threshold, buf);
|
||||||
|
writeFloatBinary<double>(relative_error, buf);
|
||||||
|
writeIntBinary<size_t>(count, buf);
|
||||||
|
writeIntBinary<size_t>(sampled.size(), buf);
|
||||||
|
|
||||||
|
for (const auto & stats : sampled)
|
||||||
|
{
|
||||||
|
writeFloatBinary<T>(stats.value, buf);
|
||||||
|
writeIntBinary<Int64>(stats.g, buf);
|
||||||
|
writeIntBinary<Int64>(stats.delta, buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void read(ReadBuffer & buf)
|
||||||
|
{
|
||||||
|
readIntBinary<size_t>(compress_threshold, buf);
|
||||||
|
readFloatBinary<double>(relative_error, buf);
|
||||||
|
readIntBinary<size_t>(count, buf);
|
||||||
|
|
||||||
|
size_t sampled_len = 0;
|
||||||
|
readIntBinary<size_t>(sampled_len, buf);
|
||||||
|
sampled.resize(sampled_len);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < sampled_len; ++i)
|
||||||
|
{
|
||||||
|
auto stats = sampled[i];
|
||||||
|
readFloatBinary<T>(stats.value, buf);
|
||||||
|
readIntBinary<Int64>(stats.g, buf);
|
||||||
|
readIntBinary<Int64>(stats.delta, buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
QueryResult findApproxQuantile(size_t index, Int64 min_rank_at_index, double target_error, double percentile) const
|
||||||
|
{
|
||||||
|
Stats curr_sample = sampled[index];
|
||||||
|
Int64 rank = static_cast<Int64>(std::ceil(percentile * count));
|
||||||
|
size_t i = index;
|
||||||
|
Int64 min_rank = min_rank_at_index;
|
||||||
|
while (i < sampled.size() - 1)
|
||||||
|
{
|
||||||
|
Int64 max_rank = min_rank + curr_sample.delta;
|
||||||
|
if (max_rank - target_error <= rank && rank <= min_rank + target_error)
|
||||||
|
return {i, min_rank, curr_sample.value};
|
||||||
|
else
|
||||||
|
{
|
||||||
|
++i;
|
||||||
|
curr_sample = sampled[i];
|
||||||
|
min_rank += curr_sample.g;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {sampled.size()-1, 0, sampled.back().value};
|
||||||
|
}
|
||||||
|
|
||||||
|
void withHeadBufferInserted()
|
||||||
|
{
|
||||||
|
if (head_sampled.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
bool use_radix_sort = head_sampled.size() >= 256 && (is_arithmetic_v<T> && !is_big_int_v<T>);
|
||||||
|
if (use_radix_sort)
|
||||||
|
RadixSort<RadixSortNumTraits<T>>::executeLSD(head_sampled.data(), head_sampled.size());
|
||||||
|
else
|
||||||
|
::sort(head_sampled.begin(), head_sampled.end());
|
||||||
|
|
||||||
|
backup_sampled.clear();
|
||||||
|
backup_sampled.reserve(sampled.size() + head_sampled.size());
|
||||||
|
|
||||||
|
size_t sample_idx = 0;
|
||||||
|
size_t ops_idx = 0;
|
||||||
|
size_t current_count = count;
|
||||||
|
for (; ops_idx < head_sampled.size(); ++ops_idx)
|
||||||
|
{
|
||||||
|
T current_sample = head_sampled[ops_idx];
|
||||||
|
|
||||||
|
// Add all the samples before the next observation.
|
||||||
|
while (sample_idx < sampled.size() && sampled[sample_idx].value <= current_sample)
|
||||||
|
{
|
||||||
|
backup_sampled.emplace_back(sampled[sample_idx]);
|
||||||
|
++sample_idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If it is the first one to insert, of if it is the last one
|
||||||
|
++current_count;
|
||||||
|
Int64 delta;
|
||||||
|
if (backup_sampled.empty() || (sample_idx == sampled.size() && ops_idx == (head_sampled.size() - 1)))
|
||||||
|
delta = 0;
|
||||||
|
else
|
||||||
|
delta = static_cast<Int64>(std::floor(2 * relative_error * current_count));
|
||||||
|
|
||||||
|
backup_sampled.emplace_back(current_sample, 1, delta);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add all the remaining existing samples
|
||||||
|
for (; sample_idx < sampled.size(); ++sample_idx)
|
||||||
|
backup_sampled.emplace_back(sampled[sample_idx]);
|
||||||
|
|
||||||
|
std::swap(sampled, backup_sampled);
|
||||||
|
head_sampled.clear();
|
||||||
|
count = current_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void doCompress(double merge_threshold)
|
||||||
|
{
|
||||||
|
if (sampled.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
backup_sampled.clear();
|
||||||
|
// Start for the last element, which is always part of the set.
|
||||||
|
// The head contains the current new head, that may be merged with the current element.
|
||||||
|
Stats head = sampled.back();
|
||||||
|
ssize_t i = sampled.size() - 2;
|
||||||
|
|
||||||
|
// Do not compress the last element
|
||||||
|
while (i >= 1)
|
||||||
|
{
|
||||||
|
// The current sample:
|
||||||
|
const auto & sample1 = sampled[i];
|
||||||
|
// Do we need to compress?
|
||||||
|
if (sample1.g + head.g + head.delta < merge_threshold)
|
||||||
|
{
|
||||||
|
// Do not insert yet, just merge the current element into the head.
|
||||||
|
head.g += sample1.g;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Prepend the current head, and keep the current sample as target for merging.
|
||||||
|
backup_sampled.push_back(head);
|
||||||
|
head = sample1;
|
||||||
|
}
|
||||||
|
--i;
|
||||||
|
}
|
||||||
|
|
||||||
|
backup_sampled.push_back(head);
|
||||||
|
// If necessary, add the minimum element:
|
||||||
|
auto curr_head = sampled.front();
|
||||||
|
|
||||||
|
// don't add the minimum element if `currentSamples` has only one element (both `currHead` and
|
||||||
|
// `head` point to the same element)
|
||||||
|
if (curr_head.value <= head.value && sampled.size() > 1)
|
||||||
|
backup_sampled.emplace_back(sampled.front());
|
||||||
|
|
||||||
|
std::reverse(backup_sampled.begin(), backup_sampled.end());
|
||||||
|
std::swap(sampled, backup_sampled);
|
||||||
|
}
|
||||||
|
|
||||||
|
double relative_error;
|
||||||
|
size_t compress_threshold;
|
||||||
|
size_t count = 0;
|
||||||
|
bool compressed;
|
||||||
|
|
||||||
|
PaddedPODArray<Stats> sampled;
|
||||||
|
PaddedPODArray<Stats> backup_sampled;
|
||||||
|
|
||||||
|
PaddedPODArray<T> head_sampled;
|
||||||
|
|
||||||
|
static constexpr size_t default_compress_threshold = 10000;
|
||||||
|
static constexpr size_t default_head_size = 50000;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Value>
|
||||||
|
class QuantileApprox
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
using Data = ApproxSampler<Value>;
|
||||||
|
mutable Data data;
|
||||||
|
|
||||||
|
public:
|
||||||
|
QuantileApprox() = default;
|
||||||
|
|
||||||
|
explicit QuantileApprox(size_t accuracy) : data(1.0 / static_cast<double>(accuracy)) { }
|
||||||
|
|
||||||
|
void add(const Value & x)
|
||||||
|
{
|
||||||
|
data.insert(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Weight>
|
||||||
|
void add(const Value &, const Weight &)
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method add with weight is not implemented for GKSampler");
|
||||||
|
}
|
||||||
|
|
||||||
|
void merge(const QuantileApprox & rhs)
|
||||||
|
{
|
||||||
|
if (!data.isCompressed())
|
||||||
|
data.compress();
|
||||||
|
|
||||||
|
data.merge(rhs.data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void serialize(WriteBuffer & buf) const
|
||||||
|
{
|
||||||
|
/// Always compress before serialization
|
||||||
|
if (!data.isCompressed())
|
||||||
|
data.compress();
|
||||||
|
|
||||||
|
data.write(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void deserialize(ReadBuffer & buf)
|
||||||
|
{
|
||||||
|
data.read(buf);
|
||||||
|
|
||||||
|
data.setCompressed();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the value of the `level` quantile. The level must be between 0 and 1.
|
||||||
|
Value get(Float64 level)
|
||||||
|
{
|
||||||
|
if (!data.isCompressed())
|
||||||
|
data.compress();
|
||||||
|
|
||||||
|
Value res;
|
||||||
|
size_t indice = 0;
|
||||||
|
data.query(&level, &indice, 1, &res);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
|
||||||
|
/// indices - an array of index levels such that the corresponding elements will go in ascending order.
|
||||||
|
void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
|
||||||
|
{
|
||||||
|
if (!data.isCompressed())
|
||||||
|
data.compress();
|
||||||
|
|
||||||
|
data.query(levels, indices, size, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
Float64 getFloat64(Float64 /*level*/)
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFloat64 is not implemented for GKSampler");
|
||||||
|
}
|
||||||
|
|
||||||
|
void getManyFloat(const Float64 * /*levels*/, const size_t * /*indices*/, size_t /*size*/, Float64 * /*result*/)
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getManyFloat is not implemented for GKSampler");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -32,6 +32,7 @@ void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory &);
|
|||||||
void registerAggregateFunctionsQuantileTDigestWeighted(AggregateFunctionFactory &);
|
void registerAggregateFunctionsQuantileTDigestWeighted(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionsQuantileBFloat16(AggregateFunctionFactory &);
|
void registerAggregateFunctionsQuantileBFloat16(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionsQuantileBFloat16Weighted(AggregateFunctionFactory &);
|
void registerAggregateFunctionsQuantileBFloat16Weighted(AggregateFunctionFactory &);
|
||||||
|
void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &);
|
void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory &);
|
void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionRate(AggregateFunctionFactory &);
|
void registerAggregateFunctionRate(AggregateFunctionFactory &);
|
||||||
@ -79,6 +80,7 @@ void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory
|
|||||||
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
|
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
|
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
|
void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &);
|
||||||
|
void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory);
|
||||||
|
|
||||||
class AggregateFunctionCombinatorFactory;
|
class AggregateFunctionCombinatorFactory;
|
||||||
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
||||||
@ -123,6 +125,7 @@ void registerAggregateFunctions()
|
|||||||
registerAggregateFunctionsQuantileTDigestWeighted(factory);
|
registerAggregateFunctionsQuantileTDigestWeighted(factory);
|
||||||
registerAggregateFunctionsQuantileBFloat16(factory);
|
registerAggregateFunctionsQuantileBFloat16(factory);
|
||||||
registerAggregateFunctionsQuantileBFloat16Weighted(factory);
|
registerAggregateFunctionsQuantileBFloat16Weighted(factory);
|
||||||
|
registerAggregateFunctionsQuantileApprox(factory);
|
||||||
registerAggregateFunctionsSequenceMatch(factory);
|
registerAggregateFunctionsSequenceMatch(factory);
|
||||||
registerAggregateFunctionWindowFunnel(factory);
|
registerAggregateFunctionWindowFunnel(factory);
|
||||||
registerAggregateFunctionRate(factory);
|
registerAggregateFunctionRate(factory);
|
||||||
@ -170,6 +173,7 @@ void registerAggregateFunctions()
|
|||||||
registerAggregateFunctionExponentialMovingAverage(factory);
|
registerAggregateFunctionExponentialMovingAverage(factory);
|
||||||
registerAggregateFunctionSparkbar(factory);
|
registerAggregateFunctionSparkbar(factory);
|
||||||
registerAggregateFunctionAnalysisOfVariance(factory);
|
registerAggregateFunctionAnalysisOfVariance(factory);
|
||||||
|
registerAggregateFunctionKolmogorovSmirnovTest(factory);
|
||||||
|
|
||||||
registerWindowFunctions(factory);
|
registerWindowFunctions(factory);
|
||||||
}
|
}
|
||||||
|
@ -86,7 +86,12 @@ public:
|
|||||||
|
|
||||||
DataTypePtr getResultType() const override
|
DataTypePtr getResultType() const override
|
||||||
{
|
{
|
||||||
return getExpression()->getResultType();
|
return result_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void resolve(DataTypePtr lambda_type)
|
||||||
|
{
|
||||||
|
result_type = std::move(lambda_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
|
void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
|
||||||
@ -102,6 +107,7 @@ protected:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
Names argument_names;
|
Names argument_names;
|
||||||
|
DataTypePtr result_type;
|
||||||
|
|
||||||
static constexpr size_t arguments_child_index = 0;
|
static constexpr size_t arguments_child_index = 0;
|
||||||
static constexpr size_t expression_child_index = 1;
|
static constexpr size_t expression_child_index = 1;
|
||||||
|
@ -5085,8 +5085,11 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
|||||||
arguments_projection_names[function_lambda_argument_index] = lambda_argument_projection_name_buffer.str();
|
arguments_projection_names[function_lambda_argument_index] = lambda_argument_projection_name_buffer.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
argument_types[function_lambda_argument_index] = std::make_shared<DataTypeFunction>(function_data_type_argument_types, lambda_to_resolve->getResultType());
|
auto lambda_resolved_type = std::make_shared<DataTypeFunction>(function_data_type_argument_types, lambda_to_resolve_typed.getExpression()->getResultType());
|
||||||
argument_columns[function_lambda_argument_index].type = argument_types[function_lambda_argument_index];
|
lambda_to_resolve_typed.resolve(lambda_resolved_type);
|
||||||
|
|
||||||
|
argument_types[function_lambda_argument_index] = lambda_resolved_type;
|
||||||
|
argument_columns[function_lambda_argument_index].type = lambda_resolved_type;
|
||||||
function_arguments[function_lambda_argument_index] = std::move(lambda_to_resolve);
|
function_arguments[function_lambda_argument_index] = std::move(lambda_to_resolve);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,13 +115,23 @@ private:
|
|||||||
|
|
||||||
for (size_t i = 0; i < expected_argument_types_size; ++i)
|
for (size_t i = 0; i < expected_argument_types_size; ++i)
|
||||||
{
|
{
|
||||||
// Skip lambdas
|
|
||||||
if (WhichDataType(expected_argument_types[i]).isFunction())
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const auto & expected_argument_type = expected_argument_types[i];
|
const auto & expected_argument_type = expected_argument_types[i];
|
||||||
const auto & actual_argument_type = actual_argument_columns[i].type;
|
const auto & actual_argument_type = actual_argument_columns[i].type;
|
||||||
|
|
||||||
|
if (!expected_argument_type)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"Function {} expected argument {} type is not set after running {} pass",
|
||||||
|
function->toAST()->formatForErrorMessage(),
|
||||||
|
i + 1,
|
||||||
|
pass_name);
|
||||||
|
|
||||||
|
if (!actual_argument_type)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"Function {} actual argument {} type is not set after running {} pass",
|
||||||
|
function->toAST()->formatForErrorMessage(),
|
||||||
|
i + 1,
|
||||||
|
pass_name);
|
||||||
|
|
||||||
if (!expected_argument_type->equals(*actual_argument_type))
|
if (!expected_argument_type->equals(*actual_argument_type))
|
||||||
{
|
{
|
||||||
/// Aggregate functions remove low cardinality for their argument types
|
/// Aggregate functions remove low cardinality for their argument types
|
||||||
|
@ -36,7 +36,7 @@ Strings BackupCoordinationLocal::waitForStage(const String &, std::chrono::milli
|
|||||||
void BackupCoordinationLocal::addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<PartNameAndChecksum> & part_names_and_checksums)
|
void BackupCoordinationLocal::addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<PartNameAndChecksum> & part_names_and_checksums)
|
||||||
{
|
{
|
||||||
std::lock_guard lock{replicated_tables_mutex};
|
std::lock_guard lock{replicated_tables_mutex};
|
||||||
replicated_tables.addPartNames(table_shared_id, table_name_for_logs, replica_name, part_names_and_checksums);
|
replicated_tables.addPartNames({table_shared_id, table_name_for_logs, replica_name, part_names_and_checksums});
|
||||||
}
|
}
|
||||||
|
|
||||||
Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const
|
Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const
|
||||||
@ -49,7 +49,7 @@ Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_sha
|
|||||||
void BackupCoordinationLocal::addReplicatedMutations(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<MutationInfo> & mutations)
|
void BackupCoordinationLocal::addReplicatedMutations(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<MutationInfo> & mutations)
|
||||||
{
|
{
|
||||||
std::lock_guard lock{replicated_tables_mutex};
|
std::lock_guard lock{replicated_tables_mutex};
|
||||||
replicated_tables.addMutations(table_shared_id, table_name_for_logs, replica_name, mutations);
|
replicated_tables.addMutations({table_shared_id, table_name_for_logs, replica_name, mutations});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<IBackupCoordination::MutationInfo> BackupCoordinationLocal::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const
|
std::vector<IBackupCoordination::MutationInfo> BackupCoordinationLocal::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const
|
||||||
@ -62,7 +62,7 @@ std::vector<IBackupCoordination::MutationInfo> BackupCoordinationLocal::getRepli
|
|||||||
void BackupCoordinationLocal::addReplicatedDataPath(const String & table_shared_id, const String & data_path)
|
void BackupCoordinationLocal::addReplicatedDataPath(const String & table_shared_id, const String & data_path)
|
||||||
{
|
{
|
||||||
std::lock_guard lock{replicated_tables_mutex};
|
std::lock_guard lock{replicated_tables_mutex};
|
||||||
replicated_tables.addDataPath(table_shared_id, data_path);
|
replicated_tables.addDataPath({table_shared_id, data_path});
|
||||||
}
|
}
|
||||||
|
|
||||||
Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_shared_id) const
|
Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_shared_id) const
|
||||||
@ -75,7 +75,7 @@ Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_sha
|
|||||||
void BackupCoordinationLocal::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path)
|
void BackupCoordinationLocal::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path)
|
||||||
{
|
{
|
||||||
std::lock_guard lock{replicated_access_mutex};
|
std::lock_guard lock{replicated_access_mutex};
|
||||||
replicated_access.addFilePath(access_zk_path, access_entity_type, "", file_path);
|
replicated_access.addFilePath({access_zk_path, access_entity_type, "", file_path});
|
||||||
}
|
}
|
||||||
|
|
||||||
Strings BackupCoordinationLocal::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const
|
Strings BackupCoordinationLocal::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const
|
||||||
@ -88,7 +88,7 @@ Strings BackupCoordinationLocal::getReplicatedAccessFilePaths(const String & acc
|
|||||||
void BackupCoordinationLocal::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path)
|
void BackupCoordinationLocal::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path)
|
||||||
{
|
{
|
||||||
std::lock_guard lock{replicated_sql_objects_mutex};
|
std::lock_guard lock{replicated_sql_objects_mutex};
|
||||||
replicated_sql_objects.addDirectory(loader_zk_path, object_type, "", dir_path);
|
replicated_sql_objects.addDirectory({loader_zk_path, object_type, "", dir_path});
|
||||||
}
|
}
|
||||||
|
|
||||||
Strings BackupCoordinationLocal::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const
|
Strings BackupCoordinationLocal::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const
|
||||||
|
@ -1,13 +1,18 @@
|
|||||||
#include <Backups/BackupCoordinationRemote.h>
|
#include <Backups/BackupCoordinationRemote.h>
|
||||||
|
|
||||||
|
#include <base/hex.h>
|
||||||
|
|
||||||
#include <Access/Common/AccessEntityType.h>
|
#include <Access/Common/AccessEntityType.h>
|
||||||
|
#include <Backups/BackupCoordinationReplicatedAccess.h>
|
||||||
|
#include <Backups/BackupCoordinationStage.h>
|
||||||
|
#include <Common/escapeForFileName.h>
|
||||||
|
#include <Common/ZooKeeper/Common.h>
|
||||||
|
#include <Common/ZooKeeper/KeeperException.h>
|
||||||
#include <Functions/UserDefined/UserDefinedSQLObjectType.h>
|
#include <Functions/UserDefined/UserDefinedSQLObjectType.h>
|
||||||
#include <IO/ReadBufferFromString.h>
|
#include <IO/ReadBufferFromString.h>
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <IO/WriteBufferFromString.h>
|
#include <IO/WriteBufferFromString.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <Common/ZooKeeper/KeeperException.h>
|
|
||||||
#include <Common/escapeForFileName.h>
|
|
||||||
#include <Backups/BackupCoordinationStage.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -154,8 +159,7 @@ BackupCoordinationRemote::BackupCoordinationRemote(
|
|||||||
const String & current_host_,
|
const String & current_host_,
|
||||||
bool plain_backup_,
|
bool plain_backup_,
|
||||||
bool is_internal_)
|
bool is_internal_)
|
||||||
: get_zookeeper(get_zookeeper_)
|
: root_zookeeper_path(root_zookeeper_path_)
|
||||||
, root_zookeeper_path(root_zookeeper_path_)
|
|
||||||
, zookeeper_path(root_zookeeper_path_ + "/backup-" + backup_uuid_)
|
, zookeeper_path(root_zookeeper_path_ + "/backup-" + backup_uuid_)
|
||||||
, keeper_settings(keeper_settings_)
|
, keeper_settings(keeper_settings_)
|
||||||
, backup_uuid(backup_uuid_)
|
, backup_uuid(backup_uuid_)
|
||||||
@ -165,17 +169,32 @@ BackupCoordinationRemote::BackupCoordinationRemote(
|
|||||||
, plain_backup(plain_backup_)
|
, plain_backup(plain_backup_)
|
||||||
, is_internal(is_internal_)
|
, is_internal(is_internal_)
|
||||||
, log(&Poco::Logger::get("BackupCoordinationRemote"))
|
, log(&Poco::Logger::get("BackupCoordinationRemote"))
|
||||||
{
|
, with_retries(
|
||||||
zookeeper_retries_info = ZooKeeperRetriesInfo(
|
|
||||||
"BackupCoordinationRemote",
|
|
||||||
log,
|
log,
|
||||||
keeper_settings.keeper_max_retries,
|
get_zookeeper_,
|
||||||
keeper_settings.keeper_retry_initial_backoff_ms,
|
keeper_settings,
|
||||||
keeper_settings.keeper_retry_max_backoff_ms);
|
[zookeeper_path = zookeeper_path, current_host = current_host, is_internal = is_internal]
|
||||||
|
(WithRetries::FaultyKeeper & zk)
|
||||||
|
{
|
||||||
|
/// Recreate this ephemeral node to signal that we are alive.
|
||||||
|
if (is_internal)
|
||||||
|
{
|
||||||
|
String alive_node_path = zookeeper_path + "/stage/alive|" + current_host;
|
||||||
|
auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
|
||||||
|
|
||||||
|
if (code == Coordination::Error::ZNODEEXISTS)
|
||||||
|
zk->handleEphemeralNodeExistenceNoFailureInjection(alive_node_path, "");
|
||||||
|
else if (code != Coordination::Error::ZOK)
|
||||||
|
throw zkutil::KeeperException(code, alive_node_path);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
{
|
||||||
createRootNodes();
|
createRootNodes();
|
||||||
|
|
||||||
stage_sync.emplace(
|
stage_sync.emplace(
|
||||||
zookeeper_path + "/stage", [this] { return getZooKeeper(); }, log);
|
zookeeper_path,
|
||||||
|
with_retries,
|
||||||
|
log);
|
||||||
}
|
}
|
||||||
|
|
||||||
BackupCoordinationRemote::~BackupCoordinationRemote()
|
BackupCoordinationRemote::~BackupCoordinationRemote()
|
||||||
@ -191,44 +210,45 @@ BackupCoordinationRemote::~BackupCoordinationRemote()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
zkutil::ZooKeeperPtr BackupCoordinationRemote::getZooKeeper() const
|
|
||||||
{
|
|
||||||
std::lock_guard lock{zookeeper_mutex};
|
|
||||||
if (!zookeeper || zookeeper->expired())
|
|
||||||
{
|
|
||||||
zookeeper = get_zookeeper();
|
|
||||||
|
|
||||||
/// It's possible that we connected to different [Zoo]Keeper instance
|
|
||||||
/// so we may read a bit stale state.
|
|
||||||
zookeeper->sync(zookeeper_path);
|
|
||||||
}
|
|
||||||
return zookeeper;
|
|
||||||
}
|
|
||||||
|
|
||||||
void BackupCoordinationRemote::createRootNodes()
|
void BackupCoordinationRemote::createRootNodes()
|
||||||
{
|
{
|
||||||
auto zk = getZooKeeper();
|
auto holder = with_retries.createRetriesControlHolder("createRootNodes");
|
||||||
zk->createAncestors(zookeeper_path);
|
holder.retries_ctl.retryLoop(
|
||||||
zk->createIfNotExists(zookeeper_path, "");
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
zk->createIfNotExists(zookeeper_path + "/repl_part_names", "");
|
{
|
||||||
zk->createIfNotExists(zookeeper_path + "/repl_mutations", "");
|
with_retries.renewZooKeeper(zk);
|
||||||
zk->createIfNotExists(zookeeper_path + "/repl_data_paths", "");
|
|
||||||
zk->createIfNotExists(zookeeper_path + "/repl_access", "");
|
zk->createAncestors(zookeeper_path);
|
||||||
zk->createIfNotExists(zookeeper_path + "/repl_sql_objects", "");
|
|
||||||
zk->createIfNotExists(zookeeper_path + "/file_infos", "");
|
Coordination::Requests ops;
|
||||||
zk->createIfNotExists(zookeeper_path + "/writing_files", "");
|
Coordination::Responses responses;
|
||||||
|
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", zkutil::CreateMode::Persistent));
|
||||||
|
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_part_names", "", zkutil::CreateMode::Persistent));
|
||||||
|
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_mutations", "", zkutil::CreateMode::Persistent));
|
||||||
|
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_data_paths", "", zkutil::CreateMode::Persistent));
|
||||||
|
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_access", "", zkutil::CreateMode::Persistent));
|
||||||
|
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_sql_objects", "", zkutil::CreateMode::Persistent));
|
||||||
|
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/file_infos", "", zkutil::CreateMode::Persistent));
|
||||||
|
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/writing_files", "", zkutil::CreateMode::Persistent));
|
||||||
|
zk->tryMulti(ops, responses);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void BackupCoordinationRemote::removeAllNodes()
|
void BackupCoordinationRemote::removeAllNodes()
|
||||||
{
|
{
|
||||||
/// Usually this function is called by the initiator when a backup is complete so we don't need the coordination anymore.
|
auto holder = with_retries.createRetriesControlHolder("removeAllNodes");
|
||||||
///
|
holder.retries_ctl.retryLoop(
|
||||||
/// However there can be a rare situation when this function is called after an error occurs on the initiator of a query
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
/// while some hosts are still making the backup. Removing all the nodes will remove the parent node of the backup coordination
|
{
|
||||||
/// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some useless part
|
/// Usually this function is called by the initiator when a backup is complete so we don't need the coordination anymore.
|
||||||
/// of their backup work before that. Anyway in this case backup won't be finalized (because only an initiator can do that).
|
///
|
||||||
auto zk = getZooKeeper();
|
/// However there can be a rare situation when this function is called after an error occurs on the initiator of a query
|
||||||
zk->removeRecursive(zookeeper_path);
|
/// while some hosts are still making the backup. Removing all the nodes will remove the parent node of the backup coordination
|
||||||
|
/// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some useless part
|
||||||
|
/// of their backup work before that. Anyway in this case backup won't be finalized (because only an initiator can do that).
|
||||||
|
with_retries.renewZooKeeper(zk);
|
||||||
|
zk->removeRecursive(zookeeper_path);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -256,10 +276,11 @@ Strings BackupCoordinationRemote::waitForStage(const String & stage_to_wait, std
|
|||||||
void BackupCoordinationRemote::serializeToMultipleZooKeeperNodes(const String & path, const String & value, const String & logging_name)
|
void BackupCoordinationRemote::serializeToMultipleZooKeeperNodes(const String & path, const String & value, const String & logging_name)
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
ZooKeeperRetriesControl retries_ctl(logging_name + "::create", zookeeper_retries_info);
|
auto holder = with_retries.createRetriesControlHolder(logging_name + "::create");
|
||||||
retries_ctl.retryLoop([&]
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
{
|
{
|
||||||
auto zk = getZooKeeper();
|
with_retries.renewZooKeeper(zk);
|
||||||
zk->createIfNotExists(path, "");
|
zk->createIfNotExists(path, "");
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -280,10 +301,11 @@ void BackupCoordinationRemote::serializeToMultipleZooKeeperNodes(const String &
|
|||||||
String part = value.substr(begin, end - begin);
|
String part = value.substr(begin, end - begin);
|
||||||
String part_path = fmt::format("{}/{:06}", path, i);
|
String part_path = fmt::format("{}/{:06}", path, i);
|
||||||
|
|
||||||
ZooKeeperRetriesControl retries_ctl(logging_name + "::createPart", zookeeper_retries_info);
|
auto holder = with_retries.createRetriesControlHolder(logging_name + "::createPart");
|
||||||
retries_ctl.retryLoop([&]
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
{
|
{
|
||||||
auto zk = getZooKeeper();
|
with_retries.renewZooKeeper(zk);
|
||||||
zk->createIfNotExists(part_path, part);
|
zk->createIfNotExists(part_path, part);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -294,9 +316,11 @@ String BackupCoordinationRemote::deserializeFromMultipleZooKeeperNodes(const Str
|
|||||||
Strings part_names;
|
Strings part_names;
|
||||||
|
|
||||||
{
|
{
|
||||||
ZooKeeperRetriesControl retries_ctl(logging_name + "::getChildren", zookeeper_retries_info);
|
auto holder = with_retries.createRetriesControlHolder(logging_name + "::getChildren");
|
||||||
retries_ctl.retryLoop([&]{
|
holder.retries_ctl.retryLoop(
|
||||||
auto zk = getZooKeeper();
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
with_retries.renewZooKeeper(zk);
|
||||||
part_names = zk->getChildren(path);
|
part_names = zk->getChildren(path);
|
||||||
std::sort(part_names.begin(), part_names.end());
|
std::sort(part_names.begin(), part_names.end());
|
||||||
});
|
});
|
||||||
@ -307,10 +331,11 @@ String BackupCoordinationRemote::deserializeFromMultipleZooKeeperNodes(const Str
|
|||||||
{
|
{
|
||||||
String part;
|
String part;
|
||||||
String part_path = path + "/" + part_name;
|
String part_path = path + "/" + part_name;
|
||||||
ZooKeeperRetriesControl retries_ctl(logging_name + "::get", zookeeper_retries_info);
|
auto holder = with_retries.createRetriesControlHolder(logging_name + "::get");
|
||||||
retries_ctl.retryLoop([&]
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
{
|
{
|
||||||
auto zk = getZooKeeper();
|
with_retries.renewZooKeeper(zk);
|
||||||
part = zk->get(part_path);
|
part = zk->get(part_path);
|
||||||
});
|
});
|
||||||
res += part;
|
res += part;
|
||||||
@ -331,11 +356,16 @@ void BackupCoordinationRemote::addReplicatedPartNames(
|
|||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedPartNames() must not be called after preparing");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedPartNames() must not be called after preparing");
|
||||||
}
|
}
|
||||||
|
|
||||||
auto zk = getZooKeeper();
|
auto holder = with_retries.createRetriesControlHolder("addReplicatedPartNames");
|
||||||
String path = zookeeper_path + "/repl_part_names/" + escapeForFileName(table_shared_id);
|
holder.retries_ctl.retryLoop(
|
||||||
zk->createIfNotExists(path, "");
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
path += "/" + escapeForFileName(replica_name);
|
{
|
||||||
zk->create(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs), zkutil::CreateMode::Persistent);
|
with_retries.renewZooKeeper(zk);
|
||||||
|
String path = zookeeper_path + "/repl_part_names/" + escapeForFileName(table_shared_id);
|
||||||
|
zk->createIfNotExists(path, "");
|
||||||
|
path += "/" + escapeForFileName(replica_name);
|
||||||
|
zk->createIfNotExists(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs));
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Strings BackupCoordinationRemote::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const
|
Strings BackupCoordinationRemote::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const
|
||||||
@ -357,11 +387,16 @@ void BackupCoordinationRemote::addReplicatedMutations(
|
|||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedMutations() must not be called after preparing");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedMutations() must not be called after preparing");
|
||||||
}
|
}
|
||||||
|
|
||||||
auto zk = getZooKeeper();
|
auto holder = with_retries.createRetriesControlHolder("addReplicatedMutations");
|
||||||
String path = zookeeper_path + "/repl_mutations/" + escapeForFileName(table_shared_id);
|
holder.retries_ctl.retryLoop(
|
||||||
zk->createIfNotExists(path, "");
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
path += "/" + escapeForFileName(replica_name);
|
{
|
||||||
zk->create(path, ReplicatedMutations::serialize(mutations, table_name_for_logs), zkutil::CreateMode::Persistent);
|
with_retries.renewZooKeeper(zk);
|
||||||
|
String path = zookeeper_path + "/repl_mutations/" + escapeForFileName(table_shared_id);
|
||||||
|
zk->createIfNotExists(path, "");
|
||||||
|
path += "/" + escapeForFileName(replica_name);
|
||||||
|
zk->createIfNotExists(path, ReplicatedMutations::serialize(mutations, table_name_for_logs));
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<IBackupCoordination::MutationInfo> BackupCoordinationRemote::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const
|
std::vector<IBackupCoordination::MutationInfo> BackupCoordinationRemote::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const
|
||||||
@ -381,11 +416,16 @@ void BackupCoordinationRemote::addReplicatedDataPath(
|
|||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedDataPath() must not be called after preparing");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedDataPath() must not be called after preparing");
|
||||||
}
|
}
|
||||||
|
|
||||||
auto zk = getZooKeeper();
|
auto holder = with_retries.createRetriesControlHolder("addReplicatedDataPath");
|
||||||
String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_shared_id);
|
holder.retries_ctl.retryLoop(
|
||||||
zk->createIfNotExists(path, "");
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
path += "/" + escapeForFileName(data_path);
|
{
|
||||||
zk->createIfNotExists(path, "");
|
with_retries.renewZooKeeper(zk);
|
||||||
|
String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_shared_id);
|
||||||
|
zk->createIfNotExists(path, "");
|
||||||
|
path += "/" + escapeForFileName(data_path);
|
||||||
|
zk->createIfNotExists(path, "");
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Strings BackupCoordinationRemote::getReplicatedDataPaths(const String & table_shared_id) const
|
Strings BackupCoordinationRemote::getReplicatedDataPaths(const String & table_shared_id) const
|
||||||
@ -401,55 +441,88 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
|
|||||||
if (replicated_tables)
|
if (replicated_tables)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
std::vector<BackupCoordinationReplicatedTables::PartNamesForTableReplica> part_names_for_replicated_tables;
|
||||||
|
{
|
||||||
|
auto holder = with_retries.createRetriesControlHolder("prepareReplicatedTables::repl_part_names");
|
||||||
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
part_names_for_replicated_tables.clear();
|
||||||
|
with_retries.renewZooKeeper(zk);
|
||||||
|
|
||||||
|
String path = zookeeper_path + "/repl_part_names";
|
||||||
|
for (const String & escaped_table_shared_id : zk->getChildren(path))
|
||||||
|
{
|
||||||
|
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
|
||||||
|
String path2 = path + "/" + escaped_table_shared_id;
|
||||||
|
for (const String & escaped_replica_name : zk->getChildren(path2))
|
||||||
|
{
|
||||||
|
String replica_name = unescapeForFileName(escaped_replica_name);
|
||||||
|
auto part_names = ReplicatedPartNames::deserialize(zk->get(path2 + "/" + escaped_replica_name));
|
||||||
|
part_names_for_replicated_tables.push_back(
|
||||||
|
{table_shared_id, part_names.table_name_for_logs, replica_name, part_names.part_names_and_checksums});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<BackupCoordinationReplicatedTables::MutationsForTableReplica> mutations_for_replicated_tables;
|
||||||
|
{
|
||||||
|
auto holder = with_retries.createRetriesControlHolder("prepareReplicatedTables::repl_mutations");
|
||||||
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
mutations_for_replicated_tables.clear();
|
||||||
|
with_retries.renewZooKeeper(zk);
|
||||||
|
|
||||||
|
String path = zookeeper_path + "/repl_mutations";
|
||||||
|
for (const String & escaped_table_shared_id : zk->getChildren(path))
|
||||||
|
{
|
||||||
|
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
|
||||||
|
String path2 = path + "/" + escaped_table_shared_id;
|
||||||
|
for (const String & escaped_replica_name : zk->getChildren(path2))
|
||||||
|
{
|
||||||
|
String replica_name = unescapeForFileName(escaped_replica_name);
|
||||||
|
auto mutations = ReplicatedMutations::deserialize(zk->get(path2 + "/" + escaped_replica_name));
|
||||||
|
mutations_for_replicated_tables.push_back(
|
||||||
|
{table_shared_id, mutations.table_name_for_logs, replica_name, mutations.mutations});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<BackupCoordinationReplicatedTables::DataPathForTableReplica> data_paths_for_replicated_tables;
|
||||||
|
{
|
||||||
|
auto holder = with_retries.createRetriesControlHolder("prepareReplicatedTables::repl_data_paths");
|
||||||
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
data_paths_for_replicated_tables.clear();
|
||||||
|
with_retries.renewZooKeeper(zk);
|
||||||
|
|
||||||
|
String path = zookeeper_path + "/repl_data_paths";
|
||||||
|
for (const String & escaped_table_shared_id : zk->getChildren(path))
|
||||||
|
{
|
||||||
|
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
|
||||||
|
String path2 = path + "/" + escaped_table_shared_id;
|
||||||
|
for (const String & escaped_data_path : zk->getChildren(path2))
|
||||||
|
{
|
||||||
|
String data_path = unescapeForFileName(escaped_data_path);
|
||||||
|
data_paths_for_replicated_tables.push_back({table_shared_id, data_path});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
replicated_tables.emplace();
|
replicated_tables.emplace();
|
||||||
auto zk = getZooKeeper();
|
for (auto & part_names : part_names_for_replicated_tables)
|
||||||
|
replicated_tables->addPartNames(std::move(part_names));
|
||||||
{
|
for (auto & mutations : mutations_for_replicated_tables)
|
||||||
String path = zookeeper_path + "/repl_part_names";
|
replicated_tables->addMutations(std::move(mutations));
|
||||||
for (const String & escaped_table_shared_id : zk->getChildren(path))
|
for (auto & data_paths : data_paths_for_replicated_tables)
|
||||||
{
|
replicated_tables->addDataPath(std::move(data_paths));
|
||||||
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
|
|
||||||
String path2 = path + "/" + escaped_table_shared_id;
|
|
||||||
for (const String & escaped_replica_name : zk->getChildren(path2))
|
|
||||||
{
|
|
||||||
String replica_name = unescapeForFileName(escaped_replica_name);
|
|
||||||
auto part_names = ReplicatedPartNames::deserialize(zk->get(path2 + "/" + escaped_replica_name));
|
|
||||||
replicated_tables->addPartNames(table_shared_id, part_names.table_name_for_logs, replica_name, part_names.part_names_and_checksums);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
String path = zookeeper_path + "/repl_mutations";
|
|
||||||
for (const String & escaped_table_shared_id : zk->getChildren(path))
|
|
||||||
{
|
|
||||||
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
|
|
||||||
String path2 = path + "/" + escaped_table_shared_id;
|
|
||||||
for (const String & escaped_replica_name : zk->getChildren(path2))
|
|
||||||
{
|
|
||||||
String replica_name = unescapeForFileName(escaped_replica_name);
|
|
||||||
auto mutations = ReplicatedMutations::deserialize(zk->get(path2 + "/" + escaped_replica_name));
|
|
||||||
replicated_tables->addMutations(table_shared_id, mutations.table_name_for_logs, replica_name, mutations.mutations);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
String path = zookeeper_path + "/repl_data_paths";
|
|
||||||
for (const String & escaped_table_shared_id : zk->getChildren(path))
|
|
||||||
{
|
|
||||||
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
|
|
||||||
String path2 = path + "/" + escaped_table_shared_id;
|
|
||||||
for (const String & escaped_data_path : zk->getChildren(path2))
|
|
||||||
{
|
|
||||||
String data_path = unescapeForFileName(escaped_data_path);
|
|
||||||
replicated_tables->addDataPath(table_shared_id, data_path);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path)
|
void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path)
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
@ -458,13 +531,18 @@ void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access
|
|||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedAccessFilePath() must not be called after preparing");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedAccessFilePath() must not be called after preparing");
|
||||||
}
|
}
|
||||||
|
|
||||||
auto zk = getZooKeeper();
|
auto holder = with_retries.createRetriesControlHolder("addReplicatedAccessFilePath");
|
||||||
String path = zookeeper_path + "/repl_access/" + escapeForFileName(access_zk_path);
|
holder.retries_ctl.retryLoop(
|
||||||
zk->createIfNotExists(path, "");
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
path += "/" + AccessEntityTypeInfo::get(access_entity_type).name;
|
{
|
||||||
zk->createIfNotExists(path, "");
|
with_retries.renewZooKeeper(zk);
|
||||||
path += "/" + current_host;
|
String path = zookeeper_path + "/repl_access/" + escapeForFileName(access_zk_path);
|
||||||
zk->createIfNotExists(path, file_path);
|
zk->createIfNotExists(path, "");
|
||||||
|
path += "/" + AccessEntityTypeInfo::get(access_entity_type).name;
|
||||||
|
zk->createIfNotExists(path, "");
|
||||||
|
path += "/" + current_host;
|
||||||
|
zk->createIfNotExists(path, file_path);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Strings BackupCoordinationRemote::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const
|
Strings BackupCoordinationRemote::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const
|
||||||
@ -479,25 +557,35 @@ void BackupCoordinationRemote::prepareReplicatedAccess() const
|
|||||||
if (replicated_access)
|
if (replicated_access)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
replicated_access.emplace();
|
std::vector<BackupCoordinationReplicatedAccess::FilePathForAccessEntitry> file_path_for_access_entities;
|
||||||
auto zk = getZooKeeper();
|
auto holder = with_retries.createRetriesControlHolder("prepareReplicatedAccess");
|
||||||
|
holder.retries_ctl.retryLoop(
|
||||||
String path = zookeeper_path + "/repl_access";
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
for (const String & escaped_access_zk_path : zk->getChildren(path))
|
|
||||||
{
|
{
|
||||||
String access_zk_path = unescapeForFileName(escaped_access_zk_path);
|
file_path_for_access_entities.clear();
|
||||||
String path2 = path + "/" + escaped_access_zk_path;
|
with_retries.renewZooKeeper(zk);
|
||||||
for (const String & type_str : zk->getChildren(path2))
|
|
||||||
|
String path = zookeeper_path + "/repl_access";
|
||||||
|
for (const String & escaped_access_zk_path : zk->getChildren(path))
|
||||||
{
|
{
|
||||||
AccessEntityType type = AccessEntityTypeInfo::parseType(type_str);
|
String access_zk_path = unescapeForFileName(escaped_access_zk_path);
|
||||||
String path3 = path2 + "/" + type_str;
|
String path2 = path + "/" + escaped_access_zk_path;
|
||||||
for (const String & host_id : zk->getChildren(path3))
|
for (const String & type_str : zk->getChildren(path2))
|
||||||
{
|
{
|
||||||
String file_path = zk->get(path3 + "/" + host_id);
|
AccessEntityType type = AccessEntityTypeInfo::parseType(type_str);
|
||||||
replicated_access->addFilePath(access_zk_path, type, host_id, file_path);
|
String path3 = path2 + "/" + type_str;
|
||||||
|
for (const String & host_id : zk->getChildren(path3))
|
||||||
|
{
|
||||||
|
String file_path = zk->get(path3 + "/" + host_id);
|
||||||
|
file_path_for_access_entities.push_back({access_zk_path, type, host_id, file_path});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
|
|
||||||
|
replicated_access.emplace();
|
||||||
|
for (auto & file_path : file_path_for_access_entities)
|
||||||
|
replicated_access->addFilePath(std::move(file_path));
|
||||||
}
|
}
|
||||||
|
|
||||||
void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path)
|
void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path)
|
||||||
@ -508,21 +596,26 @@ void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_
|
|||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedSQLObjectsDir() must not be called after preparing");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedSQLObjectsDir() must not be called after preparing");
|
||||||
}
|
}
|
||||||
|
|
||||||
auto zk = getZooKeeper();
|
auto holder = with_retries.createRetriesControlHolder("addReplicatedSQLObjectsDir");
|
||||||
String path = zookeeper_path + "/repl_sql_objects/" + escapeForFileName(loader_zk_path);
|
holder.retries_ctl.retryLoop(
|
||||||
zk->createIfNotExists(path, "");
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
|
|
||||||
path += "/";
|
|
||||||
switch (object_type)
|
|
||||||
{
|
{
|
||||||
case UserDefinedSQLObjectType::Function:
|
with_retries.renewZooKeeper(zk);
|
||||||
path += "functions";
|
String path = zookeeper_path + "/repl_sql_objects/" + escapeForFileName(loader_zk_path);
|
||||||
break;
|
zk->createIfNotExists(path, "");
|
||||||
}
|
|
||||||
|
|
||||||
zk->createIfNotExists(path, "");
|
path += "/";
|
||||||
path += "/" + current_host;
|
switch (object_type)
|
||||||
zk->createIfNotExists(path, dir_path);
|
{
|
||||||
|
case UserDefinedSQLObjectType::Function:
|
||||||
|
path += "functions";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
zk->createIfNotExists(path, "");
|
||||||
|
path += "/" + current_host;
|
||||||
|
zk->createIfNotExists(path, dir_path);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Strings BackupCoordinationRemote::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const
|
Strings BackupCoordinationRemote::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const
|
||||||
@ -537,27 +630,36 @@ void BackupCoordinationRemote::prepareReplicatedSQLObjects() const
|
|||||||
if (replicated_sql_objects)
|
if (replicated_sql_objects)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
replicated_sql_objects.emplace();
|
std::vector<BackupCoordinationReplicatedSQLObjects::DirectoryPathForSQLObject> directories_for_sql_objects;
|
||||||
auto zk = getZooKeeper();
|
auto holder = with_retries.createRetriesControlHolder("prepareReplicatedSQLObjects");
|
||||||
|
holder.retries_ctl.retryLoop(
|
||||||
String path = zookeeper_path + "/repl_sql_objects";
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
for (const String & escaped_loader_zk_path : zk->getChildren(path))
|
|
||||||
{
|
{
|
||||||
String loader_zk_path = unescapeForFileName(escaped_loader_zk_path);
|
directories_for_sql_objects.clear();
|
||||||
String objects_path = path + "/" + escaped_loader_zk_path;
|
with_retries.renewZooKeeper(zk);
|
||||||
|
|
||||||
if (String functions_path = objects_path + "/functions"; zk->exists(functions_path))
|
String path = zookeeper_path + "/repl_sql_objects";
|
||||||
|
for (const String & escaped_loader_zk_path : zk->getChildren(path))
|
||||||
{
|
{
|
||||||
UserDefinedSQLObjectType object_type = UserDefinedSQLObjectType::Function;
|
String loader_zk_path = unescapeForFileName(escaped_loader_zk_path);
|
||||||
for (const String & host_id : zk->getChildren(functions_path))
|
String objects_path = path + "/" + escaped_loader_zk_path;
|
||||||
|
|
||||||
|
if (String functions_path = objects_path + "/functions"; zk->exists(functions_path))
|
||||||
{
|
{
|
||||||
String dir = zk->get(functions_path + "/" + host_id);
|
UserDefinedSQLObjectType object_type = UserDefinedSQLObjectType::Function;
|
||||||
replicated_sql_objects->addDirectory(loader_zk_path, object_type, host_id, dir);
|
for (const String & host_id : zk->getChildren(functions_path))
|
||||||
|
{
|
||||||
|
String dir = zk->get(functions_path + "/" + host_id);
|
||||||
|
directories_for_sql_objects.push_back({loader_zk_path, object_type, host_id, dir});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
}
|
|
||||||
|
|
||||||
|
replicated_sql_objects.emplace();
|
||||||
|
for (auto & directory : directories_for_sql_objects)
|
||||||
|
replicated_sql_objects->addDirectory(std::move(directory));
|
||||||
|
}
|
||||||
|
|
||||||
void BackupCoordinationRemote::addFileInfos(BackupFileInfos && file_infos_)
|
void BackupCoordinationRemote::addFileInfos(BackupFileInfos && file_infos_)
|
||||||
{
|
{
|
||||||
@ -595,9 +697,11 @@ void BackupCoordinationRemote::prepareFileInfos() const
|
|||||||
|
|
||||||
Strings hosts_with_file_infos;
|
Strings hosts_with_file_infos;
|
||||||
{
|
{
|
||||||
ZooKeeperRetriesControl retries_ctl("prepareFileInfos::get_hosts", zookeeper_retries_info);
|
auto holder = with_retries.createRetriesControlHolder("prepareFileInfos::get_hosts");
|
||||||
retries_ctl.retryLoop([&]{
|
holder.retries_ctl.retryLoop(
|
||||||
auto zk = getZooKeeper();
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
with_retries.renewZooKeeper(zk);
|
||||||
hosts_with_file_infos = zk->getChildren(zookeeper_path + "/file_infos");
|
hosts_with_file_infos = zk->getChildren(zookeeper_path + "/file_infos");
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -616,10 +720,11 @@ bool BackupCoordinationRemote::startWritingFile(size_t data_file_index)
|
|||||||
String full_path = zookeeper_path + "/writing_files/" + std::to_string(data_file_index);
|
String full_path = zookeeper_path + "/writing_files/" + std::to_string(data_file_index);
|
||||||
String host_index_str = std::to_string(current_host_index);
|
String host_index_str = std::to_string(current_host_index);
|
||||||
|
|
||||||
ZooKeeperRetriesControl retries_ctl("startWritingFile", zookeeper_retries_info);
|
auto holder = with_retries.createRetriesControlHolder("startWritingFile");
|
||||||
retries_ctl.retryLoop([&]
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
{
|
{
|
||||||
auto zk = getZooKeeper();
|
with_retries.renewZooKeeper(zk);
|
||||||
auto code = zk->tryCreate(full_path, host_index_str, zkutil::CreateMode::Persistent);
|
auto code = zk->tryCreate(full_path, host_index_str, zkutil::CreateMode::Persistent);
|
||||||
|
|
||||||
if (code == Coordination::Error::ZOK)
|
if (code == Coordination::Error::ZOK)
|
||||||
@ -633,54 +738,63 @@ bool BackupCoordinationRemote::startWritingFile(size_t data_file_index)
|
|||||||
return acquired_writing;
|
return acquired_writing;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &) const
|
bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &) const
|
||||||
{
|
{
|
||||||
/// If its internal concurrency will be checked for the base backup
|
/// If its internal concurrency will be checked for the base backup
|
||||||
if (is_internal)
|
if (is_internal)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
auto zk = getZooKeeper();
|
|
||||||
std::string backup_stage_path = zookeeper_path + "/stage";
|
std::string backup_stage_path = zookeeper_path + "/stage";
|
||||||
|
|
||||||
if (!zk->exists(root_zookeeper_path))
|
bool result = false;
|
||||||
zk->createAncestors(root_zookeeper_path);
|
|
||||||
|
|
||||||
for (size_t attempt = 0; attempt < MAX_ZOOKEEPER_ATTEMPTS; ++attempt)
|
auto holder = with_retries.createRetriesControlHolder("getAllArchiveSuffixes");
|
||||||
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
{
|
{
|
||||||
Coordination::Stat stat;
|
with_retries.renewZooKeeper(zk);
|
||||||
zk->get(root_zookeeper_path, &stat);
|
|
||||||
Strings existing_backup_paths = zk->getChildren(root_zookeeper_path);
|
|
||||||
|
|
||||||
for (const auto & existing_backup_path : existing_backup_paths)
|
if (!zk->exists(root_zookeeper_path))
|
||||||
|
zk->createAncestors(root_zookeeper_path);
|
||||||
|
|
||||||
|
for (size_t attempt = 0; attempt < MAX_ZOOKEEPER_ATTEMPTS; ++attempt)
|
||||||
{
|
{
|
||||||
if (startsWith(existing_backup_path, "restore-"))
|
Coordination::Stat stat;
|
||||||
continue;
|
zk->get(root_zookeeper_path, &stat);
|
||||||
|
Strings existing_backup_paths = zk->getChildren(root_zookeeper_path);
|
||||||
|
|
||||||
String existing_backup_uuid = existing_backup_path;
|
for (const auto & existing_backup_path : existing_backup_paths)
|
||||||
existing_backup_uuid.erase(0, String("backup-").size());
|
|
||||||
|
|
||||||
if (existing_backup_uuid == toString(backup_uuid))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const auto status = zk->get(root_zookeeper_path + "/" + existing_backup_path + "/stage");
|
|
||||||
if (status != Stage::COMPLETED)
|
|
||||||
{
|
{
|
||||||
LOG_WARNING(log, "Found a concurrent backup: {}, current backup: {}", existing_backup_uuid, toString(backup_uuid));
|
if (startsWith(existing_backup_path, "restore-"))
|
||||||
return true;
|
continue;
|
||||||
|
|
||||||
|
String existing_backup_uuid = existing_backup_path;
|
||||||
|
existing_backup_uuid.erase(0, String("backup-").size());
|
||||||
|
|
||||||
|
|
||||||
|
if (existing_backup_uuid == toString(backup_uuid))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const auto status = zk->get(root_zookeeper_path + "/" + existing_backup_path + "/stage");
|
||||||
|
if (status != Stage::COMPLETED)
|
||||||
|
{
|
||||||
|
LOG_WARNING(log, "Found a concurrent backup: {}, current backup: {}", existing_backup_uuid, toString(backup_uuid));
|
||||||
|
result = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
zk->createIfNotExists(backup_stage_path, "");
|
||||||
|
auto code = zk->trySet(backup_stage_path, Stage::SCHEDULED_TO_START, stat.version);
|
||||||
|
if (code == Coordination::Error::ZOK)
|
||||||
|
break;
|
||||||
|
bool is_last_attempt = (attempt == MAX_ZOOKEEPER_ATTEMPTS - 1);
|
||||||
|
if ((code != Coordination::Error::ZBADVERSION) || is_last_attempt)
|
||||||
|
throw zkutil::KeeperException(code, backup_stage_path);
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
|
||||||
zk->createIfNotExists(backup_stage_path, "");
|
return result;
|
||||||
auto code = zk->trySet(backup_stage_path, Stage::SCHEDULED_TO_START, stat.version);
|
|
||||||
if (code == Coordination::Error::ZOK)
|
|
||||||
break;
|
|
||||||
bool is_last_attempt = (attempt == MAX_ZOOKEEPER_ATTEMPTS - 1);
|
|
||||||
if ((code != Coordination::Error::ZBADVERSION) || is_last_attempt)
|
|
||||||
throw zkutil::KeeperException(code, backup_stage_path);
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
#include <Backups/BackupCoordinationReplicatedSQLObjects.h>
|
#include <Backups/BackupCoordinationReplicatedSQLObjects.h>
|
||||||
#include <Backups/BackupCoordinationReplicatedTables.h>
|
#include <Backups/BackupCoordinationReplicatedTables.h>
|
||||||
#include <Backups/BackupCoordinationStageSync.h>
|
#include <Backups/BackupCoordinationStageSync.h>
|
||||||
#include <Storages/MergeTree/ZooKeeperRetries.h>
|
#include <Backups/WithRetries.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -19,13 +19,7 @@ constexpr size_t MAX_ZOOKEEPER_ATTEMPTS = 10;
|
|||||||
class BackupCoordinationRemote : public IBackupCoordination
|
class BackupCoordinationRemote : public IBackupCoordination
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
struct BackupKeeperSettings
|
using BackupKeeperSettings = WithRetries::KeeperSettings;
|
||||||
{
|
|
||||||
UInt64 keeper_max_retries;
|
|
||||||
UInt64 keeper_retry_initial_backoff_ms;
|
|
||||||
UInt64 keeper_retry_max_backoff_ms;
|
|
||||||
UInt64 keeper_value_max_size;
|
|
||||||
};
|
|
||||||
|
|
||||||
BackupCoordinationRemote(
|
BackupCoordinationRemote(
|
||||||
zkutil::GetZooKeeper get_zookeeper_,
|
zkutil::GetZooKeeper get_zookeeper_,
|
||||||
@ -79,7 +73,6 @@ public:
|
|||||||
static size_t findCurrentHostIndex(const Strings & all_hosts, const String & current_host);
|
static size_t findCurrentHostIndex(const Strings & all_hosts, const String & current_host);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
zkutil::ZooKeeperPtr getZooKeeper() const;
|
|
||||||
void createRootNodes();
|
void createRootNodes();
|
||||||
void removeAllNodes();
|
void removeAllNodes();
|
||||||
|
|
||||||
@ -94,7 +87,6 @@ private:
|
|||||||
void prepareReplicatedSQLObjects() const TSA_REQUIRES(replicated_sql_objects_mutex);
|
void prepareReplicatedSQLObjects() const TSA_REQUIRES(replicated_sql_objects_mutex);
|
||||||
void prepareFileInfos() const TSA_REQUIRES(file_infos_mutex);
|
void prepareFileInfos() const TSA_REQUIRES(file_infos_mutex);
|
||||||
|
|
||||||
const zkutil::GetZooKeeper get_zookeeper;
|
|
||||||
const String root_zookeeper_path;
|
const String root_zookeeper_path;
|
||||||
const String zookeeper_path;
|
const String zookeeper_path;
|
||||||
const BackupKeeperSettings keeper_settings;
|
const BackupKeeperSettings keeper_settings;
|
||||||
@ -106,10 +98,10 @@ private:
|
|||||||
const bool is_internal;
|
const bool is_internal;
|
||||||
Poco::Logger * const log;
|
Poco::Logger * const log;
|
||||||
|
|
||||||
mutable ZooKeeperRetriesInfo zookeeper_retries_info;
|
/// The order of these two fields matters, because stage_sync holds a reference to with_retries object
|
||||||
|
mutable WithRetries with_retries;
|
||||||
std::optional<BackupCoordinationStageSync> stage_sync;
|
std::optional<BackupCoordinationStageSync> stage_sync;
|
||||||
|
|
||||||
mutable zkutil::ZooKeeperPtr TSA_GUARDED_BY(zookeeper_mutex) zookeeper;
|
|
||||||
mutable std::optional<BackupCoordinationReplicatedTables> TSA_GUARDED_BY(replicated_tables_mutex) replicated_tables;
|
mutable std::optional<BackupCoordinationReplicatedTables> TSA_GUARDED_BY(replicated_tables_mutex) replicated_tables;
|
||||||
mutable std::optional<BackupCoordinationReplicatedAccess> TSA_GUARDED_BY(replicated_access_mutex) replicated_access;
|
mutable std::optional<BackupCoordinationReplicatedAccess> TSA_GUARDED_BY(replicated_access_mutex) replicated_access;
|
||||||
mutable std::optional<BackupCoordinationReplicatedSQLObjects> TSA_GUARDED_BY(replicated_sql_objects_mutex) replicated_sql_objects;
|
mutable std::optional<BackupCoordinationReplicatedSQLObjects> TSA_GUARDED_BY(replicated_sql_objects_mutex) replicated_sql_objects;
|
||||||
|
@ -7,8 +7,13 @@ namespace DB
|
|||||||
BackupCoordinationReplicatedAccess::BackupCoordinationReplicatedAccess() = default;
|
BackupCoordinationReplicatedAccess::BackupCoordinationReplicatedAccess() = default;
|
||||||
BackupCoordinationReplicatedAccess::~BackupCoordinationReplicatedAccess() = default;
|
BackupCoordinationReplicatedAccess::~BackupCoordinationReplicatedAccess() = default;
|
||||||
|
|
||||||
void BackupCoordinationReplicatedAccess::addFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path)
|
void BackupCoordinationReplicatedAccess::addFilePath(FilePathForAccessEntitry && file_path_for_access_entity)
|
||||||
{
|
{
|
||||||
|
const auto & access_zk_path = file_path_for_access_entity.access_zk_path;
|
||||||
|
const auto & access_entity_type = file_path_for_access_entity.access_entity_type;
|
||||||
|
const auto & host_id = file_path_for_access_entity.host_id;
|
||||||
|
const auto & file_path = file_path_for_access_entity.file_path;
|
||||||
|
|
||||||
auto & ref = file_paths_by_zk_path[std::make_pair(access_zk_path, access_entity_type)];
|
auto & ref = file_paths_by_zk_path[std::make_pair(access_zk_path, access_entity_type)];
|
||||||
ref.file_paths.emplace(file_path);
|
ref.file_paths.emplace(file_path);
|
||||||
|
|
||||||
|
@ -28,8 +28,16 @@ public:
|
|||||||
BackupCoordinationReplicatedAccess();
|
BackupCoordinationReplicatedAccess();
|
||||||
~BackupCoordinationReplicatedAccess();
|
~BackupCoordinationReplicatedAccess();
|
||||||
|
|
||||||
|
struct FilePathForAccessEntitry
|
||||||
|
{
|
||||||
|
String access_zk_path;
|
||||||
|
AccessEntityType access_entity_type;
|
||||||
|
String host_id;
|
||||||
|
String file_path;
|
||||||
|
};
|
||||||
|
|
||||||
/// Adds a path to access*.txt file keeping access entities of a ReplicatedAccessStorage.
|
/// Adds a path to access*.txt file keeping access entities of a ReplicatedAccessStorage.
|
||||||
void addFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path);
|
void addFilePath(FilePathForAccessEntitry && file_path_for_access_entity);
|
||||||
|
|
||||||
/// Returns all paths added by addFilePath() if `host_id` is a host chosen to store access.
|
/// Returns all paths added by addFilePath() if `host_id` is a host chosen to store access.
|
||||||
Strings getFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const;
|
Strings getFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const;
|
||||||
|
@ -7,8 +7,13 @@ namespace DB
|
|||||||
BackupCoordinationReplicatedSQLObjects::BackupCoordinationReplicatedSQLObjects() = default;
|
BackupCoordinationReplicatedSQLObjects::BackupCoordinationReplicatedSQLObjects() = default;
|
||||||
BackupCoordinationReplicatedSQLObjects::~BackupCoordinationReplicatedSQLObjects() = default;
|
BackupCoordinationReplicatedSQLObjects::~BackupCoordinationReplicatedSQLObjects() = default;
|
||||||
|
|
||||||
void BackupCoordinationReplicatedSQLObjects::addDirectory(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path)
|
void BackupCoordinationReplicatedSQLObjects::addDirectory(DirectoryPathForSQLObject && directory_path_for_sql_object)
|
||||||
{
|
{
|
||||||
|
const auto & loader_zk_path = directory_path_for_sql_object.loader_zk_path;
|
||||||
|
const auto & object_type = directory_path_for_sql_object.object_type;
|
||||||
|
const auto & host_id = directory_path_for_sql_object.host_id;
|
||||||
|
const auto & dir_path = directory_path_for_sql_object.dir_path;
|
||||||
|
|
||||||
auto & ref = dir_paths_by_zk_path[std::make_pair(loader_zk_path, object_type)];
|
auto & ref = dir_paths_by_zk_path[std::make_pair(loader_zk_path, object_type)];
|
||||||
ref.dir_paths.emplace(dir_path);
|
ref.dir_paths.emplace(dir_path);
|
||||||
|
|
||||||
|
@ -28,8 +28,16 @@ public:
|
|||||||
BackupCoordinationReplicatedSQLObjects();
|
BackupCoordinationReplicatedSQLObjects();
|
||||||
~BackupCoordinationReplicatedSQLObjects();
|
~BackupCoordinationReplicatedSQLObjects();
|
||||||
|
|
||||||
|
struct DirectoryPathForSQLObject
|
||||||
|
{
|
||||||
|
String loader_zk_path;
|
||||||
|
UserDefinedSQLObjectType object_type;
|
||||||
|
String host_id;
|
||||||
|
String dir_path;
|
||||||
|
};
|
||||||
|
|
||||||
/// Adds a path to directory keeping user defined SQL objects.
|
/// Adds a path to directory keeping user defined SQL objects.
|
||||||
void addDirectory(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path);
|
void addDirectory(DirectoryPathForSQLObject && directory_path_for_sql_object);
|
||||||
|
|
||||||
/// Returns all added paths to directories if `host_id` is a host chosen to store user-defined SQL objects.
|
/// Returns all added paths to directories if `host_id` is a host chosen to store user-defined SQL objects.
|
||||||
Strings getDirectories(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const;
|
Strings getDirectories(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const;
|
||||||
|
@ -149,12 +149,13 @@ private:
|
|||||||
BackupCoordinationReplicatedTables::BackupCoordinationReplicatedTables() = default;
|
BackupCoordinationReplicatedTables::BackupCoordinationReplicatedTables() = default;
|
||||||
BackupCoordinationReplicatedTables::~BackupCoordinationReplicatedTables() = default;
|
BackupCoordinationReplicatedTables::~BackupCoordinationReplicatedTables() = default;
|
||||||
|
|
||||||
void BackupCoordinationReplicatedTables::addPartNames(
|
void BackupCoordinationReplicatedTables::addPartNames(PartNamesForTableReplica && part_names)
|
||||||
const String & table_shared_id,
|
|
||||||
const String & table_name_for_logs,
|
|
||||||
const String & replica_name,
|
|
||||||
const std::vector<PartNameAndChecksum> & part_names_and_checksums)
|
|
||||||
{
|
{
|
||||||
|
const auto & table_shared_id = part_names.table_shared_id;
|
||||||
|
const auto & table_name_for_logs = part_names.table_name_for_logs;
|
||||||
|
const auto & replica_name = part_names.replica_name;
|
||||||
|
const auto & part_names_and_checksums = part_names.part_names_and_checksums;
|
||||||
|
|
||||||
if (prepared)
|
if (prepared)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addPartNames() must not be called after preparing");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "addPartNames() must not be called after preparing");
|
||||||
|
|
||||||
@ -216,12 +217,13 @@ Strings BackupCoordinationReplicatedTables::getPartNames(const String & table_sh
|
|||||||
return it2->second;
|
return it2->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BackupCoordinationReplicatedTables::addMutations(
|
void BackupCoordinationReplicatedTables::addMutations(MutationsForTableReplica && mutations_for_table_replica)
|
||||||
const String & table_shared_id,
|
|
||||||
const String & table_name_for_logs,
|
|
||||||
const String & replica_name,
|
|
||||||
const std::vector<MutationInfo> & mutations)
|
|
||||||
{
|
{
|
||||||
|
const auto & table_shared_id = mutations_for_table_replica.table_shared_id;
|
||||||
|
const auto & table_name_for_logs = mutations_for_table_replica.table_name_for_logs;
|
||||||
|
const auto & replica_name = mutations_for_table_replica.replica_name;
|
||||||
|
const auto & mutations = mutations_for_table_replica.mutations;
|
||||||
|
|
||||||
if (prepared)
|
if (prepared)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addMutations() must not be called after preparing");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "addMutations() must not be called after preparing");
|
||||||
|
|
||||||
@ -254,8 +256,11 @@ BackupCoordinationReplicatedTables::getMutations(const String & table_shared_id,
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BackupCoordinationReplicatedTables::addDataPath(const String & table_shared_id, const String & data_path)
|
void BackupCoordinationReplicatedTables::addDataPath(DataPathForTableReplica && data_path_for_table_replica)
|
||||||
{
|
{
|
||||||
|
const auto & table_shared_id = data_path_for_table_replica.table_shared_id;
|
||||||
|
const auto & data_path = data_path_for_table_replica.data_path;
|
||||||
|
|
||||||
auto & table_info = table_infos[table_shared_id];
|
auto & table_info = table_infos[table_shared_id];
|
||||||
table_info.data_paths.emplace(data_path);
|
table_info.data_paths.emplace(data_path);
|
||||||
}
|
}
|
||||||
|
@ -38,15 +38,19 @@ public:
|
|||||||
|
|
||||||
using PartNameAndChecksum = IBackupCoordination::PartNameAndChecksum;
|
using PartNameAndChecksum = IBackupCoordination::PartNameAndChecksum;
|
||||||
|
|
||||||
|
struct PartNamesForTableReplica
|
||||||
|
{
|
||||||
|
String table_shared_id;
|
||||||
|
String table_name_for_logs;
|
||||||
|
String replica_name;
|
||||||
|
std::vector<PartNameAndChecksum> part_names_and_checksums;
|
||||||
|
};
|
||||||
|
|
||||||
/// Adds part names which a specified replica of a replicated table is going to put to the backup.
|
/// Adds part names which a specified replica of a replicated table is going to put to the backup.
|
||||||
/// Multiple replicas of the replicated table call this function and then the added part names can be returned by call of the function
|
/// Multiple replicas of the replicated table call this function and then the added part names can be returned by call of the function
|
||||||
/// getPartNames().
|
/// getPartNames().
|
||||||
/// Checksums are used only to control that parts under the same names on different replicas are the same.
|
/// Checksums are used only to control that parts under the same names on different replicas are the same.
|
||||||
void addPartNames(
|
void addPartNames(PartNamesForTableReplica && part_names);
|
||||||
const String & table_shared_id,
|
|
||||||
const String & table_name_for_logs,
|
|
||||||
const String & replica_name,
|
|
||||||
const std::vector<PartNameAndChecksum> & part_names_and_checksums);
|
|
||||||
|
|
||||||
/// Returns the names of the parts which a specified replica of a replicated table should put to the backup.
|
/// Returns the names of the parts which a specified replica of a replicated table should put to the backup.
|
||||||
/// This is the same list as it was added by call of the function addPartNames() but without duplications and without
|
/// This is the same list as it was added by call of the function addPartNames() but without duplications and without
|
||||||
@ -55,20 +59,30 @@ public:
|
|||||||
|
|
||||||
using MutationInfo = IBackupCoordination::MutationInfo;
|
using MutationInfo = IBackupCoordination::MutationInfo;
|
||||||
|
|
||||||
|
struct MutationsForTableReplica
|
||||||
|
{
|
||||||
|
String table_shared_id;
|
||||||
|
String table_name_for_logs;
|
||||||
|
String replica_name;
|
||||||
|
std::vector<MutationInfo> mutations;
|
||||||
|
};
|
||||||
|
|
||||||
/// Adds information about mutations of a replicated table.
|
/// Adds information about mutations of a replicated table.
|
||||||
void addMutations(
|
void addMutations(MutationsForTableReplica && mutations_for_table_replica);
|
||||||
const String & table_shared_id,
|
|
||||||
const String & table_name_for_logs,
|
|
||||||
const String & replica_name,
|
|
||||||
const std::vector<MutationInfo> & mutations);
|
|
||||||
|
|
||||||
/// Returns all mutations of a replicated table which are not finished for some data parts added by addReplicatedPartNames().
|
/// Returns all mutations of a replicated table which are not finished for some data parts added by addReplicatedPartNames().
|
||||||
std::vector<MutationInfo> getMutations(const String & table_shared_id, const String & replica_name) const;
|
std::vector<MutationInfo> getMutations(const String & table_shared_id, const String & replica_name) const;
|
||||||
|
|
||||||
|
struct DataPathForTableReplica
|
||||||
|
{
|
||||||
|
String table_shared_id;
|
||||||
|
String data_path;
|
||||||
|
};
|
||||||
|
|
||||||
/// Adds a data path in backup for a replicated table.
|
/// Adds a data path in backup for a replicated table.
|
||||||
/// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function
|
/// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function
|
||||||
/// getDataPaths().
|
/// getDataPaths().
|
||||||
void addDataPath(const String & table_shared_id, const String & data_path);
|
void addDataPath(DataPathForTableReplica && data_path_for_table_replica);
|
||||||
|
|
||||||
/// Returns all the data paths in backup added for a replicated table (see also addReplicatedDataPath()).
|
/// Returns all the data paths in backup added for a replicated table (see also addReplicatedDataPath()).
|
||||||
Strings getDataPaths(const String & table_shared_id) const;
|
Strings getDataPaths(const String & table_shared_id) const;
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
#include <Backups/BackupCoordinationStageSync.h>
|
#include <Backups/BackupCoordinationStageSync.h>
|
||||||
|
|
||||||
|
#include <base/chrono_io.h>
|
||||||
|
#include <Common/ZooKeeper/Common.h>
|
||||||
#include <Common/Exception.h>
|
#include <Common/Exception.h>
|
||||||
#include <Common/ZooKeeper/KeeperException.h>
|
#include <Common/ZooKeeper/KeeperException.h>
|
||||||
#include <IO/ReadBufferFromString.h>
|
#include <IO/ReadBufferFromString.h>
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <IO/WriteBufferFromString.h>
|
#include <IO/WriteBufferFromString.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <base/chrono_io.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -17,9 +19,12 @@ namespace ErrorCodes
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
BackupCoordinationStageSync::BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_)
|
BackupCoordinationStageSync::BackupCoordinationStageSync(
|
||||||
: zookeeper_path(zookeeper_path_)
|
const String & root_zookeeper_path_,
|
||||||
, get_zookeeper(get_zookeeper_)
|
WithRetries & with_retries_,
|
||||||
|
Poco::Logger * log_)
|
||||||
|
: zookeeper_path(root_zookeeper_path_ + "/stage")
|
||||||
|
, with_retries(with_retries_)
|
||||||
, log(log_)
|
, log(log_)
|
||||||
{
|
{
|
||||||
createRootNodes();
|
createRootNodes();
|
||||||
@ -27,32 +32,48 @@ BackupCoordinationStageSync::BackupCoordinationStageSync(const String & zookeepe
|
|||||||
|
|
||||||
void BackupCoordinationStageSync::createRootNodes()
|
void BackupCoordinationStageSync::createRootNodes()
|
||||||
{
|
{
|
||||||
auto zookeeper = get_zookeeper();
|
auto holder = with_retries.createRetriesControlHolder("createRootNodes");
|
||||||
zookeeper->createAncestors(zookeeper_path);
|
holder.retries_ctl.retryLoop(
|
||||||
zookeeper->createIfNotExists(zookeeper_path, "");
|
[&, &zookeeper = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
with_retries.renewZooKeeper(zookeeper);
|
||||||
|
zookeeper->createAncestors(zookeeper_path);
|
||||||
|
zookeeper->createIfNotExists(zookeeper_path, "");
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void BackupCoordinationStageSync::set(const String & current_host, const String & new_stage, const String & message)
|
void BackupCoordinationStageSync::set(const String & current_host, const String & new_stage, const String & message)
|
||||||
{
|
{
|
||||||
auto zookeeper = get_zookeeper();
|
auto holder = with_retries.createRetriesControlHolder("set");
|
||||||
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zookeeper = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
with_retries.renewZooKeeper(zookeeper);
|
||||||
|
|
||||||
/// Make an ephemeral node so the initiator can track if the current host is still working.
|
/// Make an ephemeral node so the initiator can track if the current host is still working.
|
||||||
String alive_node_path = zookeeper_path + "/alive|" + current_host;
|
String alive_node_path = zookeeper_path + "/alive|" + current_host;
|
||||||
auto code = zookeeper->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
|
auto code = zookeeper->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
|
||||||
if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNODEEXISTS)
|
if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNODEEXISTS)
|
||||||
throw zkutil::KeeperException(code, alive_node_path);
|
throw zkutil::KeeperException(code, alive_node_path);
|
||||||
|
|
||||||
zookeeper->createIfNotExists(zookeeper_path + "/started|" + current_host, "");
|
zookeeper->createIfNotExists(zookeeper_path + "/started|" + current_host, "");
|
||||||
zookeeper->create(zookeeper_path + "/current|" + current_host + "|" + new_stage, message, zkutil::CreateMode::Persistent);
|
zookeeper->createIfNotExists(zookeeper_path + "/current|" + current_host + "|" + new_stage, message);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void BackupCoordinationStageSync::setError(const String & current_host, const Exception & exception)
|
void BackupCoordinationStageSync::setError(const String & current_host, const Exception & exception)
|
||||||
{
|
{
|
||||||
auto zookeeper = get_zookeeper();
|
auto holder = with_retries.createRetriesControlHolder("setError");
|
||||||
WriteBufferFromOwnString buf;
|
holder.retries_ctl.retryLoop(
|
||||||
writeStringBinary(current_host, buf);
|
[&, &zookeeper = holder.faulty_zookeeper]()
|
||||||
writeException(exception, buf, true);
|
{
|
||||||
zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str());
|
with_retries.renewZooKeeper(zookeeper);
|
||||||
|
|
||||||
|
WriteBufferFromOwnString buf;
|
||||||
|
writeStringBinary(current_host, buf);
|
||||||
|
writeException(exception, buf, true);
|
||||||
|
zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str());
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Strings BackupCoordinationStageSync::wait(const Strings & all_hosts, const String & stage_to_wait)
|
Strings BackupCoordinationStageSync::wait(const Strings & all_hosts, const String & stage_to_wait)
|
||||||
@ -83,14 +104,24 @@ struct BackupCoordinationStageSync::State
|
|||||||
};
|
};
|
||||||
|
|
||||||
BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState(
|
BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState(
|
||||||
zkutil::ZooKeeperPtr zookeeper, const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const
|
const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const
|
||||||
{
|
{
|
||||||
std::unordered_set<std::string_view> zk_nodes_set{zk_nodes.begin(), zk_nodes.end()};
|
std::unordered_set<std::string_view> zk_nodes_set{zk_nodes.begin(), zk_nodes.end()};
|
||||||
|
|
||||||
State state;
|
State state;
|
||||||
if (zk_nodes_set.contains("error"))
|
if (zk_nodes_set.contains("error"))
|
||||||
{
|
{
|
||||||
ReadBufferFromOwnString buf{zookeeper->get(zookeeper_path + "/error")};
|
String errors;
|
||||||
|
{
|
||||||
|
auto holder = with_retries.createRetriesControlHolder("readCurrentState");
|
||||||
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zookeeper = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
with_retries.renewZooKeeper(zookeeper);
|
||||||
|
errors = zookeeper->get(zookeeper_path + "/error");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
ReadBufferFromOwnString buf{errors};
|
||||||
String host;
|
String host;
|
||||||
readStringBinary(host, buf);
|
readStringBinary(host, buf);
|
||||||
state.error = std::make_pair(host, readException(buf, fmt::format("Got error from {}", host)));
|
state.error = std::make_pair(host, readException(buf, fmt::format("Got error from {}", host)));
|
||||||
@ -102,8 +133,38 @@ BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState
|
|||||||
if (!zk_nodes_set.contains("current|" + host + "|" + stage_to_wait))
|
if (!zk_nodes_set.contains("current|" + host + "|" + stage_to_wait))
|
||||||
{
|
{
|
||||||
UnreadyHostState unready_host_state;
|
UnreadyHostState unready_host_state;
|
||||||
unready_host_state.started = zk_nodes_set.contains("started|" + host);
|
const String started_node_name = "started|" + host;
|
||||||
unready_host_state.alive = zk_nodes_set.contains("alive|" + host);
|
const String alive_node_name = "alive|" + host;
|
||||||
|
const String alive_node_path = zookeeper_path + "/" + alive_node_name;
|
||||||
|
unready_host_state.started = zk_nodes_set.contains(started_node_name);
|
||||||
|
|
||||||
|
/// Because we do retries everywhere we can't fully rely on ephemeral nodes anymore.
|
||||||
|
/// Though we recreate "alive" node when reconnecting it might be not enough and race condition is possible.
|
||||||
|
/// And everything we can do here - just retry.
|
||||||
|
/// In worst case when we won't manage to see the alive node for a long time we will just abort the backup.
|
||||||
|
unready_host_state.alive = zk_nodes_set.contains(alive_node_name);
|
||||||
|
if (!unready_host_state.alive)
|
||||||
|
{
|
||||||
|
LOG_TRACE(log, "Seems like host ({}) is dead. Will retry the check to confirm", host);
|
||||||
|
auto holder = with_retries.createRetriesControlHolder("readCurrentState::checkAliveNode");
|
||||||
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zookeeper = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
with_retries.renewZooKeeper(zookeeper);
|
||||||
|
|
||||||
|
if (zookeeper->existsNoFailureInjection(alive_node_path))
|
||||||
|
{
|
||||||
|
unready_host_state.alive = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Retry with backoff. We also check whether it is last retry or no, because we won't to rethrow an exception.
|
||||||
|
if (!holder.retries_ctl.isLastRetry())
|
||||||
|
holder.retries_ctl.setKeeperError(Coordination::Error::ZNONODE, "There is no alive node for host {}. Will retry", host);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
LOG_TRACE(log, "Host ({}) appeared to be {}", host, unready_host_state.alive ? "alive" : "dead");
|
||||||
|
|
||||||
state.unready_hosts.emplace(host, unready_host_state);
|
state.unready_hosts.emplace(host, unready_host_state);
|
||||||
if (!unready_host_state.alive && unready_host_state.started && !state.host_terminated)
|
if (!unready_host_state.alive && unready_host_state.started && !state.host_terminated)
|
||||||
state.host_terminated = host;
|
state.host_terminated = host;
|
||||||
@ -113,51 +174,62 @@ BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState
|
|||||||
if (state.host_terminated || !state.unready_hosts.empty())
|
if (state.host_terminated || !state.unready_hosts.empty())
|
||||||
return state;
|
return state;
|
||||||
|
|
||||||
state.results.reserve(all_hosts.size());
|
auto holder = with_retries.createRetriesControlHolder("waitImpl::collectStagesToWait");
|
||||||
for (const auto & host : all_hosts)
|
holder.retries_ctl.retryLoop(
|
||||||
state.results.emplace_back(zookeeper->get(zookeeper_path + "/current|" + host + "|" + stage_to_wait));
|
[&, &zookeeper = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
with_retries.renewZooKeeper(zookeeper);
|
||||||
|
Strings results;
|
||||||
|
|
||||||
|
for (const auto & host : all_hosts)
|
||||||
|
results.emplace_back(zookeeper->get(zookeeper_path + "/current|" + host + "|" + stage_to_wait));
|
||||||
|
|
||||||
|
state.results = std::move(results);
|
||||||
|
});
|
||||||
|
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
Strings BackupCoordinationStageSync::waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const
|
Strings BackupCoordinationStageSync::waitImpl(
|
||||||
|
const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const
|
||||||
{
|
{
|
||||||
if (all_hosts.empty())
|
if (all_hosts.empty())
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
/// Wait until all hosts are ready or an error happens or time is out.
|
/// Wait until all hosts are ready or an error happens or time is out.
|
||||||
|
|
||||||
auto zookeeper = get_zookeeper();
|
|
||||||
|
|
||||||
/// Set by ZooKepper when list of zk nodes have changed.
|
|
||||||
auto watch = std::make_shared<Poco::Event>();
|
|
||||||
|
|
||||||
bool use_timeout = timeout.has_value();
|
bool use_timeout = timeout.has_value();
|
||||||
std::chrono::steady_clock::time_point end_of_timeout;
|
std::chrono::steady_clock::time_point end_of_timeout;
|
||||||
if (use_timeout)
|
if (use_timeout)
|
||||||
end_of_timeout = std::chrono::steady_clock::now() + std::chrono::duration_cast<std::chrono::steady_clock::duration>(*timeout);
|
end_of_timeout = std::chrono::steady_clock::now() + std::chrono::duration_cast<std::chrono::steady_clock::duration>(*timeout);
|
||||||
|
|
||||||
State state;
|
State state;
|
||||||
|
|
||||||
String previous_unready_host; /// Used for logging: we don't want to log the same unready host again.
|
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
/// Get zk nodes and subscribe on their changes.
|
LOG_INFO(log, "Waiting for the stage {}", stage_to_wait);
|
||||||
Strings zk_nodes = zookeeper->getChildren(zookeeper_path, nullptr, watch);
|
/// Set by ZooKepper when list of zk nodes have changed.
|
||||||
|
auto watch = std::make_shared<Poco::Event>();
|
||||||
|
Strings zk_nodes;
|
||||||
|
{
|
||||||
|
auto holder = with_retries.createRetriesControlHolder("waitImpl::getChildren");
|
||||||
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zookeeper = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
with_retries.renewZooKeeper(zookeeper);
|
||||||
|
watch->reset();
|
||||||
|
/// Get zk nodes and subscribe on their changes.
|
||||||
|
zk_nodes = zookeeper->getChildren(zookeeper_path, nullptr, watch);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/// Read and analyze the current state of zk nodes.
|
/// Read and analyze the current state of zk nodes.
|
||||||
state = readCurrentState(zookeeper, zk_nodes, all_hosts, stage_to_wait);
|
state = readCurrentState(zk_nodes, all_hosts, stage_to_wait);
|
||||||
if (state.error || state.host_terminated || state.unready_hosts.empty())
|
if (state.error || state.host_terminated || state.unready_hosts.empty())
|
||||||
break; /// Error happened or everything is ready.
|
break; /// Error happened or everything is ready.
|
||||||
|
|
||||||
/// Log that we will wait for another host.
|
/// Log that we will wait
|
||||||
const auto & unready_host = state.unready_hosts.begin()->first;
|
const auto & unready_host = state.unready_hosts.begin()->first;
|
||||||
if (unready_host != previous_unready_host)
|
LOG_INFO(log, "Waiting on ZooKeeper watch for any node to be changed (currently waiting for host {})", unready_host);
|
||||||
{
|
|
||||||
LOG_TRACE(log, "Waiting for host {}", unready_host);
|
|
||||||
previous_unready_host = unready_host;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Wait until `watch_callback` is called by ZooKeeper meaning that zk nodes have changed.
|
/// Wait until `watch_callback` is called by ZooKeeper meaning that zk nodes have changed.
|
||||||
{
|
{
|
||||||
@ -195,6 +267,7 @@ Strings BackupCoordinationStageSync::waitImpl(const Strings & all_hosts, const S
|
|||||||
unready_host_state.started ? "" : ": Operation didn't start");
|
unready_host_state.started ? "" : ": Operation didn't start");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LOG_TRACE(log, "Everything is Ok. All hosts achieved stage {}", stage_to_wait);
|
||||||
return state.results;
|
return state.results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <Common/ZooKeeper/Common.h>
|
#include <Backups/WithRetries.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -10,7 +9,10 @@ namespace DB
|
|||||||
class BackupCoordinationStageSync
|
class BackupCoordinationStageSync
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_);
|
BackupCoordinationStageSync(
|
||||||
|
const String & root_zookeeper_path_,
|
||||||
|
WithRetries & with_retries_,
|
||||||
|
Poco::Logger * log_);
|
||||||
|
|
||||||
/// Sets the stage of the current host and signal other hosts if there were other hosts waiting for that.
|
/// Sets the stage of the current host and signal other hosts if there were other hosts waiting for that.
|
||||||
void set(const String & current_host, const String & new_stage, const String & message);
|
void set(const String & current_host, const String & new_stage, const String & message);
|
||||||
@ -27,12 +29,13 @@ private:
|
|||||||
void createRootNodes();
|
void createRootNodes();
|
||||||
|
|
||||||
struct State;
|
struct State;
|
||||||
State readCurrentState(zkutil::ZooKeeperPtr zookeeper, const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const;
|
State readCurrentState(const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const;
|
||||||
|
|
||||||
Strings waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const;
|
Strings waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const;
|
||||||
|
|
||||||
String zookeeper_path;
|
String zookeeper_path;
|
||||||
zkutil::GetZooKeeper get_zookeeper;
|
/// A reference to the field of parent object - BackupCoordinationRemote or RestoreCoordinationRemote
|
||||||
|
WithRetries & with_retries;
|
||||||
Poco::Logger * log;
|
Poco::Logger * log;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -84,6 +84,12 @@ BackupEntriesCollector::BackupEntriesCollector(
|
|||||||
, on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000))
|
, on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000))
|
||||||
, consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000))
|
, consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000))
|
||||||
, log(&Poco::Logger::get("BackupEntriesCollector"))
|
, log(&Poco::Logger::get("BackupEntriesCollector"))
|
||||||
|
, global_zookeeper_retries_info(
|
||||||
|
"BackupEntriesCollector",
|
||||||
|
log,
|
||||||
|
context->getSettingsRef().backup_restore_keeper_max_retries,
|
||||||
|
context->getSettingsRef().backup_restore_keeper_retry_initial_backoff_ms,
|
||||||
|
context->getSettingsRef().backup_restore_keeper_retry_max_backoff_ms)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -482,7 +488,10 @@ std::vector<std::pair<ASTPtr, StoragePtr>> BackupEntriesCollector::findTablesInD
|
|||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
db_tables = database->getTablesForBackup(filter_by_table_name, context);
|
/// Database or table could be replicated - so may use ZooKeeper. We need to retry.
|
||||||
|
auto zookeeper_retries_info = global_zookeeper_retries_info;
|
||||||
|
ZooKeeperRetriesControl retries_ctl("getTablesForBackup", zookeeper_retries_info);
|
||||||
|
retries_ctl.retryLoop([&](){ db_tables = database->getTablesForBackup(filter_by_table_name, context); });
|
||||||
}
|
}
|
||||||
catch (Exception & e)
|
catch (Exception & e)
|
||||||
{
|
{
|
||||||
@ -745,6 +754,7 @@ void BackupEntriesCollector::addPostTask(std::function<void()> task)
|
|||||||
/// Runs all the tasks added with addPostCollectingTask().
|
/// Runs all the tasks added with addPostCollectingTask().
|
||||||
void BackupEntriesCollector::runPostTasks()
|
void BackupEntriesCollector::runPostTasks()
|
||||||
{
|
{
|
||||||
|
LOG_TRACE(log, "Will run {} post tasks", post_tasks.size());
|
||||||
/// Post collecting tasks can add other post collecting tasks, our code is fine with that.
|
/// Post collecting tasks can add other post collecting tasks, our code is fine with that.
|
||||||
while (!post_tasks.empty())
|
while (!post_tasks.empty())
|
||||||
{
|
{
|
||||||
@ -752,6 +762,7 @@ void BackupEntriesCollector::runPostTasks()
|
|||||||
post_tasks.pop();
|
post_tasks.pop();
|
||||||
std::move(task)();
|
std::move(task)();
|
||||||
}
|
}
|
||||||
|
LOG_TRACE(log, "All post tasks successfully executed");
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t BackupEntriesCollector::getAccessCounter(AccessEntityType type)
|
size_t BackupEntriesCollector::getAccessCounter(AccessEntityType type)
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <Parsers/ASTBackupQuery.h>
|
#include <Parsers/ASTBackupQuery.h>
|
||||||
#include <Storages/IStorage_fwd.h>
|
#include <Storages/IStorage_fwd.h>
|
||||||
#include <Storages/TableLockHolder.h>
|
#include <Storages/TableLockHolder.h>
|
||||||
|
#include <Storages/MergeTree/ZooKeeperRetries.h>
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
|
||||||
@ -96,6 +97,9 @@ private:
|
|||||||
std::chrono::milliseconds on_cluster_first_sync_timeout;
|
std::chrono::milliseconds on_cluster_first_sync_timeout;
|
||||||
std::chrono::milliseconds consistent_metadata_snapshot_timeout;
|
std::chrono::milliseconds consistent_metadata_snapshot_timeout;
|
||||||
Poco::Logger * log;
|
Poco::Logger * log;
|
||||||
|
/// Unfortunately we can use ZooKeeper for collecting information for backup
|
||||||
|
/// and we need to retry...
|
||||||
|
ZooKeeperRetriesInfo global_zookeeper_retries_info;
|
||||||
|
|
||||||
Strings all_hosts;
|
Strings all_hosts;
|
||||||
DDLRenamingMap renaming_map;
|
DDLRenamingMap renaming_map;
|
||||||
|
@ -58,10 +58,13 @@ namespace
|
|||||||
|
|
||||||
BackupCoordinationRemote::BackupKeeperSettings keeper_settings
|
BackupCoordinationRemote::BackupKeeperSettings keeper_settings
|
||||||
{
|
{
|
||||||
.keeper_max_retries = context->getSettingsRef().backup_keeper_max_retries,
|
.keeper_max_retries = context->getSettingsRef().backup_restore_keeper_max_retries,
|
||||||
.keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_keeper_retry_initial_backoff_ms,
|
.keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_restore_keeper_retry_initial_backoff_ms,
|
||||||
.keeper_retry_max_backoff_ms = context->getSettingsRef().backup_keeper_retry_max_backoff_ms,
|
.keeper_retry_max_backoff_ms = context->getSettingsRef().backup_restore_keeper_retry_max_backoff_ms,
|
||||||
.keeper_value_max_size = context->getSettingsRef().backup_keeper_value_max_size,
|
.batch_size_for_keeper_multiread = context->getSettingsRef().backup_restore_batch_size_for_keeper_multiread,
|
||||||
|
.keeper_fault_injection_probability = context->getSettingsRef().backup_restore_keeper_fault_injection_probability,
|
||||||
|
.keeper_fault_injection_seed = context->getSettingsRef().backup_restore_keeper_fault_injection_seed,
|
||||||
|
.keeper_value_max_size = context->getSettingsRef().backup_restore_keeper_value_max_size,
|
||||||
};
|
};
|
||||||
|
|
||||||
auto all_hosts = BackupSettings::Util::filterHostIDs(
|
auto all_hosts = BackupSettings::Util::filterHostIDs(
|
||||||
@ -92,10 +95,27 @@ namespace
|
|||||||
|
|
||||||
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
|
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
|
||||||
|
|
||||||
|
RestoreCoordinationRemote::RestoreKeeperSettings keeper_settings
|
||||||
|
{
|
||||||
|
.keeper_max_retries = context->getSettingsRef().backup_restore_keeper_max_retries,
|
||||||
|
.keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_restore_keeper_retry_initial_backoff_ms,
|
||||||
|
.keeper_retry_max_backoff_ms = context->getSettingsRef().backup_restore_keeper_retry_max_backoff_ms,
|
||||||
|
.batch_size_for_keeper_multiread = context->getSettingsRef().backup_restore_batch_size_for_keeper_multiread,
|
||||||
|
.keeper_fault_injection_probability = context->getSettingsRef().backup_restore_keeper_fault_injection_probability,
|
||||||
|
.keeper_fault_injection_seed = context->getSettingsRef().backup_restore_keeper_fault_injection_seed
|
||||||
|
};
|
||||||
|
|
||||||
auto all_hosts = BackupSettings::Util::filterHostIDs(
|
auto all_hosts = BackupSettings::Util::filterHostIDs(
|
||||||
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
|
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
|
||||||
|
|
||||||
return std::make_shared<RestoreCoordinationRemote>(get_zookeeper, root_zk_path, toString(*restore_settings.restore_uuid), all_hosts, restore_settings.host_id, restore_settings.internal);
|
return std::make_shared<RestoreCoordinationRemote>(
|
||||||
|
get_zookeeper,
|
||||||
|
root_zk_path,
|
||||||
|
keeper_settings,
|
||||||
|
toString(*restore_settings.restore_uuid),
|
||||||
|
all_hosts,
|
||||||
|
restore_settings.host_id,
|
||||||
|
restore_settings.internal);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -660,7 +680,9 @@ void BackupsWorker::doRestore(
|
|||||||
restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ on_cluster);
|
restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ on_cluster);
|
||||||
|
|
||||||
if (!allow_concurrent_restores && restore_coordination->hasConcurrentRestores(std::ref(num_active_restores)))
|
if (!allow_concurrent_restores && restore_coordination->hasConcurrentRestores(std::ref(num_active_restores)))
|
||||||
throw Exception(ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, "Concurrent restores not supported, turn on setting 'allow_concurrent_restores'");
|
throw Exception(
|
||||||
|
ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED,
|
||||||
|
"Concurrent restores not supported, turn on setting 'allow_concurrent_restores'");
|
||||||
|
|
||||||
/// Do RESTORE.
|
/// Do RESTORE.
|
||||||
if (on_cluster)
|
if (on_cluster)
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
|
#include <Backups/BackupCoordinationRemote.h>
|
||||||
|
#include <Backups/BackupCoordinationStage.h>
|
||||||
#include <Backups/RestoreCoordinationRemote.h>
|
#include <Backups/RestoreCoordinationRemote.h>
|
||||||
#include <Functions/UserDefined/UserDefinedSQLObjectType.h>
|
#include <Functions/UserDefined/UserDefinedSQLObjectType.h>
|
||||||
#include <Common/ZooKeeper/KeeperException.h>
|
#include <Common/ZooKeeper/KeeperException.h>
|
||||||
#include <Common/escapeForFileName.h>
|
#include <Common/escapeForFileName.h>
|
||||||
#include <Backups/BackupCoordinationStage.h>
|
#include "Backups/BackupCoordinationStageSync.h"
|
||||||
#include <Backups/BackupCoordinationRemote.h>
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -13,12 +14,14 @@ namespace Stage = BackupCoordinationStage;
|
|||||||
RestoreCoordinationRemote::RestoreCoordinationRemote(
|
RestoreCoordinationRemote::RestoreCoordinationRemote(
|
||||||
zkutil::GetZooKeeper get_zookeeper_,
|
zkutil::GetZooKeeper get_zookeeper_,
|
||||||
const String & root_zookeeper_path_,
|
const String & root_zookeeper_path_,
|
||||||
|
const RestoreKeeperSettings & keeper_settings_,
|
||||||
const String & restore_uuid_,
|
const String & restore_uuid_,
|
||||||
const Strings & all_hosts_,
|
const Strings & all_hosts_,
|
||||||
const String & current_host_,
|
const String & current_host_,
|
||||||
bool is_internal_)
|
bool is_internal_)
|
||||||
: get_zookeeper(get_zookeeper_)
|
: get_zookeeper(get_zookeeper_)
|
||||||
, root_zookeeper_path(root_zookeeper_path_)
|
, root_zookeeper_path(root_zookeeper_path_)
|
||||||
|
, keeper_settings(keeper_settings_)
|
||||||
, restore_uuid(restore_uuid_)
|
, restore_uuid(restore_uuid_)
|
||||||
, zookeeper_path(root_zookeeper_path_ + "/restore-" + restore_uuid_)
|
, zookeeper_path(root_zookeeper_path_ + "/restore-" + restore_uuid_)
|
||||||
, all_hosts(all_hosts_)
|
, all_hosts(all_hosts_)
|
||||||
@ -26,11 +29,32 @@ RestoreCoordinationRemote::RestoreCoordinationRemote(
|
|||||||
, current_host_index(BackupCoordinationRemote::findCurrentHostIndex(all_hosts, current_host))
|
, current_host_index(BackupCoordinationRemote::findCurrentHostIndex(all_hosts, current_host))
|
||||||
, is_internal(is_internal_)
|
, is_internal(is_internal_)
|
||||||
, log(&Poco::Logger::get("RestoreCoordinationRemote"))
|
, log(&Poco::Logger::get("RestoreCoordinationRemote"))
|
||||||
|
, with_retries(
|
||||||
|
log,
|
||||||
|
get_zookeeper_,
|
||||||
|
keeper_settings,
|
||||||
|
[zookeeper_path = zookeeper_path, current_host = current_host, is_internal = is_internal]
|
||||||
|
(WithRetries::FaultyKeeper & zk)
|
||||||
|
{
|
||||||
|
/// Recreate this ephemeral node to signal that we are alive.
|
||||||
|
if (is_internal)
|
||||||
|
{
|
||||||
|
String alive_node_path = zookeeper_path + "/stage/alive|" + current_host;
|
||||||
|
auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
|
||||||
|
|
||||||
|
if (code == Coordination::Error::ZNODEEXISTS)
|
||||||
|
zk->handleEphemeralNodeExistenceNoFailureInjection(alive_node_path, "");
|
||||||
|
else if (code != Coordination::Error::ZOK)
|
||||||
|
throw zkutil::KeeperException(code, alive_node_path);
|
||||||
|
}
|
||||||
|
})
|
||||||
{
|
{
|
||||||
createRootNodes();
|
createRootNodes();
|
||||||
|
|
||||||
stage_sync.emplace(
|
stage_sync.emplace(
|
||||||
zookeeper_path + "/stage", [this] { return getZooKeeper(); }, log);
|
zookeeper_path,
|
||||||
|
with_retries,
|
||||||
|
log);
|
||||||
}
|
}
|
||||||
|
|
||||||
RestoreCoordinationRemote::~RestoreCoordinationRemote()
|
RestoreCoordinationRemote::~RestoreCoordinationRemote()
|
||||||
@ -46,31 +70,25 @@ RestoreCoordinationRemote::~RestoreCoordinationRemote()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
zkutil::ZooKeeperPtr RestoreCoordinationRemote::getZooKeeper() const
|
|
||||||
{
|
|
||||||
std::lock_guard lock{mutex};
|
|
||||||
if (!zookeeper || zookeeper->expired())
|
|
||||||
{
|
|
||||||
zookeeper = get_zookeeper();
|
|
||||||
|
|
||||||
/// It's possible that we connected to different [Zoo]Keeper instance
|
|
||||||
/// so we may read a bit stale state.
|
|
||||||
zookeeper->sync(zookeeper_path);
|
|
||||||
}
|
|
||||||
return zookeeper;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RestoreCoordinationRemote::createRootNodes()
|
void RestoreCoordinationRemote::createRootNodes()
|
||||||
{
|
{
|
||||||
auto zk = getZooKeeper();
|
auto holder = with_retries.createRetriesControlHolder("createRootNodes");
|
||||||
zk->createAncestors(zookeeper_path);
|
holder.retries_ctl.retryLoop(
|
||||||
zk->createIfNotExists(zookeeper_path, "");
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
zk->createIfNotExists(zookeeper_path + "/repl_databases_tables_acquired", "");
|
{
|
||||||
zk->createIfNotExists(zookeeper_path + "/repl_tables_data_acquired", "");
|
with_retries.renewZooKeeper(zk);
|
||||||
zk->createIfNotExists(zookeeper_path + "/repl_access_storages_acquired", "");
|
zk->createAncestors(zookeeper_path);
|
||||||
zk->createIfNotExists(zookeeper_path + "/repl_sql_objects_acquired", "");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
Coordination::Requests ops;
|
||||||
|
Coordination::Responses responses;
|
||||||
|
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", zkutil::CreateMode::Persistent));
|
||||||
|
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_databases_tables_acquired", "", zkutil::CreateMode::Persistent));
|
||||||
|
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_tables_data_acquired", "", zkutil::CreateMode::Persistent));
|
||||||
|
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_access_storages_acquired", "", zkutil::CreateMode::Persistent));
|
||||||
|
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_sql_objects_acquired", "", zkutil::CreateMode::Persistent));
|
||||||
|
zk->tryMulti(ops, responses);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
void RestoreCoordinationRemote::setStage(const String & new_stage, const String & message)
|
void RestoreCoordinationRemote::setStage(const String & new_stage, const String & message)
|
||||||
{
|
{
|
||||||
@ -92,66 +110,121 @@ Strings RestoreCoordinationRemote::waitForStage(const String & stage_to_wait, st
|
|||||||
return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
|
return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool RestoreCoordinationRemote::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name)
|
bool RestoreCoordinationRemote::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name)
|
||||||
{
|
{
|
||||||
auto zk = getZooKeeper();
|
bool result = false;
|
||||||
|
auto holder = with_retries.createRetriesControlHolder("acquireCreatingTableInReplicatedDatabase");
|
||||||
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
with_retries.renewZooKeeper(zk);
|
||||||
|
|
||||||
String path = zookeeper_path + "/repl_databases_tables_acquired/" + escapeForFileName(database_zk_path);
|
String path = zookeeper_path + "/repl_databases_tables_acquired/" + escapeForFileName(database_zk_path);
|
||||||
zk->createIfNotExists(path, "");
|
zk->createIfNotExists(path, "");
|
||||||
|
|
||||||
path += "/" + escapeForFileName(table_name);
|
path += "/" + escapeForFileName(table_name);
|
||||||
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
|
auto code = zk->tryCreate(path, toString(current_host_index), zkutil::CreateMode::Persistent);
|
||||||
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
|
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
|
||||||
throw zkutil::KeeperException(code, path);
|
throw zkutil::KeeperException(code, path);
|
||||||
|
|
||||||
return (code == Coordination::Error::ZOK);
|
if (code == Coordination::Error::ZOK)
|
||||||
|
{
|
||||||
|
result = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// We need to check who created that node
|
||||||
|
result = zk->get(path) == toString(current_host_index);
|
||||||
|
});
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RestoreCoordinationRemote::acquireInsertingDataIntoReplicatedTable(const String & table_zk_path)
|
bool RestoreCoordinationRemote::acquireInsertingDataIntoReplicatedTable(const String & table_zk_path)
|
||||||
{
|
{
|
||||||
auto zk = getZooKeeper();
|
bool result = false;
|
||||||
|
auto holder = with_retries.createRetriesControlHolder("acquireInsertingDataIntoReplicatedTable");
|
||||||
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
with_retries.renewZooKeeper(zk);
|
||||||
|
|
||||||
String path = zookeeper_path + "/repl_tables_data_acquired/" + escapeForFileName(table_zk_path);
|
String path = zookeeper_path + "/repl_tables_data_acquired/" + escapeForFileName(table_zk_path);
|
||||||
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
|
auto code = zk->tryCreate(path, toString(current_host_index), zkutil::CreateMode::Persistent);
|
||||||
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
|
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
|
||||||
throw zkutil::KeeperException(code, path);
|
throw zkutil::KeeperException(code, path);
|
||||||
|
|
||||||
return (code == Coordination::Error::ZOK);
|
if (code == Coordination::Error::ZOK)
|
||||||
|
{
|
||||||
|
result = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// We need to check who created that node
|
||||||
|
result = zk->get(path) == toString(current_host_index);
|
||||||
|
});
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RestoreCoordinationRemote::acquireReplicatedAccessStorage(const String & access_storage_zk_path)
|
bool RestoreCoordinationRemote::acquireReplicatedAccessStorage(const String & access_storage_zk_path)
|
||||||
{
|
{
|
||||||
auto zk = getZooKeeper();
|
bool result = false;
|
||||||
|
auto holder = with_retries.createRetriesControlHolder("acquireReplicatedAccessStorage");
|
||||||
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
with_retries.renewZooKeeper(zk);
|
||||||
|
|
||||||
String path = zookeeper_path + "/repl_access_storages_acquired/" + escapeForFileName(access_storage_zk_path);
|
String path = zookeeper_path + "/repl_access_storages_acquired/" + escapeForFileName(access_storage_zk_path);
|
||||||
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
|
auto code = zk->tryCreate(path, toString(current_host_index), zkutil::CreateMode::Persistent);
|
||||||
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
|
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
|
||||||
throw zkutil::KeeperException(code, path);
|
throw zkutil::KeeperException(code, path);
|
||||||
|
|
||||||
return (code == Coordination::Error::ZOK);
|
if (code == Coordination::Error::ZOK)
|
||||||
|
{
|
||||||
|
result = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// We need to check who created that node
|
||||||
|
result = zk->get(path) == toString(current_host_index);
|
||||||
|
});
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RestoreCoordinationRemote::acquireReplicatedSQLObjects(const String & loader_zk_path, UserDefinedSQLObjectType object_type)
|
bool RestoreCoordinationRemote::acquireReplicatedSQLObjects(const String & loader_zk_path, UserDefinedSQLObjectType object_type)
|
||||||
{
|
{
|
||||||
auto zk = getZooKeeper();
|
bool result = false;
|
||||||
|
auto holder = with_retries.createRetriesControlHolder("acquireReplicatedSQLObjects");
|
||||||
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
with_retries.renewZooKeeper(zk);
|
||||||
|
|
||||||
String path = zookeeper_path + "/repl_sql_objects_acquired/" + escapeForFileName(loader_zk_path);
|
String path = zookeeper_path + "/repl_sql_objects_acquired/" + escapeForFileName(loader_zk_path);
|
||||||
zk->createIfNotExists(path, "");
|
zk->createIfNotExists(path, "");
|
||||||
|
|
||||||
path += "/";
|
path += "/";
|
||||||
switch (object_type)
|
switch (object_type)
|
||||||
{
|
{
|
||||||
case UserDefinedSQLObjectType::Function:
|
case UserDefinedSQLObjectType::Function:
|
||||||
path += "functions";
|
path += "functions";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
|
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
|
||||||
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
|
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
|
||||||
throw zkutil::KeeperException(code, path);
|
throw zkutil::KeeperException(code, path);
|
||||||
|
|
||||||
return (code == Coordination::Error::ZOK);
|
if (code == Coordination::Error::ZOK)
|
||||||
|
{
|
||||||
|
result = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// We need to check who created that node
|
||||||
|
result = zk->get(path) == toString(current_host_index);
|
||||||
|
});
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RestoreCoordinationRemote::removeAllNodes()
|
void RestoreCoordinationRemote::removeAllNodes()
|
||||||
@ -163,8 +236,13 @@ void RestoreCoordinationRemote::removeAllNodes()
|
|||||||
/// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some part
|
/// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some part
|
||||||
/// of their restore work before that.
|
/// of their restore work before that.
|
||||||
|
|
||||||
auto zk = getZooKeeper();
|
auto holder = with_retries.createRetriesControlHolder("removeAllNodes");
|
||||||
zk->removeRecursive(zookeeper_path);
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
|
{
|
||||||
|
with_retries.renewZooKeeper(zk);
|
||||||
|
zk->removeRecursive(zookeeper_path);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic<size_t> &) const
|
bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic<size_t> &) const
|
||||||
@ -173,46 +251,54 @@ bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic<size_t>
|
|||||||
if (is_internal)
|
if (is_internal)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
auto zk = getZooKeeper();
|
bool result = false;
|
||||||
std::string path = zookeeper_path +"/stage";
|
std::string path = zookeeper_path +"/stage";
|
||||||
|
|
||||||
if (! zk->exists(root_zookeeper_path))
|
auto holder = with_retries.createRetriesControlHolder("createRootNodes");
|
||||||
zk->createAncestors(root_zookeeper_path);
|
holder.retries_ctl.retryLoop(
|
||||||
|
[&, &zk = holder.faulty_zookeeper]()
|
||||||
for (size_t attempt = 0; attempt < MAX_ZOOKEEPER_ATTEMPTS; ++attempt)
|
|
||||||
{
|
|
||||||
Coordination::Stat stat;
|
|
||||||
zk->get(root_zookeeper_path, &stat);
|
|
||||||
Strings existing_restore_paths = zk->getChildren(root_zookeeper_path);
|
|
||||||
for (const auto & existing_restore_path : existing_restore_paths)
|
|
||||||
{
|
{
|
||||||
if (startsWith(existing_restore_path, "backup-"))
|
with_retries.renewZooKeeper(zk);
|
||||||
continue;
|
|
||||||
|
|
||||||
String existing_restore_uuid = existing_restore_path;
|
if (! zk->exists(root_zookeeper_path))
|
||||||
existing_restore_uuid.erase(0, String("restore-").size());
|
zk->createAncestors(root_zookeeper_path);
|
||||||
|
|
||||||
if (existing_restore_uuid == toString(restore_uuid))
|
for (size_t attempt = 0; attempt < MAX_ZOOKEEPER_ATTEMPTS; ++attempt)
|
||||||
continue;
|
|
||||||
|
|
||||||
|
|
||||||
const auto status = zk->get(root_zookeeper_path + "/" + existing_restore_path + "/stage");
|
|
||||||
if (status != Stage::COMPLETED)
|
|
||||||
{
|
{
|
||||||
LOG_WARNING(log, "Found a concurrent restore: {}, current restore: {}", existing_restore_uuid, toString(restore_uuid));
|
Coordination::Stat stat;
|
||||||
return true;
|
zk->get(root_zookeeper_path, &stat);
|
||||||
|
Strings existing_restore_paths = zk->getChildren(root_zookeeper_path);
|
||||||
|
for (const auto & existing_restore_path : existing_restore_paths)
|
||||||
|
{
|
||||||
|
if (startsWith(existing_restore_path, "backup-"))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
String existing_restore_uuid = existing_restore_path;
|
||||||
|
existing_restore_uuid.erase(0, String("restore-").size());
|
||||||
|
|
||||||
|
if (existing_restore_uuid == toString(restore_uuid))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const auto status = zk->get(root_zookeeper_path + "/" + existing_restore_path + "/stage");
|
||||||
|
if (status != Stage::COMPLETED)
|
||||||
|
{
|
||||||
|
LOG_WARNING(log, "Found a concurrent restore: {}, current restore: {}", existing_restore_uuid, toString(restore_uuid));
|
||||||
|
result = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
zk->createIfNotExists(path, "");
|
||||||
|
auto code = zk->trySet(path, Stage::SCHEDULED_TO_START, stat.version);
|
||||||
|
if (code == Coordination::Error::ZOK)
|
||||||
|
break;
|
||||||
|
bool is_last_attempt = (attempt == MAX_ZOOKEEPER_ATTEMPTS - 1);
|
||||||
|
if ((code != Coordination::Error::ZBADVERSION) || is_last_attempt)
|
||||||
|
throw zkutil::KeeperException(code, path);
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
|
|
||||||
zk->createIfNotExists(path, "");
|
return result;
|
||||||
auto code = zk->trySet(path, Stage::SCHEDULED_TO_START, stat.version);
|
|
||||||
if (code == Coordination::Error::ZOK)
|
|
||||||
break;
|
|
||||||
bool is_last_attempt = (attempt == MAX_ZOOKEEPER_ATTEMPTS - 1);
|
|
||||||
if ((code != Coordination::Error::ZBADVERSION) || is_last_attempt)
|
|
||||||
throw zkutil::KeeperException(code, path);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
#include <Backups/IRestoreCoordination.h>
|
#include <Backups/IRestoreCoordination.h>
|
||||||
#include <Backups/BackupCoordinationStageSync.h>
|
#include <Backups/BackupCoordinationStageSync.h>
|
||||||
|
#include <Backups/WithRetries.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -11,9 +12,12 @@ namespace DB
|
|||||||
class RestoreCoordinationRemote : public IRestoreCoordination
|
class RestoreCoordinationRemote : public IRestoreCoordination
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
using RestoreKeeperSettings = WithRetries::KeeperSettings;
|
||||||
|
|
||||||
RestoreCoordinationRemote(
|
RestoreCoordinationRemote(
|
||||||
zkutil::GetZooKeeper get_zookeeper_,
|
zkutil::GetZooKeeper get_zookeeper_,
|
||||||
const String & root_zookeeper_path_,
|
const String & root_zookeeper_path_,
|
||||||
|
const RestoreKeeperSettings & keeper_settings_,
|
||||||
const String & restore_uuid_,
|
const String & restore_uuid_,
|
||||||
const Strings & all_hosts_,
|
const Strings & all_hosts_,
|
||||||
const String & current_host_,
|
const String & current_host_,
|
||||||
@ -45,14 +49,15 @@ public:
|
|||||||
bool hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const override;
|
bool hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
zkutil::ZooKeeperPtr getZooKeeper() const;
|
|
||||||
void createRootNodes();
|
void createRootNodes();
|
||||||
void removeAllNodes();
|
void removeAllNodes();
|
||||||
|
|
||||||
class ReplicatedDatabasesMetadataSync;
|
class ReplicatedDatabasesMetadataSync;
|
||||||
|
|
||||||
|
/// get_zookeeper will provide a zookeeper client without any fault injection
|
||||||
const zkutil::GetZooKeeper get_zookeeper;
|
const zkutil::GetZooKeeper get_zookeeper;
|
||||||
const String root_zookeeper_path;
|
const String root_zookeeper_path;
|
||||||
|
const RestoreKeeperSettings keeper_settings;
|
||||||
const String restore_uuid;
|
const String restore_uuid;
|
||||||
const String zookeeper_path;
|
const String zookeeper_path;
|
||||||
const Strings all_hosts;
|
const Strings all_hosts;
|
||||||
@ -61,10 +66,9 @@ private:
|
|||||||
const bool is_internal;
|
const bool is_internal;
|
||||||
Poco::Logger * const log;
|
Poco::Logger * const log;
|
||||||
|
|
||||||
|
mutable WithRetries with_retries;
|
||||||
std::optional<BackupCoordinationStageSync> stage_sync;
|
std::optional<BackupCoordinationStageSync> stage_sync;
|
||||||
|
|
||||||
mutable std::mutex mutex;
|
mutable std::mutex mutex;
|
||||||
mutable zkutil::ZooKeeperPtr zookeeper;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
61
src/Backups/WithRetries.cpp
Normal file
61
src/Backups/WithRetries.cpp
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
#include <mutex>
|
||||||
|
#include <Backups/WithRetries.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
WithRetries::WithRetries(Poco::Logger * log_, zkutil::GetZooKeeper get_zookeeper_, const KeeperSettings & settings_, RenewerCallback callback_)
|
||||||
|
: log(log_)
|
||||||
|
, get_zookeeper(get_zookeeper_)
|
||||||
|
, settings(settings_)
|
||||||
|
, callback(callback_)
|
||||||
|
, global_zookeeper_retries_info(
|
||||||
|
log->name(),
|
||||||
|
log,
|
||||||
|
settings.keeper_max_retries,
|
||||||
|
settings.keeper_retry_initial_backoff_ms,
|
||||||
|
settings.keeper_retry_max_backoff_ms)
|
||||||
|
{}
|
||||||
|
|
||||||
|
WithRetries::RetriesControlHolder::RetriesControlHolder(const WithRetries * parent, const String & name)
|
||||||
|
: info(parent->global_zookeeper_retries_info)
|
||||||
|
, retries_ctl(name, info)
|
||||||
|
, faulty_zookeeper(parent->getFaultyZooKeeper())
|
||||||
|
{}
|
||||||
|
|
||||||
|
WithRetries::RetriesControlHolder WithRetries::createRetriesControlHolder(const String & name)
|
||||||
|
{
|
||||||
|
return RetriesControlHolder(this, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
void WithRetries::renewZooKeeper(FaultyKeeper my_faulty_zookeeper) const
|
||||||
|
{
|
||||||
|
std::lock_guard lock(zookeeper_mutex);
|
||||||
|
|
||||||
|
if (!zookeeper || zookeeper->expired())
|
||||||
|
{
|
||||||
|
zookeeper = get_zookeeper();
|
||||||
|
my_faulty_zookeeper->setKeeper(zookeeper);
|
||||||
|
|
||||||
|
callback(my_faulty_zookeeper);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
WithRetries::FaultyKeeper WithRetries::getFaultyZooKeeper() const
|
||||||
|
{
|
||||||
|
/// We need to create new instance of ZooKeeperWithFaultInjection each time a copy a pointer to ZooKeeper client there
|
||||||
|
/// The reason is that ZooKeeperWithFaultInjection may reset the underlying pointer and there could be a race condition
|
||||||
|
/// when the same object is used from multiple threads.
|
||||||
|
auto faulty_zookeeper = ZooKeeperWithFaultInjection::createInstance(
|
||||||
|
settings.keeper_fault_injection_probability,
|
||||||
|
settings.keeper_fault_injection_seed,
|
||||||
|
zookeeper,
|
||||||
|
log->name(),
|
||||||
|
log);
|
||||||
|
|
||||||
|
return faulty_zookeeper;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
79
src/Backups/WithRetries.h
Normal file
79
src/Backups/WithRetries.h
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Storages/MergeTree/ZooKeeperRetries.h>
|
||||||
|
#include <Common/ZooKeeper/Common.h>
|
||||||
|
#include <Common/ZooKeeper/ZooKeeperWithFaultInjection.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
/// In backups every request to [Zoo]Keeper should be retryable
|
||||||
|
/// and this tiny class encapsulates all the machinery for make it possible -
|
||||||
|
/// a [Zoo]Keeper client which injects faults with configurable probability
|
||||||
|
/// and a retries controller which performs retries with growing backoff.
|
||||||
|
class WithRetries
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using FaultyKeeper = Coordination::ZooKeeperWithFaultInjection::Ptr;
|
||||||
|
using RenewerCallback = std::function<void(FaultyKeeper &)>;
|
||||||
|
|
||||||
|
struct KeeperSettings
|
||||||
|
{
|
||||||
|
UInt64 keeper_max_retries{0};
|
||||||
|
UInt64 keeper_retry_initial_backoff_ms{0};
|
||||||
|
UInt64 keeper_retry_max_backoff_ms{0};
|
||||||
|
UInt64 batch_size_for_keeper_multiread{10000};
|
||||||
|
Float64 keeper_fault_injection_probability{0};
|
||||||
|
UInt64 keeper_fault_injection_seed{42};
|
||||||
|
UInt64 keeper_value_max_size{1048576};
|
||||||
|
};
|
||||||
|
|
||||||
|
/// For simplicity a separate ZooKeeperRetriesInfo and a faulty [Zoo]Keeper client
|
||||||
|
/// are stored in one place.
|
||||||
|
/// This helps to avoid writing too much boilerplate each time we need to
|
||||||
|
/// execute some operation (a set of requests) over [Zoo]Keeper with retries.
|
||||||
|
/// Why ZooKeeperRetriesInfo is separate for each operation?
|
||||||
|
/// The reason is that backup usually takes long time to finish and it makes no sense
|
||||||
|
/// to limit the overall number of retries (for example 1000) for the whole backup
|
||||||
|
/// and have a continuously growing backoff.
|
||||||
|
class RetriesControlHolder
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ZooKeeperRetriesInfo info;
|
||||||
|
ZooKeeperRetriesControl retries_ctl;
|
||||||
|
FaultyKeeper faulty_zookeeper;
|
||||||
|
|
||||||
|
private:
|
||||||
|
friend class WithRetries;
|
||||||
|
RetriesControlHolder(const WithRetries * parent, const String & name);
|
||||||
|
};
|
||||||
|
|
||||||
|
RetriesControlHolder createRetriesControlHolder(const String & name);
|
||||||
|
WithRetries(Poco::Logger * log, zkutil::GetZooKeeper get_zookeeper_, const KeeperSettings & settings, RenewerCallback callback);
|
||||||
|
|
||||||
|
/// Used to re-establish new connection inside a retry loop.
|
||||||
|
void renewZooKeeper(FaultyKeeper my_faulty_zookeeper) const;
|
||||||
|
private:
|
||||||
|
/// This will provide a special wrapper which is useful for testing
|
||||||
|
FaultyKeeper getFaultyZooKeeper() const;
|
||||||
|
|
||||||
|
Poco::Logger * log;
|
||||||
|
zkutil::GetZooKeeper get_zookeeper;
|
||||||
|
KeeperSettings settings;
|
||||||
|
/// This callback is called each time when a new [Zoo]Keeper session is created.
|
||||||
|
/// In backups it is primarily used to re-create an ephemeral node to signal the coordinator
|
||||||
|
/// that the host is alive and able to continue writing the backup.
|
||||||
|
/// Coordinator (or an initiator) of the backup also retries when it doesn't find an ephemeral node
|
||||||
|
/// for a particular host.
|
||||||
|
/// Again, this schema is not ideal. False-positives are still possible, but in worst case scenario
|
||||||
|
/// it could lead just to a failed backup which could possibly be successful
|
||||||
|
/// if there were a little bit more retries.
|
||||||
|
RenewerCallback callback;
|
||||||
|
ZooKeeperRetriesInfo global_zookeeper_retries_info;
|
||||||
|
|
||||||
|
/// This is needed only to protect zookeeper object
|
||||||
|
mutable std::mutex zookeeper_mutex;
|
||||||
|
mutable zkutil::ZooKeeperPtr zookeeper;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -608,7 +608,8 @@ if (ENABLE_TESTS)
|
|||||||
dbms
|
dbms
|
||||||
clickhouse_common_config
|
clickhouse_common_config
|
||||||
clickhouse_common_zookeeper
|
clickhouse_common_zookeeper
|
||||||
string_utils)
|
string_utils
|
||||||
|
hilite_comparator)
|
||||||
|
|
||||||
if (TARGET ch_contrib::simdjson)
|
if (TARGET ch_contrib::simdjson)
|
||||||
target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::simdjson)
|
target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::simdjson)
|
||||||
|
@ -1,11 +1,14 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "ZooKeeper.h"
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
|
||||||
|
#include <Common/ZooKeeper/ZooKeeper.h>
|
||||||
|
#include <Common/ZooKeeper/ZooKeeperWithFaultInjection.h>
|
||||||
|
|
||||||
namespace zkutil
|
namespace zkutil
|
||||||
{
|
{
|
||||||
|
|
||||||
using GetZooKeeper = std::function<ZooKeeperPtr()>;
|
using GetZooKeeper = std::function<ZooKeeperPtr()>;
|
||||||
|
using GetZooKeeperWithFaultInjection = std::function<Coordination::ZooKeeperWithFaultInjection::Ptr()>;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -146,4 +146,3 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,6 +114,7 @@ public:
|
|||||||
|
|
||||||
void setKeeper(zk::Ptr const & keeper_) { keeper = keeper_; }
|
void setKeeper(zk::Ptr const & keeper_) { keeper = keeper_; }
|
||||||
bool isNull() const { return keeper.get() == nullptr; }
|
bool isNull() const { return keeper.get() == nullptr; }
|
||||||
|
bool expired() { return keeper->expired(); }
|
||||||
|
|
||||||
///
|
///
|
||||||
/// mirror ZooKeeper interface
|
/// mirror ZooKeeper interface
|
||||||
@ -232,6 +233,11 @@ public:
|
|||||||
return access("exists", path, [&]() { return keeper->exists(path, stat, watch); });
|
return access("exists", path, [&]() { return keeper->exists(path, stat, watch); });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool existsNoFailureInjection(const std::string & path, Coordination::Stat * stat = nullptr, const zkutil::EventPtr & watch = nullptr)
|
||||||
|
{
|
||||||
|
return access<false, false, false>("exists", path, [&]() { return keeper->exists(path, stat, watch); });
|
||||||
|
}
|
||||||
|
|
||||||
zkutil::ZooKeeper::MultiExistsResponse exists(const std::vector<std::string> & paths)
|
zkutil::ZooKeeper::MultiExistsResponse exists(const std::vector<std::string> & paths)
|
||||||
{
|
{
|
||||||
return access("exists", !paths.empty() ? paths.front() : "", [&]() { return keeper->exists(paths); });
|
return access("exists", !paths.empty() ? paths.front() : "", [&]() { return keeper->exists(paths); });
|
||||||
@ -239,19 +245,30 @@ public:
|
|||||||
|
|
||||||
std::string create(const std::string & path, const std::string & data, int32_t mode)
|
std::string create(const std::string & path, const std::string & data, int32_t mode)
|
||||||
{
|
{
|
||||||
auto path_created = access(
|
std::string path_created;
|
||||||
"create",
|
auto code = tryCreate(path, data, mode, path_created);
|
||||||
|
|
||||||
|
if (code != Coordination::Error::ZOK)
|
||||||
|
throw zkutil::KeeperException(code, path);
|
||||||
|
|
||||||
|
return path_created;
|
||||||
|
}
|
||||||
|
|
||||||
|
Coordination::Error tryCreate(const std::string & path, const std::string & data, int32_t mode, std::string & path_created)
|
||||||
|
{
|
||||||
|
auto error = access(
|
||||||
|
"tryCreate",
|
||||||
path,
|
path,
|
||||||
[&]() { return keeper->create(path, data, mode); },
|
[&]() { return keeper->tryCreate(path, data, mode, path_created); },
|
||||||
[&](std::string const & result_path)
|
[&](Coordination::Error &)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
if (mode == zkutil::CreateMode::EphemeralSequential || mode == zkutil::CreateMode::Ephemeral)
|
if (mode == zkutil::CreateMode::EphemeralSequential || mode == zkutil::CreateMode::Ephemeral)
|
||||||
{
|
{
|
||||||
keeper->remove(result_path);
|
keeper->remove(path);
|
||||||
if (unlikely(logger))
|
if (unlikely(logger))
|
||||||
LOG_TRACE(logger, "ZooKeeperWithFaultInjection cleanup: seed={} func={} path={}", seed, "create", result_path);
|
LOG_TRACE(logger, "ZooKeeperWithFaultInjection cleanup: seed={} func={} path={}", seed, "tryCreate", path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (const zkutil::KeeperException & e)
|
catch (const zkutil::KeeperException & e)
|
||||||
@ -261,8 +278,8 @@ public:
|
|||||||
logger,
|
logger,
|
||||||
"ZooKeeperWithFaultInjection cleanup FAILED: seed={} func={} path={} code={} message={} ",
|
"ZooKeeperWithFaultInjection cleanup FAILED: seed={} func={} path={} code={} message={} ",
|
||||||
seed,
|
seed,
|
||||||
"create",
|
"tryCreate",
|
||||||
result_path,
|
path,
|
||||||
e.code,
|
e.code,
|
||||||
e.message());
|
e.message());
|
||||||
}
|
}
|
||||||
@ -272,10 +289,27 @@ public:
|
|||||||
if (unlikely(fault_policy))
|
if (unlikely(fault_policy))
|
||||||
{
|
{
|
||||||
if (mode == zkutil::CreateMode::EphemeralSequential || mode == zkutil::CreateMode::Ephemeral)
|
if (mode == zkutil::CreateMode::EphemeralSequential || mode == zkutil::CreateMode::Ephemeral)
|
||||||
ephemeral_nodes.push_back(path_created);
|
ephemeral_nodes.push_back(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
return path_created;
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
Coordination::Error tryCreate(const std::string & path, const std::string & data, int32_t mode)
|
||||||
|
{
|
||||||
|
String path_created;
|
||||||
|
return tryCreate(path, data, mode, path_created);
|
||||||
|
}
|
||||||
|
|
||||||
|
void createIfNotExists(const std::string & path, const std::string & data)
|
||||||
|
{
|
||||||
|
std::string path_created;
|
||||||
|
auto code = tryCreate(path, data, zkutil::CreateMode::Persistent, path_created);
|
||||||
|
|
||||||
|
if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS)
|
||||||
|
return;
|
||||||
|
|
||||||
|
throw zkutil::KeeperException(code, path);
|
||||||
}
|
}
|
||||||
|
|
||||||
Coordination::Responses multi(const Coordination::Requests & requests)
|
Coordination::Responses multi(const Coordination::Requests & requests)
|
||||||
@ -306,6 +340,27 @@ public:
|
|||||||
return access("tryRemove", path, [&]() { return keeper->tryRemove(path, version); });
|
return access("tryRemove", path, [&]() { return keeper->tryRemove(path, version); });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void removeRecursive(const std::string & path)
|
||||||
|
{
|
||||||
|
return access("removeRecursive", path, [&]() { return keeper->removeRecursive(path); });
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string sync(const std::string & path)
|
||||||
|
{
|
||||||
|
return access("sync", path, [&]() { return keeper->sync(path); });
|
||||||
|
}
|
||||||
|
|
||||||
|
Coordination::Error trySet(const std::string & path, const std::string & data, int32_t version = -1, Coordination::Stat * stat = nullptr)
|
||||||
|
{
|
||||||
|
return access("trySet", path, [&]() { return keeper->trySet(path, data, version, stat); });
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void handleEphemeralNodeExistenceNoFailureInjection(const std::string & path, const std::string & fast_delete_if_equal_value)
|
||||||
|
{
|
||||||
|
return access<false, false, false>("handleEphemeralNodeExistence", path, [&]() { return keeper->handleEphemeralNodeExistence(path, fast_delete_if_equal_value); });
|
||||||
|
}
|
||||||
|
|
||||||
void cleanupEphemeralNodes()
|
void cleanupEphemeralNodes()
|
||||||
{
|
{
|
||||||
for (const auto & path : ephemeral_nodes)
|
for (const auto & path : ephemeral_nodes)
|
||||||
|
@ -415,11 +415,13 @@ class IColumn;
|
|||||||
M(UInt64, max_temporary_data_on_disk_size_for_user, 0, "The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running user queries. Zero means unlimited.", 0)\
|
M(UInt64, max_temporary_data_on_disk_size_for_user, 0, "The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running user queries. Zero means unlimited.", 0)\
|
||||||
M(UInt64, max_temporary_data_on_disk_size_for_query, 0, "The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running queries. Zero means unlimited.", 0)\
|
M(UInt64, max_temporary_data_on_disk_size_for_query, 0, "The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running queries. Zero means unlimited.", 0)\
|
||||||
\
|
\
|
||||||
M(UInt64, backup_keeper_max_retries, 20, "Max retries for keeper operations during backup", 0) \
|
M(UInt64, backup_restore_keeper_max_retries, 20, "Max retries for keeper operations during backup or restore", 0) \
|
||||||
M(UInt64, backup_keeper_retry_initial_backoff_ms, 100, "Initial backoff timeout for [Zoo]Keeper operations during backup", 0) \
|
M(UInt64, backup_restore_keeper_retry_initial_backoff_ms, 100, "Initial backoff timeout for [Zoo]Keeper operations during backup or restore", 0) \
|
||||||
M(UInt64, backup_keeper_retry_max_backoff_ms, 5000, "Max backoff timeout for [Zoo]Keeper operations during backup", 0) \
|
M(UInt64, backup_restore_keeper_retry_max_backoff_ms, 5000, "Max backoff timeout for [Zoo]Keeper operations during backup or restore", 0) \
|
||||||
M(UInt64, backup_keeper_value_max_size, 1048576, "Maximum size of data of a [Zoo]Keeper's node during backup", 0) \
|
M(Float, backup_restore_keeper_fault_injection_probability, 0.0f, "Approximate probability of failure for a keeper request during backup or restore. Valid value is in interval [0.0f, 1.0f]", 0) \
|
||||||
M(UInt64, backup_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup", 0) \
|
M(UInt64, backup_restore_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \
|
||||||
|
M(UInt64, backup_restore_keeper_value_max_size, 1048576, "Maximum size of data of a [Zoo]Keeper's node during backup", 0) \
|
||||||
|
M(UInt64, backup_restore_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup or restore", 0) \
|
||||||
\
|
\
|
||||||
M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \
|
M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \
|
||||||
M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \
|
M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include <DataTypes/DataTypeTuple.h>
|
#include <DataTypes/DataTypeTuple.h>
|
||||||
#include <DataTypes/DataTypesNumber.h>
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
#include <DataTypes/DataTypeString.h>
|
#include <DataTypes/DataTypeString.h>
|
||||||
|
#include <DataTypes/DataTypeMap.h>
|
||||||
#include <DataTypes/IDataType.h>
|
#include <DataTypes/IDataType.h>
|
||||||
#include <boost/algorithm/string.hpp>
|
#include <boost/algorithm/string.hpp>
|
||||||
#include <boost/algorithm/string/join.hpp>
|
#include <boost/algorithm/string/join.hpp>
|
||||||
@ -264,23 +265,25 @@ static bool checkTupleType(const capnp::Type & capnp_type, const DataTypePtr & d
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!tuple_data_type->haveExplicitNames())
|
bool have_explicit_names = tuple_data_type->haveExplicitNames();
|
||||||
|
const auto & nested_names = tuple_data_type->getElementNames();
|
||||||
|
for (uint32_t i = 0; i != nested_names.size(); ++i)
|
||||||
{
|
{
|
||||||
error_message += "Only named Tuple can be converted to CapnProto Struct";
|
if (have_explicit_names)
|
||||||
return false;
|
|
||||||
}
|
|
||||||
for (const auto & name : tuple_data_type->getElementNames())
|
|
||||||
{
|
|
||||||
KJ_IF_MAYBE(field, struct_schema.findFieldByName(name))
|
|
||||||
{
|
{
|
||||||
if (!checkCapnProtoType(field->getType(), nested_types[tuple_data_type->getPositionByName(name)], mode, error_message, name))
|
KJ_IF_MAYBE (field, struct_schema.findFieldByName(nested_names[i]))
|
||||||
|
{
|
||||||
|
if (!checkCapnProtoType(field->getType(), nested_types[tuple_data_type->getPositionByName(nested_names[i])], mode, error_message, nested_names[i]))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error_message += "CapnProto struct doesn't contain a field with name " + nested_names[i];
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else if (!checkCapnProtoType(struct_schema.getFields()[i].getType(), nested_types[tuple_data_type->getPositionByName(nested_names[i])], mode, error_message, nested_names[i]))
|
||||||
{
|
|
||||||
error_message += "CapnProto struct doesn't contain a field with name " + name;
|
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -307,41 +310,129 @@ static bool checkArrayType(const capnp::Type & capnp_type, const DataTypePtr & d
|
|||||||
return checkCapnProtoType(list_schema.getElementType(), nested_type, mode, error_message, column_name);
|
return checkCapnProtoType(list_schema.getElementType(), nested_type, mode, error_message, column_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool checkMapType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message)
|
||||||
|
{
|
||||||
|
/// We output/input Map type as follow CapnProto schema
|
||||||
|
///
|
||||||
|
/// struct Map {
|
||||||
|
/// struct Entry {
|
||||||
|
/// key @0: Key;
|
||||||
|
/// value @1: Value;
|
||||||
|
/// }
|
||||||
|
/// entries @0 :List(Entry);
|
||||||
|
/// }
|
||||||
|
|
||||||
|
if (!capnp_type.isStruct())
|
||||||
|
return false;
|
||||||
|
auto struct_schema = capnp_type.asStruct();
|
||||||
|
|
||||||
|
if (checkIfStructContainsUnnamedUnion(struct_schema))
|
||||||
|
{
|
||||||
|
error_message += "CapnProto struct contains unnamed union";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (struct_schema.getFields().size() != 1)
|
||||||
|
{
|
||||||
|
error_message += "CapnProto struct that represents Map type can contain only one field";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto & field_type = struct_schema.getFields()[0].getType();
|
||||||
|
if (!field_type.isList())
|
||||||
|
{
|
||||||
|
error_message += "Field of CapnProto struct that represents Map is not a list";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto list_element_type = field_type.asList().getElementType();
|
||||||
|
if (!list_element_type.isStruct())
|
||||||
|
{
|
||||||
|
error_message += "Field of CapnProto struct that represents Map is not a list of structs";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto key_value_struct = list_element_type.asStruct();
|
||||||
|
if (checkIfStructContainsUnnamedUnion(key_value_struct))
|
||||||
|
{
|
||||||
|
error_message += "CapnProto struct contains unnamed union";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (key_value_struct.getFields().size() != 2)
|
||||||
|
{
|
||||||
|
error_message += "Key-value structure for Map struct should have exactly 2 fields";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto & map_type = assert_cast<const DataTypeMap &>(*data_type);
|
||||||
|
DataTypes types = {map_type.getKeyType(), map_type.getValueType()};
|
||||||
|
Names names = {"key", "value"};
|
||||||
|
|
||||||
|
for (size_t i = 0; i != types.size(); ++i)
|
||||||
|
{
|
||||||
|
KJ_IF_MAYBE(field, key_value_struct.findFieldByName(names[i]))
|
||||||
|
{
|
||||||
|
if (!checkCapnProtoType(field->getType(), types[i], mode, error_message, names[i]))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error_message += R"(Key-value structure for Map struct should have exactly 2 fields with names "key" and "value")";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool isCapnInteger(const capnp::Type & capnp_type)
|
||||||
|
{
|
||||||
|
return capnp_type.isInt8() || capnp_type.isUInt8() || capnp_type.isInt16() || capnp_type.isUInt16() || capnp_type.isInt32()
|
||||||
|
|| capnp_type.isUInt32() || capnp_type.isInt64() || capnp_type.isUInt64();
|
||||||
|
}
|
||||||
|
|
||||||
static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message, const String & column_name)
|
static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message, const String & column_name)
|
||||||
{
|
{
|
||||||
switch (data_type->getTypeId())
|
switch (data_type->getTypeId())
|
||||||
{
|
{
|
||||||
case TypeIndex::UInt8:
|
case TypeIndex::UInt8:
|
||||||
return capnp_type.isBool() || capnp_type.isUInt8();
|
return capnp_type.isBool() || isCapnInteger(capnp_type);
|
||||||
case TypeIndex::Date: [[fallthrough]];
|
case TypeIndex::Int8: [[fallthrough]];
|
||||||
case TypeIndex::UInt16:
|
case TypeIndex::Int16: [[fallthrough]];
|
||||||
|
case TypeIndex::UInt16: [[fallthrough]];
|
||||||
|
case TypeIndex::Int32: [[fallthrough]];
|
||||||
|
case TypeIndex::UInt32: [[fallthrough]];
|
||||||
|
case TypeIndex::Int64: [[fallthrough]];
|
||||||
|
case TypeIndex::UInt64:
|
||||||
|
/// Allow integer conversions durin input/output.
|
||||||
|
return isCapnInteger(capnp_type);
|
||||||
|
case TypeIndex::Date:
|
||||||
return capnp_type.isUInt16();
|
return capnp_type.isUInt16();
|
||||||
case TypeIndex::DateTime: [[fallthrough]];
|
case TypeIndex::DateTime: [[fallthrough]];
|
||||||
case TypeIndex::IPv4: [[fallthrough]];
|
case TypeIndex::IPv4:
|
||||||
case TypeIndex::UInt32:
|
|
||||||
return capnp_type.isUInt32();
|
return capnp_type.isUInt32();
|
||||||
case TypeIndex::UInt64:
|
|
||||||
return capnp_type.isUInt64();
|
|
||||||
case TypeIndex::Int8:
|
|
||||||
return capnp_type.isInt8();
|
|
||||||
case TypeIndex::Int16:
|
|
||||||
return capnp_type.isInt16();
|
|
||||||
case TypeIndex::Date32: [[fallthrough]];
|
case TypeIndex::Date32: [[fallthrough]];
|
||||||
case TypeIndex::Decimal32: [[fallthrough]];
|
case TypeIndex::Decimal32:
|
||||||
case TypeIndex::Int32:
|
return capnp_type.isInt32() || capnp_type.isUInt32();
|
||||||
return capnp_type.isInt32();
|
|
||||||
case TypeIndex::DateTime64: [[fallthrough]];
|
case TypeIndex::DateTime64: [[fallthrough]];
|
||||||
case TypeIndex::Decimal64: [[fallthrough]];
|
case TypeIndex::Decimal64:
|
||||||
case TypeIndex::Int64:
|
return capnp_type.isInt64() || capnp_type.isUInt64();
|
||||||
return capnp_type.isInt64();
|
case TypeIndex::Float32:[[fallthrough]];
|
||||||
case TypeIndex::Float32:
|
|
||||||
return capnp_type.isFloat32();
|
|
||||||
case TypeIndex::Float64:
|
case TypeIndex::Float64:
|
||||||
return capnp_type.isFloat64();
|
/// Allow converting between Float32 and isFloat64
|
||||||
|
return capnp_type.isFloat32() || capnp_type.isFloat64();
|
||||||
case TypeIndex::Enum8:
|
case TypeIndex::Enum8:
|
||||||
return checkEnums<Int8>(capnp_type, data_type, mode, INT8_MAX, error_message);
|
return checkEnums<Int8>(capnp_type, data_type, mode, INT8_MAX, error_message);
|
||||||
case TypeIndex::Enum16:
|
case TypeIndex::Enum16:
|
||||||
return checkEnums<Int16>(capnp_type, data_type, mode, INT16_MAX, error_message);
|
return checkEnums<Int16>(capnp_type, data_type, mode, INT16_MAX, error_message);
|
||||||
|
case TypeIndex::Int128: [[fallthrough]];
|
||||||
|
case TypeIndex::UInt128: [[fallthrough]];
|
||||||
|
case TypeIndex::Int256: [[fallthrough]];
|
||||||
|
case TypeIndex::UInt256: [[fallthrough]];
|
||||||
|
case TypeIndex::Decimal128: [[fallthrough]];
|
||||||
|
case TypeIndex::Decimal256:
|
||||||
|
return capnp_type.isData();
|
||||||
case TypeIndex::Tuple:
|
case TypeIndex::Tuple:
|
||||||
return checkTupleType(capnp_type, data_type, mode, error_message);
|
return checkTupleType(capnp_type, data_type, mode, error_message);
|
||||||
case TypeIndex::Nullable:
|
case TypeIndex::Nullable:
|
||||||
@ -359,6 +450,8 @@ static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr
|
|||||||
case TypeIndex::IPv6: [[fallthrough]];
|
case TypeIndex::IPv6: [[fallthrough]];
|
||||||
case TypeIndex::String:
|
case TypeIndex::String:
|
||||||
return capnp_type.isText() || capnp_type.isData();
|
return capnp_type.isText() || capnp_type.isData();
|
||||||
|
case TypeIndex::Map:
|
||||||
|
return checkMapType(capnp_type, data_type, mode, error_message);
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -440,10 +440,10 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
|
|||||||
if (select)
|
if (select)
|
||||||
{
|
{
|
||||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS"
|
settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS"
|
||||||
<< (comment ? "(" : "")
|
<< settings.nl_or_ws
|
||||||
<< settings.nl_or_ws << (settings.hilite ? hilite_none : "");
|
<< (comment ? "(" : "") << (settings.hilite ? hilite_none : "");
|
||||||
select->formatImpl(settings, state, frame);
|
select->formatImpl(settings, state, frame);
|
||||||
settings.ostr << (comment ? ")" : "");
|
settings.ostr << (settings.hilite ? hilite_keyword : "") << (comment ? ")" : "") << (settings.hilite ? hilite_none : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (comment)
|
if (comment)
|
||||||
|
@ -161,8 +161,9 @@ void ASTDictionary::formatImpl(const FormatSettings & settings, FormatState & st
|
|||||||
|
|
||||||
if (source)
|
if (source)
|
||||||
{
|
{
|
||||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << "SOURCE("
|
settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << "SOURCE"
|
||||||
<< (settings.hilite ? hilite_none : "");
|
<< (settings.hilite ? hilite_none : "");
|
||||||
|
settings.ostr << "(";
|
||||||
source->formatImpl(settings, state, frame);
|
source->formatImpl(settings, state, frame);
|
||||||
settings.ostr << ")";
|
settings.ostr << ")";
|
||||||
}
|
}
|
||||||
|
@ -56,16 +56,16 @@ void ASTDictionaryAttributeDeclaration::formatImpl(const FormatSettings & settin
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (hierarchical)
|
if (hierarchical)
|
||||||
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "HIERARCHICAL";
|
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "HIERARCHICAL" << (settings.hilite ? hilite_none : "");
|
||||||
|
|
||||||
if (bidirectional)
|
if (bidirectional)
|
||||||
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "BIDIRECTIONAL";
|
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "BIDIRECTIONAL" << (settings.hilite ? hilite_none : "");
|
||||||
|
|
||||||
if (injective)
|
if (injective)
|
||||||
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "INJECTIVE";
|
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "INJECTIVE" << (settings.hilite ? hilite_none : "");
|
||||||
|
|
||||||
if (is_object_id)
|
if (is_object_id)
|
||||||
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "IS_OBJECT_ID";
|
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "IS_OBJECT_ID" << (settings.hilite ? hilite_none : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -692,12 +692,15 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
|
|||||||
{
|
{
|
||||||
std::string nl_or_nothing = settings.one_line ? "" : "\n";
|
std::string nl_or_nothing = settings.one_line ? "" : "\n";
|
||||||
std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' ');
|
std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' ');
|
||||||
settings.ostr << (settings.hilite ? hilite_function : "") << name << "(" << nl_or_nothing;
|
settings.ostr << (settings.hilite ? hilite_function : "") << name << (settings.hilite ? hilite_none : "");
|
||||||
|
settings.ostr << (settings.hilite ? hilite_function : "") << "(" << (settings.hilite ? hilite_none : "");
|
||||||
|
settings.ostr << nl_or_nothing;
|
||||||
FormatStateStacked frame_nested = frame;
|
FormatStateStacked frame_nested = frame;
|
||||||
frame_nested.need_parens = false;
|
frame_nested.need_parens = false;
|
||||||
++frame_nested.indent;
|
++frame_nested.indent;
|
||||||
query->formatImpl(settings, state, frame_nested);
|
query->formatImpl(settings, state, frame_nested);
|
||||||
settings.ostr << nl_or_nothing << indent_str << ")";
|
settings.ostr << nl_or_nothing << indent_str;
|
||||||
|
settings.ostr << (settings.hilite ? hilite_function : "") << ")" << (settings.hilite ? hilite_none : "");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,11 +29,13 @@ void ASTKillQueryQuery::formatQueryImpl(const FormatSettings & settings, FormatS
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
settings.ostr << (settings.hilite ? hilite_none : "");
|
||||||
|
|
||||||
formatOnCluster(settings);
|
formatOnCluster(settings);
|
||||||
|
|
||||||
if (where_expression)
|
if (where_expression)
|
||||||
{
|
{
|
||||||
settings.ostr << " WHERE " << (settings.hilite ? hilite_none : "");
|
settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : "");
|
||||||
where_expression->formatImpl(settings, state, frame);
|
where_expression->formatImpl(settings, state, frame);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,7 +20,9 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta
|
|||||||
/// This is needed because the query can become extraordinary large after substitution of aliases.
|
/// This is needed because the query can become extraordinary large after substitution of aliases.
|
||||||
if (!alias.empty() && !state.printed_asts_with_alias.emplace(frame.current_select, alias, getTreeHash()).second)
|
if (!alias.empty() && !state.printed_asts_with_alias.emplace(frame.current_select, alias, getTreeHash()).second)
|
||||||
{
|
{
|
||||||
|
settings.ostr << (settings.hilite ? IAST::hilite_identifier : "");
|
||||||
settings.writeIdentifier(alias);
|
settings.writeIdentifier(alias);
|
||||||
|
settings.ostr << (settings.hilite ? IAST::hilite_none : "");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -18,7 +18,9 @@ void ASTWithElement::formatImpl(const FormatSettings & settings, FormatState & s
|
|||||||
{
|
{
|
||||||
std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
|
std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
|
||||||
|
|
||||||
|
settings.ostr << (settings.hilite ? hilite_alias : "");
|
||||||
settings.writeIdentifier(name);
|
settings.writeIdentifier(name);
|
||||||
|
settings.ostr << (settings.hilite ? hilite_none : "");
|
||||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS" << (settings.hilite ? hilite_none : "");
|
settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS" << (settings.hilite ? hilite_none : "");
|
||||||
settings.ostr << settings.nl_or_ws << indent_str;
|
settings.ostr << settings.nl_or_ws << indent_str;
|
||||||
dynamic_cast<const ASTWithAlias &>(*subquery).formatImplWithoutAlias(settings, state, frame);
|
dynamic_cast<const ASTWithAlias &>(*subquery).formatImplWithoutAlias(settings, state, frame);
|
||||||
|
@ -22,3 +22,7 @@ endif()
|
|||||||
if (ENABLE_FUZZING)
|
if (ENABLE_FUZZING)
|
||||||
add_subdirectory(fuzzers)
|
add_subdirectory(fuzzers)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (ENABLE_TESTS)
|
||||||
|
add_subdirectory(HiliteComparator)
|
||||||
|
endif ()
|
||||||
|
7
src/Parsers/HiliteComparator/CMakeLists.txt
Normal file
7
src/Parsers/HiliteComparator/CMakeLists.txt
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
|
||||||
|
|
||||||
|
add_headers_and_sources(hilite_comparator .)
|
||||||
|
|
||||||
|
add_library(hilite_comparator ${hilite_comparator_sources})
|
||||||
|
|
||||||
|
target_link_libraries(hilite_comparator PRIVATE dbms)
|
98
src/Parsers/HiliteComparator/HiliteComparator.cpp
Normal file
98
src/Parsers/HiliteComparator/HiliteComparator.cpp
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
#include "HiliteComparator.h"
|
||||||
|
|
||||||
|
namespace HiliteComparator
|
||||||
|
{
|
||||||
|
|
||||||
|
void consume_hilites(const char * & ptr, Hilite * last_hilite)
|
||||||
|
{
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
bool changed_hilite = false;
|
||||||
|
for (Hilite hilite : hilites)
|
||||||
|
{
|
||||||
|
if (std::string_view(ptr).starts_with(hilite))
|
||||||
|
{
|
||||||
|
ptr += strlen(hilite);
|
||||||
|
changed_hilite = true;
|
||||||
|
if (last_hilite != nullptr)
|
||||||
|
*last_hilite = hilite;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!changed_hilite)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool are_equal_with_hilites_removed(std::string_view left, std::string_view right)
|
||||||
|
{
|
||||||
|
return remove_hilites(left) == remove_hilites(right);
|
||||||
|
}
|
||||||
|
|
||||||
|
String remove_hilites(std::string_view string)
|
||||||
|
{
|
||||||
|
const char * ptr = string.begin();
|
||||||
|
String string_without_hilites;
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
consume_hilites(ptr);
|
||||||
|
if (ptr == string.end())
|
||||||
|
return string_without_hilites;
|
||||||
|
string_without_hilites += *(ptr++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hilited queries cannot be compared symbol-by-symbol, as there's some frivolousness introduced with the hilites. Specifically:
|
||||||
|
* 1. Whitespaces could be hilited with any hilite type.
|
||||||
|
* 2. Hilite could or could be not reset with hilite_none before the next hilite, i.e. the following strings a and b are equal:
|
||||||
|
* a. hilite_keyword foo hilite_none hilite_operator +
|
||||||
|
* b. hilite_keyword foo hilite_operator +
|
||||||
|
*/
|
||||||
|
bool are_equal_with_hilites(std::string_view left, std::string_view right, bool check_end_without_hilite)
|
||||||
|
{
|
||||||
|
const char * left_it = left.begin();
|
||||||
|
const char * right_it = right.begin();
|
||||||
|
Hilite left_hilite = DB::IAST::hilite_none;
|
||||||
|
Hilite right_hilite = DB::IAST::hilite_none;
|
||||||
|
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
// For each argument, consume all prefix hilites, and update the current hilite to be the last one.
|
||||||
|
consume_hilites(left_it, &left_hilite);
|
||||||
|
consume_hilites(right_it, &right_hilite);
|
||||||
|
|
||||||
|
if (left_it == left.end() && right_it == right.end())
|
||||||
|
{
|
||||||
|
if (left_hilite != right_hilite)
|
||||||
|
return false;
|
||||||
|
if (check_end_without_hilite)
|
||||||
|
if (left_hilite != DB::IAST::hilite_none)
|
||||||
|
throw std::logic_error("Expected string ends with a hilite");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (left_it == left.end() || right_it == right.end())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Lookup one character.
|
||||||
|
// Check characters match.
|
||||||
|
if (*left_it != *right_it)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Check hilites match if it's not a whitespace.
|
||||||
|
if (!std::isspace(*left_it) && left_hilite != right_hilite)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Consume one character.
|
||||||
|
left_it++;
|
||||||
|
right_it++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool are_equal_with_hilites_and_end_without_hilite(std::string_view left, std::string_view right)
|
||||||
|
{
|
||||||
|
return are_equal_with_hilites(left, right, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
48
src/Parsers/HiliteComparator/HiliteComparator.h
Normal file
48
src/Parsers/HiliteComparator/HiliteComparator.h
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Parsers/IAST.h>
|
||||||
|
#include <string>
|
||||||
|
#include <string_view>
|
||||||
|
|
||||||
|
|
||||||
|
namespace HiliteComparator
|
||||||
|
{
|
||||||
|
|
||||||
|
using Hilite = const char *;
|
||||||
|
|
||||||
|
static const std::vector<Hilite> hilites = {
|
||||||
|
DB::IAST::hilite_keyword,
|
||||||
|
DB::IAST::hilite_identifier,
|
||||||
|
DB::IAST::hilite_function,
|
||||||
|
DB::IAST::hilite_operator,
|
||||||
|
DB::IAST::hilite_alias,
|
||||||
|
DB::IAST::hilite_substitution,
|
||||||
|
DB::IAST::hilite_none
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Consume all prefix hilites, by moving `ptr` to
|
||||||
|
* If `last_hilite` is not `nullptr`, update the last hilite to be the last hilite of the prefix hilites.
|
||||||
|
*/
|
||||||
|
void consume_hilites(const char * & ptr, Hilite * last_hilite = nullptr);
|
||||||
|
|
||||||
|
String remove_hilites(std::string_view string);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copies both strings, for the simplicity of the implementation.
|
||||||
|
*/
|
||||||
|
bool are_equal_with_hilites_removed(std::string_view left, std::string_view right);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hilited queries cannot be compared symbol-by-symbol, as there's some frivolousness introduced with the hilites. Specifically:
|
||||||
|
* 1. Whitespaces could be hilited with any hilite type.
|
||||||
|
* 2. Hilite could or could be not reset with hilite_none before the next hilite, i.e. the following strings a and b are equal:
|
||||||
|
* a. hilite_keyword foo hilite_none hilite_operator +
|
||||||
|
* b. hilite_keyword foo hilite_operator +
|
||||||
|
*/
|
||||||
|
bool are_equal_with_hilites(std::string_view left, std::string_view right, bool check_end_without_hilite);
|
||||||
|
|
||||||
|
// Google tests's ASSERT_PRED_2 doesn't see overloads with default parameter values.
|
||||||
|
bool are_equal_with_hilites_and_end_without_hilite(std::string_view left, std::string_view right);
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,98 @@
|
|||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <Parsers/IAST.h>
|
||||||
|
#include <Parsers/HiliteComparator/HiliteComparator.h>
|
||||||
|
|
||||||
|
using namespace HiliteComparator;
|
||||||
|
|
||||||
|
TEST(HiliteComparator, ConsumeHilites)
|
||||||
|
{
|
||||||
|
using namespace DB;
|
||||||
|
// The order is different from the order in HILITES on purpose.
|
||||||
|
String s;
|
||||||
|
s += IAST::hilite_keyword;
|
||||||
|
s += IAST::hilite_alias;
|
||||||
|
s += IAST::hilite_identifier;
|
||||||
|
s += IAST::hilite_none;
|
||||||
|
s += IAST::hilite_operator;
|
||||||
|
s += IAST::hilite_substitution;
|
||||||
|
s += IAST::hilite_function;
|
||||||
|
s += "test";
|
||||||
|
s += IAST::hilite_keyword;
|
||||||
|
const char * ptr = s.c_str();
|
||||||
|
const char * expected_ptr = strchr(ptr, 't');
|
||||||
|
const char * last_hilite = nullptr;
|
||||||
|
consume_hilites(ptr, &last_hilite);
|
||||||
|
ASSERT_EQ(expected_ptr, ptr);
|
||||||
|
ASSERT_TRUE(last_hilite != nullptr);
|
||||||
|
ASSERT_EQ(IAST::hilite_function, last_hilite);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(HiliteComparator, RemoveHilites)
|
||||||
|
{
|
||||||
|
using namespace DB;
|
||||||
|
String s;
|
||||||
|
s += IAST::hilite_keyword;
|
||||||
|
s += "te";
|
||||||
|
s += IAST::hilite_alias;
|
||||||
|
s += IAST::hilite_identifier;
|
||||||
|
s += "s";
|
||||||
|
s += IAST::hilite_none;
|
||||||
|
s += "t";
|
||||||
|
s += IAST::hilite_operator;
|
||||||
|
s += IAST::hilite_substitution;
|
||||||
|
s += IAST::hilite_function;
|
||||||
|
ASSERT_EQ("test", remove_hilites(s));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(HiliteComparator, AreEqualWithHilites)
|
||||||
|
{
|
||||||
|
using namespace DB;
|
||||||
|
String s = IAST::hilite_keyword;
|
||||||
|
ASSERT_THROW(are_equal_with_hilites(s, s, true), std::logic_error);
|
||||||
|
ASSERT_TRUE(are_equal_with_hilites(s, s, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(HiliteComparator, AreEqualWithHilitesAndEndWithoutHilite)
|
||||||
|
{
|
||||||
|
using namespace DB;
|
||||||
|
|
||||||
|
ASSERT_PRED2(are_equal_with_hilites_and_end_without_hilite, "", "");
|
||||||
|
ASSERT_PRED2(are_equal_with_hilites_and_end_without_hilite, "", IAST::hilite_none);
|
||||||
|
ASSERT_PRED2(are_equal_with_hilites_and_end_without_hilite, IAST::hilite_none, "");
|
||||||
|
ASSERT_PRED2(are_equal_with_hilites_and_end_without_hilite, IAST::hilite_none, IAST::hilite_none);
|
||||||
|
|
||||||
|
{
|
||||||
|
String s;
|
||||||
|
s += IAST::hilite_none;
|
||||||
|
s += "select";
|
||||||
|
s += IAST::hilite_none;
|
||||||
|
ASSERT_PRED2(are_equal_with_hilites_and_end_without_hilite, s, "select");
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
String s;
|
||||||
|
s += DB::IAST::hilite_none;
|
||||||
|
s += "\n sel";
|
||||||
|
s += DB::IAST::hilite_none;
|
||||||
|
s += "ect";
|
||||||
|
s += DB::IAST::hilite_none;
|
||||||
|
ASSERT_PRED2(are_equal_with_hilites_and_end_without_hilite, s, "\n select");
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
String left;
|
||||||
|
left += DB::IAST::hilite_keyword;
|
||||||
|
left += "keyword long";
|
||||||
|
left += DB::IAST::hilite_none;
|
||||||
|
|
||||||
|
String right;
|
||||||
|
right += DB::IAST::hilite_keyword;
|
||||||
|
right += "keyword";
|
||||||
|
right += DB::IAST::hilite_none;
|
||||||
|
right += " ";
|
||||||
|
right += DB::IAST::hilite_keyword;
|
||||||
|
right += "long";
|
||||||
|
right += DB::IAST::hilite_none;
|
||||||
|
ASSERT_PRED2(are_equal_with_hilites_and_end_without_hilite, left, right);
|
||||||
|
}
|
||||||
|
}
|
133
src/Parsers/tests/gtest_format_hiliting.cpp
Normal file
133
src/Parsers/tests/gtest_format_hiliting.cpp
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
#include <unordered_set>
|
||||||
|
|
||||||
|
#include <Parsers/IAST.h>
|
||||||
|
#include <Parsers/ParserQuery.h>
|
||||||
|
#include <Parsers/parseQuery.h>
|
||||||
|
#include <Parsers/HiliteComparator/HiliteComparator.h>
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <Common/StackTrace.h>
|
||||||
|
|
||||||
|
|
||||||
|
String hilite(const String & s, const char * hilite_type)
|
||||||
|
{
|
||||||
|
return hilite_type + s + DB::IAST::hilite_none;
|
||||||
|
}
|
||||||
|
|
||||||
|
String keyword(const String & s)
|
||||||
|
{
|
||||||
|
return hilite(s, DB::IAST::hilite_keyword);
|
||||||
|
}
|
||||||
|
|
||||||
|
String identifier(const String & s)
|
||||||
|
{
|
||||||
|
return hilite(s, DB::IAST::hilite_identifier);
|
||||||
|
}
|
||||||
|
|
||||||
|
String alias(const String & s)
|
||||||
|
{
|
||||||
|
return hilite(s, DB::IAST::hilite_alias);
|
||||||
|
}
|
||||||
|
|
||||||
|
String op(const String & s)
|
||||||
|
{
|
||||||
|
return hilite(s, DB::IAST::hilite_operator);
|
||||||
|
}
|
||||||
|
|
||||||
|
String function(const String & s)
|
||||||
|
{
|
||||||
|
return hilite(s, DB::IAST::hilite_function);
|
||||||
|
}
|
||||||
|
|
||||||
|
String substitution(const String & s)
|
||||||
|
{
|
||||||
|
return hilite(s, DB::IAST::hilite_substitution);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void compare(const String & expected, const String & query)
|
||||||
|
{
|
||||||
|
using namespace DB;
|
||||||
|
ParserQuery parser(query.data() + query.size());
|
||||||
|
ASTPtr ast = parseQuery(parser, query, 0, 0);
|
||||||
|
|
||||||
|
WriteBufferFromOwnString write_buffer;
|
||||||
|
IAST::FormatSettings settings(write_buffer, true);
|
||||||
|
settings.hilite = true;
|
||||||
|
ast->format(settings);
|
||||||
|
|
||||||
|
ASSERT_PRED2(HiliteComparator::are_equal_with_hilites_removed, expected, write_buffer.str());
|
||||||
|
ASSERT_PRED2(HiliteComparator::are_equal_with_hilites_and_end_without_hilite, expected, write_buffer.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<std::pair<std::string, std::string>> expected_and_query_pairs = {
|
||||||
|
// Simple select
|
||||||
|
{
|
||||||
|
keyword("SELECT ") + "* " + keyword("FROM ") + identifier("table"),
|
||||||
|
"select * from table"
|
||||||
|
},
|
||||||
|
|
||||||
|
// ASTWithElement
|
||||||
|
{
|
||||||
|
keyword("WITH ") + alias("alias ") + keyword("AS ")
|
||||||
|
+ "(" + keyword("SELECT ") + "* " + keyword("FROM ") + identifier("table") + ") "
|
||||||
|
+ keyword("SELECT ") + "* " + keyword("FROM ") + identifier("table"),
|
||||||
|
"with alias as (select * from table) select * from table"
|
||||||
|
},
|
||||||
|
|
||||||
|
// ASTWithAlias
|
||||||
|
{
|
||||||
|
keyword("SELECT ") + identifier("a ") + op("+ ") + "1 " + keyword("AS ") + alias("b") + ", " + identifier("b"),
|
||||||
|
"select a + 1 as b, b"
|
||||||
|
},
|
||||||
|
|
||||||
|
// ASTFunction
|
||||||
|
{
|
||||||
|
keyword("SELECT ") + "* " + keyword("FROM ")
|
||||||
|
+ function("view(") + keyword("SELECT ") + "* " + keyword("FROM ") + identifier("table") + function(")"),
|
||||||
|
"select * from view(select * from table)"
|
||||||
|
},
|
||||||
|
|
||||||
|
// ASTDictionaryAttributeDeclaration
|
||||||
|
{
|
||||||
|
keyword("CREATE DICTIONARY ") + "name "
|
||||||
|
+ "(`Name` " + function("ClickHouseDataType ")
|
||||||
|
+ keyword("DEFAULT ") + "'' "
|
||||||
|
+ keyword("EXPRESSION ") + function("rand64() ")
|
||||||
|
+ keyword("IS_OBJECT_ID") + ")",
|
||||||
|
"CREATE DICTIONARY name (`Name` ClickHouseDataType DEFAULT '' EXPRESSION rand64() IS_OBJECT_ID)"
|
||||||
|
},
|
||||||
|
|
||||||
|
// ASTDictionary, SOURCE keyword
|
||||||
|
{
|
||||||
|
keyword("CREATE DICTIONARY ") + "name "
|
||||||
|
+ "(`Name` " + function("ClickHouseDataType ")
|
||||||
|
+ keyword("DEFAULT ") + "'' "
|
||||||
|
+ keyword("EXPRESSION ") + function("rand64() ")
|
||||||
|
+ keyword("IS_OBJECT_ID") + ") "
|
||||||
|
+ keyword("SOURCE") + "(" + keyword("FILE") + "(" + keyword("PATH ") + "'path'))",
|
||||||
|
"CREATE DICTIONARY name (`Name` ClickHouseDataType DEFAULT '' EXPRESSION rand64() IS_OBJECT_ID) "
|
||||||
|
"SOURCE(FILE(PATH 'path'))"
|
||||||
|
},
|
||||||
|
|
||||||
|
// ASTKillQueryQuery
|
||||||
|
{
|
||||||
|
keyword("KILL QUERY ON CLUSTER ") + "clustername "
|
||||||
|
+ keyword("WHERE ") + identifier("user ") + op("= ") + "'username' "
|
||||||
|
+ keyword("SYNC"),
|
||||||
|
"KILL QUERY ON CLUSTER clustername WHERE user = 'username' SYNC"
|
||||||
|
},
|
||||||
|
|
||||||
|
// ASTCreateQuery
|
||||||
|
{
|
||||||
|
keyword("CREATE TABLE ") + "name " + keyword("AS (SELECT ") + "*" + keyword(") ")
|
||||||
|
+ keyword("COMMENT ") + "'hello'",
|
||||||
|
"CREATE TABLE name AS (SELECT *) COMMENT 'hello'"
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
TEST(FormatHiliting, Queries)
|
||||||
|
{
|
||||||
|
for (const auto & [expected, query] : expected_and_query_pairs)
|
||||||
|
compare(expected, query);
|
||||||
|
}
|
@ -599,11 +599,16 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
|
|||||||
|
|
||||||
auto lambda_node_name = calculateActionNodeName(node, *planner_context);
|
auto lambda_node_name = calculateActionNodeName(node, *planner_context);
|
||||||
auto function_capture = std::make_shared<FunctionCaptureOverloadResolver>(
|
auto function_capture = std::make_shared<FunctionCaptureOverloadResolver>(
|
||||||
lambda_actions, captured_column_names, lambda_arguments_names_and_types, result_type, lambda_expression_node_name);
|
lambda_actions, captured_column_names, lambda_arguments_names_and_types, lambda_node.getExpression()->getResultType(), lambda_expression_node_name);
|
||||||
actions_stack.pop_back();
|
actions_stack.pop_back();
|
||||||
|
|
||||||
// TODO: Pass IFunctionBase here not FunctionCaptureOverloadResolver.
|
// TODO: Pass IFunctionBase here not FunctionCaptureOverloadResolver.
|
||||||
actions_stack[level].addFunctionIfNecessary(lambda_node_name, std::move(lambda_children), function_capture);
|
const auto * actions_node = actions_stack[level].addFunctionIfNecessary(lambda_node_name, std::move(lambda_children), function_capture);
|
||||||
|
|
||||||
|
if (!result_type->equals(*actions_node->result_type))
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"Lambda resolved type {} is not equal to type from actions DAG {}",
|
||||||
|
result_type, actions_node->result_type);
|
||||||
|
|
||||||
size_t actions_stack_size = actions_stack.size();
|
size_t actions_stack_size = actions_stack.size();
|
||||||
for (size_t i = level + 1; i < actions_stack_size; ++i)
|
for (size_t i = level + 1; i < actions_stack_size; ++i)
|
||||||
|
@ -21,9 +21,7 @@ namespace ErrorCodes
|
|||||||
{
|
{
|
||||||
extern const int LOGICAL_ERROR;
|
extern const int LOGICAL_ERROR;
|
||||||
}
|
}
|
||||||
/// For ORC format, index_nested_type = true, a nested type takes one index count. And the
|
|
||||||
/// the start index for ORC format should be 1, since index 0 indicates to select all columns.
|
|
||||||
template<bool index_nested_type>
|
|
||||||
class ArrowFieldIndexUtil
|
class ArrowFieldIndexUtil
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -46,9 +44,7 @@ public:
|
|||||||
calculateFieldIndices(const arrow::Schema & schema)
|
calculateFieldIndices(const arrow::Schema & schema)
|
||||||
{
|
{
|
||||||
std::unordered_map<std::string, std::pair<int, int>> result;
|
std::unordered_map<std::string, std::pair<int, int>> result;
|
||||||
// For format like ORC, index = 0 indicates to select all columns, so we skip 0 and start
|
int index_start = 0;
|
||||||
// from 1.
|
|
||||||
int index_start = index_nested_type;
|
|
||||||
for (int i = 0; i < schema.num_fields(); ++i)
|
for (int i = 0; i < schema.num_fields(); ++i)
|
||||||
{
|
{
|
||||||
const auto & field = schema.field(i);
|
const auto & field = schema.field(i);
|
||||||
@ -94,17 +90,16 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Count the number of indices for types.
|
/// Count the number of indices for types.
|
||||||
/// For orc format, index_nested_type is true, a complex type takes one index.
|
|
||||||
size_t countIndicesForType(std::shared_ptr<arrow::DataType> type)
|
size_t countIndicesForType(std::shared_ptr<arrow::DataType> type)
|
||||||
{
|
{
|
||||||
if (type->id() == arrow::Type::LIST)
|
if (type->id() == arrow::Type::LIST)
|
||||||
{
|
{
|
||||||
return countIndicesForType(static_cast<arrow::ListType *>(type.get())->value_type()) + index_nested_type;
|
return countIndicesForType(static_cast<arrow::ListType *>(type.get())->value_type());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type->id() == arrow::Type::STRUCT)
|
if (type->id() == arrow::Type::STRUCT)
|
||||||
{
|
{
|
||||||
int indices = index_nested_type;
|
int indices = 0;
|
||||||
auto * struct_type = static_cast<arrow::StructType *>(type.get());
|
auto * struct_type = static_cast<arrow::StructType *>(type.get());
|
||||||
for (int i = 0; i != struct_type->num_fields(); ++i)
|
for (int i = 0; i != struct_type->num_fields(); ++i)
|
||||||
indices += countIndicesForType(struct_type->field(i)->type());
|
indices += countIndicesForType(struct_type->field(i)->type());
|
||||||
@ -114,7 +109,7 @@ public:
|
|||||||
if (type->id() == arrow::Type::MAP)
|
if (type->id() == arrow::Type::MAP)
|
||||||
{
|
{
|
||||||
auto * map_type = static_cast<arrow::MapType *>(type.get());
|
auto * map_type = static_cast<arrow::MapType *>(type.get());
|
||||||
return countIndicesForType(map_type->key_type()) + countIndicesForType(map_type->item_type()) + index_nested_type;
|
return countIndicesForType(map_type->key_type()) + countIndicesForType(map_type->item_type()) ;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
@ -144,8 +139,6 @@ private:
|
|||||||
index_info.first = current_start_index;
|
index_info.first = current_start_index;
|
||||||
if (field_type->id() == arrow::Type::STRUCT)
|
if (field_type->id() == arrow::Type::STRUCT)
|
||||||
{
|
{
|
||||||
current_start_index += index_nested_type;
|
|
||||||
|
|
||||||
auto * struct_type = static_cast<arrow::StructType *>(field_type.get());
|
auto * struct_type = static_cast<arrow::StructType *>(field_type.get());
|
||||||
for (int i = 0, n = struct_type->num_fields(); i < n; ++i)
|
for (int i = 0, n = struct_type->num_fields(); i < n; ++i)
|
||||||
{
|
{
|
||||||
@ -161,7 +154,6 @@ private:
|
|||||||
const auto * list_type = static_cast<arrow::ListType *>(field_type.get());
|
const auto * list_type = static_cast<arrow::ListType *>(field_type.get());
|
||||||
const auto value_field = list_type->value_field();
|
const auto value_field = list_type->value_field();
|
||||||
auto index_snapshot = current_start_index;
|
auto index_snapshot = current_start_index;
|
||||||
current_start_index += index_nested_type;
|
|
||||||
calculateFieldIndices(*value_field, field_name, current_start_index, result, name_prefix);
|
calculateFieldIndices(*value_field, field_name, current_start_index, result, name_prefix);
|
||||||
// The nested struct field has the same name as this list field.
|
// The nested struct field has the same name as this list field.
|
||||||
// rewrite it back to the original value.
|
// rewrite it back to the original value.
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include <Columns/ColumnLowCardinality.h>
|
#include <Columns/ColumnLowCardinality.h>
|
||||||
#include <Columns/ColumnNullable.h>
|
#include <Columns/ColumnNullable.h>
|
||||||
#include <Columns/ColumnDecimal.h>
|
#include <Columns/ColumnDecimal.h>
|
||||||
|
#include <Columns/ColumnMap.h>
|
||||||
|
|
||||||
#include <DataTypes/DataTypeEnum.h>
|
#include <DataTypes/DataTypeEnum.h>
|
||||||
#include <DataTypes/DataTypeArray.h>
|
#include <DataTypes/DataTypeArray.h>
|
||||||
@ -23,6 +24,7 @@
|
|||||||
#include <DataTypes/DataTypesNumber.h>
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
#include <DataTypes/DataTypeTuple.h>
|
#include <DataTypes/DataTypeTuple.h>
|
||||||
#include <DataTypes/DataTypeLowCardinality.h>
|
#include <DataTypes/DataTypeLowCardinality.h>
|
||||||
|
#include <DataTypes/DataTypeMap.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -80,22 +82,39 @@ kj::Array<capnp::word> CapnProtoRowInputFormat::readMessage()
|
|||||||
return msg;
|
return msg;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void insertSignedInteger(IColumn & column, const DataTypePtr & column_type, Int64 value)
|
static void insertInteger(IColumn & column, const DataTypePtr & column_type, UInt64 value)
|
||||||
{
|
{
|
||||||
switch (column_type->getTypeId())
|
switch (column_type->getTypeId())
|
||||||
{
|
{
|
||||||
case TypeIndex::Int8:
|
case TypeIndex::Int8:
|
||||||
assert_cast<ColumnInt8 &>(column).insertValue(value);
|
assert_cast<ColumnInt8 &>(column).insertValue(value);
|
||||||
break;
|
break;
|
||||||
|
case TypeIndex::UInt8:
|
||||||
|
assert_cast<ColumnUInt8 &>(column).insertValue(value);
|
||||||
|
break;
|
||||||
case TypeIndex::Int16:
|
case TypeIndex::Int16:
|
||||||
assert_cast<ColumnInt16 &>(column).insertValue(value);
|
assert_cast<ColumnInt16 &>(column).insertValue(value);
|
||||||
break;
|
break;
|
||||||
|
case TypeIndex::Date: [[fallthrough]];
|
||||||
|
case TypeIndex::UInt16:
|
||||||
|
assert_cast<ColumnUInt16 &>(column).insertValue(value);
|
||||||
|
break;
|
||||||
case TypeIndex::Int32:
|
case TypeIndex::Int32:
|
||||||
assert_cast<ColumnInt32 &>(column).insertValue(static_cast<Int32>(value));
|
assert_cast<ColumnInt32 &>(column).insertValue(static_cast<Int32>(value));
|
||||||
break;
|
break;
|
||||||
|
case TypeIndex::DateTime: [[fallthrough]];
|
||||||
|
case TypeIndex::UInt32:
|
||||||
|
assert_cast<ColumnUInt32 &>(column).insertValue(static_cast<UInt32>(value));
|
||||||
|
break;
|
||||||
|
case TypeIndex::IPv4:
|
||||||
|
assert_cast<ColumnIPv4 &>(column).insertValue(IPv4(static_cast<UInt32>(value)));
|
||||||
|
break;
|
||||||
case TypeIndex::Int64:
|
case TypeIndex::Int64:
|
||||||
assert_cast<ColumnInt64 &>(column).insertValue(value);
|
assert_cast<ColumnInt64 &>(column).insertValue(value);
|
||||||
break;
|
break;
|
||||||
|
case TypeIndex::UInt64:
|
||||||
|
assert_cast<ColumnUInt64 &>(column).insertValue(value);
|
||||||
|
break;
|
||||||
case TypeIndex::DateTime64:
|
case TypeIndex::DateTime64:
|
||||||
assert_cast<ColumnDecimal<DateTime64> &>(column).insertValue(value);
|
assert_cast<ColumnDecimal<DateTime64> &>(column).insertValue(value);
|
||||||
break;
|
break;
|
||||||
@ -106,33 +125,7 @@ static void insertSignedInteger(IColumn & column, const DataTypePtr & column_typ
|
|||||||
assert_cast<ColumnDecimal<Decimal64> &>(column).insertValue(value);
|
assert_cast<ColumnDecimal<Decimal64> &>(column).insertValue(value);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not a signed integer.");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type {} cannot be parsed from integer", column_type->getName());
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void insertUnsignedInteger(IColumn & column, const DataTypePtr & column_type, UInt64 value)
|
|
||||||
{
|
|
||||||
switch (column_type->getTypeId())
|
|
||||||
{
|
|
||||||
case TypeIndex::UInt8:
|
|
||||||
assert_cast<ColumnUInt8 &>(column).insertValue(value);
|
|
||||||
break;
|
|
||||||
case TypeIndex::Date: [[fallthrough]];
|
|
||||||
case TypeIndex::UInt16:
|
|
||||||
assert_cast<ColumnUInt16 &>(column).insertValue(value);
|
|
||||||
break;
|
|
||||||
case TypeIndex::DateTime: [[fallthrough]];
|
|
||||||
case TypeIndex::UInt32:
|
|
||||||
assert_cast<ColumnUInt32 &>(column).insertValue(static_cast<UInt32>(value));
|
|
||||||
break;
|
|
||||||
case TypeIndex::UInt64:
|
|
||||||
assert_cast<ColumnUInt64 &>(column).insertValue(value);
|
|
||||||
break;
|
|
||||||
case TypeIndex::IPv4:
|
|
||||||
assert_cast<ColumnIPv4 &>(column).insertValue(IPv4(static_cast<UInt32>(value)));
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not an unsigned integer.");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -152,8 +145,11 @@ static void insertFloat(IColumn & column, const DataTypePtr & column_type, Float
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename Value>
|
template <typename Value>
|
||||||
static void insertString(IColumn & column, Value value)
|
static void insertData(IColumn & column, const DataTypePtr & column_type, Value value)
|
||||||
{
|
{
|
||||||
|
if (column_type->haveMaximumSizeOfValue() && value.size() != column_type->getSizeOfValueInMemory())
|
||||||
|
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected size of {} value: {}", column_type->getName(), value.size());
|
||||||
|
|
||||||
column.insertData(reinterpret_cast<const char *>(value.begin()), value.size());
|
column.insertData(reinterpret_cast<const char *>(value.begin()), value.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -166,10 +162,10 @@ static void insertEnum(IColumn & column, const DataTypePtr & column_type, const
|
|||||||
switch (enum_comparing_mode)
|
switch (enum_comparing_mode)
|
||||||
{
|
{
|
||||||
case FormatSettings::EnumComparingMode::BY_VALUES:
|
case FormatSettings::EnumComparingMode::BY_VALUES:
|
||||||
insertSignedInteger(column, nested_type, Int64(enumerant.getOrdinal()));
|
insertInteger(column, nested_type, Int64(enumerant.getOrdinal()));
|
||||||
return;
|
return;
|
||||||
case FormatSettings::EnumComparingMode::BY_NAMES:
|
case FormatSettings::EnumComparingMode::BY_NAMES:
|
||||||
insertSignedInteger(column, nested_type, Int64(enum_type->getValue(String(enumerant.getProto().getName()))));
|
insertInteger(column, nested_type, Int64(enum_type->getValue(String(enumerant.getProto().getName()))));
|
||||||
return;
|
return;
|
||||||
case FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE:
|
case FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE:
|
||||||
{
|
{
|
||||||
@ -179,7 +175,7 @@ static void insertEnum(IColumn & column, const DataTypePtr & column_type, const
|
|||||||
{
|
{
|
||||||
if (compareEnumNames(name, enum_name, enum_comparing_mode))
|
if (compareEnumNames(name, enum_name, enum_comparing_mode))
|
||||||
{
|
{
|
||||||
insertSignedInteger(column, nested_type, Int64(enum_type->getValue(name)));
|
insertInteger(column, nested_type, Int64(enum_type->getValue(name)));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -202,22 +198,22 @@ static void insertValue(IColumn & column, const DataTypePtr & column_type, const
|
|||||||
switch (value.getType())
|
switch (value.getType())
|
||||||
{
|
{
|
||||||
case capnp::DynamicValue::Type::INT:
|
case capnp::DynamicValue::Type::INT:
|
||||||
insertSignedInteger(column, column_type, value.as<Int64>());
|
insertInteger(column, column_type, value.as<Int64>());
|
||||||
break;
|
break;
|
||||||
case capnp::DynamicValue::Type::UINT:
|
case capnp::DynamicValue::Type::UINT:
|
||||||
insertUnsignedInteger(column, column_type, value.as<UInt64>());
|
insertInteger(column, column_type, value.as<UInt64>());
|
||||||
break;
|
break;
|
||||||
case capnp::DynamicValue::Type::FLOAT:
|
case capnp::DynamicValue::Type::FLOAT:
|
||||||
insertFloat(column, column_type, value.as<Float64>());
|
insertFloat(column, column_type, value.as<Float64>());
|
||||||
break;
|
break;
|
||||||
case capnp::DynamicValue::Type::BOOL:
|
case capnp::DynamicValue::Type::BOOL:
|
||||||
insertUnsignedInteger(column, column_type, UInt64(value.as<bool>()));
|
insertInteger(column, column_type, UInt64(value.as<bool>()));
|
||||||
break;
|
break;
|
||||||
case capnp::DynamicValue::Type::DATA:
|
case capnp::DynamicValue::Type::DATA:
|
||||||
insertString(column, value.as<capnp::Data>());
|
insertData(column, column_type, value.as<capnp::Data>());
|
||||||
break;
|
break;
|
||||||
case capnp::DynamicValue::Type::TEXT:
|
case capnp::DynamicValue::Type::TEXT:
|
||||||
insertString(column, value.as<capnp::Text>());
|
insertData(column, column_type, value.as<capnp::Text>());
|
||||||
break;
|
break;
|
||||||
case capnp::DynamicValue::Type::ENUM:
|
case capnp::DynamicValue::Type::ENUM:
|
||||||
if (column_type->getTypeId() == TypeIndex::Enum8)
|
if (column_type->getTypeId() == TypeIndex::Enum8)
|
||||||
@ -260,14 +256,26 @@ static void insertValue(IColumn & column, const DataTypePtr & column_type, const
|
|||||||
{
|
{
|
||||||
auto & tuple_column = assert_cast<ColumnTuple &>(column);
|
auto & tuple_column = assert_cast<ColumnTuple &>(column);
|
||||||
const auto * tuple_type = assert_cast<const DataTypeTuple *>(column_type.get());
|
const auto * tuple_type = assert_cast<const DataTypeTuple *>(column_type.get());
|
||||||
for (size_t i = 0; i != tuple_column.tupleSize(); ++i)
|
bool have_explicit_names = tuple_type->haveExplicitNames();
|
||||||
|
auto struct_schema = struct_value.getSchema();
|
||||||
|
for (uint32_t i = 0; i != tuple_column.tupleSize(); ++i)
|
||||||
insertValue(
|
insertValue(
|
||||||
tuple_column.getColumn(i),
|
tuple_column.getColumn(i),
|
||||||
tuple_type->getElements()[i],
|
tuple_type->getElements()[i],
|
||||||
tuple_type->getElementNames()[i],
|
tuple_type->getElementNames()[i],
|
||||||
struct_value.get(tuple_type->getElementNames()[i]),
|
struct_value.get(have_explicit_names ? struct_schema.getFieldByName(tuple_type->getElementNames()[i]) : struct_schema.getFields()[i]),
|
||||||
enum_comparing_mode);
|
enum_comparing_mode);
|
||||||
}
|
}
|
||||||
|
else if (isMap(column_type))
|
||||||
|
{
|
||||||
|
const auto & map_type = assert_cast<const DataTypeMap &>(*column_type);
|
||||||
|
DataTypes key_value_types = {map_type.getKeyType(), map_type.getValueType()};
|
||||||
|
Names key_value_names = {"key", "value"};
|
||||||
|
auto entries_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(key_value_types, key_value_names));
|
||||||
|
auto & entries_column = assert_cast<ColumnMap &>(column).getNestedColumn();
|
||||||
|
auto entries_field = struct_value.getSchema().getFields()[0];
|
||||||
|
insertValue(entries_column, entries_type, column_name, struct_value.get(entries_field), enum_comparing_mode);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/// It can be nested column from Nested type.
|
/// It can be nested column from Nested type.
|
||||||
|
@ -14,12 +14,14 @@
|
|||||||
#include <Columns/ColumnTuple.h>
|
#include <Columns/ColumnTuple.h>
|
||||||
#include <Columns/ColumnLowCardinality.h>
|
#include <Columns/ColumnLowCardinality.h>
|
||||||
#include <Columns/ColumnDecimal.h>
|
#include <Columns/ColumnDecimal.h>
|
||||||
|
#include <Columns/ColumnMap.h>
|
||||||
|
|
||||||
#include <DataTypes/DataTypeArray.h>
|
#include <DataTypes/DataTypeArray.h>
|
||||||
#include <DataTypes/DataTypeEnum.h>
|
#include <DataTypes/DataTypeEnum.h>
|
||||||
#include <DataTypes/DataTypeNullable.h>
|
#include <DataTypes/DataTypeNullable.h>
|
||||||
#include <DataTypes/DataTypeTuple.h>
|
#include <DataTypes/DataTypeTuple.h>
|
||||||
#include <DataTypes/DataTypeLowCardinality.h>
|
#include <DataTypes/DataTypeLowCardinality.h>
|
||||||
|
#include <DataTypes/DataTypeMap.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -177,18 +179,46 @@ static std::optional<capnp::DynamicValue::Reader> convertToDynamicValue(
|
|||||||
else if (isTuple(data_type))
|
else if (isTuple(data_type))
|
||||||
{
|
{
|
||||||
const auto * tuple_data_type = assert_cast<const DataTypeTuple *>(data_type.get());
|
const auto * tuple_data_type = assert_cast<const DataTypeTuple *>(data_type.get());
|
||||||
auto nested_types = tuple_data_type->getElements();
|
const auto & nested_types = tuple_data_type->getElements();
|
||||||
|
const auto & nested_names = tuple_data_type->getElementNames();
|
||||||
const auto & nested_columns = assert_cast<const ColumnTuple *>(column.get())->getColumns();
|
const auto & nested_columns = assert_cast<const ColumnTuple *>(column.get())->getColumns();
|
||||||
for (const auto & name : tuple_data_type->getElementNames())
|
bool have_explicit_names = tuple_data_type->haveExplicitNames();
|
||||||
|
for (uint32_t i = 0; i != nested_names.size(); ++i)
|
||||||
{
|
{
|
||||||
auto pos = tuple_data_type->getPositionByName(name);
|
capnp::StructSchema::Field nested_field = have_explicit_names ? nested_struct_schema.getFieldByName(nested_names[i]) : nested_struct_schema.getFields()[i];
|
||||||
auto field_builder
|
auto field_builder = initStructFieldBuilder(nested_columns[i], row_num, struct_builder, nested_field);
|
||||||
= initStructFieldBuilder(nested_columns[pos], row_num, struct_builder, nested_struct_schema.getFieldByName(name));
|
auto value = convertToDynamicValue(nested_columns[i], nested_types[i], row_num, nested_names[i], field_builder, enum_comparing_mode, temporary_text_data_storage);
|
||||||
auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, column_name, field_builder, enum_comparing_mode, temporary_text_data_storage);
|
|
||||||
if (value)
|
if (value)
|
||||||
struct_builder.set(name, *value);
|
struct_builder.set(nested_field, *value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (isMap(data_type))
|
||||||
|
{
|
||||||
|
/// We output Map type as follow CapnProto schema
|
||||||
|
///
|
||||||
|
/// struct Map {
|
||||||
|
/// struct Entry {
|
||||||
|
/// key @0: Key;
|
||||||
|
/// value @1: Value;
|
||||||
|
/// }
|
||||||
|
/// entries @0 :List(Entry);
|
||||||
|
/// }
|
||||||
|
///
|
||||||
|
/// And we don't need to check that struct have this form here because we checked it before.
|
||||||
|
const auto & map_type = assert_cast<const DataTypeMap &>(*data_type);
|
||||||
|
DataTypes key_value_types = {map_type.getKeyType(), map_type.getValueType()};
|
||||||
|
Names key_value_names = {"key", "value"};
|
||||||
|
auto entries_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(key_value_types, key_value_names));
|
||||||
|
|
||||||
|
/// Nested column in Map is actually Array(Tuple), so we can output it according to "entries" field schema.
|
||||||
|
const auto & entries_column = assert_cast<const ColumnMap *>(column.get())->getNestedColumnPtr();
|
||||||
|
|
||||||
|
auto entries_field = nested_struct_schema.getFields()[0];
|
||||||
|
auto field_builder = initStructFieldBuilder(entries_column, row_num, struct_builder, entries_field);
|
||||||
|
auto entries_value = convertToDynamicValue(entries_column, entries_type, row_num, column_name, field_builder, enum_comparing_mode, temporary_text_data_storage);
|
||||||
|
if (entries_value)
|
||||||
|
struct_builder.set(entries_field, *entries_value);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/// It can be nested column from Nested type.
|
/// It can be nested column from Nested type.
|
||||||
|
@ -129,10 +129,17 @@ void ORCBlockInputFormat::prepareReader()
|
|||||||
format_settings.null_as_default,
|
format_settings.null_as_default,
|
||||||
format_settings.orc.case_insensitive_column_matching);
|
format_settings.orc.case_insensitive_column_matching);
|
||||||
|
|
||||||
ArrowFieldIndexUtil<true> field_util(
|
const bool ignore_case = format_settings.orc.case_insensitive_column_matching;
|
||||||
format_settings.orc.case_insensitive_column_matching,
|
std::unordered_set<String> nested_table_names;
|
||||||
format_settings.orc.allow_missing_columns);
|
if (format_settings.orc.import_nested)
|
||||||
include_indices = field_util.findRequiredIndices(getPort().getHeader(), *schema);
|
nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case);
|
||||||
|
|
||||||
|
for (int i = 0; i < schema->num_fields(); ++i)
|
||||||
|
{
|
||||||
|
const auto & name = schema->field(i)->name();
|
||||||
|
if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name))
|
||||||
|
include_indices.push_back(i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
|
ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
|
||||||
|
@ -132,7 +132,7 @@ void ParquetBlockInputFormat::prepareReader()
|
|||||||
format_settings.null_as_default,
|
format_settings.null_as_default,
|
||||||
format_settings.parquet.case_insensitive_column_matching);
|
format_settings.parquet.case_insensitive_column_matching);
|
||||||
|
|
||||||
ArrowFieldIndexUtil<false> field_util(
|
ArrowFieldIndexUtil field_util(
|
||||||
format_settings.parquet.case_insensitive_column_matching,
|
format_settings.parquet.case_insensitive_column_matching,
|
||||||
format_settings.parquet.allow_missing_columns);
|
format_settings.parquet.allow_missing_columns);
|
||||||
column_indices = field_util.findRequiredIndices(getPort().getHeader(), *schema);
|
column_indices = field_util.findRequiredIndices(getPort().getHeader(), *schema);
|
||||||
|
@ -95,14 +95,14 @@ void ParquetBlockOutputFormat::consume(Chunk chunk)
|
|||||||
builder.version(getParquetVersion(format_settings));
|
builder.version(getParquetVersion(format_settings));
|
||||||
builder.compression(getParquetCompression(format_settings.parquet.output_compression_method));
|
builder.compression(getParquetCompression(format_settings.parquet.output_compression_method));
|
||||||
auto props = builder.build();
|
auto props = builder.build();
|
||||||
auto status = parquet::arrow::FileWriter::Open(
|
auto result = parquet::arrow::FileWriter::Open(
|
||||||
*arrow_table->schema(),
|
*arrow_table->schema(),
|
||||||
arrow::default_memory_pool(),
|
arrow::default_memory_pool(),
|
||||||
sink,
|
sink,
|
||||||
props, /*parquet::default_writer_properties(),*/
|
props);
|
||||||
&file_writer);
|
if (!result.ok())
|
||||||
if (!status.ok())
|
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while opening a table: {}", result.status().ToString());
|
||||||
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while opening a table: {}", status.ToString());
|
file_writer = std::move(result.ValueOrDie());
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: calculate row_group_size depending on a number of rows and table size
|
// TODO: calculate row_group_size depending on a number of rows and table size
|
||||||
|
@ -126,6 +126,12 @@ public:
|
|||||||
user_error = UserError{};
|
user_error = UserError{};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
void setKeeperError(Coordination::Error code, fmt::format_string<Args...> fmt, Args &&... args)
|
||||||
|
{
|
||||||
|
setKeeperError(code, fmt::format(fmt, std::forward<Args>(args)...));
|
||||||
|
}
|
||||||
|
|
||||||
void stopRetries() { stop_retries = true; }
|
void stopRetries() { stop_retries = true; }
|
||||||
|
|
||||||
void requestUnconditionalRetry() { unconditional_retry = true; }
|
void requestUnconditionalRetry() { unconditional_retry = true; }
|
||||||
|
@ -0,0 +1,11 @@
|
|||||||
|
<clickhouse>
|
||||||
|
<profiles>
|
||||||
|
<default>
|
||||||
|
<backup_restore_keeper_max_retries>1000</backup_restore_keeper_max_retries>
|
||||||
|
<backup_restore_keeper_retry_initial_backoff_ms>1</backup_restore_keeper_retry_initial_backoff_ms>
|
||||||
|
<backup_restore_keeper_retry_max_backoff_ms>1</backup_restore_keeper_retry_max_backoff_ms>
|
||||||
|
<backup_restore_keeper_fault_injection_seed>42</backup_restore_keeper_fault_injection_seed>
|
||||||
|
<backup_restore_keeper_fault_injection_probability>0.002</backup_restore_keeper_fault_injection_probability>
|
||||||
|
</default>
|
||||||
|
</profiles>
|
||||||
|
</clickhouse>
|
@ -13,6 +13,7 @@ cluster = ClickHouseCluster(__file__)
|
|||||||
instance = cluster.add_instance(
|
instance = cluster.add_instance(
|
||||||
"instance",
|
"instance",
|
||||||
main_configs=["configs/backups_disk.xml"],
|
main_configs=["configs/backups_disk.xml"],
|
||||||
|
user_configs=["configs/zookeeper_retries.xml"],
|
||||||
external_dirs=["/backups/"],
|
external_dirs=["/backups/"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -0,0 +1,11 @@
|
|||||||
|
<clickhouse>
|
||||||
|
<profiles>
|
||||||
|
<default>
|
||||||
|
<backup_restore_keeper_max_retries>1000</backup_restore_keeper_max_retries>
|
||||||
|
<backup_restore_keeper_retry_initial_backoff_ms>1</backup_restore_keeper_retry_initial_backoff_ms>
|
||||||
|
<backup_restore_keeper_retry_max_backoff_ms>1</backup_restore_keeper_retry_max_backoff_ms>
|
||||||
|
<backup_restore_keeper_fault_injection_seed>42</backup_restore_keeper_fault_injection_seed>
|
||||||
|
<backup_restore_keeper_fault_injection_probability>0.002</backup_restore_keeper_fault_injection_probability>
|
||||||
|
</default>
|
||||||
|
</profiles>
|
||||||
|
</clickhouse>
|
@ -18,6 +18,7 @@ main_configs = [
|
|||||||
|
|
||||||
user_configs = [
|
user_configs = [
|
||||||
"configs/allow_database_types.xml",
|
"configs/allow_database_types.xml",
|
||||||
|
"configs/zookeeper_retries.xml",
|
||||||
]
|
]
|
||||||
|
|
||||||
node1 = cluster.add_instance(
|
node1 = cluster.add_instance(
|
||||||
@ -429,7 +430,7 @@ def test_replicated_database_async():
|
|||||||
assert node2.query("SELECT * FROM mydb.tbl2 ORDER BY y") == TSV(["a", "bb"])
|
assert node2.query("SELECT * FROM mydb.tbl2 ORDER BY y") == TSV(["a", "bb"])
|
||||||
|
|
||||||
|
|
||||||
# By default `backup_keeper_value_max_size` is 1 MB, but in this test we'll set it to 50 bytes just to check it works.
|
# By default `backup_restore_keeper_value_max_size` is 1 MB, but in this test we'll set it to 50 bytes just to check it works.
|
||||||
def test_keeper_value_max_size():
|
def test_keeper_value_max_size():
|
||||||
node1.query(
|
node1.query(
|
||||||
"CREATE TABLE tbl ON CLUSTER 'cluster' ("
|
"CREATE TABLE tbl ON CLUSTER 'cluster' ("
|
||||||
@ -450,7 +451,7 @@ def test_keeper_value_max_size():
|
|||||||
backup_name = new_backup_name()
|
backup_name = new_backup_name()
|
||||||
node1.query(
|
node1.query(
|
||||||
f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}",
|
f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}",
|
||||||
settings={"backup_keeper_value_max_size": 50},
|
settings={"backup_restore_keeper_value_max_size": 50},
|
||||||
)
|
)
|
||||||
|
|
||||||
node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
|
node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
|
||||||
|
@ -29,6 +29,7 @@ def generate_cluster_def():
|
|||||||
|
|
||||||
|
|
||||||
main_configs = ["configs/backups_disk.xml", generate_cluster_def()]
|
main_configs = ["configs/backups_disk.xml", generate_cluster_def()]
|
||||||
|
# No [Zoo]Keeper retries for tests with concurrency
|
||||||
user_configs = ["configs/allow_database_types.xml"]
|
user_configs = ["configs/allow_database_types.xml"]
|
||||||
|
|
||||||
nodes = []
|
nodes = []
|
||||||
|
@ -50,6 +50,7 @@ def generate_cluster_def():
|
|||||||
|
|
||||||
|
|
||||||
main_configs = ["configs/disallow_concurrency.xml", generate_cluster_def()]
|
main_configs = ["configs/disallow_concurrency.xml", generate_cluster_def()]
|
||||||
|
# No [Zoo]Keeper retries for tests with concurrency
|
||||||
user_configs = ["configs/allow_database_types.xml"]
|
user_configs = ["configs/allow_database_types.xml"]
|
||||||
|
|
||||||
nodes = []
|
nodes = []
|
||||||
|
@ -0,0 +1,11 @@
|
|||||||
|
<clickhouse>
|
||||||
|
<profiles>
|
||||||
|
<default>
|
||||||
|
<backup_restore_keeper_max_retries>1000</backup_restore_keeper_max_retries>
|
||||||
|
<backup_restore_keeper_retry_initial_backoff_ms>1</backup_restore_keeper_retry_initial_backoff_ms>
|
||||||
|
<backup_restore_keeper_retry_max_backoff_ms>1</backup_restore_keeper_retry_max_backoff_ms>
|
||||||
|
<backup_restore_keeper_fault_injection_seed>42</backup_restore_keeper_fault_injection_seed>
|
||||||
|
<backup_restore_keeper_fault_injection_probability>0.002</backup_restore_keeper_fault_injection_probability>
|
||||||
|
</default>
|
||||||
|
</profiles>
|
||||||
|
</clickhouse>
|
@ -9,6 +9,9 @@ node = cluster.add_instance(
|
|||||||
"configs/named_collection_s3_backups.xml",
|
"configs/named_collection_s3_backups.xml",
|
||||||
"configs/s3_settings.xml",
|
"configs/s3_settings.xml",
|
||||||
],
|
],
|
||||||
|
user_configs=[
|
||||||
|
"configs/zookeeper_retries.xml",
|
||||||
|
],
|
||||||
with_minio=True,
|
with_minio=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
<clickhouse>
|
<clickhouse>
|
||||||
<keeper_server>
|
<keeper_server>
|
||||||
|
<use_cluster>false</use_cluster>
|
||||||
<tcp_port>9181</tcp_port>
|
<tcp_port>9181</tcp_port>
|
||||||
<server_id>1</server_id>
|
<server_id>1</server_id>
|
||||||
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
|
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
|
||||||
|
@ -61,28 +61,39 @@ def test_mntr_data_size_after_restart(started_cluster):
|
|||||||
node_zk.close()
|
node_zk.close()
|
||||||
node_zk = None
|
node_zk = None
|
||||||
|
|
||||||
def get_line_from_mntr(key):
|
def get_line_from_mntr(mntr_str, key):
|
||||||
return next(
|
return next(
|
||||||
filter(
|
filter(
|
||||||
lambda line: key in line,
|
lambda line: key in line,
|
||||||
keeper_utils.send_4lw_cmd(started_cluster, node, "mntr").split(
|
mntr_str.split("\n"),
|
||||||
"\n"
|
|
||||||
),
|
|
||||||
),
|
),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
|
|
||||||
line_size_before = get_line_from_mntr("zk_approximate_data_size")
|
mntr_result = keeper_utils.send_4lw_cmd(started_cluster, node, "mntr")
|
||||||
node_count_before = get_line_from_mntr("zk_znode_count")
|
line_size_before = get_line_from_mntr(mntr_result, "zk_approximate_data_size")
|
||||||
assert get_line_from_mntr("zk_ephemerals_count") == "zk_ephemerals_count\t0"
|
node_count_before = get_line_from_mntr(mntr_result, "zk_znode_count")
|
||||||
|
assert (
|
||||||
|
get_line_from_mntr(mntr_result, "zk_ephemerals_count")
|
||||||
|
== "zk_ephemerals_count\t0"
|
||||||
|
)
|
||||||
assert line_size_before != None
|
assert line_size_before != None
|
||||||
|
|
||||||
restart_clickhouse()
|
restart_clickhouse()
|
||||||
|
|
||||||
def assert_mntr_stats():
|
def assert_mntr_stats():
|
||||||
assert get_line_from_mntr("zk_ephemerals_count") == "zk_ephemerals_count\t0"
|
mntr_result = keeper_utils.send_4lw_cmd(started_cluster, node, "mntr")
|
||||||
assert get_line_from_mntr("zk_znode_count") == node_count_before
|
assert (
|
||||||
assert get_line_from_mntr("zk_approximate_data_size") == line_size_before
|
get_line_from_mntr(mntr_result, "zk_ephemerals_count")
|
||||||
|
== "zk_ephemerals_count\t0"
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
get_line_from_mntr(mntr_result, "zk_znode_count") == node_count_before
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
get_line_from_mntr(mntr_result, "zk_approximate_data_size")
|
||||||
|
== line_size_before
|
||||||
|
)
|
||||||
|
|
||||||
assert_mntr_stats()
|
assert_mntr_stats()
|
||||||
keeper_utils.send_4lw_cmd(started_cluster, node, "rclc")
|
keeper_utils.send_4lw_cmd(started_cluster, node, "rclc")
|
||||||
|
@ -92,8 +92,11 @@ idx10 ['This','is','a','test']
|
|||||||
123 1
|
123 1
|
||||||
456 2
|
456 2
|
||||||
=== Try load data from datapage_v2.snappy.parquet
|
=== Try load data from datapage_v2.snappy.parquet
|
||||||
Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Unknown encoding type.: While executing ParquetBlockInputFormat: data for INSERT was parsed from stdin: (in query: INSERT INTO parquet_load FORMAT Parquet). (CANNOT_READ_ALL_DATA)
|
abc 1 2 1 [1,2,3]
|
||||||
|
abc 2 3 1 []
|
||||||
|
abc 3 4 1 []
|
||||||
|
\N 4 5 0 [1,2,3]
|
||||||
|
abc 5 2 1 [1,2]
|
||||||
=== Try load data from datatype-date32.parquet
|
=== Try load data from datatype-date32.parquet
|
||||||
1925-01-01
|
1925-01-01
|
||||||
1949-10-01
|
1949-10-01
|
||||||
|
@ -50,4 +50,3 @@ OK
|
|||||||
OK
|
OK
|
||||||
OK
|
OK
|
||||||
OK
|
OK
|
||||||
OK
|
|
||||||
|
@ -109,8 +109,6 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint6
|
|||||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Enum(\'one\' = 1)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL';
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Enum(\'one\' = 1)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL';
|
||||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Tuple(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL';
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Tuple(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL';
|
||||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Nullable(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL';
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Nullable(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL';
|
||||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Int32') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL';
|
|
||||||
|
|
||||||
|
|
||||||
$CLICKHOUSE_CLIENT --query="SELECT number AS a, toString(number) as b FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_unnamed_union:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL';
|
$CLICKHOUSE_CLIENT --query="SELECT number AS a, toString(number) as b FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_unnamed_union:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL';
|
||||||
$CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable1 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_fake_nullable:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL';
|
$CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable1 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_fake_nullable:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL';
|
||||||
|
@ -66,6 +66,18 @@ ExpressionTransform
|
|||||||
ExpressionTransform
|
ExpressionTransform
|
||||||
(ReadFromMergeTree)
|
(ReadFromMergeTree)
|
||||||
MergeTreeInOrder 0 → 1
|
MergeTreeInOrder 0 → 1
|
||||||
|
(Expression)
|
||||||
|
ExpressionTransform
|
||||||
|
(Limit)
|
||||||
|
Limit
|
||||||
|
(Sorting)
|
||||||
|
(Expression)
|
||||||
|
ExpressionTransform
|
||||||
|
(Expression)
|
||||||
|
ExpressionTransform
|
||||||
|
(ReadFromMergeTree)
|
||||||
|
ExpressionTransform
|
||||||
|
MergeTreeInOrder 0 → 1
|
||||||
2020-10-11 0 0
|
2020-10-11 0 0
|
||||||
2020-10-11 0 10
|
2020-10-11 0 10
|
||||||
2020-10-11 0 20
|
2020-10-11 0 20
|
||||||
@ -82,6 +94,20 @@ ExpressionTransform
|
|||||||
ExpressionTransform
|
ExpressionTransform
|
||||||
(ReadFromMergeTree)
|
(ReadFromMergeTree)
|
||||||
MergeTreeInOrder 0 → 1
|
MergeTreeInOrder 0 → 1
|
||||||
|
(Expression)
|
||||||
|
ExpressionTransform
|
||||||
|
(Limit)
|
||||||
|
Limit
|
||||||
|
(Sorting)
|
||||||
|
FinishSortingTransform
|
||||||
|
PartialSortingTransform
|
||||||
|
(Expression)
|
||||||
|
ExpressionTransform
|
||||||
|
(Expression)
|
||||||
|
ExpressionTransform
|
||||||
|
(ReadFromMergeTree)
|
||||||
|
ExpressionTransform
|
||||||
|
MergeTreeInOrder 0 → 1
|
||||||
2020-10-12 0
|
2020-10-12 0
|
||||||
2020-10-12 1
|
2020-10-12 1
|
||||||
2020-10-12 2
|
2020-10-12 2
|
||||||
@ -104,6 +130,19 @@ ExpressionTransform
|
|||||||
(ReadFromMergeTree)
|
(ReadFromMergeTree)
|
||||||
ReverseTransform
|
ReverseTransform
|
||||||
MergeTreeReverse 0 → 1
|
MergeTreeReverse 0 → 1
|
||||||
|
(Expression)
|
||||||
|
ExpressionTransform
|
||||||
|
(Limit)
|
||||||
|
Limit
|
||||||
|
(Sorting)
|
||||||
|
(Expression)
|
||||||
|
ExpressionTransform
|
||||||
|
(Expression)
|
||||||
|
ExpressionTransform
|
||||||
|
(ReadFromMergeTree)
|
||||||
|
ExpressionTransform
|
||||||
|
ReverseTransform
|
||||||
|
MergeTreeReverse 0 → 1
|
||||||
2020-10-12 99999
|
2020-10-12 99999
|
||||||
2020-10-12 99998
|
2020-10-12 99998
|
||||||
2020-10-12 99997
|
2020-10-12 99997
|
||||||
|
@ -21,17 +21,20 @@ SELECT toStartOfMonth(date) as d, i FROM t_read_in_order ORDER BY d, -i LIMIT 5;
|
|||||||
EXPLAIN PIPELINE SELECT toStartOfMonth(date) as d, i FROM t_read_in_order ORDER BY d, -i LIMIT 5;
|
EXPLAIN PIPELINE SELECT toStartOfMonth(date) as d, i FROM t_read_in_order ORDER BY d, -i LIMIT 5;
|
||||||
|
|
||||||
SELECT date, i FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i LIMIT 5;
|
SELECT date, i FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i LIMIT 5;
|
||||||
EXPLAIN PIPELINE SELECT date, i FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i LIMIT 5;
|
EXPLAIN PIPELINE SELECT date, i FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i LIMIT 5 settings allow_experimental_analyzer=0;
|
||||||
|
EXPLAIN PIPELINE SELECT date, i FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i LIMIT 5 settings allow_experimental_analyzer=1;
|
||||||
|
|
||||||
SELECT * FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i, v LIMIT 5;
|
SELECT * FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i, v LIMIT 5;
|
||||||
EXPLAIN PIPELINE SELECT * FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i, v LIMIT 5;
|
EXPLAIN PIPELINE SELECT * FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i, v LIMIT 5 settings allow_experimental_analyzer=0;
|
||||||
|
EXPLAIN PIPELINE SELECT * FROM t_read_in_order WHERE date = '2020-10-11' ORDER BY i, v LIMIT 5 settings allow_experimental_analyzer=1;
|
||||||
|
|
||||||
INSERT INTO t_read_in_order SELECT '2020-10-12', number, number FROM numbers(100000);
|
INSERT INTO t_read_in_order SELECT '2020-10-12', number, number FROM numbers(100000);
|
||||||
|
|
||||||
SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i LIMIT 5;
|
SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i LIMIT 5;
|
||||||
|
|
||||||
EXPLAIN SYNTAX SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5;
|
EXPLAIN SYNTAX SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5;
|
||||||
EXPLAIN PIPELINE SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5;
|
EXPLAIN PIPELINE SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5 settings allow_experimental_analyzer=0;
|
||||||
|
EXPLAIN PIPELINE SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5 settings allow_experimental_analyzer=1;
|
||||||
SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5;
|
SELECT date, i FROM t_read_in_order WHERE date = '2020-10-12' ORDER BY i DESC LIMIT 5;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS t_read_in_order;
|
DROP TABLE IF EXISTS t_read_in_order;
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
0 1 2 200
|
0 1 2 200
|
||||||
Aggregating
|
Aggregating
|
||||||
Order: a ASC, c ASC
|
Order: a ASC, c ASC
|
||||||
Sorting (Stream): a ASC, b ASC, c ASC
|
|
||||||
ReadFromMergeTree (default.tab)
|
ReadFromMergeTree (default.tab)
|
||||||
Sorting (Stream): a ASC, b ASC, c ASC
|
Aggregating
|
||||||
|
Order: default.tab.a_0 ASC, default.tab.c_2 ASC
|
||||||
|
ReadFromMergeTree (default.tab)
|
||||||
|
@ -5,4 +5,5 @@ insert into tab select 0, number % 3, 2 - intDiv(number, 3), (number % 3 + 1) *
|
|||||||
insert into tab select 0, number % 3, 2 - intDiv(number, 3), (number % 3 + 1) * 100 from numbers(6);
|
insert into tab select 0, number % 3, 2 - intDiv(number, 3), (number % 3 + 1) * 100 from numbers(6);
|
||||||
|
|
||||||
select a, any(b), c, d from tab where b = 1 group by a, c, d order by c, d settings optimize_aggregation_in_order=1, query_plan_aggregation_in_order=1;
|
select a, any(b), c, d from tab where b = 1 group by a, c, d order by c, d settings optimize_aggregation_in_order=1, query_plan_aggregation_in_order=1;
|
||||||
select * from (explain actions = 1, sorting=1 select a, any(b), c, d from tab where b = 1 group by a, c, d settings optimize_aggregation_in_order=1, query_plan_aggregation_in_order=1) where explain like '%Sorting (Stream)%' or explain like '%ReadFromMergeTree%' or explain like '%Aggregating%' or explain like '%Order:%';
|
select * from (explain actions = 1, sorting=1 select a, any(b), c, d from tab where b = 1 group by a, c, d settings optimize_aggregation_in_order=1, query_plan_aggregation_in_order=1) where explain like '%ReadFromMergeTree%' or explain like '%Aggregating%' or explain like '%Order:%' settings allow_experimental_analyzer=0;
|
||||||
|
select * from (explain actions = 1, sorting=1 select a, any(b), c, d from tab where b = 1 group by a, c, d settings optimize_aggregation_in_order=1, query_plan_aggregation_in_order=1) where explain like '%ReadFromMergeTree%' or explain like '%Aggregating%' or explain like '%Order:%' settings allow_experimental_analyzer=1;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -4,419 +4,11 @@
|
|||||||
SET allow_experimental_query_cache = true;
|
SET allow_experimental_query_cache = true;
|
||||||
|
|
||||||
SYSTEM DROP QUERY CACHE;
|
SYSTEM DROP QUERY CACHE;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS t;
|
DROP TABLE IF EXISTS t;
|
||||||
|
|
||||||
-- Create test table with lot's of rows
|
-- Create test table with lot's of rows
|
||||||
CREATE TABLE t(c String) ENGINE=MergeTree ORDER BY c;
|
CREATE TABLE t(c String) ENGINE=MergeTree ORDER BY c;
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
INSERT INTO t SELECT multiIf(n = 0, 'abc', n = 1, 'def', n = 2, 'abc', n = 3, 'jkl', '<unused>') FROM (SELECT number % 4 AS n FROM numbers(1200));
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
INSERT INTO t values ('abc') ('def') ('abc') ('jkl');
|
|
||||||
OPTIMIZE TABLE t FINAL;
|
OPTIMIZE TABLE t FINAL;
|
||||||
|
|
||||||
-- Run query which, store *compressed* result in query cache
|
-- Run query which, store *compressed* result in query cache
|
||||||
@ -442,3 +34,4 @@ SELECT * FROM t ORDER BY c
|
|||||||
SETTINGS use_query_cache = true;
|
SETTINGS use_query_cache = true;
|
||||||
|
|
||||||
DROP TABLE t;
|
DROP TABLE t;
|
||||||
|
SYSTEM DROP QUERY CACHE;
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
SET allow_experimental_query_cache = true;
|
SET allow_experimental_query_cache = true;
|
||||||
|
|
||||||
SYSTEM DROP QUERY CACHE;
|
SYSTEM DROP QUERY CACHE;
|
||||||
|
|
||||||
DROP TABLE IF EXISTS t;
|
DROP TABLE IF EXISTS t;
|
||||||
|
|
||||||
-- Create test table with "many" rows
|
-- Create test table with "many" rows
|
||||||
@ -51,3 +50,4 @@ SELECT * FROM t ORDER BY c
|
|||||||
SETTINGS max_block_size = 2, use_query_cache = true;
|
SETTINGS max_block_size = 2, use_query_cache = true;
|
||||||
|
|
||||||
DROP TABLE t;
|
DROP TABLE t;
|
||||||
|
SYSTEM DROP QUERY CACHE;
|
||||||
|
37
tests/queries/0_stateless/02661_quantile_approx.reference
Normal file
37
tests/queries/0_stateless/02661_quantile_approx.reference
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
-- { echoOn }
|
||||||
|
with arrayJoin([0, 1, 2, 10]) as x select quantilesApprox(100, 0.5, 0.4, 0.1)(x);
|
||||||
|
[1,1,0]
|
||||||
|
with arrayJoin([0, 6, 7, 9, 10]) as x select quantileApprox(100, 0.5)(x);
|
||||||
|
7
|
||||||
|
select quantilesApprox(10000, 0.25, 0.5, 0.75, 0.0, 1.0, 0, 1)(number + 1) from numbers(1000);
|
||||||
|
[250,500,750,1,1000,1,1000]
|
||||||
|
select quantilesApprox(10000, 0.01, 0.1, 0.11)(number + 1) from numbers(10);
|
||||||
|
[1,1,2]
|
||||||
|
with number + 1 as col select quantilesApprox(10000, 0.25, 0.5, 0.75)(col), count(col), quantilesApprox(10000, 0.0, 1.0)(col), sum(col) from numbers(1000);
|
||||||
|
[250,500,750] 1000 [1,1000] 500500
|
||||||
|
select quantilesApprox(1, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
|
||||||
|
[1,1,1,1,1]
|
||||||
|
select quantilesApprox(10, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
|
||||||
|
[1,156,156,296,715]
|
||||||
|
select quantilesApprox(100, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
|
||||||
|
[93,192,251,306,770]
|
||||||
|
select quantilesApprox(1000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
|
||||||
|
[99,199,249,313,776]
|
||||||
|
select quantilesApprox(10000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
|
||||||
|
[100,200,250,314,777]
|
||||||
|
select quantileApprox()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
|
||||||
|
select quantileApprox(100)(number) from numbers(10);
|
||||||
|
4
|
||||||
|
select quantileApprox(100, 0.5)(number) from numbers(10);
|
||||||
|
4
|
||||||
|
select quantileApprox(100, 0.5, 0.75)(number) from numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||||
|
select quantileApprox('abc', 0.5)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||||
|
select quantileApprox(1.23, 0.5)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||||
|
select quantileApprox(-100, 0.5)(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
|
||||||
|
select quantilesApprox()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
|
||||||
|
select quantilesApprox(100)(number) from numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||||
|
select quantilesApprox(100, 0.5)(number) from numbers(10);
|
||||||
|
[4]
|
||||||
|
select quantilesApprox('abc', 0.5, 0.75)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||||
|
select quantilesApprox(1.23, 0.5, 0.75)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||||
|
select quantilesApprox(-100, 0.5, 0.75)(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
|
31
tests/queries/0_stateless/02661_quantile_approx.sql
Normal file
31
tests/queries/0_stateless/02661_quantile_approx.sql
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
-- { echoOn }
|
||||||
|
with arrayJoin([0, 1, 2, 10]) as x select quantilesApprox(100, 0.5, 0.4, 0.1)(x);
|
||||||
|
with arrayJoin([0, 6, 7, 9, 10]) as x select quantileApprox(100, 0.5)(x);
|
||||||
|
|
||||||
|
select quantilesApprox(10000, 0.25, 0.5, 0.75, 0.0, 1.0, 0, 1)(number + 1) from numbers(1000);
|
||||||
|
select quantilesApprox(10000, 0.01, 0.1, 0.11)(number + 1) from numbers(10);
|
||||||
|
|
||||||
|
with number + 1 as col select quantilesApprox(10000, 0.25, 0.5, 0.75)(col), count(col), quantilesApprox(10000, 0.0, 1.0)(col), sum(col) from numbers(1000);
|
||||||
|
|
||||||
|
select quantilesApprox(1, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
|
||||||
|
select quantilesApprox(10, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
|
||||||
|
select quantilesApprox(100, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
|
||||||
|
select quantilesApprox(1000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
|
||||||
|
select quantilesApprox(10000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
|
||||||
|
|
||||||
|
|
||||||
|
select quantileApprox()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
|
||||||
|
select quantileApprox(100)(number) from numbers(10);
|
||||||
|
select quantileApprox(100, 0.5)(number) from numbers(10);
|
||||||
|
select quantileApprox(100, 0.5, 0.75)(number) from numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||||
|
select quantileApprox('abc', 0.5)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||||
|
select quantileApprox(1.23, 0.5)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||||
|
select quantileApprox(-100, 0.5)(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
|
||||||
|
|
||||||
|
select quantilesApprox()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
|
||||||
|
select quantilesApprox(100)(number) from numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||||
|
select quantilesApprox(100, 0.5)(number) from numbers(10);
|
||||||
|
select quantilesApprox('abc', 0.5, 0.75)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||||
|
select quantilesApprox(1.23, 0.5, 0.75)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||||
|
select quantilesApprox(-100, 0.5, 0.75)(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
|
||||||
|
-- { echoOff }
|
@ -24,3 +24,33 @@ Positions: 0 1
|
|||||||
COLUMN Const(UInt8) -> 5 UInt8 : 1
|
COLUMN Const(UInt8) -> 5 UInt8 : 1
|
||||||
FUNCTION greaterOrEquals(id : 0, 5 :: 1) -> greaterOrEquals(id, 5) UInt8 : 2
|
FUNCTION greaterOrEquals(id : 0, 5 :: 1) -> greaterOrEquals(id, 5) UInt8 : 2
|
||||||
Positions: 2 0
|
Positions: 2 0
|
||||||
|
Expression ((Project names + (Projection + Change column names to column identifiers)))
|
||||||
|
Header: id UInt64
|
||||||
|
value String
|
||||||
|
Actions: INPUT : 0 -> id UInt64 : 0
|
||||||
|
INPUT : 1 -> value String : 1
|
||||||
|
ALIAS id :: 0 -> default.test_table.id_0 UInt64 : 2
|
||||||
|
ALIAS value :: 1 -> default.test_table.value_1 String : 0
|
||||||
|
ALIAS default.test_table.id_0 :: 2 -> id UInt64 : 1
|
||||||
|
ALIAS default.test_table.value_1 :: 0 -> value String : 2
|
||||||
|
Positions: 1 2
|
||||||
|
ReadFromMergeTree (default.test_table)
|
||||||
|
Header: id UInt64
|
||||||
|
value String
|
||||||
|
ReadType: Default
|
||||||
|
Parts: 0
|
||||||
|
Granules: 0
|
||||||
|
Prewhere info
|
||||||
|
Need filter: 1
|
||||||
|
Prewhere filter
|
||||||
|
Prewhere filter column: equals(id, 5_UInt8) (removed)
|
||||||
|
Actions: INPUT : 0 -> id UInt64 : 0
|
||||||
|
COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 1
|
||||||
|
FUNCTION equals(id : 0, 5_UInt8 :: 1) -> equals(id, 5_UInt8) UInt8 : 2
|
||||||
|
Positions: 2 0
|
||||||
|
Row level filter
|
||||||
|
Row level filter column: greaterOrEquals(id, 5_UInt8)
|
||||||
|
Actions: INPUT : 0 -> id UInt64 : 0
|
||||||
|
COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 1
|
||||||
|
FUNCTION greaterOrEquals(id : 0, 5_UInt8 :: 1) -> greaterOrEquals(id, 5_UInt8) UInt8 : 2
|
||||||
|
Positions: 2 0
|
||||||
|
@ -10,7 +10,8 @@ INSERT INTO test_table VALUES (0, 'Value');
|
|||||||
DROP ROW POLICY IF EXISTS test_row_policy ON test_table;
|
DROP ROW POLICY IF EXISTS test_row_policy ON test_table;
|
||||||
CREATE ROW POLICY test_row_policy ON test_table USING id >= 5 TO ALL;
|
CREATE ROW POLICY test_row_policy ON test_table USING id >= 5 TO ALL;
|
||||||
|
|
||||||
EXPLAIN header = 1, actions = 1 SELECT id, value FROM test_table PREWHERE id = 5;
|
EXPLAIN header = 1, actions = 1 SELECT id, value FROM test_table PREWHERE id = 5 settings allow_experimental_analyzer=0;
|
||||||
|
EXPLAIN header = 1, actions = 1 SELECT id, value FROM test_table PREWHERE id = 5 settings allow_experimental_analyzer=1;
|
||||||
|
|
||||||
DROP ROW POLICY test_row_policy ON test_table;
|
DROP ROW POLICY test_row_policy ON test_table;
|
||||||
DROP TABLE test_table;
|
DROP TABLE test_table;
|
||||||
|
@ -0,0 +1,3 @@
|
|||||||
|
42 42 42 42 42.42 42.42
|
||||||
|
{'Hello':42,'World':24}
|
||||||
|
42 42 42 42 42 42 42 42
|
17
tests/queries/0_stateless/02705_capnp_more_types.sh
Executable file
17
tests/queries/0_stateless/02705_capnp_more_types.sh
Executable file
@ -0,0 +1,17 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Tags: no-fasttest, no-parallel, no-replicated-database
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
SCHEMADIR=$CURDIR/format_schemas
|
||||||
|
$CLICKHOUSE_LOCAL -q "select 42::Int128 as int128, 42::UInt128 as uint128, 42::Int256 as int256, 42::UInt256 as uint256, 42.42::Decimal128(2) as decimal128, 42.42::Decimal256(2) as decimal256 format CapnProto settings format_schema='$SCHEMADIR/02705_big_numbers:Message'" | $CLICKHOUSE_LOCAL --input-format CapnProto --structure "int128 Int128, uint128 UInt128, int256 Int256, uint256 UInt256, decimal128 Decimal128(2), decimal256 Decimal256(2)" -q "select * from table" --format_schema="$SCHEMADIR/02705_big_numbers:Message"
|
||||||
|
|
||||||
|
$CLICKHOUSE_LOCAL -q "select map('Hello', 42, 'World', 24) as map format CapnProto settings format_schema='$SCHEMADIR/02705_map:Message'" | $CLICKHOUSE_LOCAL --input-format CapnProto --structure "map Map(String, UInt32)" --format_schema="$SCHEMADIR/02705_map:Message" -q "select * from table"
|
||||||
|
|
||||||
|
|
||||||
|
$CLICKHOUSE_LOCAL -q "select 42 as int8, 42 as uint8, 42 as int16, 42 as uint16, 42 as int32, 42 as uint32, 42 as int64, 42 as uint64 format CapnProto settings format_schema='$SCHEMADIR/02030_capnp_simple_types:Message'" | $CLICKHOUSE_LOCAL --input-format CapnProto --structure "int8 UInt32, uint8 Int32, int16 Int8, uint16 UInt8, int32 UInt64, uint32 Int64, int64 UInt16, uint64 Int16" --format_schema="$SCHEMADIR/02030_capnp_simple_types:Message" -q "select * from table"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,3 @@
|
|||||||
|
0.1 0.1 1 0.05 0.1 1 0.05 0.1 1 0.05 0.099562 1 0.018316 1 1 -0 1 1 -0 1 1 -0 1 1 -0 1
|
||||||
|
0.000007 0.000007 0.000004 0.000023 0.000007 0.000004 0.000023 0.000007 0.000004 0.000023 0.000008 0.000003 0.00002 0.158 0.158 0.158 0.146 0.158 0.158 0.146 0.158 0.158 0.146 0.158 0.158 0.146
|
||||||
|
0 0 0 0.523357 0 0 0.523357 0 0 0.523357 0 0 0.504595 0.486 0.486 0.486 0.036 0.486 0.486 0.036 0.486 0.486 0.036 0.486 0.486 0.036
|
107
tests/queries/0_stateless/02706_kolmogorov_smirnov_test.sql
Normal file
107
tests/queries/0_stateless/02706_kolmogorov_smirnov_test.sql
Normal file
File diff suppressed because one or more lines are too long
@ -0,0 +1,84 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from scipy import stats
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||||
|
|
||||||
|
from pure_http_client import ClickHouseClient
|
||||||
|
|
||||||
|
|
||||||
|
def test_and_check(name, a, b, t_stat, p_value, precision=1e-2):
|
||||||
|
client = ClickHouseClient()
|
||||||
|
client.query("DROP TABLE IF EXISTS ks_test;")
|
||||||
|
client.query("CREATE TABLE ks_test (left Float64, right UInt8) ENGINE = Memory;")
|
||||||
|
client.query(
|
||||||
|
"INSERT INTO ks_test VALUES {};".format(
|
||||||
|
", ".join(["({},{})".format(i, 0) for i in a])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
client.query(
|
||||||
|
"INSERT INTO ks_test VALUES {};".format(
|
||||||
|
", ".join(["({},{})".format(j, 1) for j in b])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
real = client.query_return_df(
|
||||||
|
"SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name)
|
||||||
|
+ "roundBankers({}(left, right).2, 16) as p_value ".format(name)
|
||||||
|
+ "FROM ks_test FORMAT TabSeparatedWithNames;"
|
||||||
|
)
|
||||||
|
real_t_stat = real["t_stat"][0]
|
||||||
|
real_p_value = real["p_value"][0]
|
||||||
|
assert (
|
||||||
|
abs(real_t_stat - np.float64(t_stat)) < precision
|
||||||
|
), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat)
|
||||||
|
assert (
|
||||||
|
abs(real_p_value - np.float64(p_value)) < precision
|
||||||
|
), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value)
|
||||||
|
client.query("DROP TABLE IF EXISTS ks_test;")
|
||||||
|
|
||||||
|
|
||||||
|
def test_ks_all_alternatives(rvs1, rvs2):
|
||||||
|
s, p = stats.ks_2samp(rvs1, rvs2)
|
||||||
|
test_and_check("kolmogorovSmirnovTest", rvs1, rvs2, s, p)
|
||||||
|
|
||||||
|
s, p = stats.ks_2samp(rvs1, rvs2, alternative="two-sided")
|
||||||
|
test_and_check("kolmogorovSmirnovTest('two-sided')", rvs1, rvs2, s, p)
|
||||||
|
|
||||||
|
s, p = stats.ks_2samp(rvs1, rvs2, alternative="greater", method="auto")
|
||||||
|
test_and_check("kolmogorovSmirnovTest('greater', 'auto')", rvs1, rvs2, s, p)
|
||||||
|
|
||||||
|
s, p = stats.ks_2samp(rvs1, rvs2, alternative="less", method="exact")
|
||||||
|
test_and_check("kolmogorovSmirnovTest('less', 'exact')", rvs1, rvs2, s, p)
|
||||||
|
|
||||||
|
if max(len(rvs1), len(rvs2)) > 10000:
|
||||||
|
s, p = stats.ks_2samp(rvs1, rvs2, alternative="two-sided", method="asymp")
|
||||||
|
test_and_check("kolmogorovSmirnovTest('two-sided', 'asymp')", rvs1, rvs2, s, p)
|
||||||
|
s, p = stats.ks_2samp(rvs1, rvs2, alternative="greater", method="asymp")
|
||||||
|
test_and_check("kolmogorovSmirnovTest('greater', 'asymp')", rvs1, rvs2, s, p)
|
||||||
|
|
||||||
|
|
||||||
|
def test_kolmogorov_smirnov():
|
||||||
|
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=10), 2)
|
||||||
|
rvs2 = np.round(stats.norm.rvs(loc=1.5, scale=5, size=20), 2)
|
||||||
|
test_ks_all_alternatives(rvs1, rvs2)
|
||||||
|
|
||||||
|
rvs1 = np.round(stats.norm.rvs(loc=13, scale=1, size=100), 2)
|
||||||
|
rvs2 = np.round(stats.norm.rvs(loc=1.52, scale=9, size=100), 2)
|
||||||
|
test_ks_all_alternatives(rvs1, rvs2)
|
||||||
|
|
||||||
|
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=100), 2)
|
||||||
|
rvs2 = np.round(stats.norm.rvs(loc=11.5, scale=50, size=1000), 2)
|
||||||
|
test_ks_all_alternatives(rvs1, rvs2)
|
||||||
|
|
||||||
|
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=11000), 2)
|
||||||
|
rvs2 = np.round(stats.norm.rvs(loc=3.5, scale=5.5, size=11000), 2)
|
||||||
|
test_ks_all_alternatives(rvs1, rvs2)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_kolmogorov_smirnov()
|
||||||
|
print("Ok.")
|
@ -0,0 +1 @@
|
|||||||
|
Ok.
|
9
tests/queries/0_stateless/02706_kolmogorov_smirnov_test_scipy.sh
Executable file
9
tests/queries/0_stateless/02706_kolmogorov_smirnov_test_scipy.sh
Executable file
@ -0,0 +1,9 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
# We should have correct env vars from shell_config.sh to run this test
|
||||||
|
|
||||||
|
python3 "$CURDIR"/02706_kolmogorov_smirnov_test_scipy.python
|
@ -0,0 +1,5 @@
|
|||||||
|
[0] [[0]]
|
||||||
|
[0] [[0]]
|
||||||
|
[0] [[1]]
|
||||||
|
[0] [[1]]
|
||||||
|
[0] [[1]]
|
@ -0,0 +1,24 @@
|
|||||||
|
SELECT
|
||||||
|
range(1),
|
||||||
|
arrayMap(x -> arrayMap(x -> x, range(x)), [1])
|
||||||
|
SETTINGS allow_experimental_analyzer = 0;
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
range(1),
|
||||||
|
arrayMap(x -> arrayMap(x -> x, range(x)), [1])
|
||||||
|
SETTINGS allow_experimental_analyzer = 1;
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
range(1),
|
||||||
|
arrayMap(x -> arrayMap(x -> 1, range(x)), [1])
|
||||||
|
SETTINGS allow_experimental_analyzer = 0;
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
range(1),
|
||||||
|
arrayMap(x -> arrayMap(x -> 1, range(x)), [1])
|
||||||
|
SETTINGS allow_experimental_analyzer = 1;
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
range(1),
|
||||||
|
arrayMap(x -> arrayMap(y -> 1, range(x)), [1])
|
||||||
|
SETTINGS allow_experimental_analyzer = 1;
|
@ -1 +1 @@
|
|||||||
`a` Nullable(String), `b` Array(Nullable(Int32)), `c` Nullable(Float64), `d` Nullable(UInt8), `e` Array(Nullable(Int32))
|
`a` Nullable(String), `b` Nullable(Int32), `c` Nullable(Float64), `d` Nullable(UInt8), `e` Array(Nullable(Int32))
|
||||||
|
@ -0,0 +1,10 @@
|
|||||||
|
@0xdbb9ad1f14bf0b36;
|
||||||
|
|
||||||
|
struct Message {
|
||||||
|
int128 @0 :Data;
|
||||||
|
uint128 @1 :Data;
|
||||||
|
int256 @2 :Data;
|
||||||
|
uint256 @3 :Data;
|
||||||
|
decimal128 @4 :Data;
|
||||||
|
decimal256 @5 :Data;
|
||||||
|
}
|
14
tests/queries/0_stateless/format_schemas/02705_map.capnp
Normal file
14
tests/queries/0_stateless/format_schemas/02705_map.capnp
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
@0xdbb9ad1f14bf0b36;
|
||||||
|
|
||||||
|
struct Message {
|
||||||
|
struct Map {
|
||||||
|
struct Entry {
|
||||||
|
key @0 : Text;
|
||||||
|
value @1 : UInt32;
|
||||||
|
}
|
||||||
|
|
||||||
|
entries @0 : List(Entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
map @0 : Map;
|
||||||
|
}
|
@ -15,6 +15,7 @@
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import base64
|
import base64
|
||||||
|
import random
|
||||||
|
|
||||||
if os.environ.get("AWS_LAMBDA_ENV", "0") == "1":
|
if os.environ.get("AWS_LAMBDA_ENV", "0") == "1":
|
||||||
# For AWS labmda (python 3.7)
|
# For AWS labmda (python 3.7)
|
||||||
@ -28,25 +29,34 @@ DRY_RUN_MARK = "<no url, dry run>"
|
|||||||
MAX_FAILURES_DEFAULT = 50
|
MAX_FAILURES_DEFAULT = 50
|
||||||
SLACK_URL_DEFAULT = DRY_RUN_MARK
|
SLACK_URL_DEFAULT = DRY_RUN_MARK
|
||||||
|
|
||||||
# Find tests that failed in master during the last check_period hours,
|
EXTENDED_CHECK_PERIOD_MUL = 3
|
||||||
|
FLAKY_ALERT_PROBABILITY = 0.20
|
||||||
|
|
||||||
|
# Find tests that failed in master during the last check_period * 12 hours,
|
||||||
# but did not fail during the last 2 weeks. Assuming these tests were broken recently.
|
# but did not fail during the last 2 weeks. Assuming these tests were broken recently.
|
||||||
# NOTE: It may report flaky tests that fail too rarely.
|
# Counts number of failures in check_period and check_period * 12 time windows
|
||||||
|
# to distinguish rare flaky tests from completely broken tests
|
||||||
NEW_BROKEN_TESTS_QUERY = """
|
NEW_BROKEN_TESTS_QUERY = """
|
||||||
WITH
|
WITH
|
||||||
1 AS check_period,
|
1 AS check_period,
|
||||||
|
check_period * 12 AS extended_check_period,
|
||||||
now() as now
|
now() as now
|
||||||
SELECT test_name, any(report_url)
|
SELECT
|
||||||
|
test_name,
|
||||||
|
any(report_url),
|
||||||
|
countIf((check_start_time + check_duration_ms / 1000) < now - INTERVAL check_period HOUR) AS count_prev_periods,
|
||||||
|
countIf((check_start_time + check_duration_ms / 1000) >= now - INTERVAL check_period HOUR) AS count
|
||||||
FROM checks
|
FROM checks
|
||||||
WHERE 1
|
WHERE 1
|
||||||
AND check_start_time >= now - INTERVAL 1 WEEK
|
AND check_start_time BETWEEN now - INTERVAL 1 WEEK AND now
|
||||||
AND (check_start_time + check_duration_ms / 1000) >= now - INTERVAL check_period HOUR
|
AND (check_start_time + check_duration_ms / 1000) >= now - INTERVAL extended_check_period HOUR
|
||||||
AND pull_request_number = 0
|
AND pull_request_number = 0
|
||||||
AND test_status LIKE 'F%'
|
AND test_status LIKE 'F%'
|
||||||
AND check_status != 'success'
|
AND check_status != 'success'
|
||||||
AND test_name NOT IN (
|
AND test_name NOT IN (
|
||||||
SELECT test_name FROM checks WHERE 1
|
SELECT test_name FROM checks WHERE 1
|
||||||
AND check_start_time >= now - INTERVAL 1 MONTH
|
AND check_start_time >= now - INTERVAL 1 MONTH
|
||||||
AND (check_start_time + check_duration_ms / 1000) BETWEEN now - INTERVAL 2 WEEK AND now - INTERVAL check_period HOUR
|
AND (check_start_time + check_duration_ms / 1000) BETWEEN now - INTERVAL 2 WEEK AND now - INTERVAL extended_check_period HOUR
|
||||||
AND pull_request_number = 0
|
AND pull_request_number = 0
|
||||||
AND check_status != 'success'
|
AND check_status != 'success'
|
||||||
AND test_status LIKE 'F%')
|
AND test_status LIKE 'F%')
|
||||||
@ -74,6 +84,27 @@ WHERE 1
|
|||||||
AND check_name ILIKE check_name_pattern
|
AND check_name ILIKE check_name_pattern
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# It shows all recent failures of the specified test (helps to find when it started)
|
||||||
|
ALL_RECENT_FAILURES_QUERY = """
|
||||||
|
WITH
|
||||||
|
'{}' AS name_substr,
|
||||||
|
90 AS interval_days,
|
||||||
|
('Stateless tests (asan)', 'Stateless tests (address)', 'Stateless tests (address, actions)') AS backport_and_release_specific_checks
|
||||||
|
SELECT
|
||||||
|
toStartOfDay(check_start_time) AS d,
|
||||||
|
count(),
|
||||||
|
groupUniqArray(pull_request_number) AS prs,
|
||||||
|
any(report_url)
|
||||||
|
FROM checks
|
||||||
|
WHERE ((now() - toIntervalDay(interval_days)) <= check_start_time) AND (pull_request_number NOT IN (
|
||||||
|
SELECT pull_request_number AS prn
|
||||||
|
FROM checks
|
||||||
|
WHERE (prn != 0) AND ((now() - toIntervalDay(interval_days)) <= check_start_time) AND (check_name IN (backport_and_release_specific_checks))
|
||||||
|
)) AND (position(test_name, name_substr) > 0) AND (test_status IN ('FAIL', 'ERROR', 'FLAKY'))
|
||||||
|
GROUP BY d
|
||||||
|
ORDER BY d DESC
|
||||||
|
"""
|
||||||
|
|
||||||
SLACK_MESSAGE_JSON = {"type": "mrkdwn", "text": None}
|
SLACK_MESSAGE_JSON = {"type": "mrkdwn", "text": None}
|
||||||
|
|
||||||
|
|
||||||
@ -97,16 +128,62 @@ def run_clickhouse_query(query):
|
|||||||
return [x.split("\t") for x in lines]
|
return [x.split("\t") for x in lines]
|
||||||
|
|
||||||
|
|
||||||
def get_new_broken_tests_message(broken_tests):
|
def split_broken_and_flaky_tests(failed_tests):
|
||||||
if not broken_tests:
|
if not failed_tests:
|
||||||
return None
|
return None
|
||||||
msg = "There are {} new broken tests in master:\n".format(len(broken_tests))
|
|
||||||
for name, report in broken_tests:
|
broken_tests = []
|
||||||
msg += " - *{}* - <{}|Report>\n".format(name, report)
|
flaky_tests = []
|
||||||
|
for name, report, count_prev_str, count_str in failed_tests:
|
||||||
|
count_prev, count = int(count_prev_str), int(count_str)
|
||||||
|
if (2 <= count and count_prev < 2) or (count_prev == 1 and count == 1):
|
||||||
|
# It failed 2 times or more within extended time window, it's definitely broken.
|
||||||
|
# 2 <= count_prev means that it was not reported as broken on previous runs
|
||||||
|
broken_tests.append([name, report])
|
||||||
|
elif 0 < count and count_prev == 0:
|
||||||
|
# It failed only once, can be a rare flaky test
|
||||||
|
flaky_tests.append([name, report])
|
||||||
|
|
||||||
|
return broken_tests, flaky_tests
|
||||||
|
|
||||||
|
|
||||||
|
def format_failed_tests_list(failed_tests, failure_type):
|
||||||
|
if len(failed_tests) == 1:
|
||||||
|
res = "There is a new {} test:\n".format(failure_type)
|
||||||
|
else:
|
||||||
|
res = "There are {} new {} tests:\n".format(len(failed_tests), failure_type)
|
||||||
|
|
||||||
|
for name, report in failed_tests:
|
||||||
|
cidb_url = get_play_url(ALL_RECENT_FAILURES_QUERY.format(name))
|
||||||
|
res += " - *{}* - <{}|Report> - <{}|CI DB> \n".format(
|
||||||
|
name, report, cidb_url
|
||||||
|
)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def get_new_broken_tests_message(failed_tests):
|
||||||
|
if not failed_tests:
|
||||||
|
return None
|
||||||
|
|
||||||
|
broken_tests, flaky_tests = split_broken_and_flaky_tests(failed_tests)
|
||||||
|
if len(broken_tests) == 0 and len(flaky_tests) == 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
msg = ""
|
||||||
|
if len(broken_tests) > 0:
|
||||||
|
msg += format_failed_tests_list(broken_tests, "*BROKEN*")
|
||||||
|
elif random.random() > FLAKY_ALERT_PROBABILITY:
|
||||||
|
# Should we report fuzzers unconditionally?
|
||||||
|
print("Will not report flaky tests to avoid noise: ", flaky_tests)
|
||||||
|
return None
|
||||||
|
|
||||||
|
if len(flaky_tests) > 0:
|
||||||
|
msg += format_failed_tests_list(flaky_tests, "flaky")
|
||||||
|
|
||||||
return msg
|
return msg
|
||||||
|
|
||||||
|
|
||||||
def get_too_many_failures_message(failures_count):
|
def get_too_many_failures_message_impl(failures_count):
|
||||||
MAX_FAILURES = int(os.environ.get("MAX_FAILURES", MAX_FAILURES_DEFAULT))
|
MAX_FAILURES = int(os.environ.get("MAX_FAILURES", MAX_FAILURES_DEFAULT))
|
||||||
curr_failures = int(failures_count[0][0])
|
curr_failures = int(failures_count[0][0])
|
||||||
prev_failures = int(failures_count[0][1])
|
prev_failures = int(failures_count[0][1])
|
||||||
@ -129,6 +206,13 @@ def get_too_many_failures_message(failures_count):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_too_many_failures_message(failures_count):
|
||||||
|
msg = get_too_many_failures_message_impl(failures_count)
|
||||||
|
if msg:
|
||||||
|
msg += "\nSee https://aretestsgreenyet.com/"
|
||||||
|
return msg
|
||||||
|
|
||||||
|
|
||||||
def send_to_slack(message):
|
def send_to_slack(message):
|
||||||
SLACK_URL = os.environ.get("SLACK_URL", SLACK_URL_DEFAULT)
|
SLACK_URL = os.environ.get("SLACK_URL", SLACK_URL_DEFAULT)
|
||||||
if SLACK_URL == DRY_RUN_MARK:
|
if SLACK_URL == DRY_RUN_MARK:
|
||||||
|
Loading…
Reference in New Issue
Block a user