Merge branch 'alter-to-json' of github.com:Avogar/ClickHouse into fix-dynamic-sizes

This commit is contained in:
avogar 2024-11-07 18:00:49 +00:00
commit 2ec1c88092
382 changed files with 9922 additions and 4810 deletions

2
.gitmodules vendored
View File

@ -332,7 +332,7 @@
url = https://github.com/ClickHouse/usearch.git
[submodule "contrib/SimSIMD"]
path = contrib/SimSIMD
url = https://github.com/ashvardanian/SimSIMD.git
url = https://github.com/ClickHouse/SimSIMD.git
[submodule "contrib/FP16"]
path = contrib/FP16
url = https://github.com/Maratyszcza/FP16.git

View File

@ -488,6 +488,7 @@
* Remove `is_deterministic` field from the `system.functions` table. [#66630](https://github.com/ClickHouse/ClickHouse/pull/66630) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Function `tuple` will now try to construct named tuples in query (controlled by `enable_named_columns_in_function_tuple`). Introduce function `tupleNames` to extract names from tuples. [#54881](https://github.com/ClickHouse/ClickHouse/pull/54881) ([Amos Bird](https://github.com/amosbird)).
* Change how deduplication for Materialized Views works. Fixed a lot of cases like: - on destination table: data is split for 2 or more blocks and that blocks is considered as duplicate when that block is inserted in parallel. - on MV destination table: the equal blocks are deduplicated, that happens when MV often produces equal data as a result for different input data due to performing aggregation. - on MV destination table: the equal blocks which comes from different MV are deduplicated. [#61601](https://github.com/ClickHouse/ClickHouse/pull/61601) ([Sema Checherinda](https://github.com/CheSema)).
* Functions `bitShiftLeft` and `bitShitfRight` return an error for out of bounds shift positions [#65838](https://github.com/ClickHouse/ClickHouse/pull/65838) ([Pablo Marcos](https://github.com/pamarcos)).
#### New Feature
* Add `ASOF JOIN` support for `full_sorting_join` algorithm. [#55051](https://github.com/ClickHouse/ClickHouse/pull/55051) ([vdimir](https://github.com/vdimir)).
@ -599,7 +600,6 @@
* Functions `bitTest`, `bitTestAll`, and `bitTestAny` now return an error if the specified bit index is out-of-bounds [#65818](https://github.com/ClickHouse/ClickHouse/pull/65818) ([Pablo Marcos](https://github.com/pamarcos)).
* Setting `join_any_take_last_row` is supported in any query with hash join. [#65820](https://github.com/ClickHouse/ClickHouse/pull/65820) ([vdimir](https://github.com/vdimir)).
* Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)).
* Functions `bitShiftLeft` and `bitShitfRight` return an error for out of bounds shift positions [#65838](https://github.com/ClickHouse/ClickHouse/pull/65838) ([Pablo Marcos](https://github.com/pamarcos)).
* Fix growing memory usage in S3Queue. [#65839](https://github.com/ClickHouse/ClickHouse/pull/65839) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix tie handling in `arrayAUC` to match sklearn. [#65840](https://github.com/ClickHouse/ClickHouse/pull/65840) ([gabrielmcg44](https://github.com/gabrielmcg44)).
* Fix possible issues with MySQL server protocol TLS connections. [#65917](https://github.com/ClickHouse/ClickHouse/pull/65917) ([Azat Khuzhin](https://github.com/azat)).

View File

@ -88,6 +88,7 @@ string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
option (ENABLE_FUZZING "Fuzzy testing using libfuzzer" OFF)
option (ENABLE_FUZZER_TEST "Build testing fuzzers in order to test libFuzzer functionality" OFF)
if (ENABLE_FUZZING)
# Also set WITH_COVERAGE=1 for better fuzzing process

View File

@ -14,9 +14,10 @@ The following versions of ClickHouse server are currently supported with securit
| Version | Supported |
|:-|:-|
| 24.10 | ✔️ |
| 24.9 | ✔️ |
| 24.8 | ✔️ |
| 24.7 | ✔️ |
| 24.7 | |
| 24.6 | ❌ |
| 24.5 | ❌ |
| 24.4 | ❌ |

View File

@ -86,7 +86,7 @@ using StringRefs = std::vector<StringRef>;
* For more information, see hash_map_string_2.cpp
*/
inline bool compare8(const char * p1, const char * p2)
inline bool compare16(const char * p1, const char * p2)
{
return 0xFFFF == _mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(p1)),
@ -115,7 +115,7 @@ inline bool compare64(const char * p1, const char * p2)
#elif defined(__aarch64__) && defined(__ARM_NEON)
inline bool compare8(const char * p1, const char * p2)
inline bool compare16(const char * p1, const char * p2)
{
uint64_t mask = getNibbleMask(vceqq_u8(
vld1q_u8(reinterpret_cast<const unsigned char *>(p1)), vld1q_u8(reinterpret_cast<const unsigned char *>(p2))));
@ -185,13 +185,22 @@ inline bool memequalWide(const char * p1, const char * p2, size_t size)
switch (size / 16) // NOLINT(bugprone-switch-missing-default-case)
{
case 3: if (!compare8(p1 + 32, p2 + 32)) return false; [[fallthrough]];
case 2: if (!compare8(p1 + 16, p2 + 16)) return false; [[fallthrough]];
case 1: if (!compare8(p1, p2)) return false; [[fallthrough]];
case 3:
if (!compare16(p1 + 32, p2 + 32))
return false;
[[fallthrough]];
case 2:
if (!compare16(p1 + 16, p2 + 16))
return false;
[[fallthrough]];
case 1:
if (!compare16(p1, p2))
return false;
[[fallthrough]];
default: ;
}
return compare8(p1 + size - 16, p2 + size - 16);
return compare16(p1 + size - 16, p2 + size - 16);
}
#endif

View File

@ -4,6 +4,7 @@
#include <string>
#include <sstream>
#include <cctz/time_zone.h>
#include <fmt/core.h>
inline std::string to_string(const std::time_t & time)
@ -11,18 +12,6 @@ inline std::string to_string(const std::time_t & time)
return cctz::format("%Y-%m-%d %H:%M:%S", std::chrono::system_clock::from_time_t(time), cctz::local_time_zone());
}
template <typename Clock, typename Duration = typename Clock::duration>
std::string to_string(const std::chrono::time_point<Clock, Duration> & tp)
{
// Don't use DateLUT because it shows weird characters for
// TimePoint::max(). I wish we could use C++20 format, but it's not
// there yet.
// return DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(tp));
auto in_time_t = std::chrono::system_clock::to_time_t(tp);
return to_string(in_time_t);
}
template <typename Rep, typename Period = std::ratio<1>>
std::string to_string(const std::chrono::duration<Rep, Period> & duration)
{
@ -33,6 +22,20 @@ std::string to_string(const std::chrono::duration<Rep, Period> & duration)
return std::to_string(seconds_as_double.count()) + "s";
}
template <typename Clock, typename Duration = typename Clock::duration>
std::string to_string(const std::chrono::time_point<Clock, Duration> & tp)
{
// Don't use DateLUT because it shows weird characters for
// TimePoint::max(). I wish we could use C++20 format, but it's not
// there yet.
// return DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(tp));
if constexpr (std::is_same_v<Clock, std::chrono::system_clock>)
return to_string(std::chrono::system_clock::to_time_t(tp));
else
return to_string(tp.time_since_epoch());
}
template <typename Clock, typename Duration = typename Clock::duration>
std::ostream & operator<<(std::ostream & o, const std::chrono::time_point<Clock, Duration> & tp)
{
@ -44,3 +47,23 @@ std::ostream & operator<<(std::ostream & o, const std::chrono::duration<Rep, Per
{
return o << to_string(duration);
}
template <typename Clock, typename Duration>
struct fmt::formatter<std::chrono::time_point<Clock, Duration>> : fmt::formatter<std::string>
{
template <typename FormatCtx>
auto format(const std::chrono::time_point<Clock, Duration> & tp, FormatCtx & ctx) const
{
return fmt::formatter<std::string>::format(::to_string(tp), ctx);
}
};
template <typename Rep, typename Period>
struct fmt::formatter<std::chrono::duration<Rep, Period>> : fmt::formatter<std::string>
{
template <typename FormatCtx>
auto format(const std::chrono::duration<Rep, Period> & duration, FormatCtx & ctx) const
{
return fmt::formatter<std::string>::format(::to_string(duration), ctx);
}
};

2
contrib/SimSIMD vendored

@ -1 +1 @@
Subproject commit ff51434d90c66f916e94ff05b24530b127aa4cff
Subproject commit ee3c9c9c00b51645f62a1a9e99611b78c0052a21

View File

@ -1,4 +1,8 @@
set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD")
add_library(_simsimd INTERFACE)
target_include_directories(_simsimd SYSTEM INTERFACE "${SIMSIMD_PROJECT_DIR}/include")
# See contrib/usearch-cmake/CMakeLists.txt, why only enabled on x86
if (ARCH_AMD64)
set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD")
set(SIMSIMD_SRCS ${SIMSIMD_PROJECT_DIR}/c/lib.c)
add_library(_simsimd ${SIMSIMD_SRCS})
target_include_directories(_simsimd SYSTEM PUBLIC "${SIMSIMD_PROJECT_DIR}/include")
target_compile_definitions(_simsimd PUBLIC SIMSIMD_DYNAMIC_DISPATCH)
endif()

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit 5cfccd8ea65f33d4517e7409815d761c7650b45d
Subproject commit 6e2574f5013a005c050c9a7787d341aef09d0063

View File

@ -213,13 +213,19 @@ target_include_directories(_orc SYSTEM PRIVATE
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/arrow")
# arrow/cpp/src/arrow/CMakeLists.txt (ARROW_SRCS + ARROW_COMPUTE + ARROW_IPC)
# find . \( -iname \*.cc -o -iname \*.cpp -o -iname \*.c \) | sort | awk '{print "\"${LIBRARY_DIR}" substr($1,2) "\"" }' | grep -v 'test.cc' | grep -v 'json' | grep -v 'flight' \|
# grep -v 'csv' | grep -v 'acero' | grep -v 'dataset' | grep -v 'testing' | grep -v 'gpu' | grep -v 'engine' | grep -v 'filesystem' | grep -v 'benchmark.cc'
set(ARROW_SRCS
"${LIBRARY_DIR}/adapters/orc/adapter.cc"
"${LIBRARY_DIR}/adapters/orc/options.cc"
"${LIBRARY_DIR}/adapters/orc/util.cc"
"${LIBRARY_DIR}/array/array_base.cc"
"${LIBRARY_DIR}/array/array_binary.cc"
"${LIBRARY_DIR}/array/array_decimal.cc"
"${LIBRARY_DIR}/array/array_dict.cc"
"${LIBRARY_DIR}/array/array_nested.cc"
"${LIBRARY_DIR}/array/array_primitive.cc"
"${LIBRARY_DIR}/array/array_run_end.cc"
"${LIBRARY_DIR}/array/builder_adaptive.cc"
"${LIBRARY_DIR}/array/builder_base.cc"
"${LIBRARY_DIR}/array/builder_binary.cc"
@ -227,124 +233,26 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/array/builder_dict.cc"
"${LIBRARY_DIR}/array/builder_nested.cc"
"${LIBRARY_DIR}/array/builder_primitive.cc"
"${LIBRARY_DIR}/array/builder_union.cc"
"${LIBRARY_DIR}/array/builder_run_end.cc"
"${LIBRARY_DIR}/array/array_run_end.cc"
"${LIBRARY_DIR}/array/builder_union.cc"
"${LIBRARY_DIR}/array/concatenate.cc"
"${LIBRARY_DIR}/array/data.cc"
"${LIBRARY_DIR}/array/diff.cc"
"${LIBRARY_DIR}/array/util.cc"
"${LIBRARY_DIR}/array/validate.cc"
"${LIBRARY_DIR}/builder.cc"
"${LIBRARY_DIR}/buffer.cc"
"${LIBRARY_DIR}/chunked_array.cc"
"${LIBRARY_DIR}/chunk_resolver.cc"
"${LIBRARY_DIR}/compare.cc"
"${LIBRARY_DIR}/config.cc"
"${LIBRARY_DIR}/datum.cc"
"${LIBRARY_DIR}/device.cc"
"${LIBRARY_DIR}/extension_type.cc"
"${LIBRARY_DIR}/memory_pool.cc"
"${LIBRARY_DIR}/pretty_print.cc"
"${LIBRARY_DIR}/record_batch.cc"
"${LIBRARY_DIR}/result.cc"
"${LIBRARY_DIR}/scalar.cc"
"${LIBRARY_DIR}/sparse_tensor.cc"
"${LIBRARY_DIR}/status.cc"
"${LIBRARY_DIR}/table.cc"
"${LIBRARY_DIR}/table_builder.cc"
"${LIBRARY_DIR}/tensor.cc"
"${LIBRARY_DIR}/tensor/coo_converter.cc"
"${LIBRARY_DIR}/tensor/csf_converter.cc"
"${LIBRARY_DIR}/tensor/csx_converter.cc"
"${LIBRARY_DIR}/type.cc"
"${LIBRARY_DIR}/visitor.cc"
"${LIBRARY_DIR}/builder.cc"
"${LIBRARY_DIR}/c/bridge.cc"
"${LIBRARY_DIR}/io/buffered.cc"
"${LIBRARY_DIR}/io/caching.cc"
"${LIBRARY_DIR}/io/compressed.cc"
"${LIBRARY_DIR}/io/file.cc"
"${LIBRARY_DIR}/io/hdfs.cc"
"${LIBRARY_DIR}/io/hdfs_internal.cc"
"${LIBRARY_DIR}/io/interfaces.cc"
"${LIBRARY_DIR}/io/memory.cc"
"${LIBRARY_DIR}/io/slow.cc"
"${LIBRARY_DIR}/io/stdio.cc"
"${LIBRARY_DIR}/io/transform.cc"
"${LIBRARY_DIR}/util/async_util.cc"
"${LIBRARY_DIR}/util/basic_decimal.cc"
"${LIBRARY_DIR}/util/bit_block_counter.cc"
"${LIBRARY_DIR}/util/bit_run_reader.cc"
"${LIBRARY_DIR}/util/bit_util.cc"
"${LIBRARY_DIR}/util/bitmap.cc"
"${LIBRARY_DIR}/util/bitmap_builders.cc"
"${LIBRARY_DIR}/util/bitmap_ops.cc"
"${LIBRARY_DIR}/util/bpacking.cc"
"${LIBRARY_DIR}/util/cancel.cc"
"${LIBRARY_DIR}/util/compression.cc"
"${LIBRARY_DIR}/util/counting_semaphore.cc"
"${LIBRARY_DIR}/util/cpu_info.cc"
"${LIBRARY_DIR}/util/decimal.cc"
"${LIBRARY_DIR}/util/delimiting.cc"
"${LIBRARY_DIR}/util/formatting.cc"
"${LIBRARY_DIR}/util/future.cc"
"${LIBRARY_DIR}/util/int_util.cc"
"${LIBRARY_DIR}/util/io_util.cc"
"${LIBRARY_DIR}/util/logging.cc"
"${LIBRARY_DIR}/util/key_value_metadata.cc"
"${LIBRARY_DIR}/util/memory.cc"
"${LIBRARY_DIR}/util/mutex.cc"
"${LIBRARY_DIR}/util/string.cc"
"${LIBRARY_DIR}/util/string_builder.cc"
"${LIBRARY_DIR}/util/task_group.cc"
"${LIBRARY_DIR}/util/tdigest.cc"
"${LIBRARY_DIR}/util/thread_pool.cc"
"${LIBRARY_DIR}/util/time.cc"
"${LIBRARY_DIR}/util/trie.cc"
"${LIBRARY_DIR}/util/unreachable.cc"
"${LIBRARY_DIR}/util/uri.cc"
"${LIBRARY_DIR}/util/utf8.cc"
"${LIBRARY_DIR}/util/value_parsing.cc"
"${LIBRARY_DIR}/util/byte_size.cc"
"${LIBRARY_DIR}/util/debug.cc"
"${LIBRARY_DIR}/util/tracing.cc"
"${LIBRARY_DIR}/util/atfork_internal.cc"
"${LIBRARY_DIR}/util/crc32.cc"
"${LIBRARY_DIR}/util/hashing.cc"
"${LIBRARY_DIR}/util/ree_util.cc"
"${LIBRARY_DIR}/util/union_util.cc"
"${LIBRARY_DIR}/vendored/base64.cpp"
"${LIBRARY_DIR}/vendored/datetime/tz.cpp"
"${LIBRARY_DIR}/vendored/musl/strptime.c"
"${LIBRARY_DIR}/vendored/uriparser/UriCommon.c"
"${LIBRARY_DIR}/vendored/uriparser/UriCompare.c"
"${LIBRARY_DIR}/vendored/uriparser/UriEscape.c"
"${LIBRARY_DIR}/vendored/uriparser/UriFile.c"
"${LIBRARY_DIR}/vendored/uriparser/UriIp4Base.c"
"${LIBRARY_DIR}/vendored/uriparser/UriIp4.c"
"${LIBRARY_DIR}/vendored/uriparser/UriMemory.c"
"${LIBRARY_DIR}/vendored/uriparser/UriNormalizeBase.c"
"${LIBRARY_DIR}/vendored/uriparser/UriNormalize.c"
"${LIBRARY_DIR}/vendored/uriparser/UriParseBase.c"
"${LIBRARY_DIR}/vendored/uriparser/UriParse.c"
"${LIBRARY_DIR}/vendored/uriparser/UriQuery.c"
"${LIBRARY_DIR}/vendored/uriparser/UriRecompose.c"
"${LIBRARY_DIR}/vendored/uriparser/UriResolve.c"
"${LIBRARY_DIR}/vendored/uriparser/UriShorten.c"
"${LIBRARY_DIR}/vendored/double-conversion/bignum.cc"
"${LIBRARY_DIR}/vendored/double-conversion/bignum-dtoa.cc"
"${LIBRARY_DIR}/vendored/double-conversion/cached-powers.cc"
"${LIBRARY_DIR}/vendored/double-conversion/double-to-string.cc"
"${LIBRARY_DIR}/vendored/double-conversion/fast-dtoa.cc"
"${LIBRARY_DIR}/vendored/double-conversion/fixed-dtoa.cc"
"${LIBRARY_DIR}/vendored/double-conversion/string-to-double.cc"
"${LIBRARY_DIR}/vendored/double-conversion/strtod.cc"
"${LIBRARY_DIR}/c/dlpack.cc"
"${LIBRARY_DIR}/chunk_resolver.cc"
"${LIBRARY_DIR}/chunked_array.cc"
"${LIBRARY_DIR}/compare.cc"
"${LIBRARY_DIR}/compute/api_aggregate.cc"
"${LIBRARY_DIR}/compute/api_scalar.cc"
"${LIBRARY_DIR}/compute/api_vector.cc"
"${LIBRARY_DIR}/compute/cast.cc"
"${LIBRARY_DIR}/compute/exec.cc"
"${LIBRARY_DIR}/compute/expression.cc"
"${LIBRARY_DIR}/compute/function.cc"
"${LIBRARY_DIR}/compute/function_internal.cc"
"${LIBRARY_DIR}/compute/kernel.cc"
@ -355,6 +263,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/aggregate_var_std.cc"
"${LIBRARY_DIR}/compute/kernels/codegen_internal.cc"
"${LIBRARY_DIR}/compute/kernels/hash_aggregate.cc"
"${LIBRARY_DIR}/compute/kernels/ree_util_internal.cc"
"${LIBRARY_DIR}/compute/kernels/row_encoder.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_arithmetic.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_boolean.cc"
@ -382,30 +291,139 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/vector_cumulative_ops.cc"
"${LIBRARY_DIR}/compute/kernels/vector_hash.cc"
"${LIBRARY_DIR}/compute/kernels/vector_nested.cc"
"${LIBRARY_DIR}/compute/kernels/vector_pairwise.cc"
"${LIBRARY_DIR}/compute/kernels/vector_rank.cc"
"${LIBRARY_DIR}/compute/kernels/vector_replace.cc"
"${LIBRARY_DIR}/compute/kernels/vector_run_end_encode.cc"
"${LIBRARY_DIR}/compute/kernels/vector_select_k.cc"
"${LIBRARY_DIR}/compute/kernels/vector_selection.cc"
"${LIBRARY_DIR}/compute/kernels/vector_sort.cc"
"${LIBRARY_DIR}/compute/kernels/vector_selection_internal.cc"
"${LIBRARY_DIR}/compute/kernels/vector_selection_filter_internal.cc"
"${LIBRARY_DIR}/compute/kernels/vector_selection_internal.cc"
"${LIBRARY_DIR}/compute/kernels/vector_selection_take_internal.cc"
"${LIBRARY_DIR}/compute/light_array.cc"
"${LIBRARY_DIR}/compute/registry.cc"
"${LIBRARY_DIR}/compute/expression.cc"
"${LIBRARY_DIR}/compute/kernels/vector_sort.cc"
"${LIBRARY_DIR}/compute/key_hash_internal.cc"
"${LIBRARY_DIR}/compute/key_map_internal.cc"
"${LIBRARY_DIR}/compute/light_array_internal.cc"
"${LIBRARY_DIR}/compute/ordering.cc"
"${LIBRARY_DIR}/compute/registry.cc"
"${LIBRARY_DIR}/compute/row/compare_internal.cc"
"${LIBRARY_DIR}/compute/row/encode_internal.cc"
"${LIBRARY_DIR}/compute/row/grouper.cc"
"${LIBRARY_DIR}/compute/row/row_internal.cc"
"${LIBRARY_DIR}/compute/util.cc"
"${LIBRARY_DIR}/config.cc"
"${LIBRARY_DIR}/datum.cc"
"${LIBRARY_DIR}/device.cc"
"${LIBRARY_DIR}/extension_type.cc"
"${LIBRARY_DIR}/integration/c_data_integration_internal.cc"
"${LIBRARY_DIR}/io/buffered.cc"
"${LIBRARY_DIR}/io/caching.cc"
"${LIBRARY_DIR}/io/compressed.cc"
"${LIBRARY_DIR}/io/file.cc"
"${LIBRARY_DIR}/io/hdfs.cc"
"${LIBRARY_DIR}/io/hdfs_internal.cc"
"${LIBRARY_DIR}/io/interfaces.cc"
"${LIBRARY_DIR}/io/memory.cc"
"${LIBRARY_DIR}/io/slow.cc"
"${LIBRARY_DIR}/io/stdio.cc"
"${LIBRARY_DIR}/io/transform.cc"
"${LIBRARY_DIR}/ipc/dictionary.cc"
"${LIBRARY_DIR}/ipc/feather.cc"
"${LIBRARY_DIR}/ipc/file_to_stream.cc"
"${LIBRARY_DIR}/ipc/message.cc"
"${LIBRARY_DIR}/ipc/metadata_internal.cc"
"${LIBRARY_DIR}/ipc/options.cc"
"${LIBRARY_DIR}/ipc/reader.cc"
"${LIBRARY_DIR}/ipc/stream_to_file.cc"
"${LIBRARY_DIR}/ipc/writer.cc"
"${LIBRARY_DIR}/memory_pool.cc"
"${LIBRARY_DIR}/pretty_print.cc"
"${LIBRARY_DIR}/record_batch.cc"
"${LIBRARY_DIR}/result.cc"
"${LIBRARY_DIR}/scalar.cc"
"${LIBRARY_DIR}/sparse_tensor.cc"
"${LIBRARY_DIR}/status.cc"
"${LIBRARY_DIR}/table.cc"
"${LIBRARY_DIR}/table_builder.cc"
"${LIBRARY_DIR}/tensor.cc"
"${LIBRARY_DIR}/tensor/coo_converter.cc"
"${LIBRARY_DIR}/tensor/csf_converter.cc"
"${LIBRARY_DIR}/tensor/csx_converter.cc"
"${LIBRARY_DIR}/type.cc"
"${LIBRARY_DIR}/type_traits.cc"
"${LIBRARY_DIR}/util/align_util.cc"
"${LIBRARY_DIR}/util/async_util.cc"
"${LIBRARY_DIR}/util/atfork_internal.cc"
"${LIBRARY_DIR}/util/basic_decimal.cc"
"${LIBRARY_DIR}/util/bit_block_counter.cc"
"${LIBRARY_DIR}/util/bit_run_reader.cc"
"${LIBRARY_DIR}/util/bit_util.cc"
"${LIBRARY_DIR}/util/bitmap.cc"
"${LIBRARY_DIR}/util/bitmap_builders.cc"
"${LIBRARY_DIR}/util/bitmap_ops.cc"
"${LIBRARY_DIR}/util/bpacking.cc"
"${LIBRARY_DIR}/util/byte_size.cc"
"${LIBRARY_DIR}/util/cancel.cc"
"${LIBRARY_DIR}/util/compression.cc"
"${LIBRARY_DIR}/util/counting_semaphore.cc"
"${LIBRARY_DIR}/util/cpu_info.cc"
"${LIBRARY_DIR}/util/crc32.cc"
"${LIBRARY_DIR}/util/debug.cc"
"${LIBRARY_DIR}/util/decimal.cc"
"${LIBRARY_DIR}/util/delimiting.cc"
"${LIBRARY_DIR}/util/dict_util.cc"
"${LIBRARY_DIR}/util/float16.cc"
"${LIBRARY_DIR}/util/formatting.cc"
"${LIBRARY_DIR}/util/future.cc"
"${LIBRARY_DIR}/util/hashing.cc"
"${LIBRARY_DIR}/util/int_util.cc"
"${LIBRARY_DIR}/util/io_util.cc"
"${LIBRARY_DIR}/util/key_value_metadata.cc"
"${LIBRARY_DIR}/util/list_util.cc"
"${LIBRARY_DIR}/util/logging.cc"
"${LIBRARY_DIR}/util/memory.cc"
"${LIBRARY_DIR}/util/mutex.cc"
"${LIBRARY_DIR}/util/ree_util.cc"
"${LIBRARY_DIR}/util/string.cc"
"${LIBRARY_DIR}/util/string_builder.cc"
"${LIBRARY_DIR}/util/task_group.cc"
"${LIBRARY_DIR}/util/tdigest.cc"
"${LIBRARY_DIR}/util/thread_pool.cc"
"${LIBRARY_DIR}/util/time.cc"
"${LIBRARY_DIR}/util/tracing.cc"
"${LIBRARY_DIR}/util/trie.cc"
"${LIBRARY_DIR}/util/union_util.cc"
"${LIBRARY_DIR}/util/unreachable.cc"
"${LIBRARY_DIR}/util/uri.cc"
"${LIBRARY_DIR}/util/utf8.cc"
"${LIBRARY_DIR}/util/value_parsing.cc"
"${LIBRARY_DIR}/vendored/base64.cpp"
"${LIBRARY_DIR}/vendored/datetime/tz.cpp"
"${LIBRARY_DIR}/vendored/double-conversion/bignum-dtoa.cc"
"${LIBRARY_DIR}/vendored/double-conversion/bignum.cc"
"${LIBRARY_DIR}/vendored/double-conversion/cached-powers.cc"
"${LIBRARY_DIR}/vendored/double-conversion/double-to-string.cc"
"${LIBRARY_DIR}/vendored/double-conversion/fast-dtoa.cc"
"${LIBRARY_DIR}/vendored/double-conversion/fixed-dtoa.cc"
"${LIBRARY_DIR}/vendored/double-conversion/string-to-double.cc"
"${LIBRARY_DIR}/vendored/double-conversion/strtod.cc"
"${LIBRARY_DIR}/vendored/musl/strptime.c"
"${LIBRARY_DIR}/vendored/uriparser/UriCommon.c"
"${LIBRARY_DIR}/vendored/uriparser/UriCompare.c"
"${LIBRARY_DIR}/vendored/uriparser/UriEscape.c"
"${LIBRARY_DIR}/vendored/uriparser/UriFile.c"
"${LIBRARY_DIR}/vendored/uriparser/UriIp4.c"
"${LIBRARY_DIR}/vendored/uriparser/UriIp4Base.c"
"${LIBRARY_DIR}/vendored/uriparser/UriMemory.c"
"${LIBRARY_DIR}/vendored/uriparser/UriNormalize.c"
"${LIBRARY_DIR}/vendored/uriparser/UriNormalizeBase.c"
"${LIBRARY_DIR}/vendored/uriparser/UriParse.c"
"${LIBRARY_DIR}/vendored/uriparser/UriParseBase.c"
"${LIBRARY_DIR}/vendored/uriparser/UriQuery.c"
"${LIBRARY_DIR}/vendored/uriparser/UriRecompose.c"
"${LIBRARY_DIR}/vendored/uriparser/UriResolve.c"
"${LIBRARY_DIR}/vendored/uriparser/UriShorten.c"
"${LIBRARY_DIR}/visitor.cc"
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc"
"${ARROW_SRC_DIR}/arrow/adapters/orc/util.cc"
@ -465,22 +483,38 @@ set(PARQUET_SRCS
"${LIBRARY_DIR}/arrow/schema.cc"
"${LIBRARY_DIR}/arrow/schema_internal.cc"
"${LIBRARY_DIR}/arrow/writer.cc"
"${LIBRARY_DIR}/benchmark_util.cc"
"${LIBRARY_DIR}/bloom_filter.cc"
"${LIBRARY_DIR}/bloom_filter_reader.cc"
"${LIBRARY_DIR}/column_reader.cc"
"${LIBRARY_DIR}/column_scanner.cc"
"${LIBRARY_DIR}/column_writer.cc"
"${LIBRARY_DIR}/encoding.cc"
"${LIBRARY_DIR}/encryption/crypto_factory.cc"
"${LIBRARY_DIR}/encryption/encryption.cc"
"${LIBRARY_DIR}/encryption/encryption_internal.cc"
"${LIBRARY_DIR}/encryption/encryption_internal_nossl.cc"
"${LIBRARY_DIR}/encryption/file_key_unwrapper.cc"
"${LIBRARY_DIR}/encryption/file_key_wrapper.cc"
"${LIBRARY_DIR}/encryption/file_system_key_material_store.cc"
"${LIBRARY_DIR}/encryption/internal_file_decryptor.cc"
"${LIBRARY_DIR}/encryption/internal_file_encryptor.cc"
"${LIBRARY_DIR}/encryption/key_material.cc"
"${LIBRARY_DIR}/encryption/key_metadata.cc"
"${LIBRARY_DIR}/encryption/key_toolkit.cc"
"${LIBRARY_DIR}/encryption/key_toolkit_internal.cc"
"${LIBRARY_DIR}/encryption/kms_client.cc"
"${LIBRARY_DIR}/encryption/local_wrap_kms_client.cc"
"${LIBRARY_DIR}/encryption/openssl_internal.cc"
"${LIBRARY_DIR}/exception.cc"
"${LIBRARY_DIR}/file_reader.cc"
"${LIBRARY_DIR}/file_writer.cc"
"${LIBRARY_DIR}/page_index.cc"
"${LIBRARY_DIR}/level_conversion.cc"
"${LIBRARY_DIR}/level_comparison.cc"
"${LIBRARY_DIR}/level_comparison_avx2.cc"
"${LIBRARY_DIR}/level_conversion.cc"
"${LIBRARY_DIR}/level_conversion_bmi2.cc"
"${LIBRARY_DIR}/metadata.cc"
"${LIBRARY_DIR}/page_index.cc"
"${LIBRARY_DIR}/platform.cc"
"${LIBRARY_DIR}/printer.cc"
"${LIBRARY_DIR}/properties.cc"
@ -489,7 +523,6 @@ set(PARQUET_SRCS
"${LIBRARY_DIR}/stream_reader.cc"
"${LIBRARY_DIR}/stream_writer.cc"
"${LIBRARY_DIR}/types.cc"
"${LIBRARY_DIR}/bloom_filter_reader.cc"
"${LIBRARY_DIR}/xxhasher.cc"
"${GEN_LIBRARY_DIR}/parquet_constants.cpp"
@ -520,6 +553,9 @@ endif ()
add_definitions(-DPARQUET_THRIFT_VERSION_MAJOR=0)
add_definitions(-DPARQUET_THRIFT_VERSION_MINOR=16)
# As per https://github.com/apache/arrow/pull/35672 you need to enable it explicitly.
add_definitions(-DARROW_ENABLE_THREADING)
# === tools
set(TOOLS_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/tools/parquet")

2
contrib/flatbuffers vendored

@ -1 +1 @@
Subproject commit eb3f827948241ce0e701516f16cd67324802bce9
Subproject commit 0100f6a5779831fa7a651e4b67ef389a8752bd9b

2
contrib/krb5 vendored

@ -1 +1 @@
Subproject commit 71b06c2276009ae649c7703019f3b4605f66fd3d
Subproject commit c5b4b994c18db86933255907a97eee5993fd18fe

2
contrib/usearch vendored

@ -1 +1 @@
Subproject commit 1706420acafbd83d852c512dcf343af0a4059e48
Subproject commit 7efe8b710c9831bfe06573b1df0fad001b04a2b5

View File

@ -6,12 +6,63 @@ target_include_directories(_usearch SYSTEM INTERFACE ${USEARCH_PROJECT_DIR}/incl
target_link_libraries(_usearch INTERFACE _fp16)
target_compile_definitions(_usearch INTERFACE USEARCH_USE_FP16LIB)
# target_compile_definitions(_usearch INTERFACE USEARCH_USE_SIMSIMD)
# ^^ simsimd is not enabled at the moment. Reasons:
# - Vectorization is important for raw scans but not so much for HNSW. We use usearch only for HNSW.
# - Simsimd does compile-time dispatch (choice of SIMD kernels determined by capabilities of the build machine) or dynamic dispatch (SIMD
# kernels chosen at runtime based on cpuid instruction). Since current builds are limited to SSE 4.2 (x86) and NEON (ARM), the speedup of
# the former would be moderate compared to AVX-512 / SVE. The latter is at the moment too fragile with respect to portability across x86
# and ARM machines ... certain conbinations of quantizations / distance functions / SIMD instructions are not implemented at the moment.
# Only x86 for now. On ARM, the linker goes down in flames. To make SimSIMD compile, I had to remove a macro checks in SimSIMD
# for AVX512 (x86, worked nicely) and __ARM_BF16_FORMAT_ALTERNATIVE. It is probably because of that.
if (ARCH_AMD64)
target_link_libraries(_usearch INTERFACE _simsimd)
target_compile_definitions(_usearch INTERFACE USEARCH_USE_SIMSIMD)
target_compile_definitions(_usearch INTERFACE USEARCH_CAN_COMPILE_FLOAT16)
target_compile_definitions(_usearch INTERFACE USEARCH_CAN_COMPILE_BF16)
endif ()
add_library(ch_contrib::usearch ALIAS _usearch)
# Cf. https://github.com/llvm/llvm-project/issues/107810 (though it is not 100% the same stack)
#
# LLVM ERROR: Cannot select: 0x7996e7a73150: f32,ch = load<(load (s16) from %ir.22, !tbaa !54231), anyext from bf16> 0x79961cb737c0, 0x7996e7a1a500, undef:i64, ./contrib/SimSIMD/include/simsimd/dot.h:215:1
# 0x7996e7a1a500: i64 = add 0x79961e770d00, Constant:i64<-16>, ./contrib/SimSIMD/include/simsimd/dot.h:215:1
# 0x79961e770d00: i64,ch = CopyFromReg 0x79961cb737c0, Register:i64 %4, ./contrib/SimSIMD/include/simsimd/dot.h:215:1
# 0x7996e7a1ae10: i64 = Register %4
# 0x7996e7a1b5f0: i64 = Constant<-16>
# 0x7996e7a1a730: i64 = undef
# In function: _ZL23simsimd_dot_bf16_serialPKu6__bf16S0_yPd
# PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
# Stack dump:
# 0. Running pass 'Function Pass Manager' on module 'src/libdbms.a(MergeTreeIndexVectorSimilarity.cpp.o at 2312737440)'.
# 1. Running pass 'AArch64 Instruction Selection' on function '@_ZL23simsimd_dot_bf16_serialPKu6__bf16S0_yPd'
# #0 0x00007999e83a63bf llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0xda63bf)
# #1 0x00007999e83a44f9 llvm::sys::RunSignalHandlers() (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0xda44f9)
# #2 0x00007999e83a6b00 (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0xda6b00)
# #3 0x00007999e6e45320 (/lib/x86_64-linux-gnu/libc.so.6+0x45320)
# #4 0x00007999e6e9eb1c pthread_kill (/lib/x86_64-linux-gnu/libc.so.6+0x9eb1c)
# #5 0x00007999e6e4526e raise (/lib/x86_64-linux-gnu/libc.so.6+0x4526e)
# #6 0x00007999e6e288ff abort (/lib/x86_64-linux-gnu/libc.so.6+0x288ff)
# #7 0x00007999e82fe0c2 llvm::report_fatal_error(llvm::Twine const&, bool) (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0xcfe0c2)
# #8 0x00007999e8c2f8e3 (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0x162f8e3)
# #9 0x00007999e8c2ed76 llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0x162ed76)
# #10 0x00007999ea1adbcb (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0x2badbcb)
# #11 0x00007999e8c2611f llvm::SelectionDAGISel::DoInstructionSelection() (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0x162611f)
# #12 0x00007999e8c25790 llvm::SelectionDAGISel::CodeGenAndEmitDAG() (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0x1625790)
# #13 0x00007999e8c248de llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0x16248de)
# #14 0x00007999e8c22934 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0x1622934)
# #15 0x00007999e87826b9 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0x11826b9)
# #16 0x00007999e84f7772 llvm::FPPassManager::runOnFunction(llvm::Function&) (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0xef7772)
# #17 0x00007999e84fd2f4 llvm::FPPassManager::runOnModule(llvm::Module&) (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0xefd2f4)
# #18 0x00007999e84f7e9f llvm::legacy::PassManagerImpl::run(llvm::Module&) (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0xef7e9f)
# #19 0x00007999e99f7d61 (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0x23f7d61)
# #20 0x00007999e99f8c91 (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0x23f8c91)
# #21 0x00007999e99f8b10 llvm::lto::thinBackend(llvm::lto::Config const&, unsigned int, std::function<llvm::Expected<std::unique_ptr<llvm::CachedFileStream, std::default_delete<llvm::CachedFileStream>>> (unsigned int, llvm::Twine const&)>, llvm::Module&, llvm::ModuleSummaryIndex const&, llvm::DenseMap<llvm::StringRef, std::unordered_set<unsigned long, std::hash<unsigned long>, std::equal_to<unsigned long>, std::allocator<unsigned long>>, llvm::DenseMapInfo<llvm::StringRef, void
# >, llvm::detail::DenseMapPair<llvm::StringRef, std::unordered_set<unsigned long, std::hash<unsigned long>, std::equal_to<unsigned long>, std::allocator<unsigned long>>>> const&, llvm::DenseMap<unsigned long, llvm::GlobalValueSummary*, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, llvm::GlobalValueSummary*>> const&, llvm::MapVector<llvm::StringRef, llvm::BitcodeModule, llvm::DenseMap<llvm::StringRef, unsigned int, llvm::DenseMapInfo<llvm::S
# tringRef, void>, llvm::detail::DenseMapPair<llvm::StringRef, unsigned int>>, llvm::SmallVector<std::pair<llvm::StringRef, llvm::BitcodeModule>, 0u>>*, std::vector<unsigned char, std::allocator<unsigned char>> const&) (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0x23f8b10)
# #22 0x00007999e99f248d (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0x23f248d)
# #23 0x00007999e99f1cd6 (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0x23f1cd6)
# #24 0x00007999e82c9beb (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0xcc9beb)
# #25 0x00007999e834ebe3 llvm::ThreadPool::processTasks(llvm::ThreadPoolTaskGroup*) (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0xd4ebe3)
# #26 0x00007999e834f704 (/usr/lib/llvm-18/bin/../lib/libLLVM.so.18.1+0xd4f704)
# #27 0x00007999e6e9ca94 (/lib/x86_64-linux-gnu/libc.so.6+0x9ca94)
# #28 0x00007999e6f29c3c (/lib/x86_64-linux-gnu/libc.so.6+0x129c3c)
# clang++-18: error: unable to execute command: Aborted (core dumped)
# clang++-18: error: linker command failed due to signal (use -v to see invocation)
# ^[[A^Cninja: build stopped: interrupted by user.

View File

@ -1,7 +1,7 @@
# The Dockerfile.ubuntu exists for the tests/ci/docker_server.py script
# If the image is built from Dockerfile.alpine, then the `-alpine` suffix is added automatically,
# so the only purpose of Dockerfile.ubuntu is to push `latest`, `head` and so on w/o suffixes
FROM ubuntu:20.04 AS glibc-donor
FROM ubuntu:22.04 AS glibc-donor
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
@ -9,7 +9,11 @@ RUN arch=${TARGETARCH:-amd64} \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
esac \
&& ln -s "${rarch}-linux-gnu" /lib/linux-gnu
&& ln -s "${rarch}-linux-gnu" /lib/linux-gnu \
&& case $arch in \
amd64) ln /lib/linux-gnu/ld-linux-x86-64.so.2 /lib/linux-gnu/ld-2.35.so ;; \
arm64) ln /lib/linux-gnu/ld-linux-aarch64.so.1 /lib/linux-gnu/ld-2.35.so ;; \
esac
FROM alpine
@ -20,7 +24,7 @@ ENV LANG=en_US.UTF-8 \
TZ=UTC \
CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.35.so /lib/
COPY --from=glibc-donor /etc/nsswitch.conf /etc/
COPY entrypoint.sh /entrypoint.sh
@ -34,7 +38,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.9.2.42"
ARG VERSION="24.10.1.2812"
ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -35,7 +35,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.9.2.42"
ARG VERSION="24.10.1.2812"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -1,4 +1,4 @@
FROM ubuntu:20.04
FROM ubuntu:22.04
# see https://github.com/moby/moby/issues/4032#issuecomment-192327844
# It could be removed after we move on a version 23:04+
@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="24.9.2.42"
ARG VERSION="24.10.1.2812"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
#docker-official-library:off

View File

@ -20,6 +20,7 @@ For more information and documentation see https://clickhouse.com/.
- The amd64 image requires support for [SSE3 instructions](https://en.wikipedia.org/wiki/SSE3). Virtually all x86 CPUs after 2005 support SSE3.
- The arm64 image requires support for the [ARMv8.2-A architecture](https://en.wikipedia.org/wiki/AArch64#ARMv8.2-A) and additionally the Load-Acquire RCpc register. The register is optional in version ARMv8.2-A and mandatory in [ARMv8.3-A](https://en.wikipedia.org/wiki/AArch64#ARMv8.3-A). Supported in Graviton >=2, Azure and GCP instances. Examples for unsupported devices are Raspberry Pi 4 (ARMv8.0-A) and Jetson AGX Xavier/Orin (ARMv8.2-A).
- Since the Clickhouse 24.11 Ubuntu images started using `ubuntu:22.04` as its base image. It requires docker version >= `20.10.10` containing [patch](https://github.com/moby/moby/commit/977283509f75303bc6612665a04abf76ff1d2468). As a workaround you could use `docker run [--privileged | --security-opt seccomp=unconfined]` instead, however that has security implications.
## How to use this image

View File

@ -33,8 +33,6 @@ RUN apt-get update \
COPY requirements.txt /
RUN pip3 install --no-cache-dir -r /requirements.txt
ENV FUZZER_ARGS="-max_total_time=60"
SHELL ["/bin/bash", "-c"]
# docker run --network=host --volume <workspace>:/workspace -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/libfuzzer

View File

@ -1,16 +0,0 @@
# Since right now we can't set volumes to the docker during build, we split building container in stages:
# 1. build base container
# 2. run base conatiner with mounted volumes
# 3. commit container as image
FROM ubuntu:20.04 as clickhouse-test-runner-base
# A volume where directory with clickhouse packages to be mounted,
# for later installing.
VOLUME /packages
CMD apt-get update ;\
DEBIAN_FRONTEND=noninteractive \
apt install -y /packages/clickhouse-common-static_*.deb \
/packages/clickhouse-client_*.deb \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -4,9 +4,13 @@ sidebar_position: 50
sidebar_label: EmbeddedRocksDB
---
import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge';
# EmbeddedRocksDB Engine
This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/).
<CloudNotSupportedBadge />
This engine allows integrating ClickHouse with [RocksDB](http://rocksdb.org/).
## Creating a Table {#creating-a-table}

View File

@ -23,6 +23,7 @@ functions in ClickHouse. The sample datasets include:
- The [NYPD Complaint Data](../getting-started/example-datasets/nypd_complaint_data.md) demonstrates how to use data inference to simplify creating tables
- The ["What's on the Menu?" dataset](../getting-started/example-datasets/menus.md) has an example of denormalizing data
- The [Laion dataset](../getting-started/example-datasets/laion.md) has an example of [Approximate nearest neighbor search indexes](../engines/table-engines/mergetree-family/annindexes.md) usage
- The [TPC-H](../getting-started/example-datasets/tpch.md), [TPC-DS](../getting-started/example-datasets/tpcds.md), and [Star Schema (SSB)](../getting-started/example-datasets/star-schema.md) industry benchmarks for analytics databases
- [Getting Data Into ClickHouse - Part 1](https://clickhouse.com/blog/getting-data-into-clickhouse-part-1) provides examples of defining a schema and loading a small Hacker News dataset
- [Getting Data Into ClickHouse - Part 3 - Using S3](https://clickhouse.com/blog/getting-data-into-clickhouse-part-3-s3) has examples of loading data from s3
- [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) shows how to generate random data if none of the above fit your needs.

View File

@ -190,6 +190,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
- `--config-file` The name of the configuration file.
- `--secure` If specified, will connect to server over secure connection (TLS). You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#openssl).
- `--history_file` — Path to a file containing command history.
- `--history_max_entries` — Maximum number of entries in the history file. Default value: 1 000 000.
- `--param_<name>` — Value for a [query with parameters](#cli-queries-with-parameters).
- `--hardware-utilization` — Print hardware utilization information in progress bar.
- `--print-profile-events` Print `ProfileEvents` packets.

View File

@ -9,7 +9,7 @@ sidebar_label: Prometheus protocols
## Exposing metrics {#expose}
:::note
ClickHouse Cloud does not currently support connecting to Prometheus. To be notified when this feature is supported, please contact support@clickhouse.com.
If you are using ClickHouse Cloud, you can expose metrics to Prometheus using the [Prometheus Integration](/en/integrations/prometheus).
:::
ClickHouse can expose its own metrics for scraping from Prometheus:

View File

@ -65,6 +65,34 @@ sudo rm -f /etc/yum.repos.d/clickhouse.repo
After that follow the [install guide](../getting-started/install.md#from-rpm-packages)
### You Can't Run Docker Container
You are running a simple `docker run clickhouse/clickhouse-server` and it crashes with a stack trace similar to following:
```
$ docker run -it clickhouse/clickhouse-server
........
2024.11.06 21:04:48.912036 [ 1 ] {} <Information> SentryWriter: Sending crash reports is disabled
Poco::Exception. Code: 1000, e.code() = 0, System exception: cannot start thread, Stack trace (when copying this message, always include the lines below):
0. Poco::ThreadImpl::startImpl(Poco::SharedPtr<Poco::Runnable, Poco::ReferenceCounter, Poco::ReleasePolicy<Poco::Runnable>>) @ 0x00000000157c7b34
1. Poco::Thread::start(Poco::Runnable&) @ 0x00000000157c8a0e
2. BaseDaemon::initializeTerminationAndSignalProcessing() @ 0x000000000d267a14
3. BaseDaemon::initialize(Poco::Util::Application&) @ 0x000000000d2652cb
4. DB::Server::initialize(Poco::Util::Application&) @ 0x000000000d128b38
5. Poco::Util::Application::run() @ 0x000000001581cfda
6. DB::Server::run() @ 0x000000000d1288f0
7. Poco::Util::ServerApplication::run(int, char**) @ 0x0000000015825e27
8. mainEntryClickHouseServer(int, char**) @ 0x000000000d125b38
9. main @ 0x0000000007ea4eee
10. ? @ 0x00007f67ff946d90
11. ? @ 0x00007f67ff946e40
12. _start @ 0x00000000062e802e
(version 24.10.1.2812 (official build))
```
The reason is an old docker daemon with version lower than `20.10.10`. A way to fix it either upgrading it, or running `docker run [--privileged | --security-opt seccomp=unconfined]`. The latter has security implications.
## Connecting to the Server {#troubleshooting-accepts-no-connections}
Possible issues:

View File

@ -19,7 +19,7 @@ Columns:
- `column` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Name of a column to which access is granted.
- `is_partial_revoke` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows whether some privileges have been revoked. Possible values:
- `0` — The row describes a partial revoke.
- `1` — The row describes a grant.
- `0` — The row describes a grant.
- `1` — The row describes a partial revoke.
- `grant_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Permission is granted `WITH GRANT OPTION`, see [GRANT](../../sql-reference/statements/grant.md#granting-privilege-syntax).

View File

@ -17,7 +17,7 @@ anyLast(column) [RESPECT NULLS]
- `column`: The column name.
:::note
Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the first value passed, regardless of whether it is `NULL` or not.
Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the last value passed, regardless of whether it is `NULL` or not.
:::
**Returned value**

View File

@ -58,10 +58,10 @@ SELECT json FROM test;
└───────────────────────────────────┘
```
Using CAST from 'String':
Using CAST from `String`:
```sql
SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON as json;
SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON AS json;
```
```text
@ -70,7 +70,47 @@ SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON as json
└────────────────────────────────────────────────┘
```
CAST from `JSON`, named `Tuple`, `Map` and `Object('json')` to `JSON` type will be supported later.
Using CAST from `Tuple`:
```sql
SELECT (tuple(42 AS b) AS a, [1, 2, 3] AS c, 'Hello, World!' AS d)::JSON AS json;
```
```text
┌─json───────────────────────────────────────────┐
│ {"a":{"b":42},"c":[1,2,3],"d":"Hello, World!"} │
└────────────────────────────────────────────────┘
```
Using CAST from `Map`:
```sql
SELECT map('a', map('b', 42), 'c', [1,2,3], 'd', 'Hello, World!')::JSON AS json;
```
```text
┌─json───────────────────────────────────────────┐
│ {"a":{"b":42},"c":[1,2,3],"d":"Hello, World!"} │
└────────────────────────────────────────────────┘
```
Using CAST from deprecated `Object('json')`:
```sql
SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::Object('json')::JSON AS json;
```
```text
┌─json───────────────────────────────────────────┐
│ {"a":{"b":42},"c":[1,2,3],"d":"Hello, World!"} │
└────────────────────────────────────────────────┘
```
:::note
CAST from `Tuple`/`Map`/`Object('json')` to `JSON` is implemented via serializing the column into `String` column containing JSON objects and deserializing it back to `JSON` type column.
:::
CAST between `JSON` types with different arguments will be supported later.
## Reading JSON paths as subcolumns
@ -630,6 +670,28 @@ SELECT arrayJoin(distinctJSONPathsAndTypes(json)) FROM s3('s3://clickhouse-publi
└─arrayJoin(distinctJSONPathsAndTypes(json))──────────────────┘
```
## ALTER MODIFY COLUMN to JSON type
It's possible to alter an existing table and change the type of the column to the new `JSON` type. Right now only alter from `String` type is supported.
**Example**
```sql
CREATE TABLE test (json String) ENGINE=MergeTree ORDeR BY tuple();
INSERT INTO test VALUES ('{"a" : 42}'), ('{"a" : 43, "b" : "Hello"}'), ('{"a" : 44, "b" : [1, 2, 3]}')), ('{"c" : "2020-01-01"}');
ALTER TABLE test MODIFY COLUMN json JSON;
SELECT json, json.a, json.b, json.c FROM test;
```
```text
┌─json─────────────────────────┬─json.a─┬─json.b──┬─json.c─────┐
│ {"a":"42"} │ 42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
│ {"a":"43","b":"Hello"} │ 43 │ Hello │ ᴺᵁᴸᴸ │
│ {"a":"44","b":["1","2","3"]} │ 44 │ [1,2,3] │ ᴺᵁᴸᴸ │
│ {"c":"2020-01-01"} │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2020-01-01 │
└──────────────────────────────┴────────┴─────────┴────────────┘
```
## Tips for better usage of the JSON type
Before creating `JSON` column and loading data into it, consider the following tips:

View File

@ -55,7 +55,7 @@ SELECT * FROM view(column1=value1, column2=value2 ...)
## Materialized View
``` sql
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE]
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] [TO[db.]name] [ENGINE = engine] [POPULATE]
[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }]
AS SELECT ...
[COMMENT 'comment']

View File

@ -117,6 +117,7 @@ GRANT SELECT ON db*.* TO john -- correct
GRANT SELECT ON *.my_table TO john -- wrong
GRANT SELECT ON foo*bar TO john -- wrong
GRANT SELECT ON *suffix TO john -- wrong
GRANT SELECT(foo) ON db.table* TO john -- wrong
```
## Privileges
@ -242,10 +243,13 @@ Hierarchy of privileges:
- `HDFS`
- `HIVE`
- `JDBC`
- `KAFKA`
- `MONGO`
- `MYSQL`
- `NATS`
- `ODBC`
- `POSTGRES`
- `RABBITMQ`
- `REDIS`
- `REMOTE`
- `S3`
@ -524,10 +528,13 @@ Allows using external data sources. Applies to [table engines](../../engines/tab
- `HDFS`. Level: `GLOBAL`
- `HIVE`. Level: `GLOBAL`
- `JDBC`. Level: `GLOBAL`
- `KAFKA`. Level: `GLOBAL`
- `MONGO`. Level: `GLOBAL`
- `MYSQL`. Level: `GLOBAL`
- `NATS`. Level: `GLOBAL`
- `ODBC`. Level: `GLOBAL`
- `POSTGRES`. Level: `GLOBAL`
- `RABBITMQ`. Level: `GLOBAL`
- `REDIS`. Level: `GLOBAL`
- `REMOTE`. Level: `GLOBAL`
- `S3`. Level: `GLOBAL`

View File

@ -291,7 +291,7 @@ All missed values of `expr` column will be filled sequentially and other columns
To fill multiple columns, add `WITH FILL` modifier with optional parameters after each field name in `ORDER BY` section.
``` sql
ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr]
ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr] [STALENESS const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr] [STALENESS numeric_expr]
[INTERPOLATE [(col [AS expr], ... colN [AS exprN])]]
```
@ -300,6 +300,7 @@ When `FROM const_expr` not defined sequence of filling use minimal `expr` field
When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`.
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types, as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals.
When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type.
When `STALENESS const_numeric_expr` is defined, the query will generate rows until the difference from the previous row in the original data exceeds `const_numeric_expr`.
`INTERPOLATE` can be applied to columns not participating in `ORDER BY WITH FILL`. Such columns are filled based on previous fields values by applying `expr`. If `expr` is not present will repeat previous value. Omitted list will result in including all allowed columns.
Example of a query without `WITH FILL`:
@ -497,6 +498,64 @@ Result:
└────────────┴────────────┴──────────┘
```
Example of a query without `STALENESS`:
``` sql
SELECT number as key, 5 * number value, 'original' AS source
FROM numbers(16) WHERE key % 5 == 0
ORDER BY key WITH FILL;
```
Result:
``` text
┌─key─┬─value─┬─source───┐
1. │ 0 │ 0 │ original │
2. │ 1 │ 0 │ │
3. │ 2 │ 0 │ │
4. │ 3 │ 0 │ │
5. │ 4 │ 0 │ │
6. │ 5 │ 25 │ original │
7. │ 6 │ 0 │ │
8. │ 7 │ 0 │ │
9. │ 8 │ 0 │ │
10. │ 9 │ 0 │ │
11. │ 10 │ 50 │ original │
12. │ 11 │ 0 │ │
13. │ 12 │ 0 │ │
14. │ 13 │ 0 │ │
15. │ 14 │ 0 │ │
16. │ 15 │ 75 │ original │
└─────┴───────┴──────────┘
```
Same query after applying `STALENESS 3`:
``` sql
SELECT number as key, 5 * number value, 'original' AS source
FROM numbers(16) WHERE key % 5 == 0
ORDER BY key WITH FILL STALENESS 3;
```
Result:
``` text
┌─key─┬─value─┬─source───┐
1. │ 0 │ 0 │ original │
2. │ 1 │ 0 │ │
3. │ 2 │ 0 │ │
4. │ 5 │ 25 │ original │
5. │ 6 │ 0 │ │
6. │ 7 │ 0 │ │
7. │ 10 │ 50 │ original │
8. │ 11 │ 0 │ │
9. │ 12 │ 0 │ │
10. │ 15 │ 75 │ original │
11. │ 16 │ 0 │ │
12. │ 17 │ 0 │ │
└─────┴───────┴──────────┘
```
Example of a query without `INTERPOLATE`:
``` sql

View File

@ -95,7 +95,7 @@ sudo yum install -y clickhouse-server clickhouse-client
sudo systemctl enable clickhouse-server
sudo systemctl start clickhouse-server
sudo systemctl status clickhouse-server
clickhouse-client # илм "clickhouse-client --password" если установлен пароль
clickhouse-client # или "clickhouse-client --password" если установлен пароль
```
Для использования наиболее свежих версий нужно заменить `stable` на `testing` (рекомендуется для тестовых окружений). Также иногда доступен `prestable`.

View File

@ -39,7 +39,7 @@ SELECT a, b, c FROM (SELECT ...)
## Материализованные представления {#materialized}
``` sql
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE]
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] [TO[db.]name] [ENGINE = engine] [POPULATE]
[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }]
AS SELECT ...
```

View File

@ -192,14 +192,23 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION
- `addressToSymbol`
- `demangle`
- [SOURCES](#grant-sources)
- `AZURE`
- `FILE`
- `URL`
- `REMOTE`
- `MYSQL`
- `ODBC`
- `JDBC`
- `HDFS`
- `HIVE`
- `JDBC`
- `KAFKA`
- `MONGO`
- `MYSQL`
- `NATS`
- `ODBC`
- `POSTGRES`
- `RABBITMQ`
- `REDIS`
- `REMOTE`
- `S3`
- `SQLITE`
- `URL`
- [dictGet](#grant-dictget)
Примеры того, как трактуется данная иерархия:
@ -461,14 +470,23 @@ GRANT INSERT(x,y) ON db.table TO john
Разрешает использовать внешние источники данных. Применяется к [движкам таблиц](../../engines/table-engines/index.md) и [табличным функциям](../table-functions/index.md#table-functions).
- `SOURCES`. Уровень: `GROUP`
- `AZURE`. Уровень: `GLOBAL`
- `FILE`. Уровень: `GLOBAL`
- `URL`. Уровень: `GLOBAL`
- `REMOTE`. Уровень: `GLOBAL`
- `MYSQL`. Уровень: `GLOBAL`
- `ODBC`. Уровень: `GLOBAL`
- `JDBC`. Уровень: `GLOBAL`
- `HDFS`. Уровень: `GLOBAL`
- `HIVE`. Уровень: `GLOBAL`
- `JDBC`. Уровень: `GLOBAL`
- `KAFKA`. Уровень: `GLOBAL`
- `MONGO`. Уровень: `GLOBAL`
- `MYSQL`. Уровень: `GLOBAL`
- `NATS`. Уровень: `GLOBAL`
- `ODBC`. Уровень: `GLOBAL`
- `POSTGRES`. Уровень: `GLOBAL`
- `RABBITMQ`. Уровень: `GLOBAL`
- `REDIS`. Уровень: `GLOBAL`
- `REMOTE`. Уровень: `GLOBAL`
- `S3`. Уровень: `GLOBAL`
- `SQLITE`. Уровень: `GLOBAL`
- `URL`. Уровень: `GLOBAL`
Привилегия `SOURCES` разрешает использование всех источников. Также вы можете присвоить привилегию для каждого источника отдельно. Для использования источников необходимы дополнительные привилегии.

View File

@ -39,7 +39,7 @@ SELECT a, b, c FROM (SELECT ...)
## Materialized {#materialized}
``` sql
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
```
物化视图存储由相应的[SELECT](../../../sql-reference/statements/select/index.md)管理.

View File

@ -170,14 +170,23 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION
- `addressToSymbol`
- `demangle`
- [SOURCES](#grant-sources)
- `AZURE`
- `FILE`
- `URL`
- `REMOTE`
- `YSQL`
- `ODBC`
- `JDBC`
- `HDFS`
- `HIVE`
- `JDBC`
- `KAFKA`
- `MONGO`
- `MYSQL`
- `NATS`
- `ODBC`
- `POSTGRES`
- `RABBITMQ`
- `REDIS`
- `REMOTE`
- `S3`
- `SQLITE`
- `URL`
- [dictGet](#grant-dictget)
如何对待该层级的示例:
@ -428,14 +437,23 @@ GRANT INSERT(x,y) ON db.table TO john
允许在 [table engines](../../engines/table-engines/index.md) 和 [table functions](../../sql-reference/table-functions/index.md#table-functions)中使用外部数据源。
- `SOURCES`. 级别: `GROUP`
- `AZURE`. 级别: `GLOBAL`
- `FILE`. 级别: `GLOBAL`
- `URL`. 级别: `GLOBAL`
- `REMOTE`. 级别: `GLOBAL`
- `YSQL`. 级别: `GLOBAL`
- `ODBC`. 级别: `GLOBAL`
- `JDBC`. 级别: `GLOBAL`
- `HDFS`. 级别: `GLOBAL`
- `HIVE`. 级别: `GLOBAL`
- `JDBC`. 级别: `GLOBAL`
- `KAFKA`. 级别: `GLOBAL`
- `MONGO`. 级别: `GLOBAL`
- `MYSQL`. 级别: `GLOBAL`
- `NATS`. 级别: `GLOBAL`
- `ODBC`. 级别: `GLOBAL`
- `POSTGRES`. 级别: `GLOBAL`
- `RABBITMQ`. 级别: `GLOBAL`
- `REDIS`. 级别: `GLOBAL`
- `REMOTE`. 级别: `GLOBAL`
- `S3`. 级别: `GLOBAL`
- `SQLITE`. 级别: `GLOBAL`
- `URL`. 级别: `GLOBAL`
`SOURCES` 权限允许使用所有数据源。当然也可以单独对每个数据源进行授权。要使用数据源时,还需要额外的权限。

View File

@ -192,6 +192,10 @@ void Client::parseConnectionsCredentials(Poco::Util::AbstractConfiguration & con
history_file = home_path + "/" + history_file.substr(1);
config.setString("history_file", history_file);
}
if (config.has(prefix + ".history_max_entries"))
{
config.setUInt("history_max_entries", history_max_entries);
}
if (config.has(prefix + ".accept-invalid-certificate"))
config.setBool("accept-invalid-certificate", config.getBool(prefix + ".accept-invalid-certificate"));
}

View File

@ -236,6 +236,7 @@ void DisksApp::runInteractiveReplxx()
ReplxxLineReader lr(
suggest,
history_file,
history_max_entries,
/* multiline= */ false,
/* ignore_shell_suspend= */ false,
query_extenders,
@ -398,6 +399,8 @@ void DisksApp::initializeHistoryFile()
throw;
}
}
history_max_entries = config().getUInt("history-max-entries", 1000000);
}
void DisksApp::init(const std::vector<String> & common_arguments)

View File

@ -62,6 +62,8 @@ private:
// Fields responsible for the REPL work
String history_file;
UInt32 history_max_entries = 0; /// Maximum number of entries in the history file. Needs to be initialized to 0 since we don't have a proper constructor. Worry not, actual value is set within the initializeHistoryFile method.
LineReader::Suggest suggest;
static LineReader::Patterns query_extenders;
static LineReader::Patterns query_delimiters;

View File

@ -243,6 +243,8 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
}
}
history_max_entries = config().getUInt("history-max-entries", 1000000);
String default_log_level;
if (config().has("query"))
/// We don't want to see any information log in query mode, unless it was set explicitly
@ -319,6 +321,7 @@ void KeeperClient::runInteractiveReplxx()
ReplxxLineReader lr(
suggest,
history_file,
history_max_entries,
/* multiline= */ false,
/* ignore_shell_suspend= */ false,
query_extenders,

View File

@ -59,6 +59,8 @@ protected:
std::vector<String> getCompletions(const String & prefix) const;
String history_file;
UInt32 history_max_entries; /// Maximum number of entries in the history file.
LineReader::Suggest suggest;
zkutil::ZooKeeperArgs zk_args;

View File

@ -1353,9 +1353,11 @@ try
}
FailPointInjection::enableFromGlobalConfig(config());
#endif
memory_worker.start();
#if defined(OS_LINUX)
int default_oom_score = 0;
#if !defined(NDEBUG)

View File

@ -608,7 +608,7 @@ AuthResult AccessControl::authenticate(const Credentials & credentials, const Po
}
catch (...)
{
tryLogCurrentException(getLogger(), "from: " + address.toString() + ", user: " + credentials.getUserName() + ": Authentication failed");
tryLogCurrentException(getLogger(), "from: " + address.toString() + ", user: " + credentials.getUserName() + ": Authentication failed", LogsLevel::information);
WriteBufferFromOwnString message;
message << credentials.getUserName() << ": Authentication failed: password is incorrect, or there is no user with such name.";
@ -622,8 +622,9 @@ AuthResult AccessControl::authenticate(const Credentials & credentials, const Po
<< "and deleting this file will reset the password.\n"
<< "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed.\n\n";
/// We use the same message for all authentication failures because we don't want to give away any unnecessary information for security reasons,
/// only the log will show the exact reason.
/// We use the same message for all authentication failures because we don't want to give away any unnecessary information for security reasons.
/// Only the log ((*), above) will show the exact reason. Note that (*) logs at information level instead of the default error level as
/// authentication failures are not an unusual event.
throw Exception(PreformattedMessage{message.str(),
"{}: Authentication failed: password is incorrect, or there is no user with such name",
std::vector<std::string>{credentials.getUserName()}},

View File

@ -243,6 +243,9 @@ enum class AccessType : uint8_t
M(S3, "", GLOBAL, SOURCES) \
M(HIVE, "", GLOBAL, SOURCES) \
M(AZURE, "", GLOBAL, SOURCES) \
M(KAFKA, "", GLOBAL, SOURCES) \
M(NATS, "", GLOBAL, SOURCES) \
M(RABBITMQ, "", GLOBAL, SOURCES) \
M(SOURCES, "", GROUP, ALL) \
\
M(CLUSTER, "", GLOBAL, ALL) /* ON CLUSTER queries */ \

View File

@ -52,7 +52,10 @@ namespace
{AccessType::HDFS, "HDFS"},
{AccessType::S3, "S3"},
{AccessType::HIVE, "Hive"},
{AccessType::AZURE, "AzureBlobStorage"}
{AccessType::AZURE, "AzureBlobStorage"},
{AccessType::KAFKA, "Kafka"},
{AccessType::NATS, "NATS"},
{AccessType::RABBITMQ, "RabbitMQ"}
};

View File

@ -15,6 +15,9 @@ public:
explicit Credentials() = default;
explicit Credentials(const String & user_name_);
Credentials(const Credentials &) = default;
Credentials(Credentials &&) = default;
virtual ~Credentials() = default;
const String & getUserName() const;

View File

@ -387,7 +387,7 @@ template <typename Value, bool return_float, bool interpolated>
using FuncQuantileExactWeighted = AggregateFunctionQuantile<
Value,
QuantileExactWeighted<Value, interpolated>,
NameQuantileExactWeighted,
std::conditional_t<interpolated, NameQuantileExactWeightedInterpolated, NameQuantileExactWeighted>,
true,
std::conditional_t<return_float, Float64, void>,
false,
@ -396,7 +396,7 @@ template <typename Value, bool return_float, bool interpolated>
using FuncQuantilesExactWeighted = AggregateFunctionQuantile<
Value,
QuantileExactWeighted<Value, interpolated>,
NameQuantilesExactWeighted,
std::conditional_t<interpolated, NameQuantilesExactWeightedInterpolated, NameQuantilesExactWeighted>,
true,
std::conditional_t<return_float, Float64, void>,
true,

View File

@ -1,2 +1,2 @@
clickhouse_add_executable(aggregate_function_state_deserialization_fuzzer aggregate_function_state_deserialization_fuzzer.cpp ${SRCS})
target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE clickhouse_aggregate_functions)
target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE clickhouse_aggregate_functions dbms)

View File

@ -498,6 +498,8 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express
sort_node->getFillTo() = buildExpression(order_by_element.getFillTo(), context);
if (order_by_element.getFillStep())
sort_node->getFillStep() = buildExpression(order_by_element.getFillStep(), context);
if (order_by_element.getFillStaleness())
sort_node->getFillStaleness() = buildExpression(order_by_element.getFillStaleness(), context);
list_node->getNodes().push_back(std::move(sort_node));
}

View File

@ -437,8 +437,13 @@ ProjectionName QueryAnalyzer::calculateWindowProjectionName(const QueryTreeNodeP
return buffer.str();
}
ProjectionName QueryAnalyzer::calculateSortColumnProjectionName(const QueryTreeNodePtr & sort_column_node, const ProjectionName & sort_expression_projection_name,
const ProjectionName & fill_from_expression_projection_name, const ProjectionName & fill_to_expression_projection_name, const ProjectionName & fill_step_expression_projection_name)
ProjectionName QueryAnalyzer::calculateSortColumnProjectionName(
const QueryTreeNodePtr & sort_column_node,
const ProjectionName & sort_expression_projection_name,
const ProjectionName & fill_from_expression_projection_name,
const ProjectionName & fill_to_expression_projection_name,
const ProjectionName & fill_step_expression_projection_name,
const ProjectionName & fill_staleness_expression_projection_name)
{
auto & sort_node_typed = sort_column_node->as<SortNode &>();
@ -468,6 +473,9 @@ ProjectionName QueryAnalyzer::calculateSortColumnProjectionName(const QueryTreeN
if (sort_node_typed.hasFillStep())
sort_column_projection_name_buffer << " STEP " << fill_step_expression_projection_name;
if (sort_node_typed.hasFillStaleness())
sort_column_projection_name_buffer << " STALENESS " << fill_staleness_expression_projection_name;
}
return sort_column_projection_name_buffer.str();
@ -3998,6 +4006,7 @@ ProjectionNames QueryAnalyzer::resolveSortNodeList(QueryTreeNodePtr & sort_node_
ProjectionNames fill_from_expression_projection_names;
ProjectionNames fill_to_expression_projection_names;
ProjectionNames fill_step_expression_projection_names;
ProjectionNames fill_staleness_expression_projection_names;
auto & sort_node_list_typed = sort_node_list->as<ListNode &>();
for (auto & node : sort_node_list_typed.getNodes())
@ -4088,11 +4097,38 @@ ProjectionNames QueryAnalyzer::resolveSortNodeList(QueryTreeNodePtr & sort_node_
fill_step_expression_projection_names_size);
}
if (sort_node.hasFillStaleness())
{
fill_staleness_expression_projection_names = resolveExpressionNode(sort_node.getFillStaleness(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
const auto * constant_node = sort_node.getFillStaleness()->as<ConstantNode>();
if (!constant_node)
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"Sort FILL STALENESS expression must be constant with numeric or interval type. Actual {}. In scope {}",
sort_node.getFillStaleness()->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
bool is_number = isColumnedAsNumber(constant_node->getResultType());
bool is_interval = WhichDataType(constant_node->getResultType()).isInterval();
if (!is_number && !is_interval)
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"Sort FILL STALENESS expression must be constant with numeric or interval type. Actual {}. In scope {}",
sort_node.getFillStaleness()->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
size_t fill_staleness_expression_projection_names_size = fill_staleness_expression_projection_names.size();
if (fill_staleness_expression_projection_names_size != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Sort FILL STALENESS expression expected 1 projection name. Actual {}",
fill_staleness_expression_projection_names_size);
}
auto sort_column_projection_name = calculateSortColumnProjectionName(node,
sort_expression_projection_names[0],
fill_from_expression_projection_names.empty() ? "" : fill_from_expression_projection_names.front(),
fill_to_expression_projection_names.empty() ? "" : fill_to_expression_projection_names.front(),
fill_step_expression_projection_names.empty() ? "" : fill_step_expression_projection_names.front());
fill_step_expression_projection_names.empty() ? "" : fill_step_expression_projection_names.front(),
fill_staleness_expression_projection_names.empty() ? "" : fill_staleness_expression_projection_names.front());
result_projection_names.push_back(std::move(sort_column_projection_name));
@ -4100,6 +4136,7 @@ ProjectionNames QueryAnalyzer::resolveSortNodeList(QueryTreeNodePtr & sort_node_
fill_from_expression_projection_names.clear();
fill_to_expression_projection_names.clear();
fill_step_expression_projection_names.clear();
fill_staleness_expression_projection_names.clear();
}
return result_projection_names;

View File

@ -140,7 +140,8 @@ private:
const ProjectionName & sort_expression_projection_name,
const ProjectionName & fill_from_expression_projection_name,
const ProjectionName & fill_to_expression_projection_name,
const ProjectionName & fill_step_expression_projection_name);
const ProjectionName & fill_step_expression_projection_name,
const ProjectionName & fill_staleness_expression_projection_name);
QueryTreeNodePtr tryGetLambdaFromSQLUserDefinedFunctions(const std::string & function_name, ContextPtr context);

View File

@ -69,6 +69,12 @@ void SortNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, si
buffer << '\n' << std::string(indent + 2, ' ') << "FILL STEP\n";
getFillStep()->dumpTreeImpl(buffer, format_state, indent + 4);
}
if (hasFillStaleness())
{
buffer << '\n' << std::string(indent + 2, ' ') << "FILL STALENESS\n";
getFillStaleness()->dumpTreeImpl(buffer, format_state, indent + 4);
}
}
bool SortNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const
@ -132,6 +138,8 @@ ASTPtr SortNode::toASTImpl(const ConvertToASTOptions & options) const
result->setFillTo(getFillTo()->toAST(options));
if (hasFillStep())
result->setFillStep(getFillStep()->toAST(options));
if (hasFillStaleness())
result->setFillStaleness(getFillStaleness()->toAST(options));
return result;
}

View File

@ -105,6 +105,24 @@ public:
return children[fill_step_child_index];
}
/// Returns true if sort node has fill staleness, false otherwise
bool hasFillStaleness() const
{
return children[fill_staleness_child_index] != nullptr;
}
/// Get fill staleness
const QueryTreeNodePtr & getFillStaleness() const
{
return children[fill_staleness_child_index];
}
/// Get fill staleness
QueryTreeNodePtr & getFillStaleness()
{
return children[fill_staleness_child_index];
}
/// Get collator
const std::shared_ptr<Collator> & getCollator() const
{
@ -144,7 +162,8 @@ private:
static constexpr size_t fill_from_child_index = 1;
static constexpr size_t fill_to_child_index = 2;
static constexpr size_t fill_step_child_index = 3;
static constexpr size_t children_size = fill_step_child_index + 1;
static constexpr size_t fill_staleness_child_index = 4;
static constexpr size_t children_size = fill_staleness_child_index + 1;
SortDirection sort_direction = SortDirection::ASCENDING;
std::optional<SortDirection> nulls_sort_direction;

View File

@ -0,0 +1,135 @@
#include <Backups/BackupConcurrencyCheck.h>
#include <Common/Exception.h>
#include <Common/logger_useful.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CONCURRENT_ACCESS_NOT_SUPPORTED;
}
BackupConcurrencyCheck::BackupConcurrencyCheck(
const UUID & backup_or_restore_uuid_,
bool is_restore_,
bool on_cluster_,
bool allow_concurrency_,
BackupConcurrencyCounters & counters_)
: is_restore(is_restore_), backup_or_restore_uuid(backup_or_restore_uuid_), on_cluster(on_cluster_), counters(counters_)
{
std::lock_guard lock{counters.mutex};
if (!allow_concurrency_)
{
bool found_concurrent_operation = false;
if (is_restore)
{
size_t num_local_restores = counters.local_restores;
size_t num_on_cluster_restores = counters.on_cluster_restores.size();
if (on_cluster)
{
if (!counters.on_cluster_restores.contains(backup_or_restore_uuid))
++num_on_cluster_restores;
}
else
{
++num_local_restores;
}
found_concurrent_operation = (num_local_restores + num_on_cluster_restores > 1);
}
else
{
size_t num_local_backups = counters.local_backups;
size_t num_on_cluster_backups = counters.on_cluster_backups.size();
if (on_cluster)
{
if (!counters.on_cluster_backups.contains(backup_or_restore_uuid))
++num_on_cluster_backups;
}
else
{
++num_local_backups;
}
found_concurrent_operation = (num_local_backups + num_on_cluster_backups > 1);
}
if (found_concurrent_operation)
throwConcurrentOperationNotAllowed(is_restore);
}
if (on_cluster)
{
if (is_restore)
++counters.on_cluster_restores[backup_or_restore_uuid];
else
++counters.on_cluster_backups[backup_or_restore_uuid];
}
else
{
if (is_restore)
++counters.local_restores;
else
++counters.local_backups;
}
}
BackupConcurrencyCheck::~BackupConcurrencyCheck()
{
std::lock_guard lock{counters.mutex};
if (on_cluster)
{
if (is_restore)
{
auto it = counters.on_cluster_restores.find(backup_or_restore_uuid);
if (it != counters.on_cluster_restores.end())
{
if (!--it->second)
counters.on_cluster_restores.erase(it);
}
}
else
{
auto it = counters.on_cluster_backups.find(backup_or_restore_uuid);
if (it != counters.on_cluster_backups.end())
{
if (!--it->second)
counters.on_cluster_backups.erase(it);
}
}
}
else
{
if (is_restore)
--counters.local_restores;
else
--counters.local_backups;
}
}
void BackupConcurrencyCheck::throwConcurrentOperationNotAllowed(bool is_restore)
{
throw Exception(
ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED,
"Concurrent {} are not allowed, turn on setting '{}'",
is_restore ? "restores" : "backups",
is_restore ? "allow_concurrent_restores" : "allow_concurrent_backups");
}
BackupConcurrencyCounters::BackupConcurrencyCounters() = default;
BackupConcurrencyCounters::~BackupConcurrencyCounters()
{
if (local_backups > 0 || local_restores > 0 || !on_cluster_backups.empty() || !on_cluster_restores.empty())
LOG_ERROR(getLogger(__PRETTY_FUNCTION__), "Some backups or restores are processing");
}
}

View File

@ -0,0 +1,55 @@
#pragma once
#include <Core/UUID.h>
#include <base/scope_guard.h>
#include <mutex>
#include <unordered_map>
namespace DB
{
class BackupConcurrencyCounters;
/// Local checker for concurrent BACKUP or RESTORE operations.
/// This class is used by implementations of IBackupCoordination and IRestoreCoordination
/// to throw an exception if concurrent backups or restores are not allowed.
class BackupConcurrencyCheck
{
public:
/// Checks concurrency of a BACKUP operation or a RESTORE operation.
/// Keep a constructed instance of BackupConcurrencyCheck until the operation is done.
BackupConcurrencyCheck(
const UUID & backup_or_restore_uuid_,
bool is_restore_,
bool on_cluster_,
bool allow_concurrency_,
BackupConcurrencyCounters & counters_);
~BackupConcurrencyCheck();
[[noreturn]] static void throwConcurrentOperationNotAllowed(bool is_restore);
private:
const bool is_restore;
const UUID backup_or_restore_uuid;
const bool on_cluster;
BackupConcurrencyCounters & counters;
};
class BackupConcurrencyCounters
{
public:
BackupConcurrencyCounters();
~BackupConcurrencyCounters();
private:
friend class BackupConcurrencyCheck;
size_t local_backups TSA_GUARDED_BY(mutex) = 0;
size_t local_restores TSA_GUARDED_BY(mutex) = 0;
std::unordered_map<UUID /* backup_uuid */, size_t /* num_refs */> on_cluster_backups TSA_GUARDED_BY(mutex);
std::unordered_map<UUID /* restore_uuid */, size_t /* num_refs */> on_cluster_restores TSA_GUARDED_BY(mutex);
std::mutex mutex;
};
}

View File

@ -0,0 +1,64 @@
#include <Backups/BackupCoordinationCleaner.h>
namespace DB
{
BackupCoordinationCleaner::BackupCoordinationCleaner(const String & zookeeper_path_, const WithRetries & with_retries_, LoggerPtr log_)
: zookeeper_path(zookeeper_path_), with_retries(with_retries_), log(log_)
{
}
void BackupCoordinationCleaner::cleanup()
{
tryRemoveAllNodes(/* throw_if_error = */ true, /* retries_kind = */ WithRetries::kNormal);
}
bool BackupCoordinationCleaner::tryCleanupAfterError() noexcept
{
return tryRemoveAllNodes(/* throw_if_error = */ false, /* retries_kind = */ WithRetries::kNormal);
}
bool BackupCoordinationCleaner::tryRemoveAllNodes(bool throw_if_error, WithRetries::Kind retries_kind)
{
{
std::lock_guard lock{mutex};
if (cleanup_result.succeeded)
return true;
if (cleanup_result.exception)
{
if (throw_if_error)
std::rethrow_exception(cleanup_result.exception);
return false;
}
}
try
{
LOG_TRACE(log, "Removing nodes from ZooKeeper");
auto holder = with_retries.createRetriesControlHolder("removeAllNodes", retries_kind);
holder.retries_ctl.retryLoop([&, &zookeeper = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zookeeper);
zookeeper->removeRecursive(zookeeper_path);
});
std::lock_guard lock{mutex};
cleanup_result.succeeded = true;
return true;
}
catch (...)
{
LOG_TRACE(log, "Caught exception while removing nodes from ZooKeeper for this restore: {}",
getCurrentExceptionMessage(/* with_stacktrace= */ false, /* check_embedded_stacktrace= */ true));
std::lock_guard lock{mutex};
cleanup_result.exception = std::current_exception();
if (throw_if_error)
throw;
return false;
}
}
}

View File

@ -0,0 +1,40 @@
#pragma once
#include <Backups/WithRetries.h>
namespace DB
{
/// Removes all the nodes from ZooKeeper used to coordinate a BACKUP ON CLUSTER operation or
/// a RESTORE ON CLUSTER operation (successful or not).
/// This class is used by BackupCoordinationOnCluster and RestoreCoordinationOnCluster to cleanup.
class BackupCoordinationCleaner
{
public:
BackupCoordinationCleaner(const String & zookeeper_path_, const WithRetries & with_retries_, LoggerPtr log_);
void cleanup();
bool tryCleanupAfterError() noexcept;
private:
bool tryRemoveAllNodes(bool throw_if_error, WithRetries::Kind retries_kind);
const String zookeeper_path;
/// A reference to a field of the parent object which is either BackupCoordinationOnCluster or RestoreCoordinationOnCluster.
const WithRetries & with_retries;
const LoggerPtr log;
struct CleanupResult
{
bool succeeded = false;
std::exception_ptr exception;
};
CleanupResult cleanup_result TSA_GUARDED_BY(mutex);
std::mutex mutex;
};
}

View File

@ -1,5 +1,7 @@
#include <Backups/BackupCoordinationLocal.h>
#include <Common/Exception.h>
#include <Common/ZooKeeper/ZooKeeperRetries.h>
#include <Common/logger_useful.h>
#include <Common/quoteString.h>
#include <fmt/format.h>
@ -8,27 +10,20 @@
namespace DB
{
BackupCoordinationLocal::BackupCoordinationLocal(bool plain_backup_)
: log(getLogger("BackupCoordinationLocal")), file_infos(plain_backup_)
BackupCoordinationLocal::BackupCoordinationLocal(
const UUID & backup_uuid_,
bool is_plain_backup_,
bool allow_concurrent_backup_,
BackupConcurrencyCounters & concurrency_counters_)
: log(getLogger("BackupCoordinationLocal"))
, concurrency_check(backup_uuid_, /* is_restore = */ false, /* on_cluster = */ false, allow_concurrent_backup_, concurrency_counters_)
, file_infos(is_plain_backup_)
{
}
BackupCoordinationLocal::~BackupCoordinationLocal() = default;
void BackupCoordinationLocal::setStage(const String &, const String &)
{
}
void BackupCoordinationLocal::setError(const Exception &)
{
}
Strings BackupCoordinationLocal::waitForStage(const String &)
{
return {};
}
Strings BackupCoordinationLocal::waitForStage(const String &, std::chrono::milliseconds)
ZooKeeperRetriesInfo BackupCoordinationLocal::getOnClusterInitializationKeeperRetriesInfo() const
{
return {};
}
@ -135,15 +130,4 @@ bool BackupCoordinationLocal::startWritingFile(size_t data_file_index)
return writing_files.emplace(data_file_index).second;
}
bool BackupCoordinationLocal::hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const
{
if (num_active_backups > 1)
{
LOG_WARNING(log, "Found concurrent backups: num_active_backups={}", num_active_backups);
return true;
}
return false;
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Backups/IBackupCoordination.h>
#include <Backups/BackupConcurrencyCheck.h>
#include <Backups/BackupCoordinationFileInfos.h>
#include <Backups/BackupCoordinationReplicatedAccess.h>
#include <Backups/BackupCoordinationReplicatedSQLObjects.h>
@ -21,13 +22,21 @@ namespace DB
class BackupCoordinationLocal : public IBackupCoordination
{
public:
explicit BackupCoordinationLocal(bool plain_backup_);
explicit BackupCoordinationLocal(
const UUID & backup_uuid_,
bool is_plain_backup_,
bool allow_concurrent_backup_,
BackupConcurrencyCounters & concurrency_counters_);
~BackupCoordinationLocal() override;
void setStage(const String & new_stage, const String & message) override;
void setError(const Exception & exception) override;
Strings waitForStage(const String & stage_to_wait) override;
Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;
Strings setStage(const String &, const String &, bool) override { return {}; }
void setBackupQueryWasSentToOtherHosts() override {}
bool trySetError(std::exception_ptr) override { return true; }
void finish() override {}
bool tryFinishAfterError() noexcept override { return true; }
void waitForOtherHostsToFinish() override {}
bool tryWaitForOtherHostsToFinishAfterError() noexcept override { return true; }
void addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;
@ -54,17 +63,18 @@ public:
BackupFileInfos getFileInfosForAllHosts() const override;
bool startWritingFile(size_t data_file_index) override;
bool hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const override;
ZooKeeperRetriesInfo getOnClusterInitializationKeeperRetriesInfo() const override;
private:
LoggerPtr const log;
BackupConcurrencyCheck concurrency_check;
BackupCoordinationReplicatedTables TSA_GUARDED_BY(replicated_tables_mutex) replicated_tables;
BackupCoordinationReplicatedAccess TSA_GUARDED_BY(replicated_access_mutex) replicated_access;
BackupCoordinationReplicatedSQLObjects TSA_GUARDED_BY(replicated_sql_objects_mutex) replicated_sql_objects;
BackupCoordinationFileInfos TSA_GUARDED_BY(file_infos_mutex) file_infos;
BackupCoordinationReplicatedTables replicated_tables TSA_GUARDED_BY(replicated_tables_mutex);
BackupCoordinationReplicatedAccess replicated_access TSA_GUARDED_BY(replicated_access_mutex);
BackupCoordinationReplicatedSQLObjects replicated_sql_objects TSA_GUARDED_BY(replicated_sql_objects_mutex);
BackupCoordinationFileInfos file_infos TSA_GUARDED_BY(file_infos_mutex);
BackupCoordinationKeeperMapTables keeper_map_tables TSA_GUARDED_BY(keeper_map_tables_mutex);
std::unordered_set<size_t> TSA_GUARDED_BY(writing_files_mutex) writing_files;
std::unordered_set<size_t> writing_files TSA_GUARDED_BY(writing_files_mutex);
mutable std::mutex replicated_tables_mutex;
mutable std::mutex replicated_access_mutex;

View File

@ -1,7 +1,4 @@
#include <Backups/BackupCoordinationRemote.h>
#include <base/hex.h>
#include <boost/algorithm/string/split.hpp>
#include <Backups/BackupCoordinationOnCluster.h>
#include <Access/Common/AccessEntityType.h>
#include <Backups/BackupCoordinationReplicatedAccess.h>
@ -26,8 +23,6 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
namespace Stage = BackupCoordinationStage;
namespace
{
using PartNameAndChecksum = IBackupCoordination::PartNameAndChecksum;
@ -149,144 +144,152 @@ namespace
};
}
size_t BackupCoordinationRemote::findCurrentHostIndex(const Strings & all_hosts, const String & current_host)
Strings BackupCoordinationOnCluster::excludeInitiator(const Strings & all_hosts)
{
Strings all_hosts_without_initiator = all_hosts;
bool has_initiator = (std::erase(all_hosts_without_initiator, kInitiator) > 0);
chassert(has_initiator);
return all_hosts_without_initiator;
}
size_t BackupCoordinationOnCluster::findCurrentHostIndex(const String & current_host, const Strings & all_hosts)
{
auto it = std::find(all_hosts.begin(), all_hosts.end(), current_host);
if (it == all_hosts.end())
return 0;
return all_hosts.size();
return it - all_hosts.begin();
}
BackupCoordinationRemote::BackupCoordinationRemote(
zkutil::GetZooKeeper get_zookeeper_,
BackupCoordinationOnCluster::BackupCoordinationOnCluster(
const UUID & backup_uuid_,
bool is_plain_backup_,
const String & root_zookeeper_path_,
zkutil::GetZooKeeper get_zookeeper_,
const BackupKeeperSettings & keeper_settings_,
const String & backup_uuid_,
const Strings & all_hosts_,
const String & current_host_,
bool plain_backup_,
bool is_internal_,
const Strings & all_hosts_,
bool allow_concurrent_backup_,
BackupConcurrencyCounters & concurrency_counters_,
ThreadPoolCallbackRunnerUnsafe<void> schedule_,
QueryStatusPtr process_list_element_)
: root_zookeeper_path(root_zookeeper_path_)
, zookeeper_path(root_zookeeper_path_ + "/backup-" + backup_uuid_)
, zookeeper_path(root_zookeeper_path_ + "/backup-" + toString(backup_uuid_))
, keeper_settings(keeper_settings_)
, backup_uuid(backup_uuid_)
, all_hosts(all_hosts_)
, all_hosts_without_initiator(excludeInitiator(all_hosts))
, current_host(current_host_)
, current_host_index(findCurrentHostIndex(all_hosts, current_host))
, plain_backup(plain_backup_)
, is_internal(is_internal_)
, log(getLogger("BackupCoordinationRemote"))
, with_retries(
log,
get_zookeeper_,
keeper_settings,
process_list_element_,
[my_zookeeper_path = zookeeper_path, my_current_host = current_host, my_is_internal = is_internal]
(WithRetries::FaultyKeeper & zk)
{
/// Recreate this ephemeral node to signal that we are alive.
if (my_is_internal)
{
String alive_node_path = my_zookeeper_path + "/stage/alive|" + my_current_host;
/// Delete the ephemeral node from the previous connection so we don't have to wait for keeper to do it automatically.
zk->tryRemove(alive_node_path);
zk->createAncestors(alive_node_path);
zk->create(alive_node_path, "", zkutil::CreateMode::Ephemeral);
}
})
, current_host_index(findCurrentHostIndex(current_host, all_hosts))
, plain_backup(is_plain_backup_)
, log(getLogger("BackupCoordinationOnCluster"))
, with_retries(log, get_zookeeper_, keeper_settings, process_list_element_, [root_zookeeper_path_](Coordination::ZooKeeperWithFaultInjection::Ptr zk) { zk->sync(root_zookeeper_path_); })
, concurrency_check(backup_uuid_, /* is_restore = */ false, /* on_cluster = */ true, allow_concurrent_backup_, concurrency_counters_)
, stage_sync(/* is_restore = */ false, fs::path{zookeeper_path} / "stage", current_host, all_hosts, allow_concurrent_backup_, with_retries, schedule_, process_list_element_, log)
, cleaner(zookeeper_path, with_retries, log)
{
createRootNodes();
stage_sync.emplace(
zookeeper_path,
with_retries,
log);
}
BackupCoordinationRemote::~BackupCoordinationRemote()
BackupCoordinationOnCluster::~BackupCoordinationOnCluster()
{
try
{
if (!is_internal)
removeAllNodes();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
tryFinishImpl();
}
void BackupCoordinationRemote::createRootNodes()
void BackupCoordinationOnCluster::createRootNodes()
{
auto holder = with_retries.createRetriesControlHolder("createRootNodes");
auto holder = with_retries.createRetriesControlHolder("createRootNodes", WithRetries::kInitialization);
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
zk->createAncestors(zookeeper_path);
Coordination::Requests ops;
Coordination::Responses responses;
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_part_names", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_mutations", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_data_paths", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_access", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_sql_objects", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/keeper_map_tables", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/file_infos", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/writing_files", "", zkutil::CreateMode::Persistent));
zk->tryMulti(ops, responses);
zk->createIfNotExists(zookeeper_path, "");
zk->createIfNotExists(zookeeper_path + "/repl_part_names", "");
zk->createIfNotExists(zookeeper_path + "/repl_mutations", "");
zk->createIfNotExists(zookeeper_path + "/repl_data_paths", "");
zk->createIfNotExists(zookeeper_path + "/repl_access", "");
zk->createIfNotExists(zookeeper_path + "/repl_sql_objects", "");
zk->createIfNotExists(zookeeper_path + "/keeper_map_tables", "");
zk->createIfNotExists(zookeeper_path + "/file_infos", "");
zk->createIfNotExists(zookeeper_path + "/writing_files", "");
});
}
void BackupCoordinationRemote::removeAllNodes()
Strings BackupCoordinationOnCluster::setStage(const String & new_stage, const String & message, bool sync)
{
auto holder = with_retries.createRetriesControlHolder("removeAllNodes");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
stage_sync.setStage(new_stage, message);
if (!sync)
return {};
return stage_sync.waitForHostsToReachStage(new_stage, all_hosts_without_initiator);
}
void BackupCoordinationOnCluster::setBackupQueryWasSentToOtherHosts()
{
backup_query_was_sent_to_other_hosts = true;
}
bool BackupCoordinationOnCluster::trySetError(std::exception_ptr exception)
{
return stage_sync.trySetError(exception);
}
void BackupCoordinationOnCluster::finish()
{
bool other_hosts_also_finished = false;
stage_sync.finish(other_hosts_also_finished);
if ((current_host == kInitiator) && (other_hosts_also_finished || !backup_query_was_sent_to_other_hosts))
cleaner.cleanup();
}
bool BackupCoordinationOnCluster::tryFinishAfterError() noexcept
{
return tryFinishImpl();
}
bool BackupCoordinationOnCluster::tryFinishImpl() noexcept
{
bool other_hosts_also_finished = false;
if (!stage_sync.tryFinishAfterError(other_hosts_also_finished))
return false;
if ((current_host == kInitiator) && (other_hosts_also_finished || !backup_query_was_sent_to_other_hosts))
{
/// Usually this function is called by the initiator when a backup is complete so we don't need the coordination anymore.
///
/// However there can be a rare situation when this function is called after an error occurs on the initiator of a query
/// while some hosts are still making the backup. Removing all the nodes will remove the parent node of the backup coordination
/// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some useless part
/// of their backup work before that. Anyway in this case backup won't be finalized (because only an initiator can do that).
with_retries.renewZooKeeper(zk);
zk->removeRecursive(zookeeper_path);
});
if (!cleaner.tryCleanupAfterError())
return false;
}
return true;
}
void BackupCoordinationRemote::setStage(const String & new_stage, const String & message)
void BackupCoordinationOnCluster::waitForOtherHostsToFinish()
{
if (is_internal)
stage_sync->set(current_host, new_stage, message);
else
stage_sync->set(current_host, new_stage, /* message */ "", /* all_hosts */ true);
if ((current_host != kInitiator) || !backup_query_was_sent_to_other_hosts)
return;
stage_sync.waitForOtherHostsToFinish();
}
void BackupCoordinationRemote::setError(const Exception & exception)
bool BackupCoordinationOnCluster::tryWaitForOtherHostsToFinishAfterError() noexcept
{
stage_sync->setError(current_host, exception);
if (current_host != kInitiator)
return false;
if (!backup_query_was_sent_to_other_hosts)
return true;
return stage_sync.tryWaitForOtherHostsToFinishAfterError();
}
Strings BackupCoordinationRemote::waitForStage(const String & stage_to_wait)
ZooKeeperRetriesInfo BackupCoordinationOnCluster::getOnClusterInitializationKeeperRetriesInfo() const
{
return stage_sync->wait(all_hosts, stage_to_wait);
return ZooKeeperRetriesInfo{keeper_settings.max_retries_while_initializing,
static_cast<UInt64>(keeper_settings.retry_initial_backoff_ms.count()),
static_cast<UInt64>(keeper_settings.retry_max_backoff_ms.count())};
}
Strings BackupCoordinationRemote::waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout)
{
return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
}
void BackupCoordinationRemote::serializeToMultipleZooKeeperNodes(const String & path, const String & value, const String & logging_name)
void BackupCoordinationOnCluster::serializeToMultipleZooKeeperNodes(const String & path, const String & value, const String & logging_name)
{
{
auto holder = with_retries.createRetriesControlHolder(logging_name + "::create");
@ -301,7 +304,7 @@ void BackupCoordinationRemote::serializeToMultipleZooKeeperNodes(const String &
if (value.empty())
return;
size_t max_part_size = keeper_settings.keeper_value_max_size;
size_t max_part_size = keeper_settings.value_max_size;
if (!max_part_size)
max_part_size = value.size();
@ -324,7 +327,7 @@ void BackupCoordinationRemote::serializeToMultipleZooKeeperNodes(const String &
}
}
String BackupCoordinationRemote::deserializeFromMultipleZooKeeperNodes(const String & path, const String & logging_name) const
String BackupCoordinationOnCluster::deserializeFromMultipleZooKeeperNodes(const String & path, const String & logging_name) const
{
Strings part_names;
@ -357,7 +360,7 @@ String BackupCoordinationRemote::deserializeFromMultipleZooKeeperNodes(const Str
}
void BackupCoordinationRemote::addReplicatedPartNames(
void BackupCoordinationOnCluster::addReplicatedPartNames(
const String & table_zk_path,
const String & table_name_for_logs,
const String & replica_name,
@ -381,14 +384,14 @@ void BackupCoordinationRemote::addReplicatedPartNames(
});
}
Strings BackupCoordinationRemote::getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const
Strings BackupCoordinationOnCluster::getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const
{
std::lock_guard lock{replicated_tables_mutex};
prepareReplicatedTables();
return replicated_tables->getPartNames(table_zk_path, replica_name);
}
void BackupCoordinationRemote::addReplicatedMutations(
void BackupCoordinationOnCluster::addReplicatedMutations(
const String & table_zk_path,
const String & table_name_for_logs,
const String & replica_name,
@ -412,7 +415,7 @@ void BackupCoordinationRemote::addReplicatedMutations(
});
}
std::vector<IBackupCoordination::MutationInfo> BackupCoordinationRemote::getReplicatedMutations(const String & table_zk_path, const String & replica_name) const
std::vector<IBackupCoordination::MutationInfo> BackupCoordinationOnCluster::getReplicatedMutations(const String & table_zk_path, const String & replica_name) const
{
std::lock_guard lock{replicated_tables_mutex};
prepareReplicatedTables();
@ -420,7 +423,7 @@ std::vector<IBackupCoordination::MutationInfo> BackupCoordinationRemote::getRepl
}
void BackupCoordinationRemote::addReplicatedDataPath(
void BackupCoordinationOnCluster::addReplicatedDataPath(
const String & table_zk_path, const String & data_path)
{
{
@ -441,7 +444,7 @@ void BackupCoordinationRemote::addReplicatedDataPath(
});
}
Strings BackupCoordinationRemote::getReplicatedDataPaths(const String & table_zk_path) const
Strings BackupCoordinationOnCluster::getReplicatedDataPaths(const String & table_zk_path) const
{
std::lock_guard lock{replicated_tables_mutex};
prepareReplicatedTables();
@ -449,7 +452,7 @@ Strings BackupCoordinationRemote::getReplicatedDataPaths(const String & table_zk
}
void BackupCoordinationRemote::prepareReplicatedTables() const
void BackupCoordinationOnCluster::prepareReplicatedTables() const
{
if (replicated_tables)
return;
@ -536,7 +539,7 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
replicated_tables->addDataPath(std::move(data_paths));
}
void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path)
void BackupCoordinationOnCluster::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path)
{
{
std::lock_guard lock{replicated_access_mutex};
@ -558,14 +561,14 @@ void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access
});
}
Strings BackupCoordinationRemote::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const
Strings BackupCoordinationOnCluster::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const
{
std::lock_guard lock{replicated_access_mutex};
prepareReplicatedAccess();
return replicated_access->getFilePaths(access_zk_path, access_entity_type, current_host);
}
void BackupCoordinationRemote::prepareReplicatedAccess() const
void BackupCoordinationOnCluster::prepareReplicatedAccess() const
{
if (replicated_access)
return;
@ -601,7 +604,7 @@ void BackupCoordinationRemote::prepareReplicatedAccess() const
replicated_access->addFilePath(std::move(file_path));
}
void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path)
void BackupCoordinationOnCluster::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path)
{
{
std::lock_guard lock{replicated_sql_objects_mutex};
@ -631,14 +634,14 @@ void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_
});
}
Strings BackupCoordinationRemote::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const
Strings BackupCoordinationOnCluster::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const
{
std::lock_guard lock{replicated_sql_objects_mutex};
prepareReplicatedSQLObjects();
return replicated_sql_objects->getDirectories(loader_zk_path, object_type, current_host);
}
void BackupCoordinationRemote::prepareReplicatedSQLObjects() const
void BackupCoordinationOnCluster::prepareReplicatedSQLObjects() const
{
if (replicated_sql_objects)
return;
@ -674,7 +677,7 @@ void BackupCoordinationRemote::prepareReplicatedSQLObjects() const
replicated_sql_objects->addDirectory(std::move(directory));
}
void BackupCoordinationRemote::addKeeperMapTable(const String & table_zookeeper_root_path, const String & table_id, const String & data_path_in_backup)
void BackupCoordinationOnCluster::addKeeperMapTable(const String & table_zookeeper_root_path, const String & table_id, const String & data_path_in_backup)
{
{
std::lock_guard lock{keeper_map_tables_mutex};
@ -695,7 +698,7 @@ void BackupCoordinationRemote::addKeeperMapTable(const String & table_zookeeper_
});
}
void BackupCoordinationRemote::prepareKeeperMapTables() const
void BackupCoordinationOnCluster::prepareKeeperMapTables() const
{
if (keeper_map_tables)
return;
@ -740,7 +743,7 @@ void BackupCoordinationRemote::prepareKeeperMapTables() const
}
String BackupCoordinationRemote::getKeeperMapDataPath(const String & table_zookeeper_root_path) const
String BackupCoordinationOnCluster::getKeeperMapDataPath(const String & table_zookeeper_root_path) const
{
std::lock_guard lock(keeper_map_tables_mutex);
prepareKeeperMapTables();
@ -748,7 +751,7 @@ String BackupCoordinationRemote::getKeeperMapDataPath(const String & table_zooke
}
void BackupCoordinationRemote::addFileInfos(BackupFileInfos && file_infos_)
void BackupCoordinationOnCluster::addFileInfos(BackupFileInfos && file_infos_)
{
{
std::lock_guard lock{file_infos_mutex};
@ -761,21 +764,21 @@ void BackupCoordinationRemote::addFileInfos(BackupFileInfos && file_infos_)
serializeToMultipleZooKeeperNodes(zookeeper_path + "/file_infos/" + current_host, file_infos_str, "addFileInfos");
}
BackupFileInfos BackupCoordinationRemote::getFileInfos() const
BackupFileInfos BackupCoordinationOnCluster::getFileInfos() const
{
std::lock_guard lock{file_infos_mutex};
prepareFileInfos();
return file_infos->getFileInfos(current_host);
}
BackupFileInfos BackupCoordinationRemote::getFileInfosForAllHosts() const
BackupFileInfos BackupCoordinationOnCluster::getFileInfosForAllHosts() const
{
std::lock_guard lock{file_infos_mutex};
prepareFileInfos();
return file_infos->getFileInfosForAllHosts();
}
void BackupCoordinationRemote::prepareFileInfos() const
void BackupCoordinationOnCluster::prepareFileInfos() const
{
if (file_infos)
return;
@ -801,7 +804,7 @@ void BackupCoordinationRemote::prepareFileInfos() const
}
}
bool BackupCoordinationRemote::startWritingFile(size_t data_file_index)
bool BackupCoordinationOnCluster::startWritingFile(size_t data_file_index)
{
{
/// Check if this host is already writing this file.
@ -842,66 +845,4 @@ bool BackupCoordinationRemote::startWritingFile(size_t data_file_index)
}
}
bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &) const
{
/// If its internal concurrency will be checked for the base backup
if (is_internal)
return false;
std::string backup_stage_path = zookeeper_path + "/stage";
bool result = false;
auto holder = with_retries.createRetriesControlHolder("getAllArchiveSuffixes");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
if (!zk->exists(root_zookeeper_path))
zk->createAncestors(root_zookeeper_path);
for (size_t attempt = 0; attempt < MAX_ZOOKEEPER_ATTEMPTS; ++attempt)
{
Coordination::Stat stat;
zk->get(root_zookeeper_path, &stat);
Strings existing_backup_paths = zk->getChildren(root_zookeeper_path);
for (const auto & existing_backup_path : existing_backup_paths)
{
if (startsWith(existing_backup_path, "restore-"))
continue;
String existing_backup_uuid = existing_backup_path;
existing_backup_uuid.erase(0, String("backup-").size());
if (existing_backup_uuid == toString(backup_uuid))
continue;
String status;
if (zk->tryGet(root_zookeeper_path + "/" + existing_backup_path + "/stage", status))
{
/// Check if some other backup is in progress
if (status == Stage::SCHEDULED_TO_START)
{
LOG_WARNING(log, "Found a concurrent backup: {}, current backup: {}", existing_backup_uuid, toString(backup_uuid));
result = true;
return;
}
}
}
zk->createIfNotExists(backup_stage_path, "");
auto code = zk->trySet(backup_stage_path, Stage::SCHEDULED_TO_START, stat.version);
if (code == Coordination::Error::ZOK)
break;
bool is_last_attempt = (attempt == MAX_ZOOKEEPER_ATTEMPTS - 1);
if ((code != Coordination::Error::ZBADVERSION) || is_last_attempt)
throw zkutil::KeeperException::fromPath(code, backup_stage_path);
}
});
return result;
}
}

View File

@ -1,6 +1,8 @@
#pragma once
#include <Backups/IBackupCoordination.h>
#include <Backups/BackupConcurrencyCheck.h>
#include <Backups/BackupCoordinationCleaner.h>
#include <Backups/BackupCoordinationFileInfos.h>
#include <Backups/BackupCoordinationReplicatedAccess.h>
#include <Backups/BackupCoordinationReplicatedSQLObjects.h>
@ -13,32 +15,35 @@
namespace DB
{
/// We try to store data to zookeeper several times due to possible version conflicts.
constexpr size_t MAX_ZOOKEEPER_ATTEMPTS = 10;
/// Implementation of the IBackupCoordination interface performing coordination via ZooKeeper. It's necessary for "BACKUP ON CLUSTER".
class BackupCoordinationRemote : public IBackupCoordination
class BackupCoordinationOnCluster : public IBackupCoordination
{
public:
using BackupKeeperSettings = WithRetries::KeeperSettings;
/// Empty string as the current host is used to mark the initiator of a BACKUP ON CLUSTER query.
static const constexpr std::string_view kInitiator;
BackupCoordinationRemote(
zkutil::GetZooKeeper get_zookeeper_,
BackupCoordinationOnCluster(
const UUID & backup_uuid_,
bool is_plain_backup_,
const String & root_zookeeper_path_,
zkutil::GetZooKeeper get_zookeeper_,
const BackupKeeperSettings & keeper_settings_,
const String & backup_uuid_,
const Strings & all_hosts_,
const String & current_host_,
bool plain_backup_,
bool is_internal_,
const Strings & all_hosts_,
bool allow_concurrent_backup_,
BackupConcurrencyCounters & concurrency_counters_,
ThreadPoolCallbackRunnerUnsafe<void> schedule_,
QueryStatusPtr process_list_element_);
~BackupCoordinationRemote() override;
~BackupCoordinationOnCluster() override;
void setStage(const String & new_stage, const String & message) override;
void setError(const Exception & exception) override;
Strings waitForStage(const String & stage_to_wait) override;
Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;
Strings setStage(const String & new_stage, const String & message, bool sync) override;
void setBackupQueryWasSentToOtherHosts() override;
bool trySetError(std::exception_ptr exception) override;
void finish() override;
bool tryFinishAfterError() noexcept override;
void waitForOtherHostsToFinish() override;
bool tryWaitForOtherHostsToFinishAfterError() noexcept override;
void addReplicatedPartNames(
const String & table_zk_path,
@ -73,13 +78,14 @@ public:
BackupFileInfos getFileInfosForAllHosts() const override;
bool startWritingFile(size_t data_file_index) override;
bool hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const override;
ZooKeeperRetriesInfo getOnClusterInitializationKeeperRetriesInfo() const override;
static size_t findCurrentHostIndex(const Strings & all_hosts, const String & current_host);
static Strings excludeInitiator(const Strings & all_hosts);
static size_t findCurrentHostIndex(const String & current_host, const Strings & all_hosts);
private:
void createRootNodes();
void removeAllNodes();
bool tryFinishImpl() noexcept;
void serializeToMultipleZooKeeperNodes(const String & path, const String & value, const String & logging_name);
String deserializeFromMultipleZooKeeperNodes(const String & path, const String & logging_name) const;
@ -96,26 +102,27 @@ private:
const String root_zookeeper_path;
const String zookeeper_path;
const BackupKeeperSettings keeper_settings;
const String backup_uuid;
const UUID backup_uuid;
const Strings all_hosts;
const Strings all_hosts_without_initiator;
const String current_host;
const size_t current_host_index;
const bool plain_backup;
const bool is_internal;
LoggerPtr const log;
/// The order of these two fields matters, because stage_sync holds a reference to with_retries object
mutable WithRetries with_retries;
std::optional<BackupCoordinationStageSync> stage_sync;
const WithRetries with_retries;
BackupConcurrencyCheck concurrency_check;
BackupCoordinationStageSync stage_sync;
BackupCoordinationCleaner cleaner;
std::atomic<bool> backup_query_was_sent_to_other_hosts = false;
mutable std::optional<BackupCoordinationReplicatedTables> TSA_GUARDED_BY(replicated_tables_mutex) replicated_tables;
mutable std::optional<BackupCoordinationReplicatedAccess> TSA_GUARDED_BY(replicated_access_mutex) replicated_access;
mutable std::optional<BackupCoordinationReplicatedSQLObjects> TSA_GUARDED_BY(replicated_sql_objects_mutex) replicated_sql_objects;
mutable std::optional<BackupCoordinationFileInfos> TSA_GUARDED_BY(file_infos_mutex) file_infos;
mutable std::optional<BackupCoordinationReplicatedTables> replicated_tables TSA_GUARDED_BY(replicated_tables_mutex);
mutable std::optional<BackupCoordinationReplicatedAccess> replicated_access TSA_GUARDED_BY(replicated_access_mutex);
mutable std::optional<BackupCoordinationReplicatedSQLObjects> replicated_sql_objects TSA_GUARDED_BY(replicated_sql_objects_mutex);
mutable std::optional<BackupCoordinationFileInfos> file_infos TSA_GUARDED_BY(file_infos_mutex);
mutable std::optional<BackupCoordinationKeeperMapTables> keeper_map_tables TSA_GUARDED_BY(keeper_map_tables_mutex);
std::unordered_set<size_t> TSA_GUARDED_BY(writing_files_mutex) writing_files;
std::unordered_set<size_t> writing_files TSA_GUARDED_BY(writing_files_mutex);
mutable std::mutex zookeeper_mutex;
mutable std::mutex replicated_tables_mutex;
mutable std::mutex replicated_access_mutex;
mutable std::mutex replicated_sql_objects_mutex;

View File

@ -8,10 +8,6 @@ namespace DB
namespace BackupCoordinationStage
{
/// This stage is set after concurrency check so ensure we dont start other backup/restores
/// when concurrent backup/restores are not allowed
constexpr const char * SCHEDULED_TO_START = "scheduled to start";
/// Finding all tables and databases which we're going to put to the backup and collecting their metadata.
constexpr const char * GATHERING_METADATA = "gathering metadata";
@ -46,10 +42,6 @@ namespace BackupCoordinationStage
/// Coordination stage meaning that a host finished its work.
constexpr const char * COMPLETED = "completed";
/// Coordination stage meaning that backup/restore has failed due to an error
/// Check '/error' for the error message
constexpr const char * ERROR = "error";
}
}

File diff suppressed because it is too large Load Diff

View File

@ -10,33 +10,193 @@ class BackupCoordinationStageSync
{
public:
BackupCoordinationStageSync(
const String & root_zookeeper_path_,
WithRetries & with_retries_,
bool is_restore_, /// true if this is a RESTORE ON CLUSTER command, false if this is a BACKUP ON CLUSTER command
const String & zookeeper_path_, /// path to the "stage" folder in ZooKeeper
const String & current_host_, /// the current host, or an empty string if it's the initiator of the BACKUP/RESTORE ON CLUSTER command
const Strings & all_hosts_, /// all the hosts (including the initiator and the current host) performing the BACKUP/RESTORE ON CLUSTER command
bool allow_concurrency_, /// whether it's allowed to have concurrent backups or restores.
const WithRetries & with_retries_,
ThreadPoolCallbackRunnerUnsafe<void> schedule_,
QueryStatusPtr process_list_element_,
LoggerPtr log_);
~BackupCoordinationStageSync();
/// Sets the stage of the current host and signal other hosts if there were other hosts waiting for that.
void set(const String & current_host, const String & new_stage, const String & message, const bool & all_hosts = false);
void setError(const String & current_host, const Exception & exception);
void setStage(const String & stage, const String & stage_result = {});
/// Sets the stage of the current host and waits until all hosts come to the same stage.
/// The function returns the messages all hosts set when they come to the required stage.
Strings wait(const Strings & all_hosts, const String & stage_to_wait);
/// Waits until all the specified hosts come to the specified stage.
/// The function returns the results which specified hosts set when they came to the required stage.
/// If it doesn't happen before the timeout then the function will stop waiting and throw an exception.
Strings waitForHostsToReachStage(const String & stage_to_wait, const Strings & hosts, std::optional<std::chrono::milliseconds> timeout = {}) const;
/// Almost the same as setAndWait() but this one stops waiting and throws an exception after a specific amount of time.
Strings waitFor(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout);
/// Waits until all the other hosts finish their work.
/// Stops waiting and throws an exception if another host encounters an error or if some host gets cancelled.
void waitForOtherHostsToFinish() const;
/// Lets other host know that the current host has finished its work.
void finish(bool & other_hosts_also_finished);
/// Lets other hosts know that the current host has encountered an error.
bool trySetError(std::exception_ptr exception) noexcept;
/// Waits until all the other hosts finish their work (as a part of error-handling process).
/// Doesn't stops waiting if some host encounters an error or gets cancelled.
bool tryWaitForOtherHostsToFinishAfterError() const noexcept;
/// Lets other host know that the current host has finished its work (as a part of error-handling process).
bool tryFinishAfterError(bool & other_hosts_also_finished) noexcept;
/// Returns a printable name of a specific host. For empty host the function returns "initiator".
static String getHostDesc(const String & host);
static String getHostsDesc(const Strings & hosts);
private:
/// Initializes the original state. It will be updated then with readCurrentState().
void initializeState();
/// Creates the root node in ZooKeeper.
void createRootNodes();
struct State;
State readCurrentState(WithRetries::RetriesControlHolder & retries_control_holder, const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const;
/// Atomically creates both 'start' and 'alive' nodes and also checks that there is no concurrent backup or restore if `allow_concurrency` is false.
void createStartAndAliveNodes();
void createStartAndAliveNodes(Coordination::ZooKeeperWithFaultInjection::Ptr zookeeper);
Strings waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const;
/// Deserialize the version of a node stored in the 'start' node.
int parseStartNode(const String & start_node_contents, const String & host) const;
String zookeeper_path;
/// A reference to the field of parent object - BackupCoordinationRemote or RestoreCoordinationRemote
WithRetries & with_retries;
LoggerPtr log;
/// Recreates the 'alive' node if it doesn't exist. It's an ephemeral node so it's removed automatically after disconnections.
void createAliveNode(Coordination::ZooKeeperWithFaultInjection::Ptr zookeeper);
/// Checks that there is no concurrent backup or restore if `allow_concurrency` is false.
void checkConcurrency(Coordination::ZooKeeperWithFaultInjection::Ptr zookeeper);
/// Watching thread periodically reads the current state from ZooKeeper and recreates the 'alive' node.
void startWatchingThread();
void stopWatchingThread();
void watchingThread();
/// Reads the current state from ZooKeeper without throwing exceptions.
void readCurrentState(Coordination::ZooKeeperWithFaultInjection::Ptr zookeeper);
String getStageNodePath(const String & stage) const;
/// Lets other hosts know that the current host has encountered an error.
bool trySetError(const Exception & exception);
void setError(const Exception & exception);
/// Deserializes an error stored in the error node.
static std::pair<std::exception_ptr, String> parseErrorNode(const String & error_node_contents);
/// Reset the `connected` flag for each host.
void resetConnectedFlag();
/// Checks if the current query is cancelled, and if so then the function sets the `cancelled` flag in the current state.
void checkIfQueryCancelled();
/// Checks if the current state contains an error, and if so then the function passes this error to the query status
/// to cancel the current BACKUP or RESTORE command.
void cancelQueryIfError();
/// Checks if some host was disconnected for too long, and if so then the function generates an error and pass it to the query status
/// to cancel the current BACKUP or RESTORE command.
void cancelQueryIfDisconnectedTooLong();
/// Used by waitForHostsToReachStage() to check if everything is ready to return.
bool checkIfHostsReachStage(const Strings & hosts, const String & stage_to_wait, bool time_is_out, std::optional<std::chrono::milliseconds> timeout, Strings & results) const TSA_REQUIRES(mutex);
/// Creates the 'finish' node.
bool tryFinishImpl();
bool tryFinishImpl(bool & other_hosts_also_finished, bool throw_if_error, WithRetries::Kind retries_kind);
void createFinishNodeAndRemoveAliveNode(Coordination::ZooKeeperWithFaultInjection::Ptr zookeeper);
/// Returns the version used by the initiator.
int getInitiatorVersion() const;
/// Waits until all the other hosts finish their work.
bool tryWaitForOtherHostsToFinishImpl(const String & reason, bool throw_if_error, std::optional<std::chrono::seconds> timeout) const;
bool checkIfOtherHostsFinish(const String & reason, bool throw_if_error, bool time_is_out, std::optional<std::chrono::milliseconds> timeout) const TSA_REQUIRES(mutex);
const bool is_restore;
const String operation_name;
const String current_host;
const String current_host_desc;
const Strings all_hosts;
const bool allow_concurrency;
/// A reference to a field of the parent object which is either BackupCoordinationOnCluster or RestoreCoordinationOnCluster.
const WithRetries & with_retries;
const ThreadPoolCallbackRunnerUnsafe<void> schedule;
const QueryStatusPtr process_list_element;
const LoggerPtr log;
const std::chrono::seconds failure_after_host_disconnected_for_seconds;
const std::chrono::seconds finish_timeout_after_error;
const std::chrono::milliseconds sync_period_ms;
const size_t max_attempts_after_bad_version;
/// Paths in ZooKeeper.
const std::filesystem::path zookeeper_path;
const String root_zookeeper_path;
const String operation_node_path;
const String operation_node_name;
const String stage_node_path;
const String start_node_path;
const String finish_node_path;
const String num_hosts_node_path;
const String alive_node_path;
const String alive_tracker_node_path;
const String error_node_path;
std::shared_ptr<Poco::Event> zk_nodes_changed;
/// We store list of previously found ZooKeeper nodes to show better logging messages.
Strings zk_nodes;
/// Information about one host read from ZooKeeper.
struct HostInfo
{
String host;
bool started = false;
bool connected = false;
bool finished = false;
int version = 1;
std::map<String /* stage */, String /* result */> stages = {}; /// std::map because we need to compare states
std::exception_ptr exception = nullptr;
std::chrono::time_point<std::chrono::system_clock> last_connection_time = {};
std::chrono::time_point<std::chrono::steady_clock> last_connection_time_monotonic = {};
bool operator ==(const HostInfo & other) const;
bool operator !=(const HostInfo & other) const;
};
/// Information about all the host participating in the current BACKUP or RESTORE operation.
struct State
{
std::map<String /* host */, HostInfo> hosts; /// std::map because we need to compare states
std::optional<String> host_with_error;
bool cancelled = false;
bool operator ==(const State & other) const;
bool operator !=(const State & other) const;
};
State state TSA_GUARDED_BY(mutex);
mutable std::condition_variable state_changed;
std::future<void> watching_thread_future;
std::atomic<bool> should_stop_watching_thread = false;
struct FinishResult
{
bool succeeded = false;
std::exception_ptr exception;
bool other_hosts_also_finished = false;
};
FinishResult finish_result TSA_GUARDED_BY(mutex);
mutable std::mutex mutex;
};
}

View File

@ -102,7 +102,6 @@ BackupEntriesCollector::BackupEntriesCollector(
, read_settings(read_settings_)
, context(context_)
, process_list_element(context->getProcessListElement())
, on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000))
, collect_metadata_timeout(context->getConfigRef().getUInt64(
"backups.collect_metadata_timeout", context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000)))
, attempts_to_collect_metadata_before_sleep(context->getConfigRef().getUInt("backups.attempts_to_collect_metadata_before_sleep", 2))
@ -176,21 +175,7 @@ Strings BackupEntriesCollector::setStage(const String & new_stage, const String
checkIsQueryCancelled();
current_stage = new_stage;
backup_coordination->setStage(new_stage, message);
if (new_stage == Stage::formatGatheringMetadata(0))
{
return backup_coordination->waitForStage(new_stage, on_cluster_first_sync_timeout);
}
if (new_stage.starts_with(Stage::GATHERING_METADATA))
{
auto current_time = std::chrono::steady_clock::now();
auto end_of_timeout = std::max(current_time, collect_metadata_end_time);
return backup_coordination->waitForStage(
new_stage, std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - current_time));
}
return backup_coordination->waitForStage(new_stage);
return backup_coordination->setStage(new_stage, message, /* sync = */ true);
}
void BackupEntriesCollector::checkIsQueryCancelled() const

View File

@ -111,10 +111,6 @@ private:
ContextPtr context;
QueryStatusPtr process_list_element;
/// The time a BACKUP ON CLUSTER or RESTORE ON CLUSTER command will wait until all the nodes receive the BACKUP (or RESTORE) query and start working.
/// This setting is similar to `distributed_ddl_task_timeout`.
const std::chrono::milliseconds on_cluster_first_sync_timeout;
/// The time a BACKUP command will try to collect the metadata of tables & databases.
const std::chrono::milliseconds collect_metadata_timeout;

View File

@ -5,6 +5,7 @@
namespace DB
{
class IDisk;
using DiskPtr = std::shared_ptr<IDisk>;
class SeekableReadBuffer;
@ -63,9 +64,13 @@ public:
virtual void copyFile(const String & destination, const String & source, size_t size) = 0;
/// Removes a file written to the backup, if it still exists.
virtual void removeFile(const String & file_name) = 0;
virtual void removeFiles(const Strings & file_names) = 0;
/// Removes the backup folder if it's empty or contains empty subfolders.
virtual void removeEmptyDirectories() = 0;
virtual const ReadSettings & getReadSettings() const = 0;
virtual const WriteSettings & getWriteSettings() const = 0;
virtual size_t getWriteBufferSize() const = 0;

View File

@ -81,6 +81,7 @@ public:
void removeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override;
void removeEmptyDirectories() override {}
private:
std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;

View File

@ -91,16 +91,36 @@ std::unique_ptr<WriteBuffer> BackupWriterDisk::writeFile(const String & file_nam
void BackupWriterDisk::removeFile(const String & file_name)
{
disk->removeFileIfExists(root_path / file_name);
if (disk->existsDirectory(root_path) && disk->isDirectoryEmpty(root_path))
disk->removeDirectory(root_path);
}
void BackupWriterDisk::removeFiles(const Strings & file_names)
{
for (const auto & file_name : file_names)
disk->removeFileIfExists(root_path / file_name);
if (disk->existsDirectory(root_path) && disk->isDirectoryEmpty(root_path))
disk->removeDirectory(root_path);
}
void BackupWriterDisk::removeEmptyDirectories()
{
removeEmptyDirectoriesImpl(root_path);
}
void BackupWriterDisk::removeEmptyDirectoriesImpl(const fs::path & current_dir)
{
if (!disk->existsDirectory(current_dir))
return;
if (disk->isDirectoryEmpty(current_dir))
{
disk->removeDirectory(current_dir);
return;
}
/// Backups are not too deep, so recursion is good enough here.
for (auto it = disk->iterateDirectory(current_dir); it->isValid(); it->next())
removeEmptyDirectoriesImpl(current_dir / it->name());
if (disk->isDirectoryEmpty(current_dir))
disk->removeDirectory(current_dir);
}
void BackupWriterDisk::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,

View File

@ -50,9 +50,11 @@ public:
void removeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override;
void removeEmptyDirectories() override;
private:
std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
void removeEmptyDirectoriesImpl(const std::filesystem::path & current_dir);
const DiskPtr disk;
const std::filesystem::path root_path;

View File

@ -106,16 +106,36 @@ std::unique_ptr<WriteBuffer> BackupWriterFile::writeFile(const String & file_nam
void BackupWriterFile::removeFile(const String & file_name)
{
(void)fs::remove(root_path / file_name);
if (fs::is_directory(root_path) && fs::is_empty(root_path))
(void)fs::remove(root_path);
}
void BackupWriterFile::removeFiles(const Strings & file_names)
{
for (const auto & file_name : file_names)
(void)fs::remove(root_path / file_name);
if (fs::is_directory(root_path) && fs::is_empty(root_path))
(void)fs::remove(root_path);
}
void BackupWriterFile::removeEmptyDirectories()
{
removeEmptyDirectoriesImpl(root_path);
}
void BackupWriterFile::removeEmptyDirectoriesImpl(const fs::path & current_dir)
{
if (!fs::is_directory(current_dir))
return;
if (fs::is_empty(current_dir))
{
(void)fs::remove(current_dir);
return;
}
/// Backups are not too deep, so recursion is good enough here.
for (const auto & it : std::filesystem::directory_iterator{current_dir})
removeEmptyDirectoriesImpl(it.path());
if (fs::is_empty(current_dir))
(void)fs::remove(current_dir);
}
void BackupWriterFile::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,

View File

@ -42,9 +42,11 @@ public:
void removeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override;
void removeEmptyDirectories() override;
private:
std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
void removeEmptyDirectoriesImpl(const std::filesystem::path & current_dir);
const std::filesystem::path root_path;
const DataSourceDescription data_source_description;

View File

@ -74,6 +74,7 @@ public:
void removeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override;
void removeEmptyDirectories() override {}
private:
std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;

View File

@ -147,11 +147,11 @@ BackupImpl::BackupImpl(
BackupImpl::~BackupImpl()
{
if ((open_mode == OpenMode::WRITE) && !is_internal_backup && !writing_finalized && !std::uncaught_exceptions() && !std::current_exception())
if ((open_mode == OpenMode::WRITE) && !writing_finalized && !corrupted)
{
/// It is suspicious to destroy BackupImpl without finalization while writing a backup when there is no exception.
LOG_ERROR(log, "BackupImpl is not finalized when destructor is called. Stack trace: {}", StackTrace().toString());
chassert(false && "BackupImpl is not finalized when destructor is called.");
LOG_ERROR(log, "BackupImpl is not finalized or marked as corrupted when destructor is called. Stack trace: {}", StackTrace().toString());
chassert(false, "BackupImpl is not finalized or marked as corrupted when destructor is called.");
}
try
@ -196,9 +196,6 @@ void BackupImpl::open()
if (open_mode == OpenMode::READ)
readBackupMetadata();
if ((open_mode == OpenMode::WRITE) && base_backup_info)
base_backup_uuid = getBaseBackupUnlocked()->getUUID();
}
void BackupImpl::close()
@ -280,6 +277,8 @@ std::shared_ptr<const IBackup> BackupImpl::getBaseBackupUnlocked() const
toString(base_backup->getUUID()),
(base_backup_uuid ? toString(*base_backup_uuid) : ""));
}
base_backup_uuid = base_backup->getUUID();
}
return base_backup;
}
@ -369,7 +368,7 @@ void BackupImpl::writeBackupMetadata()
if (base_backup_in_use)
{
*out << "<base_backup>" << xml << base_backup_info->toString() << "</base_backup>";
*out << "<base_backup_uuid>" << toString(*base_backup_uuid) << "</base_backup_uuid>";
*out << "<base_backup_uuid>" << getBaseBackupUnlocked()->getUUID() << "</base_backup_uuid>";
}
}
@ -594,9 +593,6 @@ bool BackupImpl::checkLockFile(bool throw_if_failed) const
void BackupImpl::removeLockFile()
{
if (is_internal_backup)
return; /// Internal backup must not remove the lock file (it's still used by the initiator).
if (checkLockFile(false))
writer->removeFile(lock_file_name);
}
@ -989,8 +985,11 @@ void BackupImpl::finalizeWriting()
if (open_mode != OpenMode::WRITE)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is not opened for writing");
if (corrupted)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup can't be finalized after an error happened");
if (writing_finalized)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is already finalized");
return;
if (!is_internal_backup)
{
@ -1015,20 +1014,58 @@ void BackupImpl::setCompressedSize()
}
void BackupImpl::tryRemoveAllFiles()
bool BackupImpl::setIsCorrupted() noexcept
{
if (open_mode != OpenMode::WRITE)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is not opened for writing");
if (is_internal_backup)
return;
try
{
LOG_INFO(log, "Removing all files of backup {}", backup_name_for_logging);
std::lock_guard lock{mutex};
if (open_mode != OpenMode::WRITE)
{
LOG_ERROR(log, "Backup is not opened for writing. Stack trace: {}", StackTrace().toString());
chassert(false, "Backup is not opened for writing when setIsCorrupted() is called");
return false;
}
if (writing_finalized)
{
LOG_WARNING(log, "An error happened after the backup was completed successfully, the backup must be correct!");
return false;
}
if (corrupted)
return true;
LOG_WARNING(log, "An error happened, the backup won't be completed");
closeArchive(/* finalize= */ false);
corrupted = true;
return true;
}
catch (...)
{
DB::tryLogCurrentException(log, "Caught exception while setting that the backup was corrupted");
return false;
}
}
bool BackupImpl::tryRemoveAllFiles() noexcept
{
try
{
std::lock_guard lock{mutex};
if (!corrupted)
{
LOG_ERROR(log, "Backup is not set as corrupted. Stack trace: {}", StackTrace().toString());
chassert(false, "Backup is not set as corrupted when tryRemoveAllFiles() is called");
return false;
}
LOG_INFO(log, "Removing all files of backup {}", backup_name_for_logging);
Strings files_to_remove;
if (use_archive)
{
files_to_remove.push_back(archive_params.archive_name);
@ -1041,14 +1078,17 @@ void BackupImpl::tryRemoveAllFiles()
}
if (!checkLockFile(false))
return;
return false;
writer->removeFiles(files_to_remove);
removeLockFile();
writer->removeEmptyDirectories();
return true;
}
catch (...)
{
DB::tryLogCurrentException(__PRETTY_FUNCTION__);
DB::tryLogCurrentException(log, "Caught exception while removing files of a corrupted backup");
return false;
}
}

View File

@ -86,7 +86,8 @@ public:
void writeFile(const BackupFileInfo & info, BackupEntryPtr entry) override;
bool supportsWritingInMultipleThreads() const override { return !use_archive; }
void finalizeWriting() override;
void tryRemoveAllFiles() override;
bool setIsCorrupted() noexcept override;
bool tryRemoveAllFiles() noexcept override;
private:
void open();
@ -146,13 +147,14 @@ private:
int version;
mutable std::optional<BackupInfo> base_backup_info;
mutable std::shared_ptr<const IBackup> base_backup;
std::optional<UUID> base_backup_uuid;
mutable std::optional<UUID> base_backup_uuid;
std::shared_ptr<IArchiveReader> archive_reader;
std::shared_ptr<IArchiveWriter> archive_writer;
String lock_file_name;
std::atomic<bool> lock_file_before_first_file_checked = false;
bool writing_finalized = false;
bool corrupted = false;
bool deduplicate_files = true;
bool use_same_s3_credentials_for_base_backup = false;
bool use_same_password_for_base_backup = false;

View File

@ -0,0 +1,58 @@
#include <Backups/BackupKeeperSettings.h>
#include <Core/Settings.h>
#include <Interpreters/Context.h>
#include <Poco/Util/AbstractConfiguration.h>
namespace DB
{
namespace Setting
{
extern const SettingsUInt64 backup_restore_keeper_max_retries;
extern const SettingsUInt64 backup_restore_keeper_retry_initial_backoff_ms;
extern const SettingsUInt64 backup_restore_keeper_retry_max_backoff_ms;
extern const SettingsUInt64 backup_restore_failure_after_host_disconnected_for_seconds;
extern const SettingsUInt64 backup_restore_keeper_max_retries_while_initializing;
extern const SettingsUInt64 backup_restore_keeper_max_retries_while_handling_error;
extern const SettingsUInt64 backup_restore_finish_timeout_after_error_sec;
extern const SettingsUInt64 backup_restore_keeper_value_max_size;
extern const SettingsUInt64 backup_restore_batch_size_for_keeper_multi;
extern const SettingsUInt64 backup_restore_batch_size_for_keeper_multiread;
extern const SettingsFloat backup_restore_keeper_fault_injection_probability;
extern const SettingsUInt64 backup_restore_keeper_fault_injection_seed;
}
BackupKeeperSettings BackupKeeperSettings::fromContext(const ContextPtr & context)
{
BackupKeeperSettings keeper_settings;
const auto & settings = context->getSettingsRef();
const auto & config = context->getConfigRef();
keeper_settings.max_retries = settings[Setting::backup_restore_keeper_max_retries];
keeper_settings.retry_initial_backoff_ms = std::chrono::milliseconds{settings[Setting::backup_restore_keeper_retry_initial_backoff_ms]};
keeper_settings.retry_max_backoff_ms = std::chrono::milliseconds{settings[Setting::backup_restore_keeper_retry_max_backoff_ms]};
keeper_settings.failure_after_host_disconnected_for_seconds = std::chrono::seconds{settings[Setting::backup_restore_failure_after_host_disconnected_for_seconds]};
keeper_settings.max_retries_while_initializing = settings[Setting::backup_restore_keeper_max_retries_while_initializing];
keeper_settings.max_retries_while_handling_error = settings[Setting::backup_restore_keeper_max_retries_while_handling_error];
keeper_settings.finish_timeout_after_error = std::chrono::seconds(settings[Setting::backup_restore_finish_timeout_after_error_sec]);
if (config.has("backups.sync_period_ms"))
keeper_settings.sync_period_ms = std::chrono::milliseconds{config.getUInt64("backups.sync_period_ms")};
if (config.has("backups.max_attempts_after_bad_version"))
keeper_settings.max_attempts_after_bad_version = config.getUInt64("backups.max_attempts_after_bad_version");
keeper_settings.value_max_size = settings[Setting::backup_restore_keeper_value_max_size];
keeper_settings.batch_size_for_multi = settings[Setting::backup_restore_batch_size_for_keeper_multi];
keeper_settings.batch_size_for_multiread = settings[Setting::backup_restore_batch_size_for_keeper_multiread];
keeper_settings.fault_injection_probability = settings[Setting::backup_restore_keeper_fault_injection_probability];
keeper_settings.fault_injection_seed = settings[Setting::backup_restore_keeper_fault_injection_seed];
return keeper_settings;
}
}

View File

@ -0,0 +1,64 @@
#pragma once
#include <Interpreters/Context_fwd.h>
namespace DB
{
/// Settings for [Zoo]Keeper-related works during BACKUP or RESTORE.
struct BackupKeeperSettings
{
/// Maximum number of retries in the middle of a BACKUP ON CLUSTER or RESTORE ON CLUSTER operation.
/// Should be big enough so the whole operation won't be cancelled in the middle of it because of a temporary ZooKeeper failure.
UInt64 max_retries{1000};
/// Initial backoff timeout for ZooKeeper operations during backup or restore.
std::chrono::milliseconds retry_initial_backoff_ms{100};
/// Max backoff timeout for ZooKeeper operations during backup or restore.
std::chrono::milliseconds retry_max_backoff_ms{5000};
/// If a host during BACKUP ON CLUSTER or RESTORE ON CLUSTER doesn't recreate its 'alive' node in ZooKeeper
/// for this amount of time then the whole backup or restore is considered as failed.
/// Should be bigger than any reasonable time for a host to reconnect to ZooKeeper after a failure.
/// Set to zero to disable (if it's zero and some host crashed then BACKUP ON CLUSTER or RESTORE ON CLUSTER will be waiting
/// for the crashed host forever until the operation is explicitly cancelled with KILL QUERY).
std::chrono::seconds failure_after_host_disconnected_for_seconds{3600};
/// Maximum number of retries during the initialization of a BACKUP ON CLUSTER or RESTORE ON CLUSTER operation.
/// Shouldn't be too big because if the operation is going to fail then it's better if it fails faster.
UInt64 max_retries_while_initializing{20};
/// Maximum number of retries while handling an error of a BACKUP ON CLUSTER or RESTORE ON CLUSTER operation.
/// Shouldn't be too big because those retries are just for cleanup after the operation has failed already.
UInt64 max_retries_while_handling_error{20};
/// How long the initiator should wait for other host to handle the 'error' node and finish their work.
std::chrono::seconds finish_timeout_after_error{180};
/// How often the "stage" folder in ZooKeeper must be scanned in a background thread to track changes done by other hosts.
std::chrono::milliseconds sync_period_ms{5000};
/// Number of attempts after getting error ZBADVERSION from ZooKeeper.
size_t max_attempts_after_bad_version{10};
/// Maximum size of data of a ZooKeeper's node during backup.
UInt64 value_max_size{1048576};
/// Maximum size of a batch for a multi request.
UInt64 batch_size_for_multi{1000};
/// Maximum size of a batch for a multiread request.
UInt64 batch_size_for_multiread{10000};
/// Approximate probability of failure for a keeper request during backup or restore. Valid value is in interval [0.0f, 1.0f].
Float64 fault_injection_probability{0};
/// Seed for `fault_injection_probability`: 0 - random seed, otherwise the setting value.
UInt64 fault_injection_seed{0};
static BackupKeeperSettings fromContext(const ContextPtr & context);
};
}

View File

@ -74,6 +74,17 @@ BackupSettings BackupSettings::fromBackupQuery(const ASTBackupQuery & query)
return res;
}
bool BackupSettings::isAsync(const ASTBackupQuery & query)
{
if (query.settings)
{
const auto * field = query.settings->as<const ASTSetQuery &>().changes.tryGet("async");
if (field)
return field->safeGet<bool>();
}
return false; /// `async` is false by default.
}
void BackupSettings::copySettingsToQuery(ASTBackupQuery & query) const
{
auto query_settings = std::make_shared<ASTSetQuery>();

View File

@ -101,6 +101,8 @@ struct BackupSettings
static BackupSettings fromBackupQuery(const ASTBackupQuery & query);
void copySettingsToQuery(ASTBackupQuery & query) const;
static bool isAsync(const ASTBackupQuery & query);
struct Util
{
static std::vector<Strings> clusterHostIDsFromAST(const IAST & ast);

File diff suppressed because it is too large Load Diff

View File

@ -23,6 +23,7 @@ using BackupMutablePtr = std::shared_ptr<IBackup>;
using BackupPtr = std::shared_ptr<const IBackup>;
class IBackupEntry;
using BackupEntries = std::vector<std::pair<String, std::shared_ptr<const IBackupEntry>>>;
class BackupConcurrencyCounters;
using DataRestoreTasks = std::vector<std::function<void()>>;
struct ReadSettings;
class BackupLog;
@ -31,6 +32,10 @@ using ThreadGroupPtr = std::shared_ptr<ThreadGroup>;
class QueryStatus;
using QueryStatusPtr = std::shared_ptr<QueryStatus>;
class ProcessList;
class Cluster;
using ClusterPtr = std::shared_ptr<Cluster>;
class AccessRightsElements;
struct ZooKeeperRetriesInfo;
/// Manager of backups and restores: executes backups and restores' threads in the background.
@ -47,18 +52,18 @@ public:
/// Starts executing a BACKUP or RESTORE query. Returns ID of the operation.
/// For asynchronous operations the function throws no exceptions on failure usually,
/// call getInfo() on a returned operation id to check for errors.
BackupOperationID start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context);
std::pair<BackupOperationID, BackupStatus> start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context);
/// Waits until the specified backup or restore operation finishes or stops.
/// The function returns immediately if the operation is already finished.
void wait(const BackupOperationID & backup_or_restore_id, bool rethrow_exception = true);
BackupStatus wait(const BackupOperationID & backup_or_restore_id, bool rethrow_exception = true);
/// Waits until all running backup and restore operations finish or stop.
void waitAll();
/// Cancels the specified backup or restore operation.
/// The function does nothing if this operation has already finished.
void cancel(const BackupOperationID & backup_or_restore_id, bool wait_ = true);
BackupStatus cancel(const BackupOperationID & backup_or_restore_id, bool wait_ = true);
/// Cancels all running backup and restore operations.
void cancelAll(bool wait_ = true);
@ -67,26 +72,32 @@ public:
std::vector<BackupOperationInfo> getAllInfos() const;
private:
BackupOperationID startMakingBackup(const ASTPtr & query, const ContextPtr & context);
std::pair<BackupOperationID, BackupStatus> startMakingBackup(const ASTPtr & query, const ContextPtr & context);
struct BackupStarter;
BackupMutablePtr openBackupForWriting(const BackupInfo & backup_info, const BackupSettings & backup_settings, std::shared_ptr<IBackupCoordination> backup_coordination, const ContextPtr & context) const;
void doBackup(
BackupMutablePtr & backup,
BackupMutablePtr backup,
const std::shared_ptr<ASTBackupQuery> & backup_query,
const BackupOperationID & backup_id,
const String & backup_name_for_logging,
const BackupInfo & backup_info,
BackupSettings backup_settings,
const BackupSettings & backup_settings,
std::shared_ptr<IBackupCoordination> backup_coordination,
const ContextPtr & context,
ContextMutablePtr mutable_context);
ContextMutablePtr context,
bool on_cluster,
const ClusterPtr & cluster);
/// Builds file infos for specified backup entries.
void buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, const ReadSettings & read_settings, std::shared_ptr<IBackupCoordination> backup_coordination, QueryStatusPtr process_list_element);
/// Write backup entries to an opened backup.
void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, const BackupOperationID & backup_id, std::shared_ptr<IBackupCoordination> backup_coordination, bool internal, QueryStatusPtr process_list_element);
void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, const BackupOperationID & backup_id, std::shared_ptr<IBackupCoordination> backup_coordination, bool is_internal_backup, QueryStatusPtr process_list_element);
BackupOperationID startRestoring(const ASTPtr & query, ContextMutablePtr context);
std::pair<BackupOperationID, BackupStatus> startRestoring(const ASTPtr & query, ContextMutablePtr context);
struct RestoreStarter;
BackupPtr openBackupForReading(const BackupInfo & backup_info, const RestoreSettings & restore_settings, const ContextPtr & context) const;
void doRestore(
const std::shared_ptr<ASTBackupQuery> & restore_query,
@ -95,7 +106,17 @@ private:
const BackupInfo & backup_info,
RestoreSettings restore_settings,
std::shared_ptr<IRestoreCoordination> restore_coordination,
ContextMutablePtr context);
ContextMutablePtr context,
bool on_cluster,
const ClusterPtr & cluster);
std::shared_ptr<IBackupCoordination> makeBackupCoordination(bool on_cluster, const BackupSettings & backup_settings, const ContextPtr & context) const;
std::shared_ptr<IRestoreCoordination> makeRestoreCoordination(bool on_cluster, const RestoreSettings & restore_settings, const ContextPtr & context) const;
/// Sends a BACKUP or RESTORE query to other hosts.
void sendQueryToOtherHosts(const ASTBackupQuery & backup_or_restore_query, const ClusterPtr & cluster,
size_t only_shard_num, size_t only_replica_num, ContextMutablePtr context, const AccessRightsElements & access_to_check,
const ZooKeeperRetriesInfo & retries_info) const;
/// Run data restoring tasks which insert data to tables.
void restoreTablesData(const BackupOperationID & restore_id, BackupPtr backup, DataRestoreTasks && tasks, ThreadPool & thread_pool, QueryStatusPtr process_list_element);
@ -139,6 +160,8 @@ private:
std::shared_ptr<BackupLog> backup_log;
ProcessList & process_list;
std::unique_ptr<BackupConcurrencyCounters> concurrency_counters;
};
}

View File

@ -121,8 +121,13 @@ public:
/// Finalizes writing the backup, should be called after all entries have been successfully written.
virtual void finalizeWriting() = 0;
/// Try to remove all files copied to the backup. Used after an exception or it the backup was cancelled.
virtual void tryRemoveAllFiles() = 0;
/// Sets that a non-retriable error happened while the backup was being written which means that
/// the backup is most likely corrupted and it can't be finalized.
/// This function is called while handling an exception or if the backup was cancelled.
virtual bool setIsCorrupted() noexcept = 0;
/// Try to remove all files copied to the backup. Could be used after setIsCorrupted().
virtual bool tryRemoveAllFiles() noexcept = 0;
};
using BackupPtr = std::shared_ptr<const IBackup>;

View File

@ -5,26 +5,44 @@
namespace DB
{
class Exception;
struct BackupFileInfo;
using BackupFileInfos = std::vector<BackupFileInfo>;
enum class AccessEntityType : uint8_t;
enum class UserDefinedSQLObjectType : uint8_t;
struct ZooKeeperRetriesInfo;
/// Replicas use this class to coordinate what they're writing to a backup while executing BACKUP ON CLUSTER.
/// There are two implementation of this interface: BackupCoordinationLocal and BackupCoordinationRemote.
/// There are two implementation of this interface: BackupCoordinationLocal and BackupCoordinationOnCluster.
/// BackupCoordinationLocal is used while executing BACKUP without ON CLUSTER and performs coordination in memory.
/// BackupCoordinationRemote is used while executing BACKUP with ON CLUSTER and performs coordination via ZooKeeper.
/// BackupCoordinationOnCluster is used while executing BACKUP with ON CLUSTER and performs coordination via ZooKeeper.
class IBackupCoordination
{
public:
virtual ~IBackupCoordination() = default;
/// Sets the current stage and waits for other hosts to come to this stage too.
virtual void setStage(const String & new_stage, const String & message) = 0;
virtual void setError(const Exception & exception) = 0;
virtual Strings waitForStage(const String & stage_to_wait) = 0;
virtual Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
virtual Strings setStage(const String & new_stage, const String & message, bool sync) = 0;
/// Sets that the backup query was sent to other hosts.
/// Function waitForOtherHostsToFinish() will check that to find out if it should really wait or not.
virtual void setBackupQueryWasSentToOtherHosts() = 0;
/// Lets other hosts know that the current host has encountered an error.
virtual bool trySetError(std::exception_ptr exception) = 0;
/// Lets other hosts know that the current host has finished its work.
virtual void finish() = 0;
/// Lets other hosts know that the current host has finished its work (as a part of error-handling process).
virtual bool tryFinishAfterError() noexcept = 0;
/// Waits until all the other hosts finish their work.
/// Stops waiting and throws an exception if another host encounters an error or if some host gets cancelled.
virtual void waitForOtherHostsToFinish() = 0;
/// Waits until all the other hosts finish their work (as a part of error-handling process).
/// Doesn't stops waiting if some host encounters an error or gets cancelled.
virtual bool tryWaitForOtherHostsToFinishAfterError() noexcept = 0;
struct PartNameAndChecksum
{
@ -87,9 +105,7 @@ public:
/// Starts writing a specified file, the function returns false if that file is already being written concurrently.
virtual bool startWritingFile(size_t data_file_index) = 0;
/// This function is used to check if concurrent backups are running
/// other than the backup passed to the function
virtual bool hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const = 0;
virtual ZooKeeperRetriesInfo getOnClusterInitializationKeeperRetriesInfo() const = 0;
};
}

View File

@ -5,26 +5,42 @@
namespace DB
{
class Exception;
enum class UserDefinedSQLObjectType : uint8_t;
class ASTCreateQuery;
struct ZooKeeperRetriesInfo;
/// Replicas use this class to coordinate what they're reading from a backup while executing RESTORE ON CLUSTER.
/// There are two implementation of this interface: RestoreCoordinationLocal and RestoreCoordinationRemote.
/// There are two implementation of this interface: RestoreCoordinationLocal and RestoreCoordinationOnCluster.
/// RestoreCoordinationLocal is used while executing RESTORE without ON CLUSTER and performs coordination in memory.
/// RestoreCoordinationRemote is used while executing RESTORE with ON CLUSTER and performs coordination via ZooKeeper.
/// RestoreCoordinationOnCluster is used while executing RESTORE with ON CLUSTER and performs coordination via ZooKeeper.
class IRestoreCoordination
{
public:
virtual ~IRestoreCoordination() = default;
/// Sets the current stage and waits for other hosts to come to this stage too.
virtual void setStage(const String & new_stage, const String & message) = 0;
virtual void setError(const Exception & exception) = 0;
virtual Strings waitForStage(const String & stage_to_wait) = 0;
virtual Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
virtual Strings setStage(const String & new_stage, const String & message, bool sync) = 0;
static constexpr const char * kErrorStatus = "error";
/// Sets that the restore query was sent to other hosts.
/// Function waitForOtherHostsToFinish() will check that to find out if it should really wait or not.
virtual void setRestoreQueryWasSentToOtherHosts() = 0;
/// Lets other hosts know that the current host has encountered an error.
virtual bool trySetError(std::exception_ptr exception) = 0;
/// Lets other hosts know that the current host has finished its work.
virtual void finish() = 0;
/// Lets other hosts know that the current host has finished its work (as a part of error-handling process).
virtual bool tryFinishAfterError() noexcept = 0;
/// Waits until all the other hosts finish their work.
/// Stops waiting and throws an exception if another host encounters an error or if some host gets cancelled.
virtual void waitForOtherHostsToFinish() = 0;
/// Waits until all the other hosts finish their work (as a part of error-handling process).
/// Doesn't stops waiting if some host encounters an error or gets cancelled.
virtual bool tryWaitForOtherHostsToFinishAfterError() noexcept = 0;
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
virtual bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) = 0;
@ -49,9 +65,7 @@ public:
/// (because otherwise the macro "{uuid}" in the ZooKeeper path will not work correctly).
virtual void generateUUIDForTable(ASTCreateQuery & create_query) = 0;
/// This function is used to check if concurrent restores are running
/// other than the restore passed to the function
virtual bool hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const = 0;
virtual ZooKeeperRetriesInfo getOnClusterInitializationKeeperRetriesInfo() const = 0;
};
}

View File

@ -1,32 +1,24 @@
#include <Backups/RestoreCoordinationLocal.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/formatAST.h>
#include <Common/ZooKeeper/ZooKeeperRetries.h>
#include <Common/logger_useful.h>
namespace DB
{
RestoreCoordinationLocal::RestoreCoordinationLocal() : log(getLogger("RestoreCoordinationLocal"))
RestoreCoordinationLocal::RestoreCoordinationLocal(
const UUID & restore_uuid, bool allow_concurrent_restore_, BackupConcurrencyCounters & concurrency_counters_)
: log(getLogger("RestoreCoordinationLocal"))
, concurrency_check(restore_uuid, /* is_restore = */ true, /* on_cluster = */ false, allow_concurrent_restore_, concurrency_counters_)
{
}
RestoreCoordinationLocal::~RestoreCoordinationLocal() = default;
void RestoreCoordinationLocal::setStage(const String &, const String &)
{
}
void RestoreCoordinationLocal::setError(const Exception &)
{
}
Strings RestoreCoordinationLocal::waitForStage(const String &)
{
return {};
}
Strings RestoreCoordinationLocal::waitForStage(const String &, std::chrono::milliseconds)
ZooKeeperRetriesInfo RestoreCoordinationLocal::getOnClusterInitializationKeeperRetriesInfo() const
{
return {};
}
@ -63,7 +55,7 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer
{
String query_str = serializeAST(create_query);
auto find_in_map = [&]
auto find_in_map = [&]() TSA_REQUIRES(mutex)
{
auto it = create_query_uuids.find(query_str);
if (it != create_query_uuids.end())
@ -91,14 +83,4 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer
}
}
bool RestoreCoordinationLocal::hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const
{
if (num_active_restores > 1)
{
LOG_WARNING(log, "Found concurrent backups: num_active_restores={}", num_active_restores);
return true;
}
return false;
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Backups/IRestoreCoordination.h>
#include <Backups/BackupConcurrencyCheck.h>
#include <Parsers/CreateQueryUUIDs.h>
#include <Common/Logger.h>
#include <mutex>
@ -12,19 +13,20 @@ namespace DB
{
class ASTCreateQuery;
/// Implementation of the IRestoreCoordination interface performing coordination in memory.
class RestoreCoordinationLocal : public IRestoreCoordination
{
public:
RestoreCoordinationLocal();
RestoreCoordinationLocal(const UUID & restore_uuid_, bool allow_concurrent_restore_, BackupConcurrencyCounters & concurrency_counters_);
~RestoreCoordinationLocal() override;
/// Sets the current stage and waits for other hosts to come to this stage too.
void setStage(const String & new_stage, const String & message) override;
void setError(const Exception & exception) override;
Strings waitForStage(const String & stage_to_wait) override;
Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;
Strings setStage(const String &, const String &, bool) override { return {}; }
void setRestoreQueryWasSentToOtherHosts() override {}
bool trySetError(std::exception_ptr) override { return true; }
void finish() override {}
bool tryFinishAfterError() noexcept override { return true; }
void waitForOtherHostsToFinish() override {}
bool tryWaitForOtherHostsToFinishAfterError() noexcept override { return true; }
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;
@ -49,15 +51,16 @@ public:
/// (because otherwise the macro "{uuid}" in the ZooKeeper path will not work correctly).
void generateUUIDForTable(ASTCreateQuery & create_query) override;
bool hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const override;
ZooKeeperRetriesInfo getOnClusterInitializationKeeperRetriesInfo() const override;
private:
LoggerPtr const log;
BackupConcurrencyCheck concurrency_check;
std::set<std::pair<String /* database_zk_path */, String /* table_name */>> acquired_tables_in_replicated_databases;
std::unordered_set<String /* table_zk_path */> acquired_data_in_replicated_tables;
std::unordered_map<String, CreateQueryUUIDs> create_query_uuids;
std::unordered_set<String /* root_zk_path */> acquired_data_in_keeper_map_tables;
std::set<std::pair<String /* database_zk_path */, String /* table_name */>> acquired_tables_in_replicated_databases TSA_GUARDED_BY(mutex);
std::unordered_set<String /* table_zk_path */> acquired_data_in_replicated_tables TSA_GUARDED_BY(mutex);
std::unordered_map<String, CreateQueryUUIDs> create_query_uuids TSA_GUARDED_BY(mutex);
std::unordered_set<String /* root_zk_path */> acquired_data_in_keeper_map_tables TSA_GUARDED_BY(mutex);
mutable std::mutex mutex;
};

View File

@ -0,0 +1,318 @@
#include <Backups/BackupCoordinationOnCluster.h>
#include <Backups/BackupCoordinationStage.h>
#include <Backups/BackupCoordinationStageSync.h>
#include <Backups/RestoreCoordinationOnCluster.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/CreateQueryUUIDs.h>
#include <Parsers/formatAST.h>
#include <Functions/UserDefined/UserDefinedSQLObjectType.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/escapeForFileName.h>
namespace DB
{
RestoreCoordinationOnCluster::RestoreCoordinationOnCluster(
const UUID & restore_uuid_,
const String & root_zookeeper_path_,
zkutil::GetZooKeeper get_zookeeper_,
const BackupKeeperSettings & keeper_settings_,
const String & current_host_,
const Strings & all_hosts_,
bool allow_concurrent_restore_,
BackupConcurrencyCounters & concurrency_counters_,
ThreadPoolCallbackRunnerUnsafe<void> schedule_,
QueryStatusPtr process_list_element_)
: root_zookeeper_path(root_zookeeper_path_)
, keeper_settings(keeper_settings_)
, restore_uuid(restore_uuid_)
, zookeeper_path(root_zookeeper_path_ + "/restore-" + toString(restore_uuid_))
, all_hosts(all_hosts_)
, all_hosts_without_initiator(BackupCoordinationOnCluster::excludeInitiator(all_hosts))
, current_host(current_host_)
, current_host_index(BackupCoordinationOnCluster::findCurrentHostIndex(current_host, all_hosts))
, log(getLogger("RestoreCoordinationOnCluster"))
, with_retries(log, get_zookeeper_, keeper_settings, process_list_element_, [root_zookeeper_path_](Coordination::ZooKeeperWithFaultInjection::Ptr zk) { zk->sync(root_zookeeper_path_); })
, concurrency_check(restore_uuid_, /* is_restore = */ true, /* on_cluster = */ true, allow_concurrent_restore_, concurrency_counters_)
, stage_sync(/* is_restore = */ true, fs::path{zookeeper_path} / "stage", current_host, all_hosts, allow_concurrent_restore_, with_retries, schedule_, process_list_element_, log)
, cleaner(zookeeper_path, with_retries, log)
{
createRootNodes();
}
RestoreCoordinationOnCluster::~RestoreCoordinationOnCluster()
{
tryFinishImpl();
}
void RestoreCoordinationOnCluster::createRootNodes()
{
auto holder = with_retries.createRetriesControlHolder("createRootNodes", WithRetries::kInitialization);
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
zk->createAncestors(zookeeper_path);
zk->createIfNotExists(zookeeper_path, "");
zk->createIfNotExists(zookeeper_path + "/repl_databases_tables_acquired", "");
zk->createIfNotExists(zookeeper_path + "/repl_tables_data_acquired", "");
zk->createIfNotExists(zookeeper_path + "/repl_access_storages_acquired", "");
zk->createIfNotExists(zookeeper_path + "/repl_sql_objects_acquired", "");
zk->createIfNotExists(zookeeper_path + "/keeper_map_tables", "");
zk->createIfNotExists(zookeeper_path + "/table_uuids", "");
});
}
Strings RestoreCoordinationOnCluster::setStage(const String & new_stage, const String & message, bool sync)
{
stage_sync.setStage(new_stage, message);
if (!sync)
return {};
return stage_sync.waitForHostsToReachStage(new_stage, all_hosts_without_initiator);
}
void RestoreCoordinationOnCluster::setRestoreQueryWasSentToOtherHosts()
{
restore_query_was_sent_to_other_hosts = true;
}
bool RestoreCoordinationOnCluster::trySetError(std::exception_ptr exception)
{
return stage_sync.trySetError(exception);
}
void RestoreCoordinationOnCluster::finish()
{
bool other_hosts_also_finished = false;
stage_sync.finish(other_hosts_also_finished);
if ((current_host == kInitiator) && (other_hosts_also_finished || !restore_query_was_sent_to_other_hosts))
cleaner.cleanup();
}
bool RestoreCoordinationOnCluster::tryFinishAfterError() noexcept
{
return tryFinishImpl();
}
bool RestoreCoordinationOnCluster::tryFinishImpl() noexcept
{
bool other_hosts_also_finished = false;
if (!stage_sync.tryFinishAfterError(other_hosts_also_finished))
return false;
if ((current_host == kInitiator) && (other_hosts_also_finished || !restore_query_was_sent_to_other_hosts))
{
if (!cleaner.tryCleanupAfterError())
return false;
}
return true;
}
void RestoreCoordinationOnCluster::waitForOtherHostsToFinish()
{
if ((current_host != kInitiator) || !restore_query_was_sent_to_other_hosts)
return;
stage_sync.waitForOtherHostsToFinish();
}
bool RestoreCoordinationOnCluster::tryWaitForOtherHostsToFinishAfterError() noexcept
{
if (current_host != kInitiator)
return false;
if (!restore_query_was_sent_to_other_hosts)
return true;
return stage_sync.tryWaitForOtherHostsToFinishAfterError();
}
ZooKeeperRetriesInfo RestoreCoordinationOnCluster::getOnClusterInitializationKeeperRetriesInfo() const
{
return ZooKeeperRetriesInfo{keeper_settings.max_retries_while_initializing,
static_cast<UInt64>(keeper_settings.retry_initial_backoff_ms.count()),
static_cast<UInt64>(keeper_settings.retry_max_backoff_ms.count())};
}
bool RestoreCoordinationOnCluster::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name)
{
bool result = false;
auto holder = with_retries.createRetriesControlHolder("acquireCreatingTableInReplicatedDatabase");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_databases_tables_acquired/" + escapeForFileName(database_zk_path);
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(table_name);
auto code = zk->tryCreate(path, toString(current_host_index), zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException::fromPath(code, path);
if (code == Coordination::Error::ZOK)
{
result = true;
return;
}
/// We need to check who created that node
result = zk->get(path) == toString(current_host_index);
});
return result;
}
bool RestoreCoordinationOnCluster::acquireInsertingDataIntoReplicatedTable(const String & table_zk_path)
{
bool result = false;
auto holder = with_retries.createRetriesControlHolder("acquireInsertingDataIntoReplicatedTable");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_tables_data_acquired/" + escapeForFileName(table_zk_path);
auto code = zk->tryCreate(path, toString(current_host_index), zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException::fromPath(code, path);
if (code == Coordination::Error::ZOK)
{
result = true;
return;
}
/// We need to check who created that node
result = zk->get(path) == toString(current_host_index);
});
return result;
}
bool RestoreCoordinationOnCluster::acquireReplicatedAccessStorage(const String & access_storage_zk_path)
{
bool result = false;
auto holder = with_retries.createRetriesControlHolder("acquireReplicatedAccessStorage");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_access_storages_acquired/" + escapeForFileName(access_storage_zk_path);
auto code = zk->tryCreate(path, toString(current_host_index), zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException::fromPath(code, path);
if (code == Coordination::Error::ZOK)
{
result = true;
return;
}
/// We need to check who created that node
result = zk->get(path) == toString(current_host_index);
});
return result;
}
bool RestoreCoordinationOnCluster::acquireReplicatedSQLObjects(const String & loader_zk_path, UserDefinedSQLObjectType object_type)
{
bool result = false;
auto holder = with_retries.createRetriesControlHolder("acquireReplicatedSQLObjects");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_sql_objects_acquired/" + escapeForFileName(loader_zk_path);
zk->createIfNotExists(path, "");
path += "/";
switch (object_type)
{
case UserDefinedSQLObjectType::Function:
path += "functions";
break;
}
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException::fromPath(code, path);
if (code == Coordination::Error::ZOK)
{
result = true;
return;
}
/// We need to check who created that node
result = zk->get(path) == toString(current_host_index);
});
return result;
}
bool RestoreCoordinationOnCluster::acquireInsertingDataForKeeperMap(const String & root_zk_path, const String & table_unique_id)
{
bool lock_acquired = false;
auto holder = with_retries.createRetriesControlHolder("acquireInsertingDataForKeeperMap");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
/// we need to remove leading '/' from root_zk_path
auto normalized_root_zk_path = root_zk_path.substr(1);
std::string restore_lock_path = fs::path(zookeeper_path) / "keeper_map_tables" / escapeForFileName(normalized_root_zk_path);
zk->createAncestors(restore_lock_path);
auto code = zk->tryCreate(restore_lock_path, table_unique_id, zkutil::CreateMode::Persistent);
if (code == Coordination::Error::ZOK)
{
lock_acquired = true;
return;
}
if (code == Coordination::Error::ZNODEEXISTS)
lock_acquired = table_unique_id == zk->get(restore_lock_path);
else
zkutil::KeeperException::fromPath(code, restore_lock_path);
});
return lock_acquired;
}
void RestoreCoordinationOnCluster::generateUUIDForTable(ASTCreateQuery & create_query)
{
String query_str = serializeAST(create_query);
CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true};
String new_uuids_str = new_uuids.toString();
auto holder = with_retries.createRetriesControlHolder("generateUUIDForTable");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/table_uuids/" + escapeForFileName(query_str);
Coordination::Error res = zk->tryCreate(path, new_uuids_str, zkutil::CreateMode::Persistent);
if (res == Coordination::Error::ZOK)
{
new_uuids.copyToQuery(create_query);
return;
}
if (res == Coordination::Error::ZNODEEXISTS)
{
CreateQueryUUIDs::fromString(zk->get(path)).copyToQuery(create_query);
return;
}
zkutil::KeeperException::fromPath(res, path);
});
}
}

View File

@ -1,6 +1,8 @@
#pragma once
#include <Backups/IRestoreCoordination.h>
#include <Backups/BackupConcurrencyCheck.h>
#include <Backups/BackupCoordinationCleaner.h>
#include <Backups/BackupCoordinationStageSync.h>
#include <Backups/WithRetries.h>
@ -9,28 +11,33 @@ namespace DB
{
/// Implementation of the IRestoreCoordination interface performing coordination via ZooKeeper. It's necessary for "RESTORE ON CLUSTER".
class RestoreCoordinationRemote : public IRestoreCoordination
class RestoreCoordinationOnCluster : public IRestoreCoordination
{
public:
using RestoreKeeperSettings = WithRetries::KeeperSettings;
/// Empty string as the current host is used to mark the initiator of a RESTORE ON CLUSTER query.
static const constexpr std::string_view kInitiator;
RestoreCoordinationRemote(
zkutil::GetZooKeeper get_zookeeper_,
RestoreCoordinationOnCluster(
const UUID & restore_uuid_,
const String & root_zookeeper_path_,
const RestoreKeeperSettings & keeper_settings_,
const String & restore_uuid_,
const Strings & all_hosts_,
zkutil::GetZooKeeper get_zookeeper_,
const BackupKeeperSettings & keeper_settings_,
const String & current_host_,
bool is_internal_,
const Strings & all_hosts_,
bool allow_concurrent_restore_,
BackupConcurrencyCounters & concurrency_counters_,
ThreadPoolCallbackRunnerUnsafe<void> schedule_,
QueryStatusPtr process_list_element_);
~RestoreCoordinationRemote() override;
~RestoreCoordinationOnCluster() override;
/// Sets the current stage and waits for other hosts to come to this stage too.
void setStage(const String & new_stage, const String & message) override;
void setError(const Exception & exception) override;
Strings waitForStage(const String & stage_to_wait) override;
Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;
Strings setStage(const String & new_stage, const String & message, bool sync) override;
void setRestoreQueryWasSentToOtherHosts() override;
bool trySetError(std::exception_ptr exception) override;
void finish() override;
bool tryFinishAfterError() noexcept override;
void waitForOtherHostsToFinish() override;
bool tryWaitForOtherHostsToFinishAfterError() noexcept override;
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;
@ -55,27 +62,27 @@ public:
/// (because otherwise the macro "{uuid}" in the ZooKeeper path will not work correctly).
void generateUUIDForTable(ASTCreateQuery & create_query) override;
bool hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const override;
ZooKeeperRetriesInfo getOnClusterInitializationKeeperRetriesInfo() const override;
private:
void createRootNodes();
void removeAllNodes();
bool tryFinishImpl() noexcept;
/// get_zookeeper will provide a zookeeper client without any fault injection
const zkutil::GetZooKeeper get_zookeeper;
const String root_zookeeper_path;
const RestoreKeeperSettings keeper_settings;
const String restore_uuid;
const BackupKeeperSettings keeper_settings;
const UUID restore_uuid;
const String zookeeper_path;
const Strings all_hosts;
const Strings all_hosts_without_initiator;
const String current_host;
const size_t current_host_index;
const bool is_internal;
LoggerPtr const log;
mutable WithRetries with_retries;
std::optional<BackupCoordinationStageSync> stage_sync;
mutable std::mutex mutex;
const WithRetries with_retries;
BackupConcurrencyCheck concurrency_check;
BackupCoordinationStageSync stage_sync;
BackupCoordinationCleaner cleaner;
std::atomic<bool> restore_query_was_sent_to_other_hosts = false;
};
}

View File

@ -1,379 +0,0 @@
#include <Backups/BackupCoordinationRemote.h>
#include <Backups/BackupCoordinationStage.h>
#include <Backups/RestoreCoordinationRemote.h>
#include <Backups/BackupCoordinationStageSync.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/CreateQueryUUIDs.h>
#include <Parsers/formatAST.h>
#include <Functions/UserDefined/UserDefinedSQLObjectType.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/escapeForFileName.h>
namespace DB
{
namespace Stage = BackupCoordinationStage;
RestoreCoordinationRemote::RestoreCoordinationRemote(
zkutil::GetZooKeeper get_zookeeper_,
const String & root_zookeeper_path_,
const RestoreKeeperSettings & keeper_settings_,
const String & restore_uuid_,
const Strings & all_hosts_,
const String & current_host_,
bool is_internal_,
QueryStatusPtr process_list_element_)
: get_zookeeper(get_zookeeper_)
, root_zookeeper_path(root_zookeeper_path_)
, keeper_settings(keeper_settings_)
, restore_uuid(restore_uuid_)
, zookeeper_path(root_zookeeper_path_ + "/restore-" + restore_uuid_)
, all_hosts(all_hosts_)
, current_host(current_host_)
, current_host_index(BackupCoordinationRemote::findCurrentHostIndex(all_hosts, current_host))
, is_internal(is_internal_)
, log(getLogger("RestoreCoordinationRemote"))
, with_retries(
log,
get_zookeeper_,
keeper_settings,
process_list_element_,
[my_zookeeper_path = zookeeper_path, my_current_host = current_host, my_is_internal = is_internal]
(WithRetries::FaultyKeeper & zk)
{
/// Recreate this ephemeral node to signal that we are alive.
if (my_is_internal)
{
String alive_node_path = my_zookeeper_path + "/stage/alive|" + my_current_host;
/// Delete the ephemeral node from the previous connection so we don't have to wait for keeper to do it automatically.
zk->tryRemove(alive_node_path);
zk->createAncestors(alive_node_path);
zk->create(alive_node_path, "", zkutil::CreateMode::Ephemeral);
}
})
{
createRootNodes();
stage_sync.emplace(
zookeeper_path,
with_retries,
log);
}
RestoreCoordinationRemote::~RestoreCoordinationRemote()
{
try
{
if (!is_internal)
removeAllNodes();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
void RestoreCoordinationRemote::createRootNodes()
{
auto holder = with_retries.createRetriesControlHolder("createRootNodes");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
zk->createAncestors(zookeeper_path);
Coordination::Requests ops;
Coordination::Responses responses;
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_databases_tables_acquired", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_tables_data_acquired", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_access_storages_acquired", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_sql_objects_acquired", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/keeper_map_tables", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/table_uuids", "", zkutil::CreateMode::Persistent));
zk->tryMulti(ops, responses);
});
}
void RestoreCoordinationRemote::setStage(const String & new_stage, const String & message)
{
if (is_internal)
stage_sync->set(current_host, new_stage, message);
else
stage_sync->set(current_host, new_stage, /* message */ "", /* all_hosts */ true);
}
void RestoreCoordinationRemote::setError(const Exception & exception)
{
stage_sync->setError(current_host, exception);
}
Strings RestoreCoordinationRemote::waitForStage(const String & stage_to_wait)
{
return stage_sync->wait(all_hosts, stage_to_wait);
}
Strings RestoreCoordinationRemote::waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout)
{
return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
}
bool RestoreCoordinationRemote::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name)
{
bool result = false;
auto holder = with_retries.createRetriesControlHolder("acquireCreatingTableInReplicatedDatabase");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_databases_tables_acquired/" + escapeForFileName(database_zk_path);
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(table_name);
auto code = zk->tryCreate(path, toString(current_host_index), zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException::fromPath(code, path);
if (code == Coordination::Error::ZOK)
{
result = true;
return;
}
/// We need to check who created that node
result = zk->get(path) == toString(current_host_index);
});
return result;
}
bool RestoreCoordinationRemote::acquireInsertingDataIntoReplicatedTable(const String & table_zk_path)
{
bool result = false;
auto holder = with_retries.createRetriesControlHolder("acquireInsertingDataIntoReplicatedTable");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_tables_data_acquired/" + escapeForFileName(table_zk_path);
auto code = zk->tryCreate(path, toString(current_host_index), zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException::fromPath(code, path);
if (code == Coordination::Error::ZOK)
{
result = true;
return;
}
/// We need to check who created that node
result = zk->get(path) == toString(current_host_index);
});
return result;
}
bool RestoreCoordinationRemote::acquireReplicatedAccessStorage(const String & access_storage_zk_path)
{
bool result = false;
auto holder = with_retries.createRetriesControlHolder("acquireReplicatedAccessStorage");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_access_storages_acquired/" + escapeForFileName(access_storage_zk_path);
auto code = zk->tryCreate(path, toString(current_host_index), zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException::fromPath(code, path);
if (code == Coordination::Error::ZOK)
{
result = true;
return;
}
/// We need to check who created that node
result = zk->get(path) == toString(current_host_index);
});
return result;
}
bool RestoreCoordinationRemote::acquireReplicatedSQLObjects(const String & loader_zk_path, UserDefinedSQLObjectType object_type)
{
bool result = false;
auto holder = with_retries.createRetriesControlHolder("acquireReplicatedSQLObjects");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/repl_sql_objects_acquired/" + escapeForFileName(loader_zk_path);
zk->createIfNotExists(path, "");
path += "/";
switch (object_type)
{
case UserDefinedSQLObjectType::Function:
path += "functions";
break;
}
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException::fromPath(code, path);
if (code == Coordination::Error::ZOK)
{
result = true;
return;
}
/// We need to check who created that node
result = zk->get(path) == toString(current_host_index);
});
return result;
}
bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String & root_zk_path, const String & table_unique_id)
{
bool lock_acquired = false;
auto holder = with_retries.createRetriesControlHolder("acquireInsertingDataForKeeperMap");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
/// we need to remove leading '/' from root_zk_path
auto normalized_root_zk_path = root_zk_path.substr(1);
std::string restore_lock_path = fs::path(zookeeper_path) / "keeper_map_tables" / escapeForFileName(normalized_root_zk_path);
zk->createAncestors(restore_lock_path);
auto code = zk->tryCreate(restore_lock_path, table_unique_id, zkutil::CreateMode::Persistent);
if (code == Coordination::Error::ZOK)
{
lock_acquired = true;
return;
}
if (code == Coordination::Error::ZNODEEXISTS)
lock_acquired = table_unique_id == zk->get(restore_lock_path);
else
zkutil::KeeperException::fromPath(code, restore_lock_path);
});
return lock_acquired;
}
void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_query)
{
String query_str = serializeAST(create_query);
CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true};
String new_uuids_str = new_uuids.toString();
auto holder = with_retries.createRetriesControlHolder("generateUUIDForTable");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/table_uuids/" + escapeForFileName(query_str);
Coordination::Error res = zk->tryCreate(path, new_uuids_str, zkutil::CreateMode::Persistent);
if (res == Coordination::Error::ZOK)
{
new_uuids.copyToQuery(create_query);
return;
}
if (res == Coordination::Error::ZNODEEXISTS)
{
CreateQueryUUIDs::fromString(zk->get(path)).copyToQuery(create_query);
return;
}
zkutil::KeeperException::fromPath(res, path);
});
}
void RestoreCoordinationRemote::removeAllNodes()
{
/// Usually this function is called by the initiator when a restore operation is complete so we don't need the coordination anymore.
///
/// However there can be a rare situation when this function is called after an error occurs on the initiator of a query
/// while some hosts are still restoring something. Removing all the nodes will remove the parent node of the restore coordination
/// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some part
/// of their restore work before that.
auto holder = with_retries.createRetriesControlHolder("removeAllNodes");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
zk->removeRecursive(zookeeper_path);
});
}
bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic<size_t> &) const
{
/// If its internal concurrency will be checked for the base restore
if (is_internal)
return false;
bool result = false;
std::string path = zookeeper_path + "/stage";
auto holder = with_retries.createRetriesControlHolder("createRootNodes");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
if (! zk->exists(root_zookeeper_path))
zk->createAncestors(root_zookeeper_path);
for (size_t attempt = 0; attempt < MAX_ZOOKEEPER_ATTEMPTS; ++attempt)
{
Coordination::Stat stat;
zk->get(root_zookeeper_path, &stat);
Strings existing_restore_paths = zk->getChildren(root_zookeeper_path);
for (const auto & existing_restore_path : existing_restore_paths)
{
if (startsWith(existing_restore_path, "backup-"))
continue;
String existing_restore_uuid = existing_restore_path;
existing_restore_uuid.erase(0, String("restore-").size());
if (existing_restore_uuid == toString(restore_uuid))
continue;
String status;
if (zk->tryGet(root_zookeeper_path + "/" + existing_restore_path + "/stage", status))
{
/// Check if some other restore is in progress
if (status == Stage::SCHEDULED_TO_START)
{
LOG_WARNING(log, "Found a concurrent restore: {}, current restore: {}", existing_restore_uuid, toString(restore_uuid));
result = true;
return;
}
}
}
zk->createIfNotExists(path, "");
auto code = zk->trySet(path, Stage::SCHEDULED_TO_START, stat.version);
if (code == Coordination::Error::ZOK)
break;
bool is_last_attempt = (attempt == MAX_ZOOKEEPER_ATTEMPTS - 1);
if ((code != Coordination::Error::ZBADVERSION) || is_last_attempt)
throw zkutil::KeeperException::fromPath(code, path);
}
});
return result;
}
}

View File

@ -100,7 +100,6 @@ RestorerFromBackup::RestorerFromBackup(
, context(context_)
, process_list_element(context->getProcessListElement())
, after_task_callback(after_task_callback_)
, on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000))
, create_table_timeout(context->getConfigRef().getUInt64("backups.create_table_timeout", 300000))
, log(getLogger("RestorerFromBackup"))
, tables_dependencies("RestorerFromBackup")
@ -119,12 +118,14 @@ RestorerFromBackup::~RestorerFromBackup()
}
}
void RestorerFromBackup::run(Mode mode)
void RestorerFromBackup::run(Mode mode_)
{
/// run() can be called onle once.
if (!current_stage.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Already restoring");
mode = mode_;
/// Find other hosts working along with us to execute this ON CLUSTER query.
all_hosts = BackupSettings::Util::filterHostIDs(
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
@ -139,6 +140,7 @@ void RestorerFromBackup::run(Mode mode)
setStage(Stage::FINDING_TABLES_IN_BACKUP);
findDatabasesAndTablesInBackup();
waitFutures();
logNumberOfDatabasesAndTablesToRestore();
/// Check access rights.
setStage(Stage::CHECKING_ACCESS_RIGHTS);
@ -228,20 +230,8 @@ void RestorerFromBackup::setStage(const String & new_stage, const String & messa
if (restore_coordination)
{
restore_coordination->setStage(new_stage, message);
/// The initiator of a RESTORE ON CLUSTER query waits for other hosts to complete their work (see waitForStage(Stage::COMPLETED) in BackupsWorker::doRestore),
/// but other hosts shouldn't wait for each others' completion. (That's simply unnecessary and also
/// the initiator may start cleaning up (e.g. removing restore-coordination ZooKeeper nodes) once all other hosts are in Stage::COMPLETED.)
bool need_wait = (new_stage != Stage::COMPLETED);
if (need_wait)
{
if (new_stage == Stage::FINDING_TABLES_IN_BACKUP)
restore_coordination->waitForStage(new_stage, on_cluster_first_sync_timeout);
else
restore_coordination->waitForStage(new_stage);
}
/// There is no need to sync Stage::COMPLETED with other hosts because it's the last stage.
restore_coordination->setStage(new_stage, message, /* sync = */ (new_stage != Stage::COMPLETED));
}
}
@ -384,8 +374,12 @@ void RestorerFromBackup::findDatabasesAndTablesInBackup()
}
}
}
}
LOG_INFO(log, "Will restore {} databases and {} tables", getNumDatabases(), getNumTables());
void RestorerFromBackup::logNumberOfDatabasesAndTablesToRestore() const
{
std::string_view action = (mode == CHECK_ACCESS_ONLY) ? "check access rights for restoring" : "restore";
LOG_INFO(log, "Will {} {} databases and {} tables", action, getNumDatabases(), getNumTables());
}
void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional<ASTs> & partitions)

View File

@ -53,7 +53,7 @@ public:
using DataRestoreTasks = std::vector<DataRestoreTask>;
/// Restores the metadata of databases and tables and returns tasks to restore the data of tables.
void run(Mode mode);
void run(Mode mode_);
BackupPtr getBackup() const { return backup; }
const RestoreSettings & getRestoreSettings() const { return restore_settings; }
@ -80,10 +80,10 @@ private:
ContextMutablePtr context;
QueryStatusPtr process_list_element;
std::function<void()> after_task_callback;
std::chrono::milliseconds on_cluster_first_sync_timeout;
std::chrono::milliseconds create_table_timeout;
LoggerPtr log;
Mode mode = Mode::RESTORE;
Strings all_hosts;
DDLRenamingMap renaming_map;
std::vector<std::filesystem::path> root_paths_in_backup;
@ -97,6 +97,7 @@ private:
void findDatabaseInBackupImpl(const String & database_name_in_backup, const std::set<DatabaseAndTableName> & except_table_names);
void findEverythingInBackup(const std::set<String> & except_database_names, const std::set<DatabaseAndTableName> & except_table_names);
void logNumberOfDatabasesAndTablesToRestore() const;
size_t getNumDatabases() const;
size_t getNumTables() const;

View File

@ -1,57 +1,34 @@
#include <Backups/WithRetries.h>
#include <Core/Settings.h>
#include <mutex>
namespace DB
{
namespace Setting
{
extern const SettingsUInt64 backup_restore_keeper_max_retries;
extern const SettingsUInt64 backup_restore_keeper_retry_initial_backoff_ms;
extern const SettingsUInt64 backup_restore_keeper_retry_max_backoff_ms;
extern const SettingsUInt64 backup_restore_batch_size_for_keeper_multiread;
extern const SettingsFloat backup_restore_keeper_fault_injection_probability;
extern const SettingsUInt64 backup_restore_keeper_fault_injection_seed;
extern const SettingsUInt64 backup_restore_keeper_value_max_size;
extern const SettingsUInt64 backup_restore_batch_size_for_keeper_multi;
}
WithRetries::KeeperSettings WithRetries::KeeperSettings::fromContext(ContextPtr context)
{
return
{
.keeper_max_retries = context->getSettingsRef()[Setting::backup_restore_keeper_max_retries],
.keeper_retry_initial_backoff_ms = context->getSettingsRef()[Setting::backup_restore_keeper_retry_initial_backoff_ms],
.keeper_retry_max_backoff_ms = context->getSettingsRef()[Setting::backup_restore_keeper_retry_max_backoff_ms],
.batch_size_for_keeper_multiread = context->getSettingsRef()[Setting::backup_restore_batch_size_for_keeper_multiread],
.keeper_fault_injection_probability = context->getSettingsRef()[Setting::backup_restore_keeper_fault_injection_probability],
.keeper_fault_injection_seed = context->getSettingsRef()[Setting::backup_restore_keeper_fault_injection_seed],
.keeper_value_max_size = context->getSettingsRef()[Setting::backup_restore_keeper_value_max_size],
.batch_size_for_keeper_multi = context->getSettingsRef()[Setting::backup_restore_batch_size_for_keeper_multi],
};
}
WithRetries::WithRetries(
LoggerPtr log_, zkutil::GetZooKeeper get_zookeeper_, const KeeperSettings & settings_, QueryStatusPtr process_list_element_, RenewerCallback callback_)
LoggerPtr log_, zkutil::GetZooKeeper get_zookeeper_, const BackupKeeperSettings & settings_, QueryStatusPtr process_list_element_, RenewerCallback callback_)
: log(log_)
, get_zookeeper(get_zookeeper_)
, settings(settings_)
, process_list_element(process_list_element_)
, callback(callback_)
, global_zookeeper_retries_info(
settings.keeper_max_retries, settings.keeper_retry_initial_backoff_ms, settings.keeper_retry_max_backoff_ms)
{}
WithRetries::RetriesControlHolder::RetriesControlHolder(const WithRetries * parent, const String & name)
: info(parent->global_zookeeper_retries_info)
, retries_ctl(name, parent->log, info, parent->process_list_element)
WithRetries::RetriesControlHolder::RetriesControlHolder(const WithRetries * parent, const String & name, Kind kind)
: info( (kind == kInitialization) ? parent->settings.max_retries_while_initializing
: (kind == kErrorHandling) ? parent->settings.max_retries_while_handling_error
: parent->settings.max_retries,
parent->settings.retry_initial_backoff_ms.count(),
parent->settings.retry_max_backoff_ms.count())
/// We don't use process_list_element while handling an error because the error handling can't be cancellable.
, retries_ctl(name, parent->log, info, (kind == kErrorHandling) ? nullptr : parent->process_list_element)
, faulty_zookeeper(parent->getFaultyZooKeeper())
{}
WithRetries::RetriesControlHolder WithRetries::createRetriesControlHolder(const String & name)
WithRetries::RetriesControlHolder WithRetries::createRetriesControlHolder(const String & name, Kind kind) const
{
return RetriesControlHolder(this, name);
return RetriesControlHolder(this, name, kind);
}
void WithRetries::renewZooKeeper(FaultyKeeper my_faulty_zookeeper) const
@ -62,8 +39,8 @@ void WithRetries::renewZooKeeper(FaultyKeeper my_faulty_zookeeper) const
{
zookeeper = get_zookeeper();
my_faulty_zookeeper->setKeeper(zookeeper);
callback(my_faulty_zookeeper);
if (callback)
callback(my_faulty_zookeeper);
}
else
{
@ -71,7 +48,7 @@ void WithRetries::renewZooKeeper(FaultyKeeper my_faulty_zookeeper) const
}
}
const WithRetries::KeeperSettings & WithRetries::getKeeperSettings() const
const BackupKeeperSettings & WithRetries::getKeeperSettings() const
{
return settings;
}
@ -88,8 +65,8 @@ WithRetries::FaultyKeeper WithRetries::getFaultyZooKeeper() const
/// The reason is that ZooKeeperWithFaultInjection may reset the underlying pointer and there could be a race condition
/// when the same object is used from multiple threads.
auto faulty_zookeeper = ZooKeeperWithFaultInjection::createInstance(
settings.keeper_fault_injection_probability,
settings.keeper_fault_injection_seed,
settings.fault_injection_probability,
settings.fault_injection_seed,
current_zookeeper,
log->name(),
log);

View File

@ -1,9 +1,11 @@
#pragma once
#include <Common/ZooKeeper/ZooKeeperRetries.h>
#include <Backups/BackupKeeperSettings.h>
#include <Common/ZooKeeper/Common.h>
#include <Common/ZooKeeper/ZooKeeperRetries.h>
#include <Common/ZooKeeper/ZooKeeperWithFaultInjection.h>
namespace DB
{
@ -15,20 +17,13 @@ class WithRetries
{
public:
using FaultyKeeper = Coordination::ZooKeeperWithFaultInjection::Ptr;
using RenewerCallback = std::function<void(FaultyKeeper &)>;
using RenewerCallback = std::function<void(FaultyKeeper)>;
struct KeeperSettings
enum Kind
{
UInt64 keeper_max_retries{0};
UInt64 keeper_retry_initial_backoff_ms{0};
UInt64 keeper_retry_max_backoff_ms{0};
UInt64 batch_size_for_keeper_multiread{10000};
Float64 keeper_fault_injection_probability{0};
UInt64 keeper_fault_injection_seed{42};
UInt64 keeper_value_max_size{1048576};
UInt64 batch_size_for_keeper_multi{1000};
static KeeperSettings fromContext(ContextPtr context);
kNormal,
kInitialization,
kErrorHandling,
};
/// For simplicity a separate ZooKeeperRetriesInfo and a faulty [Zoo]Keeper client
@ -48,23 +43,23 @@ public:
private:
friend class WithRetries;
RetriesControlHolder(const WithRetries * parent, const String & name);
RetriesControlHolder(const WithRetries * parent, const String & name, Kind kind);
};
RetriesControlHolder createRetriesControlHolder(const String & name);
WithRetries(LoggerPtr log, zkutil::GetZooKeeper get_zookeeper_, const KeeperSettings & settings, QueryStatusPtr process_list_element_, RenewerCallback callback);
RetriesControlHolder createRetriesControlHolder(const String & name, Kind kind = Kind::kNormal) const;
WithRetries(LoggerPtr log, zkutil::GetZooKeeper get_zookeeper_, const BackupKeeperSettings & settings, QueryStatusPtr process_list_element_, RenewerCallback callback = {});
/// Used to re-establish new connection inside a retry loop.
void renewZooKeeper(FaultyKeeper my_faulty_zookeeper) const;
const KeeperSettings & getKeeperSettings() const;
const BackupKeeperSettings & getKeeperSettings() const;
private:
/// This will provide a special wrapper which is useful for testing
FaultyKeeper getFaultyZooKeeper() const;
LoggerPtr log;
zkutil::GetZooKeeper get_zookeeper;
KeeperSettings settings;
BackupKeeperSettings settings;
QueryStatusPtr process_list_element;
/// This callback is called each time when a new [Zoo]Keeper session is created.
@ -76,7 +71,6 @@ private:
/// it could lead just to a failed backup which could possibly be successful
/// if there were a little bit more retries.
RenewerCallback callback;
ZooKeeperRetriesInfo global_zookeeper_retries_info;
/// This is needed only to protect zookeeper object
mutable std::mutex zookeeper_mutex;

View File

@ -167,7 +167,8 @@ void ClientApplicationBase::init(int argc, char ** argv)
("query_kind", po::value<std::string>()->default_value("initial_query"), "One of initial_query/secondary_query/no_query")
("query_id", po::value<std::string>(), "query_id")
("history_file", po::value<std::string>(), "path to history file")
("history_file", po::value<std::string>(), "Path to a file containing command history.")
("history_max_entries", po::value<UInt32>()->default_value(1000000), "Maximum number of entries in the history file.")
("stage", po::value<std::string>()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit")
("progress", po::value<ProgressOption>()->implicit_value(ProgressOption::TTY, "tty")->default_value(ProgressOption::DEFAULT, "default"), "Print progress of queries execution - to TTY: tty|on|1|true|yes; to STDERR non-interactive mode: err; OFF: off|0|false|no; DEFAULT - interactive to TTY, non-interactive is off")
@ -350,6 +351,8 @@ void ClientApplicationBase::init(int argc, char ** argv)
getClientConfiguration().setBool("highlight", options["highlight"].as<bool>());
if (options.count("history_file"))
getClientConfiguration().setString("history_file", options["history_file"].as<std::string>());
if (options.count("history_max_entries"))
getClientConfiguration().setUInt("history_max_entries", options["history_max_entries"].as<UInt32>());
if (options.count("interactive"))
getClientConfiguration().setBool("interactive", true);
if (options.count("pager"))

View File

@ -2665,6 +2665,8 @@ void ClientBase::runInteractive()
}
}
history_max_entries = getClientConfiguration().getUInt("history_max_entries");
LineReader::Patterns query_extenders = {"\\"};
LineReader::Patterns query_delimiters = {";", "\\G", "\\G;"};
char word_break_characters[] = " \t\v\f\a\b\r\n`~!@#$%^&*()-=+[{]}\\|;:'\",<.>/?";
@ -2677,6 +2679,7 @@ void ClientBase::runInteractive()
ReplxxLineReader lr(
*suggest,
history_file,
history_max_entries,
getClientConfiguration().has("multiline"),
getClientConfiguration().getBool("ignore_shell_suspend", true),
query_extenders,

View File

@ -328,6 +328,7 @@ protected:
String home_path;
String history_file; /// Path to a file containing command history.
UInt32 history_max_entries; /// Maximum number of entries in the history file.
String current_profile;

View File

@ -293,6 +293,7 @@ void ReplxxLineReader::setLastIsDelimiter(bool flag)
ReplxxLineReader::ReplxxLineReader(
Suggest & suggest,
const String & history_file_path_,
UInt32 history_max_entries_,
bool multiline_,
bool ignore_shell_suspend,
Patterns extenders_,
@ -313,6 +314,8 @@ ReplxxLineReader::ReplxxLineReader(
{
using Replxx = replxx::Replxx;
rx.set_max_history_size(static_cast<int>(history_max_entries_));
if (!history_file_path.empty())
{
history_file_fd = open(history_file_path.c_str(), O_RDWR);

View File

@ -14,6 +14,7 @@ public:
(
Suggest & suggest,
const String & history_file_path,
UInt32 history_max_entries,
bool multiline,
bool ignore_shell_suspend,
Patterns extenders_,

Some files were not shown because too many files have changed in this diff Show More