mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 16:12:01 +00:00
Merge branch 'master' into fix-osx-setsockopt-errors
This commit is contained in:
commit
de710209a7
@ -102,6 +102,17 @@ if (ENABLE_FUZZING)
|
||||
set (ENABLE_PROTOBUF 1)
|
||||
endif()
|
||||
|
||||
option (ENABLE_WOBOQ_CODEBROWSER "Build for woboq codebrowser" OFF)
|
||||
|
||||
if (ENABLE_WOBOQ_CODEBROWSER)
|
||||
set (ENABLE_EMBEDDED_COMPILER 0)
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-poison-system-directories")
|
||||
# woboq codebrowser uses clang tooling, and they could add default system
|
||||
# clang includes, and later clang will warn for those added by itself
|
||||
# includes.
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-poison-system-directories")
|
||||
endif()
|
||||
|
||||
# Global libraries
|
||||
# See:
|
||||
# - default_libs.cmake
|
||||
@ -259,8 +270,8 @@ endif ()
|
||||
option (ENABLE_BUILD_PATH_MAPPING "Enable remapping of file source paths in debug info, predefined preprocessor macros, and __builtin_FILE(). It's used to generate reproducible builds. See https://reproducible-builds.org/docs/build-path" ${ENABLE_BUILD_PATH_MAPPING_DEFAULT})
|
||||
|
||||
if (ENABLE_BUILD_PATH_MAPPING)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${PROJECT_SOURCE_DIR}=.")
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${PROJECT_SOURCE_DIR}=.")
|
||||
endif ()
|
||||
|
||||
option (ENABLE_BUILD_PROFILING "Enable profiling of build time" OFF)
|
||||
@ -557,7 +568,7 @@ if (NATIVE_BUILD_TARGETS
|
||||
)
|
||||
message (STATUS "Building native targets...")
|
||||
|
||||
set (NATIVE_BUILD_DIR "${CMAKE_BINARY_DIR}/native")
|
||||
set (NATIVE_BUILD_DIR "${PROJECT_BINARY_DIR}/native")
|
||||
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory "${NATIVE_BUILD_DIR}"
|
||||
@ -571,7 +582,7 @@ if (NATIVE_BUILD_TARGETS
|
||||
# Avoid overriding .cargo/config.toml with native toolchain.
|
||||
"-DENABLE_RUST=OFF"
|
||||
"-DENABLE_CLICKHOUSE_SELF_EXTRACTING=${ENABLE_CLICKHOUSE_SELF_EXTRACTING}"
|
||||
${CMAKE_SOURCE_DIR}
|
||||
${PROJECT_SOURCE_DIR}
|
||||
WORKING_DIRECTORY "${NATIVE_BUILD_DIR}"
|
||||
COMMAND_ECHO STDOUT)
|
||||
|
||||
|
@ -5,11 +5,11 @@ if (NOT TARGET check)
|
||||
if (CMAKE_CONFIGURATION_TYPES)
|
||||
add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
|
||||
--force-new-ctest-process --output-on-failure --build-config "$<CONFIGURATION>"
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
else ()
|
||||
add_custom_target (check COMMAND ${CMAKE_CTEST_COMMAND}
|
||||
--force-new-ctest-process --output-on-failure
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
@ -5,14 +5,14 @@ if (Git_FOUND)
|
||||
# Commit hash + whether the building workspace was dirty or not
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" rev-parse HEAD
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GIT_HASH
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
# Branch name
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GIT_BRANCH
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
@ -20,14 +20,14 @@ if (Git_FOUND)
|
||||
SET(ENV{TZ} "UTC")
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GIT_DATE
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
# Subject of the commit
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" log -1 --format=%s
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
@ -35,7 +35,7 @@ if (Git_FOUND)
|
||||
|
||||
execute_process(
|
||||
COMMAND ${GIT_EXECUTABLE} status
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
else()
|
||||
message(STATUS "Git could not be found.")
|
||||
endif()
|
||||
|
@ -7,6 +7,6 @@ message (STATUS "compiler CXX = ${CMAKE_CXX_COMPILER} ${FULL_CXX_FLAGS}")
|
||||
message (STATUS "LINKER_FLAGS = ${FULL_EXE_LINKER_FLAGS}")
|
||||
|
||||
# Reproducible builds
|
||||
string (REPLACE "${CMAKE_SOURCE_DIR}" "." FULL_C_FLAGS_NORMALIZED "${FULL_C_FLAGS}")
|
||||
string (REPLACE "${CMAKE_SOURCE_DIR}" "." FULL_CXX_FLAGS_NORMALIZED "${FULL_CXX_FLAGS}")
|
||||
string (REPLACE "${CMAKE_SOURCE_DIR}" "." FULL_EXE_LINKER_FLAGS_NORMALIZED "${FULL_EXE_LINKER_FLAGS}")
|
||||
string (REPLACE "${PROJECT_SOURCE_DIR}" "." FULL_C_FLAGS_NORMALIZED "${FULL_C_FLAGS}")
|
||||
string (REPLACE "${PROJECT_SOURCE_DIR}" "." FULL_CXX_FLAGS_NORMALIZED "${FULL_CXX_FLAGS}")
|
||||
string (REPLACE "${PROJECT_SOURCE_DIR}" "." FULL_EXE_LINKER_FLAGS_NORMALIZED "${FULL_EXE_LINKER_FLAGS}")
|
||||
|
@ -29,14 +29,14 @@ if (SANITIZE)
|
||||
|
||||
# Linking can fail due to relocation overflows (see #49145), caused by too big object files / libraries.
|
||||
# Work around this with position-independent builds (-fPIC and -fpie), this is slightly slower than non-PIC/PIE but that's okay.
|
||||
set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fPIC -fpie -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/msan_suppressions.txt")
|
||||
set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fPIC -fpie -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/tests/msan_suppressions.txt")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")
|
||||
|
||||
elseif (SANITIZE STREQUAL "thread")
|
||||
set (TSAN_FLAGS "-fsanitize=thread")
|
||||
if (COMPILER_CLANG)
|
||||
set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt")
|
||||
set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/tests/tsan_suppressions.txt")
|
||||
endif()
|
||||
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}")
|
||||
@ -54,7 +54,7 @@ if (SANITIZE)
|
||||
set(UBSAN_FLAGS "${UBSAN_FLAGS} -fno-sanitize=unsigned-integer-overflow")
|
||||
endif()
|
||||
if (COMPILER_CLANG)
|
||||
set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
|
||||
set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/tests/ubsan_suppressions.txt")
|
||||
endif()
|
||||
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
|
||||
|
@ -1,4 +1,4 @@
|
||||
include(${CMAKE_SOURCE_DIR}/cmake/autogenerated_versions.txt)
|
||||
include(${PROJECT_SOURCE_DIR}/cmake/autogenerated_versions.txt)
|
||||
|
||||
set(VERSION_EXTRA "" CACHE STRING "")
|
||||
set(VERSION_TWEAK "" CACHE STRING "")
|
||||
|
@ -6,7 +6,7 @@ if (NOT ENABLE_AVRO)
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(AVROCPP_ROOT_DIR "${CMAKE_SOURCE_DIR}/contrib/avro/lang/c++")
|
||||
set(AVROCPP_ROOT_DIR "${PROJECT_SOURCE_DIR}/contrib/avro/lang/c++")
|
||||
set(AVROCPP_INCLUDE_DIR "${AVROCPP_ROOT_DIR}/api")
|
||||
set(AVROCPP_SOURCE_DIR "${AVROCPP_ROOT_DIR}/impl")
|
||||
|
||||
|
@ -103,11 +103,19 @@ set (SRCS_CONTEXT
|
||||
)
|
||||
|
||||
if (ARCH_AARCH64)
|
||||
set (SRCS_CONTEXT ${SRCS_CONTEXT}
|
||||
"${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S"
|
||||
"${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_elf_gas.S"
|
||||
"${LIBRARY_DIR}/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S"
|
||||
)
|
||||
if (OS_DARWIN)
|
||||
set (SRCS_CONTEXT ${SRCS_CONTEXT}
|
||||
"${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_macho_gas.S"
|
||||
"${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_macho_gas.S"
|
||||
"${LIBRARY_DIR}/libs/context/src/asm/ontop_arm64_aapcs_macho_gas.S"
|
||||
)
|
||||
else()
|
||||
set (SRCS_CONTEXT ${SRCS_CONTEXT}
|
||||
"${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S"
|
||||
"${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_elf_gas.S"
|
||||
"${LIBRARY_DIR}/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S"
|
||||
)
|
||||
endif()
|
||||
elseif (ARCH_PPC64LE)
|
||||
set (SRCS_CONTEXT ${SRCS_CONTEXT}
|
||||
"${LIBRARY_DIR}/libs/context/src/asm/jump_ppc64_sysv_elf_gas.S"
|
||||
|
@ -18,7 +18,7 @@ endif()
|
||||
# Need to use C++17 since the compilation is not possible with C++20 currently.
|
||||
set (CMAKE_CXX_STANDARD 17)
|
||||
|
||||
set(CASS_ROOT_DIR ${CMAKE_SOURCE_DIR}/contrib/cassandra)
|
||||
set(CASS_ROOT_DIR ${PROJECT_SOURCE_DIR}/contrib/cassandra)
|
||||
set(CASS_SRC_DIR "${CASS_ROOT_DIR}/src")
|
||||
set(CASS_INCLUDE_DIR "${CASS_ROOT_DIR}/include")
|
||||
|
||||
|
@ -26,7 +26,7 @@ endif ()
|
||||
# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build
|
||||
# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX
|
||||
# as the library that's built using embedded tzdata is also specific to OS_LINUX
|
||||
set(SYSTEM_STORAGE_TZ_FILE "${CMAKE_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
|
||||
set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
|
||||
# remove existing copies so that its generated fresh on each build.
|
||||
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
|
||||
|
||||
|
@ -1,15 +1,30 @@
|
||||
set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest")
|
||||
set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/googletest")
|
||||
|
||||
add_library(_gtest "${SRC_DIR}/src/gtest-all.cc")
|
||||
add_library(_gtest "${SRC_DIR}/googletest/src/gtest-all.cc")
|
||||
set_target_properties(_gtest PROPERTIES VERSION "1.0.0")
|
||||
target_compile_definitions (_gtest PUBLIC GTEST_HAS_POSIX_RE=0)
|
||||
target_include_directories(_gtest SYSTEM PUBLIC "${SRC_DIR}/include")
|
||||
target_include_directories(_gtest PRIVATE "${SRC_DIR}")
|
||||
target_include_directories(_gtest SYSTEM PUBLIC "${SRC_DIR}/googletest/include")
|
||||
target_include_directories(_gtest PRIVATE "${SRC_DIR}/googletest")
|
||||
|
||||
add_library(_gtest_main "${SRC_DIR}/src/gtest_main.cc")
|
||||
add_library(_gtest_main "${SRC_DIR}/googletest/src/gtest_main.cc")
|
||||
set_target_properties(_gtest_main PROPERTIES VERSION "1.0.0")
|
||||
target_link_libraries(_gtest_main PUBLIC _gtest)
|
||||
|
||||
add_library(_gtest_all INTERFACE)
|
||||
target_link_libraries(_gtest_all INTERFACE _gtest _gtest_main)
|
||||
add_library(ch_contrib::gtest_all ALIAS _gtest_all)
|
||||
|
||||
|
||||
add_library(_gmock "${SRC_DIR}/googlemock/src/gmock-all.cc")
|
||||
set_target_properties(_gmock PROPERTIES VERSION "1.0.0")
|
||||
target_compile_definitions (_gmock PUBLIC GTEST_HAS_POSIX_RE=0)
|
||||
target_include_directories(_gmock SYSTEM PUBLIC "${SRC_DIR}/googlemock/include" "${SRC_DIR}/googletest/include")
|
||||
target_include_directories(_gmock PRIVATE "${SRC_DIR}/googlemock")
|
||||
|
||||
add_library(_gmock_main "${SRC_DIR}/googlemock/src/gmock_main.cc")
|
||||
set_target_properties(_gmock_main PROPERTIES VERSION "1.0.0")
|
||||
target_link_libraries(_gmock_main PUBLIC _gmock)
|
||||
|
||||
add_library(_gmock_all INTERFACE)
|
||||
target_link_libraries(_gmock_all INTERFACE _gmock _gmock_main)
|
||||
add_library(ch_contrib::gmock_all ALIAS _gmock_all)
|
||||
|
@ -1,7 +1,7 @@
|
||||
# This file is a modified version of contrib/libuv/CMakeLists.txt
|
||||
|
||||
set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/libuv")
|
||||
set (BINARY_DIR "${CMAKE_BINARY_DIR}/contrib/libuv")
|
||||
set (SOURCE_DIR "${PROJECT_SOURCE_DIR}/contrib/libuv")
|
||||
set (BINARY_DIR "${PROJECT_BINARY_DIR}/contrib/libuv")
|
||||
|
||||
set(uv_sources
|
||||
src/fs-poll.c
|
||||
|
@ -15,7 +15,7 @@ endif()
|
||||
|
||||
# This is the LGPL libmariadb project.
|
||||
|
||||
set(CC_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/mariadb-connector-c)
|
||||
set(CC_SOURCE_DIR ${PROJECT_SOURCE_DIR}/contrib/mariadb-connector-c)
|
||||
set(CC_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
set(WITH_SSL ON)
|
||||
|
@ -1,4 +1,4 @@
|
||||
set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/snappy")
|
||||
set (SOURCE_DIR "${PROJECT_SOURCE_DIR}/contrib/snappy")
|
||||
|
||||
if (ARCH_S390X)
|
||||
set (SNAPPY_IS_BIG_ENDIAN 1)
|
||||
|
@ -1,4 +1,4 @@
|
||||
set (SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/zlib-ng)
|
||||
set (SOURCE_DIR ${PROJECT_SOURCE_DIR}/contrib/zlib-ng)
|
||||
|
||||
add_definitions(-DZLIB_COMPAT)
|
||||
add_definitions(-DWITH_GZFILEOP)
|
||||
|
@ -15,7 +15,7 @@ nproc=$(($(nproc) + 2)) # increase parallelism
|
||||
read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
|
||||
|
||||
mkdir -p "$BUILD_DIRECTORY" && cd "$BUILD_DIRECTORY"
|
||||
cmake "$SOURCE_DIRECTORY" -DCMAKE_CXX_COMPILER="/usr/bin/clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="/usr/bin/clang-${LLVM_VERSION}" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 "${CMAKE_FLAGS[@]}"
|
||||
cmake "$SOURCE_DIRECTORY" -DCMAKE_CXX_COMPILER="/usr/bin/clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="/usr/bin/clang-${LLVM_VERSION}" -DENABLE_WOBOQ_CODEBROWSER=ON "${CMAKE_FLAGS[@]}"
|
||||
mkdir -p "$HTML_RESULT_DIRECTORY"
|
||||
echo 'Filter out too noisy "Error: filename" lines and keep them in full codebrowser_generator.log'
|
||||
/woboq_codebrowser/generator/codebrowser_generator -b "$BUILD_DIRECTORY" -a \
|
||||
|
@ -19,8 +19,8 @@ Kafka lets you:
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1],
|
||||
name2 [type2],
|
||||
name1 [type1] [ALIAS expr1],
|
||||
name2 [type2] [ALIAS expr2],
|
||||
...
|
||||
) ENGINE = Kafka()
|
||||
SETTINGS
|
||||
|
@ -13,8 +13,8 @@ The PostgreSQL engine allows to perform `SELECT` and `INSERT` queries on data th
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
|
||||
name1 type1 [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
|
||||
name2 type2 [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
|
||||
...
|
||||
) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password'[, `schema`]);
|
||||
```
|
||||
|
636
docs/en/getting-started/example-datasets/reddit-comments.md
Normal file
636
docs/en/getting-started/example-datasets/reddit-comments.md
Normal file
@ -0,0 +1,636 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/reddit-comments
|
||||
sidebar_label: Reddit comments
|
||||
---
|
||||
|
||||
# Reddit comments dataset
|
||||
|
||||
This dataset contains publicly-available comments on Reddit that go back to December, 2005, to March, 2023, and contains over 7B rows of data. The raw data is in JSON format in compressed `.zst` files and the rows look like the following:
|
||||
|
||||
```json
|
||||
{"controversiality":0,"body":"A look at Vietnam and Mexico exposes the myth of market liberalisation.","subreddit_id":"t5_6","link_id":"t3_17863","stickied":false,"subreddit":"reddit.com","score":2,"ups":2,"author_flair_css_class":null,"created_utc":1134365188,"author_flair_text":null,"author":"frjo","id":"c13","edited":false,"parent_id":"t3_17863","gilded":0,"distinguished":null,"retrieved_on":1473738411}
|
||||
{"created_utc":1134365725,"author_flair_css_class":null,"score":1,"ups":1,"subreddit":"reddit.com","stickied":false,"link_id":"t3_17866","subreddit_id":"t5_6","controversiality":0,"body":"The site states \"What can I use it for? Meeting notes, Reports, technical specs Sign-up sheets, proposals and much more...\", just like any other new breeed of sites that want us to store everything we have on the web. And they even guarantee multiple levels of security and encryption etc. But what prevents these web site operators fom accessing and/or stealing Meeting notes, Reports, technical specs Sign-up sheets, proposals and much more, for competitive or personal gains...? I am pretty sure that most of them are honest, but what's there to prevent me from setting up a good useful site and stealing all your data? Call me paranoid - I am.","retrieved_on":1473738411,"distinguished":null,"gilded":0,"id":"c14","edited":false,"parent_id":"t3_17866","author":"zse7zse","author_flair_text":null}
|
||||
{"gilded":0,"distinguished":null,"retrieved_on":1473738411,"author":"[deleted]","author_flair_text":null,"edited":false,"id":"c15","parent_id":"t3_17869","subreddit":"reddit.com","score":0,"ups":0,"created_utc":1134366848,"author_flair_css_class":null,"body":"Jython related topics by Frank Wierzbicki","controversiality":0,"subreddit_id":"t5_6","stickied":false,"link_id":"t3_17869"}
|
||||
{"gilded":0,"retrieved_on":1473738411,"distinguished":null,"author_flair_text":null,"author":"[deleted]","edited":false,"parent_id":"t3_17870","id":"c16","subreddit":"reddit.com","created_utc":1134367660,"author_flair_css_class":null,"score":1,"ups":1,"body":"[deleted]","controversiality":0,"stickied":false,"link_id":"t3_17870","subreddit_id":"t5_6"}
|
||||
{"gilded":0,"retrieved_on":1473738411,"distinguished":null,"author_flair_text":null,"author":"rjoseph","edited":false,"id":"c17","parent_id":"t3_17817","subreddit":"reddit.com","author_flair_css_class":null,"created_utc":1134367754,"score":1,"ups":1,"body":"Saft is by far the best extension you could tak onto your Safari","controversiality":0,"link_id":"t3_17817","stickied":false,"subreddit_id":"t5_6"}
|
||||
```
|
||||
|
||||
A shoutout to Percona for the [motivation behind ingesting this dataset](https://www.percona.com/blog/big-data-set-reddit-comments-analyzing-clickhouse/), which we have downloaded and stored in an S3 bucket.
|
||||
|
||||
:::note
|
||||
The following commands were executed on ClickHouse Cloud. To run this on your own cluster, replace `default` in the `s3Cluster` function call with the name of your cluster. If you do not have a cluster, then replace the `s3Cluster` function with the `s3` function.
|
||||
:::
|
||||
|
||||
1. Let's create a table for the Reddit data:
|
||||
|
||||
```sql
|
||||
CREATE TABLE reddit
|
||||
(
|
||||
subreddit LowCardinality(String),
|
||||
subreddit_id LowCardinality(String),
|
||||
subreddit_type Enum('public' = 1, 'restricted' = 2, 'user' = 3, 'archived' = 4, 'gold_restricted' = 5, 'private' = 6),
|
||||
author LowCardinality(String),
|
||||
body String CODEC(ZSTD(6)),
|
||||
created_date Date DEFAULT toDate(created_utc),
|
||||
created_utc DateTime,
|
||||
retrieved_on DateTime,
|
||||
id String,
|
||||
parent_id String,
|
||||
link_id String,
|
||||
score Int32,
|
||||
total_awards_received UInt16,
|
||||
controversiality UInt8,
|
||||
gilded UInt8,
|
||||
collapsed_because_crowd_control UInt8,
|
||||
collapsed_reason Enum('' = 0, 'comment score below threshold' = 1, 'may be sensitive content' = 2, 'potentially toxic' = 3, 'potentially toxic content' = 4),
|
||||
distinguished Enum('' = 0, 'moderator' = 1, 'admin' = 2, 'special' = 3),
|
||||
removal_reason Enum('' = 0, 'legal' = 1),
|
||||
author_created_utc DateTime,
|
||||
author_fullname LowCardinality(String),
|
||||
author_patreon_flair UInt8,
|
||||
author_premium UInt8,
|
||||
can_gild UInt8,
|
||||
can_mod_post UInt8,
|
||||
collapsed UInt8,
|
||||
is_submitter UInt8,
|
||||
_edited String,
|
||||
locked UInt8,
|
||||
quarantined UInt8,
|
||||
no_follow UInt8,
|
||||
send_replies UInt8,
|
||||
stickied UInt8,
|
||||
author_flair_text LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (subreddit, created_date, author);
|
||||
```
|
||||
|
||||
:::note
|
||||
The names of the files in S3 start with `RC_YYYY-MM` where `YYYY-MM` goes from `2005-12` to `2023-02`. The compression changes a couple of times though, so the file extensions are not consistent. For example:
|
||||
|
||||
- the file names are initially `RC_2005-12.bz2` to `RC_2017-11.bz2`
|
||||
- then they look like `RC_2017-12.xz` to `RC_2018-09.xz`
|
||||
- and finally `RC_2018-10.zst` to `RC_2023-02.zst`
|
||||
:::
|
||||
|
||||
2. We are going to start with one month of data, but if you want to simply insert every row - skip ahead to step 8 below. The following file has 86M records from December, 2017:
|
||||
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
FROM s3Cluster(
|
||||
'default',
|
||||
'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2017-12.xz',
|
||||
'JSONEachRow'
|
||||
);
|
||||
```
|
||||
|
||||
If you do not have a cluster, use `s3` instead of `s3Cluster`:
|
||||
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
FROM s3(
|
||||
'https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/reddit/original/RC_2017-12.xz',
|
||||
'JSONEachRow'
|
||||
);
|
||||
```
|
||||
|
||||
3. It will take a while depending on your resources, but when it's done verify it worked:
|
||||
|
||||
```sql
|
||||
SELECT formatReadableQuantity(count())
|
||||
FROM reddit;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─formatReadableQuantity(count())─┐
|
||||
│ 85.97 million │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
4. Let's see how many unique subreddits were in December of 2017:
|
||||
|
||||
```sql
|
||||
SELECT uniqExact(subreddit)
|
||||
FROM reddit;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─uniqExact(subreddit)─┐
|
||||
│ 91613 │
|
||||
└──────────────────────┘
|
||||
|
||||
1 row in set. Elapsed: 1.572 sec. Processed 85.97 million rows, 367.43 MB (54.71 million rows/s., 233.80 MB/s.)
|
||||
```
|
||||
|
||||
5. This query returns the top 10 subreddits (in terms of number of comments):
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
subreddit,
|
||||
count() AS c
|
||||
FROM reddit
|
||||
GROUP BY subreddit
|
||||
ORDER BY c DESC
|
||||
LIMIT 20;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─subreddit───────┬───────c─┐
|
||||
│ AskReddit │ 5245881 │
|
||||
│ politics │ 1753120 │
|
||||
│ nfl │ 1220266 │
|
||||
│ nba │ 960388 │
|
||||
│ The_Donald │ 931857 │
|
||||
│ news │ 796617 │
|
||||
│ worldnews │ 765709 │
|
||||
│ CFB │ 710360 │
|
||||
│ gaming │ 602761 │
|
||||
│ movies │ 601966 │
|
||||
│ soccer │ 590628 │
|
||||
│ Bitcoin │ 583783 │
|
||||
│ pics │ 563408 │
|
||||
│ StarWars │ 562514 │
|
||||
│ funny │ 547563 │
|
||||
│ leagueoflegends │ 517213 │
|
||||
│ teenagers │ 492020 │
|
||||
│ DestinyTheGame │ 477377 │
|
||||
│ todayilearned │ 472650 │
|
||||
│ videos │ 450581 │
|
||||
└─────────────────┴─────────┘
|
||||
|
||||
20 rows in set. Elapsed: 0.368 sec. Processed 85.97 million rows, 367.43 MB (233.34 million rows/s., 997.25 MB/s.)
|
||||
```
|
||||
|
||||
6. Here are the top 10 authors in December of 2017, in terms of number of comments posted:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
author,
|
||||
count() AS c
|
||||
FROM reddit
|
||||
GROUP BY author
|
||||
ORDER BY c DESC
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─author──────────┬───────c─┐
|
||||
│ [deleted] │ 5913324 │
|
||||
│ AutoModerator │ 784886 │
|
||||
│ ImagesOfNetwork │ 83241 │
|
||||
│ BitcoinAllBot │ 54484 │
|
||||
│ imguralbumbot │ 45822 │
|
||||
│ RPBot │ 29337 │
|
||||
│ WikiTextBot │ 25982 │
|
||||
│ Concise_AMA_Bot │ 19974 │
|
||||
│ MTGCardFetcher │ 19103 │
|
||||
│ TotesMessenger │ 19057 │
|
||||
└─────────────────┴─────────┘
|
||||
|
||||
10 rows in set. Elapsed: 8.143 sec. Processed 85.97 million rows, 711.05 MB (10.56 million rows/s., 87.32 MB/s.)
|
||||
```
|
||||
|
||||
7. We already inserted some data, but we will start over:
|
||||
|
||||
```sql
|
||||
TRUNCATE TABLE reddit;
|
||||
```
|
||||
|
||||
8. This is a fun dataset and it looks like we can find some great information, so let's go ahead and insert the entire dataset from 2005 to 2023. When you're ready, run this command to insert all the rows. (It takes a while - up to 17 hours!)
|
||||
|
||||
```sql
|
||||
INSERT INTO reddit
|
||||
SELECT *
|
||||
FROM s3Cluster(
|
||||
'default',
|
||||
'https://clickhouse-public-datasets.s3.amazonaws.com/reddit/original/RC*',
|
||||
'JSONEachRow'
|
||||
)
|
||||
SETTINGS zstd_window_log_max = 31;
|
||||
```
|
||||
|
||||
The response looks like:
|
||||
|
||||
```response
|
||||
0 rows in set. Elapsed: 61187.839 sec. Processed 6.74 billion rows, 2.06 TB (110.17 thousand rows/s., 33.68 MB/s.)
|
||||
```
|
||||
|
||||
8. Let's see how many rows were inserted and how much disk space the table is using:
|
||||
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
sum(rows) AS count,
|
||||
formatReadableQuantity(count),
|
||||
formatReadableSize(sum(bytes)) AS disk_size,
|
||||
formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size
|
||||
FROM system.parts
|
||||
WHERE (table = 'reddit') AND active
|
||||
```
|
||||
|
||||
Notice the compression of disk storage is about 1/3 of the uncompressed size:
|
||||
|
||||
```response
|
||||
┌──────count─┬─formatReadableQuantity(sum(rows))─┬─disk_size──┬─uncompressed_size─┐
|
||||
│ 6739503568 │ 6.74 billion │ 501.10 GiB │ 1.51 TiB │
|
||||
└────────────┴───────────────────────────────────┴────────────┴───────────────────┘
|
||||
|
||||
1 row in set. Elapsed: 0.010 sec.
|
||||
```
|
||||
|
||||
9. The following query shows how many comments, authors and subreddits we have for each month:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toStartOfMonth(created_utc) AS firstOfMonth,
|
||||
count() AS c,
|
||||
bar(c, 0, 50000000, 25) AS bar_count,
|
||||
uniq(author) AS authors,
|
||||
bar(authors, 0, 5000000, 25) AS bar_authors,
|
||||
uniq(subreddit) AS subreddits,
|
||||
bar(subreddits, 0, 100000, 25) AS bar_subreddits
|
||||
FROM reddit
|
||||
GROUP BY firstOfMonth
|
||||
ORDER BY firstOfMonth ASC;
|
||||
```
|
||||
|
||||
This is a substantial query that has to process all 6.74 billion rows, but we still get an impressive response time (about 3 minutes):
|
||||
|
||||
```response
|
||||
┌─firstOfMonth─┬─────────c─┬─bar_count─────────────────┬─authors─┬─bar_authors───────────────┬─subreddits─┬─bar_subreddits────────────┐
|
||||
│ 2005-12-01 │ 1075 │ │ 394 │ │ 1 │ │
|
||||
│ 2006-01-01 │ 3666 │ │ 791 │ │ 2 │ │
|
||||
│ 2006-02-01 │ 9095 │ │ 1464 │ │ 18 │ │
|
||||
│ 2006-03-01 │ 13859 │ │ 1958 │ │ 15 │ │
|
||||
│ 2006-04-01 │ 19090 │ │ 2334 │ │ 21 │ │
|
||||
│ 2006-05-01 │ 26859 │ │ 2698 │ │ 21 │ │
|
||||
│ 2006-06-01 │ 29163 │ │ 3043 │ │ 19 │ │
|
||||
│ 2006-07-01 │ 37031 │ │ 3532 │ │ 22 │ │
|
||||
│ 2006-08-01 │ 50559 │ │ 4750 │ │ 24 │ │
|
||||
│ 2006-09-01 │ 50675 │ │ 4908 │ │ 21 │ │
|
||||
│ 2006-10-01 │ 54148 │ │ 5654 │ │ 31 │ │
|
||||
│ 2006-11-01 │ 62021 │ │ 6490 │ │ 23 │ │
|
||||
│ 2006-12-01 │ 61018 │ │ 6707 │ │ 24 │ │
|
||||
│ 2007-01-01 │ 81341 │ │ 7931 │ │ 23 │ │
|
||||
│ 2007-02-01 │ 95634 │ │ 9020 │ │ 21 │ │
|
||||
│ 2007-03-01 │ 112444 │ │ 10842 │ │ 23 │ │
|
||||
│ 2007-04-01 │ 126773 │ │ 10701 │ │ 26 │ │
|
||||
│ 2007-05-01 │ 170097 │ │ 11365 │ │ 25 │ │
|
||||
│ 2007-06-01 │ 178800 │ │ 11267 │ │ 22 │ │
|
||||
│ 2007-07-01 │ 203319 │ │ 12482 │ │ 25 │ │
|
||||
│ 2007-08-01 │ 225111 │ │ 14124 │ │ 30 │ │
|
||||
│ 2007-09-01 │ 259497 │ ▏ │ 15416 │ │ 33 │ │
|
||||
│ 2007-10-01 │ 274170 │ ▏ │ 15302 │ │ 36 │ │
|
||||
│ 2007-11-01 │ 372983 │ ▏ │ 15134 │ │ 43 │ │
|
||||
│ 2007-12-01 │ 363390 │ ▏ │ 15915 │ │ 31 │ │
|
||||
│ 2008-01-01 │ 452990 │ ▏ │ 18857 │ │ 126 │ │
|
||||
│ 2008-02-01 │ 441768 │ ▏ │ 18266 │ │ 173 │ │
|
||||
│ 2008-03-01 │ 463728 │ ▏ │ 18947 │ │ 292 │ │
|
||||
│ 2008-04-01 │ 468317 │ ▏ │ 18590 │ │ 323 │ │
|
||||
│ 2008-05-01 │ 536380 │ ▎ │ 20861 │ │ 375 │ │
|
||||
│ 2008-06-01 │ 577684 │ ▎ │ 22557 │ │ 575 │ ▏ │
|
||||
│ 2008-07-01 │ 592610 │ ▎ │ 23123 │ │ 657 │ ▏ │
|
||||
│ 2008-08-01 │ 595959 │ ▎ │ 23729 │ │ 707 │ ▏ │
|
||||
│ 2008-09-01 │ 680892 │ ▎ │ 26374 │ ▏ │ 801 │ ▏ │
|
||||
│ 2008-10-01 │ 789874 │ ▍ │ 28970 │ ▏ │ 893 │ ▏ │
|
||||
│ 2008-11-01 │ 792310 │ ▍ │ 30272 │ ▏ │ 1024 │ ▎ │
|
||||
│ 2008-12-01 │ 850359 │ ▍ │ 34073 │ ▏ │ 1103 │ ▎ │
|
||||
│ 2009-01-01 │ 1051649 │ ▌ │ 38978 │ ▏ │ 1316 │ ▎ │
|
||||
│ 2009-02-01 │ 944711 │ ▍ │ 43390 │ ▏ │ 1132 │ ▎ │
|
||||
│ 2009-03-01 │ 1048643 │ ▌ │ 46516 │ ▏ │ 1203 │ ▎ │
|
||||
│ 2009-04-01 │ 1094599 │ ▌ │ 48284 │ ▏ │ 1334 │ ▎ │
|
||||
│ 2009-05-01 │ 1201257 │ ▌ │ 52512 │ ▎ │ 1395 │ ▎ │
|
||||
│ 2009-06-01 │ 1258750 │ ▋ │ 57728 │ ▎ │ 1473 │ ▎ │
|
||||
│ 2009-07-01 │ 1470290 │ ▋ │ 60098 │ ▎ │ 1686 │ ▍ │
|
||||
│ 2009-08-01 │ 1750688 │ ▉ │ 67347 │ ▎ │ 1777 │ ▍ │
|
||||
│ 2009-09-01 │ 2032276 │ █ │ 78051 │ ▍ │ 1784 │ ▍ │
|
||||
│ 2009-10-01 │ 2242017 │ █ │ 93409 │ ▍ │ 2071 │ ▌ │
|
||||
│ 2009-11-01 │ 2207444 │ █ │ 95940 │ ▍ │ 2141 │ ▌ │
|
||||
│ 2009-12-01 │ 2560510 │ █▎ │ 104239 │ ▌ │ 2141 │ ▌ │
|
||||
│ 2010-01-01 │ 2884096 │ █▍ │ 114314 │ ▌ │ 2313 │ ▌ │
|
||||
│ 2010-02-01 │ 2687779 │ █▎ │ 115683 │ ▌ │ 2522 │ ▋ │
|
||||
│ 2010-03-01 │ 3228254 │ █▌ │ 125775 │ ▋ │ 2890 │ ▋ │
|
||||
│ 2010-04-01 │ 3209898 │ █▌ │ 128936 │ ▋ │ 3170 │ ▊ │
|
||||
│ 2010-05-01 │ 3267363 │ █▋ │ 131851 │ ▋ │ 3166 │ ▊ │
|
||||
│ 2010-06-01 │ 3532867 │ █▊ │ 139522 │ ▋ │ 3301 │ ▊ │
|
||||
│ 2010-07-01 │ 4032737 │ ██ │ 153451 │ ▊ │ 3662 │ ▉ │
|
||||
│ 2010-08-01 │ 4247982 │ ██ │ 164071 │ ▊ │ 3653 │ ▉ │
|
||||
│ 2010-09-01 │ 4704069 │ ██▎ │ 186613 │ ▉ │ 4009 │ █ │
|
||||
│ 2010-10-01 │ 5032368 │ ██▌ │ 203800 │ █ │ 4154 │ █ │
|
||||
│ 2010-11-01 │ 5689002 │ ██▊ │ 226134 │ █▏ │ 4383 │ █ │
|
||||
│ 2010-12-01 │ 5972642 │ ██▉ │ 245824 │ █▏ │ 4692 │ █▏ │
|
||||
│ 2011-01-01 │ 6603329 │ ███▎ │ 270025 │ █▎ │ 5141 │ █▎ │
|
||||
│ 2011-02-01 │ 6363114 │ ███▏ │ 277593 │ █▍ │ 5202 │ █▎ │
|
||||
│ 2011-03-01 │ 7556165 │ ███▊ │ 314748 │ █▌ │ 5445 │ █▎ │
|
||||
│ 2011-04-01 │ 7571398 │ ███▊ │ 329920 │ █▋ │ 6128 │ █▌ │
|
||||
│ 2011-05-01 │ 8803949 │ ████▍ │ 365013 │ █▊ │ 6834 │ █▋ │
|
||||
│ 2011-06-01 │ 9766511 │ ████▉ │ 393945 │ █▉ │ 7519 │ █▉ │
|
||||
│ 2011-07-01 │ 10557466 │ █████▎ │ 424235 │ ██ │ 8293 │ ██ │
|
||||
│ 2011-08-01 │ 12316144 │ ██████▏ │ 475326 │ ██▍ │ 9657 │ ██▍ │
|
||||
│ 2011-09-01 │ 12150412 │ ██████ │ 503142 │ ██▌ │ 10278 │ ██▌ │
|
||||
│ 2011-10-01 │ 13470278 │ ██████▋ │ 548801 │ ██▋ │ 10922 │ ██▋ │
|
||||
│ 2011-11-01 │ 13621533 │ ██████▊ │ 574435 │ ██▊ │ 11572 │ ██▉ │
|
||||
│ 2011-12-01 │ 14509469 │ ███████▎ │ 622849 │ ███ │ 12335 │ ███ │
|
||||
│ 2012-01-01 │ 16350205 │ ████████▏ │ 696110 │ ███▍ │ 14281 │ ███▌ │
|
||||
│ 2012-02-01 │ 16015695 │ ████████ │ 722892 │ ███▌ │ 14949 │ ███▋ │
|
||||
│ 2012-03-01 │ 17881943 │ ████████▉ │ 789664 │ ███▉ │ 15795 │ ███▉ │
|
||||
│ 2012-04-01 │ 19044534 │ █████████▌ │ 842491 │ ████▏ │ 16440 │ ████ │
|
||||
│ 2012-05-01 │ 20388260 │ ██████████▏ │ 886176 │ ████▍ │ 16974 │ ████▏ │
|
||||
│ 2012-06-01 │ 21897913 │ ██████████▉ │ 946798 │ ████▋ │ 17952 │ ████▍ │
|
||||
│ 2012-07-01 │ 24087517 │ ████████████ │ 1018636 │ █████ │ 19069 │ ████▊ │
|
||||
│ 2012-08-01 │ 25703326 │ ████████████▊ │ 1094445 │ █████▍ │ 20553 │ █████▏ │
|
||||
│ 2012-09-01 │ 23419524 │ ███████████▋ │ 1088491 │ █████▍ │ 20831 │ █████▏ │
|
||||
│ 2012-10-01 │ 24788236 │ ████████████▍ │ 1131885 │ █████▋ │ 21868 │ █████▍ │
|
||||
│ 2012-11-01 │ 24648302 │ ████████████▎ │ 1167608 │ █████▊ │ 21791 │ █████▍ │
|
||||
│ 2012-12-01 │ 26080276 │ █████████████ │ 1218402 │ ██████ │ 22622 │ █████▋ │
|
||||
│ 2013-01-01 │ 30365867 │ ███████████████▏ │ 1341703 │ ██████▋ │ 24696 │ ██████▏ │
|
||||
│ 2013-02-01 │ 27213960 │ █████████████▌ │ 1304756 │ ██████▌ │ 24514 │ ██████▏ │
|
||||
│ 2013-03-01 │ 30771274 │ ███████████████▍ │ 1391703 │ ██████▉ │ 25730 │ ██████▍ │
|
||||
│ 2013-04-01 │ 33259557 │ ████████████████▋ │ 1485971 │ ███████▍ │ 27294 │ ██████▊ │
|
||||
│ 2013-05-01 │ 33126225 │ ████████████████▌ │ 1506473 │ ███████▌ │ 27299 │ ██████▊ │
|
||||
│ 2013-06-01 │ 32648247 │ ████████████████▎ │ 1506650 │ ███████▌ │ 27450 │ ██████▊ │
|
||||
│ 2013-07-01 │ 34922133 │ █████████████████▍ │ 1561771 │ ███████▊ │ 28294 │ ███████ │
|
||||
│ 2013-08-01 │ 34766579 │ █████████████████▍ │ 1589781 │ ███████▉ │ 28943 │ ███████▏ │
|
||||
│ 2013-09-01 │ 31990369 │ ███████████████▉ │ 1570342 │ ███████▊ │ 29408 │ ███████▎ │
|
||||
│ 2013-10-01 │ 35940040 │ █████████████████▉ │ 1683770 │ ████████▍ │ 30273 │ ███████▌ │
|
||||
│ 2013-11-01 │ 37396497 │ ██████████████████▋ │ 1757467 │ ████████▊ │ 31173 │ ███████▊ │
|
||||
│ 2013-12-01 │ 39810216 │ ███████████████████▉ │ 1846204 │ █████████▏ │ 32326 │ ████████ │
|
||||
│ 2014-01-01 │ 42420655 │ █████████████████████▏ │ 1927229 │ █████████▋ │ 35603 │ ████████▉ │
|
||||
│ 2014-02-01 │ 38703362 │ ███████████████████▎ │ 1874067 │ █████████▎ │ 37007 │ █████████▎ │
|
||||
│ 2014-03-01 │ 42459956 │ █████████████████████▏ │ 1959888 │ █████████▊ │ 37948 │ █████████▍ │
|
||||
│ 2014-04-01 │ 42440735 │ █████████████████████▏ │ 1951369 │ █████████▊ │ 38362 │ █████████▌ │
|
||||
│ 2014-05-01 │ 42514094 │ █████████████████████▎ │ 1970197 │ █████████▊ │ 39078 │ █████████▊ │
|
||||
│ 2014-06-01 │ 41990650 │ ████████████████████▉ │ 1943850 │ █████████▋ │ 38268 │ █████████▌ │
|
||||
│ 2014-07-01 │ 46868899 │ ███████████████████████▍ │ 2059346 │ ██████████▎ │ 40634 │ ██████████▏ │
|
||||
│ 2014-08-01 │ 46990813 │ ███████████████████████▍ │ 2117335 │ ██████████▌ │ 41764 │ ██████████▍ │
|
||||
│ 2014-09-01 │ 44992201 │ ██████████████████████▍ │ 2124708 │ ██████████▌ │ 41890 │ ██████████▍ │
|
||||
│ 2014-10-01 │ 47497520 │ ███████████████████████▋ │ 2206535 │ ███████████ │ 43109 │ ██████████▊ │
|
||||
│ 2014-11-01 │ 46118074 │ ███████████████████████ │ 2239747 │ ███████████▏ │ 43718 │ ██████████▉ │
|
||||
│ 2014-12-01 │ 48807699 │ ████████████████████████▍ │ 2372945 │ ███████████▊ │ 43823 │ ██████████▉ │
|
||||
│ 2015-01-01 │ 53851542 │ █████████████████████████ │ 2499536 │ ████████████▍ │ 47172 │ ███████████▊ │
|
||||
│ 2015-02-01 │ 48342747 │ ████████████████████████▏ │ 2448496 │ ████████████▏ │ 47229 │ ███████████▊ │
|
||||
│ 2015-03-01 │ 54564441 │ █████████████████████████ │ 2550534 │ ████████████▊ │ 48156 │ ████████████ │
|
||||
│ 2015-04-01 │ 55005780 │ █████████████████████████ │ 2609443 │ █████████████ │ 49865 │ ████████████▍ │
|
||||
│ 2015-05-01 │ 54504410 │ █████████████████████████ │ 2585535 │ ████████████▉ │ 50137 │ ████████████▌ │
|
||||
│ 2015-06-01 │ 54258492 │ █████████████████████████ │ 2595129 │ ████████████▉ │ 49598 │ ████████████▍ │
|
||||
│ 2015-07-01 │ 58451788 │ █████████████████████████ │ 2720026 │ █████████████▌ │ 55022 │ █████████████▊ │
|
||||
│ 2015-08-01 │ 58075327 │ █████████████████████████ │ 2743994 │ █████████████▋ │ 55302 │ █████████████▊ │
|
||||
│ 2015-09-01 │ 55574825 │ █████████████████████████ │ 2672793 │ █████████████▎ │ 53960 │ █████████████▍ │
|
||||
│ 2015-10-01 │ 59494045 │ █████████████████████████ │ 2816426 │ ██████████████ │ 70210 │ █████████████████▌ │
|
||||
│ 2015-11-01 │ 57117500 │ █████████████████████████ │ 2847146 │ ██████████████▏ │ 71363 │ █████████████████▊ │
|
||||
│ 2015-12-01 │ 58523312 │ █████████████████████████ │ 2854840 │ ██████████████▎ │ 94559 │ ███████████████████████▋ │
|
||||
│ 2016-01-01 │ 61991732 │ █████████████████████████ │ 2920366 │ ██████████████▌ │ 108438 │ █████████████████████████ │
|
||||
│ 2016-02-01 │ 59189875 │ █████████████████████████ │ 2854683 │ ██████████████▎ │ 109916 │ █████████████████████████ │
|
||||
│ 2016-03-01 │ 63918864 │ █████████████████████████ │ 2969542 │ ██████████████▊ │ 84787 │ █████████████████████▏ │
|
||||
│ 2016-04-01 │ 64271256 │ █████████████████████████ │ 2999086 │ ██████████████▉ │ 61647 │ ███████████████▍ │
|
||||
│ 2016-05-01 │ 65212004 │ █████████████████████████ │ 3034674 │ ███████████████▏ │ 67465 │ ████████████████▊ │
|
||||
│ 2016-06-01 │ 65867743 │ █████████████████████████ │ 3057604 │ ███████████████▎ │ 75170 │ ██████████████████▊ │
|
||||
│ 2016-07-01 │ 66974735 │ █████████████████████████ │ 3199374 │ ███████████████▉ │ 77732 │ ███████████████████▍ │
|
||||
│ 2016-08-01 │ 69654819 │ █████████████████████████ │ 3239957 │ ████████████████▏ │ 63080 │ ███████████████▊ │
|
||||
│ 2016-09-01 │ 67024973 │ █████████████████████████ │ 3190864 │ ███████████████▉ │ 62324 │ ███████████████▌ │
|
||||
│ 2016-10-01 │ 71826553 │ █████████████████████████ │ 3284340 │ ████████████████▍ │ 62549 │ ███████████████▋ │
|
||||
│ 2016-11-01 │ 71022319 │ █████████████████████████ │ 3300822 │ ████████████████▌ │ 69718 │ █████████████████▍ │
|
||||
│ 2016-12-01 │ 72942967 │ █████████████████████████ │ 3430324 │ █████████████████▏ │ 71705 │ █████████████████▉ │
|
||||
│ 2017-01-01 │ 78946585 │ █████████████████████████ │ 3572093 │ █████████████████▊ │ 78198 │ ███████████████████▌ │
|
||||
│ 2017-02-01 │ 70609487 │ █████████████████████████ │ 3421115 │ █████████████████ │ 69823 │ █████████████████▍ │
|
||||
│ 2017-03-01 │ 79723106 │ █████████████████████████ │ 3638122 │ ██████████████████▏ │ 73865 │ ██████████████████▍ │
|
||||
│ 2017-04-01 │ 77478009 │ █████████████████████████ │ 3620591 │ ██████████████████ │ 74387 │ ██████████████████▌ │
|
||||
│ 2017-05-01 │ 79810360 │ █████████████████████████ │ 3650820 │ ██████████████████▎ │ 74356 │ ██████████████████▌ │
|
||||
│ 2017-06-01 │ 79901711 │ █████████████████████████ │ 3737614 │ ██████████████████▋ │ 72114 │ ██████████████████ │
|
||||
│ 2017-07-01 │ 81798725 │ █████████████████████████ │ 3872330 │ ███████████████████▎ │ 76052 │ ███████████████████ │
|
||||
│ 2017-08-01 │ 84658503 │ █████████████████████████ │ 3960093 │ ███████████████████▊ │ 77798 │ ███████████████████▍ │
|
||||
│ 2017-09-01 │ 83165192 │ █████████████████████████ │ 3880501 │ ███████████████████▍ │ 78402 │ ███████████████████▌ │
|
||||
│ 2017-10-01 │ 85828912 │ █████████████████████████ │ 3980335 │ ███████████████████▉ │ 80685 │ ████████████████████▏ │
|
||||
│ 2017-11-01 │ 84965681 │ █████████████████████████ │ 4026749 │ ████████████████████▏ │ 82659 │ ████████████████████▋ │
|
||||
│ 2017-12-01 │ 85973810 │ █████████████████████████ │ 4196354 │ ████████████████████▉ │ 91984 │ ██████████████████████▉ │
|
||||
│ 2018-01-01 │ 91558594 │ █████████████████████████ │ 4364443 │ █████████████████████▊ │ 102577 │ █████████████████████████ │
|
||||
│ 2018-02-01 │ 86467179 │ █████████████████████████ │ 4277899 │ █████████████████████▍ │ 104610 │ █████████████████████████ │
|
||||
│ 2018-03-01 │ 96490262 │ █████████████████████████ │ 4422470 │ ██████████████████████ │ 112559 │ █████████████████████████ │
|
||||
│ 2018-04-01 │ 98101232 │ █████████████████████████ │ 4572434 │ ██████████████████████▊ │ 105284 │ █████████████████████████ │
|
||||
│ 2018-05-01 │ 100109100 │ █████████████████████████ │ 4698908 │ ███████████████████████▍ │ 103910 │ █████████████████████████ │
|
||||
│ 2018-06-01 │ 100009462 │ █████████████████████████ │ 4697426 │ ███████████████████████▍ │ 101107 │ █████████████████████████ │
|
||||
│ 2018-07-01 │ 108151359 │ █████████████████████████ │ 5099492 │ █████████████████████████ │ 106184 │ █████████████████████████ │
|
||||
│ 2018-08-01 │ 107330940 │ █████████████████████████ │ 5084082 │ █████████████████████████ │ 109985 │ █████████████████████████ │
|
||||
│ 2018-09-01 │ 104473929 │ █████████████████████████ │ 5011953 │ █████████████████████████ │ 109710 │ █████████████████████████ │
|
||||
│ 2018-10-01 │ 112346556 │ █████████████████████████ │ 5320405 │ █████████████████████████ │ 112533 │ █████████████████████████ │
|
||||
│ 2018-11-01 │ 112573001 │ █████████████████████████ │ 5353282 │ █████████████████████████ │ 112211 │ █████████████████████████ │
|
||||
│ 2018-12-01 │ 121953600 │ █████████████████████████ │ 5611543 │ █████████████████████████ │ 118291 │ █████████████████████████ │
|
||||
│ 2019-01-01 │ 129386587 │ █████████████████████████ │ 6016687 │ █████████████████████████ │ 125725 │ █████████████████████████ │
|
||||
│ 2019-02-01 │ 120645639 │ █████████████████████████ │ 5974488 │ █████████████████████████ │ 125420 │ █████████████████████████ │
|
||||
│ 2019-03-01 │ 137650471 │ █████████████████████████ │ 6410197 │ █████████████████████████ │ 135924 │ █████████████████████████ │
|
||||
│ 2019-04-01 │ 138473643 │ █████████████████████████ │ 6416384 │ █████████████████████████ │ 139844 │ █████████████████████████ │
|
||||
│ 2019-05-01 │ 142463421 │ █████████████████████████ │ 6574836 │ █████████████████████████ │ 142012 │ █████████████████████████ │
|
||||
│ 2019-06-01 │ 134172939 │ █████████████████████████ │ 6601267 │ █████████████████████████ │ 140997 │ █████████████████████████ │
|
||||
│ 2019-07-01 │ 145965083 │ █████████████████████████ │ 6901822 │ █████████████████████████ │ 147802 │ █████████████████████████ │
|
||||
│ 2019-08-01 │ 146854393 │ █████████████████████████ │ 6993882 │ █████████████████████████ │ 151888 │ █████████████████████████ │
|
||||
│ 2019-09-01 │ 137540219 │ █████████████████████████ │ 7001362 │ █████████████████████████ │ 148839 │ █████████████████████████ │
|
||||
│ 2019-10-01 │ 129771456 │ █████████████████████████ │ 6825690 │ █████████████████████████ │ 144453 │ █████████████████████████ │
|
||||
│ 2019-11-01 │ 107990259 │ █████████████████████████ │ 6368286 │ █████████████████████████ │ 141768 │ █████████████████████████ │
|
||||
│ 2019-12-01 │ 112895934 │ █████████████████████████ │ 6640902 │ █████████████████████████ │ 148277 │ █████████████████████████ │
|
||||
│ 2020-01-01 │ 54354879 │ █████████████████████████ │ 4782339 │ ███████████████████████▉ │ 111658 │ █████████████████████████ │
|
||||
│ 2020-02-01 │ 22696923 │ ███████████▎ │ 3135175 │ ███████████████▋ │ 79521 │ ███████████████████▉ │
|
||||
│ 2020-03-01 │ 3466677 │ █▋ │ 987960 │ ████▉ │ 40901 │ ██████████▏ │
|
||||
└──────────────┴───────────┴───────────────────────────┴─────────┴───────────────────────────┴────────────┴───────────────────────────┘
|
||||
|
||||
172 rows in set. Elapsed: 184.809 sec. Processed 6.74 billion rows, 89.56 GB (36.47 million rows/s., 484.62 MB/s.)
|
||||
```
|
||||
|
||||
10. Here are the top 10 subreddits of 2022:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
subreddit,
|
||||
count() AS count
|
||||
FROM reddit
|
||||
WHERE toYear(created_utc) = 2022
|
||||
GROUP BY subreddit
|
||||
ORDER BY count DESC
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
The response is:
|
||||
|
||||
```response
|
||||
┌─subreddit────────┬───count─┐
|
||||
│ AskReddit │ 3858203 │
|
||||
│ politics │ 1356782 │
|
||||
│ memes │ 1249120 │
|
||||
│ nfl │ 883667 │
|
||||
│ worldnews │ 866065 │
|
||||
│ teenagers │ 777095 │
|
||||
│ AmItheAsshole │ 752720 │
|
||||
│ dankmemes │ 657932 │
|
||||
│ nba │ 514184 │
|
||||
│ unpopularopinion │ 473649 │
|
||||
└──────────────────┴─────────┘
|
||||
|
||||
10 rows in set. Elapsed: 27.824 sec. Processed 6.74 billion rows, 53.26 GB (242.22 million rows/s., 1.91 GB/s.)
|
||||
```
|
||||
|
||||
11. Let's see which subreddits had the biggest increase in commnents from 2018 to 2019:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
subreddit,
|
||||
newcount - oldcount AS diff
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
subreddit,
|
||||
count(*) AS newcount
|
||||
FROM reddit
|
||||
WHERE toYear(created_utc) = 2019
|
||||
GROUP BY subreddit
|
||||
)
|
||||
ALL INNER JOIN
|
||||
(
|
||||
SELECT
|
||||
subreddit,
|
||||
count(*) AS oldcount
|
||||
FROM reddit
|
||||
WHERE toYear(created_utc) = 2018
|
||||
GROUP BY subreddit
|
||||
) USING (subreddit)
|
||||
ORDER BY diff DESC
|
||||
LIMIT 50
|
||||
SETTINGS joined_subquery_requires_alias = 0;
|
||||
```
|
||||
|
||||
It looks like memes and teenagers were busy on Reddit in 2019:
|
||||
|
||||
```response
|
||||
┌─subreddit────────────┬─────diff─┐
|
||||
│ memes │ 15368369 │
|
||||
│ AskReddit │ 14663662 │
|
||||
│ teenagers │ 12266991 │
|
||||
│ AmItheAsshole │ 11561538 │
|
||||
│ dankmemes │ 11305158 │
|
||||
│ unpopularopinion │ 6332772 │
|
||||
│ PewdiepieSubmissions │ 5930818 │
|
||||
│ Market76 │ 5014668 │
|
||||
│ relationship_advice │ 3776383 │
|
||||
│ freefolk │ 3169236 │
|
||||
│ Minecraft │ 3160241 │
|
||||
│ classicwow │ 2907056 │
|
||||
│ Animemes │ 2673398 │
|
||||
│ gameofthrones │ 2402835 │
|
||||
│ PublicFreakout │ 2267605 │
|
||||
│ ShitPostCrusaders │ 2207266 │
|
||||
│ RoastMe │ 2195715 │
|
||||
│ gonewild │ 2148649 │
|
||||
│ AnthemTheGame │ 1803818 │
|
||||
│ entitledparents │ 1706270 │
|
||||
│ MortalKombat │ 1679508 │
|
||||
│ Cringetopia │ 1620555 │
|
||||
│ pokemon │ 1615266 │
|
||||
│ HistoryMemes │ 1608289 │
|
||||
│ Brawlstars │ 1574977 │
|
||||
│ iamatotalpieceofshit │ 1558315 │
|
||||
│ trashy │ 1518549 │
|
||||
│ ChapoTrapHouse │ 1505748 │
|
||||
│ Pikabu │ 1501001 │
|
||||
│ Showerthoughts │ 1475101 │
|
||||
│ cursedcomments │ 1465607 │
|
||||
│ ukpolitics │ 1386043 │
|
||||
│ wallstreetbets │ 1384431 │
|
||||
│ interestingasfuck │ 1378900 │
|
||||
│ wholesomememes │ 1353333 │
|
||||
│ AskOuija │ 1233263 │
|
||||
│ borderlands3 │ 1197192 │
|
||||
│ aww │ 1168257 │
|
||||
│ insanepeoplefacebook │ 1155473 │
|
||||
│ FortniteCompetitive │ 1122778 │
|
||||
│ EpicSeven │ 1117380 │
|
||||
│ FreeKarma4U │ 1116423 │
|
||||
│ YangForPresidentHQ │ 1086700 │
|
||||
│ SquaredCircle │ 1044089 │
|
||||
│ MurderedByWords │ 1042511 │
|
||||
│ AskMen │ 1024434 │
|
||||
│ thedivision │ 1016634 │
|
||||
│ barstoolsports │ 985032 │
|
||||
│ nfl │ 978340 │
|
||||
│ BattlefieldV │ 971408 │
|
||||
└──────────────────────┴──────────┘
|
||||
|
||||
50 rows in set. Elapsed: 65.954 sec. Processed 13.48 billion rows, 79.67 GB (204.37 million rows/s., 1.21 GB/s.)
|
||||
```
|
||||
|
||||
12. One more query: let's compare ClickHouse mentions to other technologies like Snowflake and Postgres. This query is a big one because it has to search all the comments three times for a substring, and unfortunately ClickHouse user are obviously not very active on Reddit yet:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toStartOfQuarter(created_utc) AS quarter,
|
||||
sum(if(positionCaseInsensitive(body, 'clickhouse') > 0, 1, 0)) AS clickhouse,
|
||||
sum(if(positionCaseInsensitive(body, 'snowflake') > 0, 1, 0)) AS snowflake,
|
||||
sum(if(positionCaseInsensitive(body, 'postgres') > 0, 1, 0)) AS postgres
|
||||
FROM reddit
|
||||
GROUP BY quarter
|
||||
ORDER BY quarter ASC;
|
||||
```
|
||||
|
||||
```response
|
||||
┌────Quarter─┬─clickhouse─┬─snowflake─┬─postgres─┐
|
||||
│ 2005-10-01 │ 0 │ 0 │ 0 │
|
||||
│ 2006-01-01 │ 0 │ 2 │ 23 │
|
||||
│ 2006-04-01 │ 0 │ 2 │ 24 │
|
||||
│ 2006-07-01 │ 0 │ 4 │ 13 │
|
||||
│ 2006-10-01 │ 0 │ 23 │ 73 │
|
||||
│ 2007-01-01 │ 0 │ 14 │ 91 │
|
||||
│ 2007-04-01 │ 0 │ 10 │ 59 │
|
||||
│ 2007-07-01 │ 0 │ 39 │ 116 │
|
||||
│ 2007-10-01 │ 0 │ 45 │ 125 │
|
||||
│ 2008-01-01 │ 0 │ 53 │ 234 │
|
||||
│ 2008-04-01 │ 0 │ 79 │ 303 │
|
||||
│ 2008-07-01 │ 0 │ 102 │ 174 │
|
||||
│ 2008-10-01 │ 0 │ 156 │ 323 │
|
||||
│ 2009-01-01 │ 0 │ 206 │ 208 │
|
||||
│ 2009-04-01 │ 0 │ 178 │ 417 │
|
||||
│ 2009-07-01 │ 0 │ 300 │ 295 │
|
||||
│ 2009-10-01 │ 0 │ 633 │ 589 │
|
||||
│ 2010-01-01 │ 0 │ 555 │ 501 │
|
||||
│ 2010-04-01 │ 0 │ 587 │ 469 │
|
||||
│ 2010-07-01 │ 0 │ 770 │ 821 │
|
||||
│ 2010-10-01 │ 0 │ 1480 │ 550 │
|
||||
│ 2011-01-01 │ 0 │ 1482 │ 568 │
|
||||
│ 2011-04-01 │ 0 │ 1558 │ 406 │
|
||||
│ 2011-07-01 │ 0 │ 2163 │ 628 │
|
||||
│ 2011-10-01 │ 0 │ 4064 │ 566 │
|
||||
│ 2012-01-01 │ 0 │ 4621 │ 662 │
|
||||
│ 2012-04-01 │ 0 │ 5737 │ 785 │
|
||||
│ 2012-07-01 │ 0 │ 6097 │ 1127 │
|
||||
│ 2012-10-01 │ 0 │ 7986 │ 600 │
|
||||
│ 2013-01-01 │ 0 │ 9704 │ 839 │
|
||||
│ 2013-04-01 │ 0 │ 8161 │ 853 │
|
||||
│ 2013-07-01 │ 0 │ 9704 │ 1028 │
|
||||
│ 2013-10-01 │ 0 │ 12879 │ 1404 │
|
||||
│ 2014-01-01 │ 0 │ 12317 │ 1548 │
|
||||
│ 2014-04-01 │ 0 │ 13181 │ 1577 │
|
||||
│ 2014-07-01 │ 0 │ 15640 │ 1710 │
|
||||
│ 2014-10-01 │ 0 │ 19479 │ 1959 │
|
||||
│ 2015-01-01 │ 0 │ 20411 │ 2104 │
|
||||
│ 2015-04-01 │ 1 │ 20309 │ 9112 │
|
||||
│ 2015-07-01 │ 0 │ 20325 │ 4771 │
|
||||
│ 2015-10-01 │ 0 │ 25087 │ 3030 │
|
||||
│ 2016-01-01 │ 0 │ 23462 │ 3126 │
|
||||
│ 2016-04-01 │ 3 │ 25496 │ 2757 │
|
||||
│ 2016-07-01 │ 4 │ 28233 │ 2928 │
|
||||
│ 2016-10-01 │ 2 │ 45445 │ 2449 │
|
||||
│ 2017-01-01 │ 9 │ 76019 │ 2808 │
|
||||
│ 2017-04-01 │ 9 │ 67919 │ 2803 │
|
||||
│ 2017-07-01 │ 13 │ 68974 │ 2771 │
|
||||
│ 2017-10-01 │ 12 │ 69730 │ 2906 │
|
||||
│ 2018-01-01 │ 17 │ 67476 │ 3152 │
|
||||
│ 2018-04-01 │ 3 │ 67139 │ 3986 │
|
||||
│ 2018-07-01 │ 14 │ 67979 │ 3609 │
|
||||
│ 2018-10-01 │ 28 │ 74147 │ 3850 │
|
||||
│ 2019-01-01 │ 14 │ 80250 │ 4305 │
|
||||
│ 2019-04-01 │ 30 │ 70307 │ 3872 │
|
||||
│ 2019-07-01 │ 33 │ 77149 │ 4164 │
|
||||
│ 2019-10-01 │ 13 │ 76746 │ 3541 │
|
||||
│ 2020-01-01 │ 16 │ 54475 │ 846 │
|
||||
└────────────┴────────────┴───────────┴──────────┘
|
||||
|
||||
58 rows in set. Elapsed: 2663.751 sec. Processed 6.74 billion rows, 1.21 TB (2.53 million rows/s., 454.37 MB/s.)
|
||||
```
|
@ -172,7 +172,9 @@ Example of configuration for versions earlier than 22.8:
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
Cache **configuration settings**:
|
||||
File Cache **disk configuration settings**:
|
||||
|
||||
These settings should be defined in the disk configuration section.
|
||||
|
||||
- `path` - path to the directory with cache. Default: None, this setting is obligatory.
|
||||
|
||||
@ -182,7 +184,7 @@ Cache **configuration settings**:
|
||||
|
||||
- `enable_filesystem_query_cache_limit` - allow to limit the size of cache which is downloaded within each query (depends on user setting `max_query_cache_size`). Default: `false`.
|
||||
|
||||
- `enable_cache_hits_threshold` - a number, which defines how many times some data needs to be read before it will be cached. Default: `0`, e.g. the data is cached at the first attempt to read it.
|
||||
- `enable_cache_hits_threshold` - number which defines how many times some data needs to be read before it will be cached. Default: `0`, e.g. the data is cached at the first attempt to read it.
|
||||
|
||||
- `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `false`. This setting was added in version 22.8. If you used filesystem cache before this version, then it will not work on versions starting from 22.8 if this setting is set to `true`. If you want to use this setting, clear old cache created before version 22.8 before upgrading.
|
||||
|
||||
@ -190,21 +192,23 @@ Cache **configuration settings**:
|
||||
|
||||
- `max_elements` - a limit for a number of cache files. Default: `1048576`.
|
||||
|
||||
Cache **query settings**:
|
||||
File Cache **query/profile settings**:
|
||||
|
||||
Some of these settings will disable cache features per query/profile that are enabled by default or in disk configuration settings. For example, you can enable cache in disk configuration and disable it per query/profile setting `enable_filesystem_cache` to `false`. Also setting `cache_on_write_operations` to `true` in disk configuration means that "write-though" cache is enabled. But if you need to disable this general setting per specific queries then setting `enable_filesystem_cache_on_write_operations` to `false` means that write operations cache will be disabled for a specific query/profile.
|
||||
|
||||
- `enable_filesystem_cache` - allows to disable cache per query even if storage policy was configured with `cache` disk type. Default: `true`.
|
||||
|
||||
- `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` - allows to use cache in query only if it already exists, otherwise query data will not be written to local cache storage. Default: `false`.
|
||||
|
||||
- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on.
|
||||
- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on. Default: `false`.
|
||||
|
||||
- `enable_filesystem_cache_log` - turn on logging to `system.filesystem_cache_log` table. Gives a detailed view of cache usage per query. Default: `false`.
|
||||
- `enable_filesystem_cache_log` - turn on logging to `system.filesystem_cache_log` table. Gives a detailed view of cache usage per query. It can be turn on for specific queries or enabled in a profile. Default: `false`.
|
||||
|
||||
- `max_query_cache_size` - a limit for the cache size, which can be written to local cache storage. Requires enabled `enable_filesystem_query_cache_limit` in cache configuration. Default: `false`.
|
||||
|
||||
- `skip_download_if_exceeds_query_cache` - allows to change the behaviour of setting `max_query_cache_size`. Default: `true`. If this setting is turned on and cache download limit during query was reached, no more cache will be downloaded to cache storage. If this setting is turned off and cache download limit during query was reached, cache will still be written by cost of evicting previously downloaded (within current query) data, e.g. second behaviour allows to preserve `last recentltly used` behaviour while keeping query cache limit.
|
||||
- `skip_download_if_exceeds_query_cache` - allows to change the behaviour of setting `max_query_cache_size`. Default: `true`. If this setting is turned on and cache download limit during query was reached, no more cache will be downloaded to cache storage. If this setting is turned off and cache download limit during query was reached, cache will still be written by cost of evicting previously downloaded (within current query) data, e.g. second behaviour allows to preserve `last recently used` behaviour while keeping query cache limit.
|
||||
|
||||
** Warning **
|
||||
**Warning**
|
||||
Cache configuration settings and cache query settings correspond to the latest ClickHouse version, for earlier versions something might not be supported.
|
||||
|
||||
Cache **system tables**:
|
||||
|
@ -97,8 +97,8 @@ Columns:
|
||||
- `forwarded_for` ([String](../../sql-reference/data-types/string.md)) — HTTP header `X-Forwarded-For` passed in the HTTP query.
|
||||
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The `quota key` specified in the [quotas](../../operations/quotas.md) setting (see `keyed`).
|
||||
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
|
||||
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events)
|
||||
- `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1.
|
||||
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/map.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events)
|
||||
- `Settings` ([Map(String, String)](../../sql-reference/data-types/map.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1.
|
||||
- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined.
|
||||
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution.
|
||||
- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution.
|
||||
|
@ -12,7 +12,7 @@ Columns:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — Database name.
|
||||
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. Empty if policy for database.
|
||||
|
||||
- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Row policy ID.
|
||||
|
||||
|
29
docs/en/operations/system-tables/zookeeper_connection.md
Normal file
29
docs/en/operations/system-tables/zookeeper_connection.md
Normal file
@ -0,0 +1,29 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/zookeeper_connection
|
||||
---
|
||||
#zookeeper_connection
|
||||
|
||||
This table does not exist if ZooKeeper is not configured. The 'system.zookeeper_connection' table shows current connections to ZooKeeper (including auxiliary ZooKeepers). Each row shows information about one connection.
|
||||
|
||||
Columns:
|
||||
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — ZooKeeper cluster's name.
|
||||
- `host` ([String](../../sql-reference/data-types/string.md)) — The hostname/IP of the ZooKeeper node that ClickHouse connected to.
|
||||
- `port` ([String](../../sql-reference/data-types/string.md)) — The port of the ZooKeeper node that ClickHouse connected to.
|
||||
- `index` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The index of the ZooKeeper node that ClickHouse connected to. The index is from ZooKeeper config.
|
||||
- `connected_time` ([String](../../sql-reference/data-types/string.md)) — When the connection was established
|
||||
- `is_expired` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the current connection expired.
|
||||
- `keeper_api_version` ([String](../../sql-reference/data-types/string.md)) — Keeper API version.
|
||||
- `client_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Session id of the connection.
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.zookeeper_connection;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─name──────────────┬─host─────────┬─port─┬─index─┬──────connected_time─┬─is_expired─┬─keeper_api_version─┬──────────client_id─┐
|
||||
│ default_zookeeper │ 127.0.0.1 │ 2181 │ 0 │ 2023-05-19 14:30:16 │ 0 │ 0 │ 216349144108826660 │
|
||||
└───────────────────┴──────────────┴──────┴───────┴─────────────────────┴────────────┴────────────────────┴────────────────────┘
|
||||
```
|
@ -14,8 +14,8 @@ Row policies makes sense only for users with readonly access. If user can modify
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1
|
||||
[, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2 ...]
|
||||
CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1|db1.*
|
||||
[, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2|db2.* ...]
|
||||
[FOR SELECT] USING condition
|
||||
[AS {PERMISSIVE | RESTRICTIVE}]
|
||||
[TO {role1 [, role2 ...] | ALL | ALL EXCEPT role1 [, role2 ...]}]
|
||||
@ -76,6 +76,20 @@ CREATE ROW POLICY pol2 ON mydb.table1 USING c=2 AS RESTRICTIVE TO peter, antonio
|
||||
|
||||
enables the user `peter` to see rows only if both `b=1` AND `c=2`.
|
||||
|
||||
Database policies are combined with table policies.
|
||||
|
||||
For example, the following policies
|
||||
|
||||
``` sql
|
||||
CREATE ROW POLICY pol1 ON mydb.* USING b=1 TO mira, peter
|
||||
CREATE ROW POLICY pol2 ON mydb.table1 USING c=2 AS RESTRICTIVE TO peter, antonio
|
||||
```
|
||||
|
||||
enables the user `peter` to see table1 rows only if both `b=1` AND `c=2`, although
|
||||
any other table in mydb would have only `b=1` policy applied for the user.
|
||||
|
||||
|
||||
|
||||
## ON CLUSTER Clause
|
||||
|
||||
Allows creating row policies on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
|
||||
@ -88,3 +102,5 @@ Allows creating row policies on a cluster, see [Distributed DDL](../../../sql-re
|
||||
`CREATE ROW POLICY filter2 ON mydb.mytable USING a<1000 AND b=5 TO ALL EXCEPT mira`
|
||||
|
||||
`CREATE ROW POLICY filter3 ON mydb.mytable USING 1 TO admin`
|
||||
|
||||
`CREATE ROW POLICY filter4 ON mydb.* USING 1 TO admin`
|
||||
|
@ -55,7 +55,7 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION
|
||||
|
||||
同样 `john` 有权执行 `GRANT OPTION`,因此他能给其它账号进行和自己账号权限范围相同的授权。
|
||||
|
||||
可以使用`*` 号代替表或库名进行授权操作。例如, `GRANT SELECT ONdb.* TO john` 操作运行 `john`对 `db`库的所有表执行 `SELECT`查询。同样,你可以忽略库名。在这种情形下,权限将指向当前的数据库。例如, `GRANT SELECT ON* to john` 对当前数据库的所有表指定授权, `GARNT SELECT ON mytable to john`对当前数据库的 `mytable`表进行授权。
|
||||
可以使用`*` 号代替表或库名进行授权操作。例如, `GRANT SELECT ONdb.* TO john` 操作运行 `john`对 `db`库的所有表执行 `SELECT`查询。同样,你可以忽略库名。在这种情形下,权限将指向当前的数据库。例如, `GRANT SELECT ON* to john` 对当前数据库的所有表指定授权, `GRANT SELECT ON mytable to john`对当前数据库的 `mytable`表进行授权。
|
||||
|
||||
访问 `systen`数据库总是被允许的(因为这个数据库用来处理sql操作)
|
||||
可以一次给多个账号进行多种授权操作。 `GRANT SELECT,INSERT ON *.* TO john,robin` 允许 `john`和`robin` 账号对任意数据库的任意表执行 `INSERT`和 `SELECT`操作。
|
||||
|
@ -4,10 +4,10 @@ if (NOT(
|
||||
AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR
|
||||
)
|
||||
)
|
||||
set (COMPRESSOR "${CMAKE_BINARY_DIR}/native/utils/self-extracting-executable/pre_compressor")
|
||||
set (DECOMPRESSOR "--decompressor=${CMAKE_BINARY_DIR}/utils/self-extracting-executable/decompressor")
|
||||
set (COMPRESSOR "${PROJECT_BINARY_DIR}/native/utils/self-extracting-executable/pre_compressor")
|
||||
set (DECOMPRESSOR "--decompressor=${PROJECT_BINARY_DIR}/utils/self-extracting-executable/decompressor")
|
||||
else ()
|
||||
set (COMPRESSOR "${CMAKE_BINARY_DIR}/utils/self-extracting-executable/compressor")
|
||||
set (COMPRESSOR "${PROJECT_BINARY_DIR}/utils/self-extracting-executable/compressor")
|
||||
endif ()
|
||||
|
||||
add_custom_target (self-extracting ALL
|
||||
|
@ -1872,7 +1872,7 @@ try
|
||||
}
|
||||
|
||||
if (current_connections)
|
||||
LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
|
||||
LOG_WARNING(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
|
||||
else
|
||||
LOG_INFO(log, "Closed all listening sockets.");
|
||||
|
||||
@ -1884,7 +1884,7 @@ try
|
||||
current_connections = waitServersToFinish(servers, config().getInt("shutdown_wait_unfinished", 5));
|
||||
|
||||
if (current_connections)
|
||||
LOG_INFO(log, "Closed connections. But {} remain."
|
||||
LOG_WARNING(log, "Closed connections. But {} remain."
|
||||
" Tip: To increase wait time add to config: <shutdown_wait_unfinished>60</shutdown_wait_unfinished>", current_connections);
|
||||
else
|
||||
LOG_INFO(log, "Closed connections.");
|
||||
@ -1900,7 +1900,7 @@ try
|
||||
|
||||
/// Dump coverage here, because std::atexit callback would not be called.
|
||||
dumpCoverageReportIfPossible();
|
||||
LOG_INFO(log, "Will shutdown forcefully.");
|
||||
LOG_WARNING(log, "Will shutdown forcefully.");
|
||||
safeExit(0);
|
||||
}
|
||||
});
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <Common/OpenSSLHelpers.h>
|
||||
#include <Poco/SHA1Engine.h>
|
||||
#include <base/types.h>
|
||||
#include <base/hex.h>
|
||||
#include <boost/algorithm/hex.hpp>
|
||||
#include <boost/algorithm/string/case_conv.hpp>
|
||||
|
||||
|
@ -22,7 +22,7 @@ String RowPolicyName::toString() const
|
||||
name += backQuoteIfNeed(database);
|
||||
name += '.';
|
||||
}
|
||||
name += backQuoteIfNeed(table_name);
|
||||
name += (table_name == RowPolicyName::ANY_TABLE_MARK ? "*" : backQuoteIfNeed(table_name));
|
||||
return name;
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,8 @@ namespace DB
|
||||
/// Represents the full name of a row policy, e.g. "myfilter ON mydb.mytable".
|
||||
struct RowPolicyName
|
||||
{
|
||||
static constexpr char ANY_TABLE_MARK[] = "";
|
||||
|
||||
String short_name;
|
||||
String database;
|
||||
String table_name;
|
||||
|
@ -35,7 +35,13 @@ RowPolicyFilterPtr EnabledRowPolicies::getFilter(const String & database, const
|
||||
auto loaded = mixed_filters.load();
|
||||
auto it = loaded->find({database, table_name, filter_type});
|
||||
if (it == loaded->end())
|
||||
return {};
|
||||
{ /// Look for a policy for database if a table policy not found
|
||||
it = loaded->find({database, RowPolicyName::ANY_TABLE_MARK, filter_type});
|
||||
if (it == loaded->end())
|
||||
{
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
@ -228,25 +228,25 @@ void RolesOrUsersSet::add(const std::vector<UUID> & ids_)
|
||||
|
||||
bool RolesOrUsersSet::match(const UUID & id) const
|
||||
{
|
||||
return (all || ids.count(id)) && !except_ids.count(id);
|
||||
return (all || ids.contains(id)) && !except_ids.contains(id);
|
||||
}
|
||||
|
||||
|
||||
bool RolesOrUsersSet::match(const UUID & user_id, const boost::container::flat_set<UUID> & enabled_roles) const
|
||||
{
|
||||
if (!all && !ids.count(user_id))
|
||||
if (!all && !ids.contains(user_id))
|
||||
{
|
||||
bool found_enabled_role = std::any_of(
|
||||
enabled_roles.begin(), enabled_roles.end(), [this](const UUID & enabled_role) { return ids.count(enabled_role); });
|
||||
enabled_roles.begin(), enabled_roles.end(), [this](const UUID & enabled_role) { return ids.contains(enabled_role); });
|
||||
if (!found_enabled_role)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (except_ids.count(user_id))
|
||||
if (except_ids.contains(user_id))
|
||||
return false;
|
||||
|
||||
bool in_except_list = std::any_of(
|
||||
enabled_roles.begin(), enabled_roles.end(), [this](const UUID & enabled_role) { return except_ids.count(enabled_role); });
|
||||
enabled_roles.begin(), enabled_roles.end(), [this](const UUID & enabled_role) { return except_ids.contains(enabled_role); });
|
||||
return !in_except_list;
|
||||
}
|
||||
|
||||
|
@ -35,6 +35,9 @@ struct RowPolicy : public IAccessEntity
|
||||
void setPermissive(bool permissive_ = true) { setRestrictive(!permissive_); }
|
||||
bool isPermissive() const { return !isRestrictive(); }
|
||||
|
||||
/// Applied for entire database
|
||||
bool isForDatabase() const { return full_name.table_name == RowPolicyName::ANY_TABLE_MARK; }
|
||||
|
||||
/// Sets that the policy is restrictive.
|
||||
/// A row is only accessible if at least one of the permissive policies passes,
|
||||
/// in addition to all the restrictive policies.
|
||||
|
@ -16,7 +16,8 @@ namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
/// Accumulates filters from multiple row policies and joins them using the AND logical operation.
|
||||
/// Helper to accumulate filters from multiple row policies and join them together
|
||||
/// by AND or OR logical operations.
|
||||
class FiltersMixer
|
||||
{
|
||||
public:
|
||||
@ -148,9 +149,11 @@ void RowPolicyCache::ensureAllRowPoliciesRead()
|
||||
|
||||
for (const UUID & id : access_control.findAll<RowPolicy>())
|
||||
{
|
||||
auto quota = access_control.tryRead<RowPolicy>(id);
|
||||
if (quota)
|
||||
all_policies.emplace(id, PolicyInfo(quota));
|
||||
auto policy = access_control.tryRead<RowPolicy>(id);
|
||||
if (policy)
|
||||
{
|
||||
all_policies.emplace(id, PolicyInfo(policy));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -215,40 +218,105 @@ void RowPolicyCache::mixFiltersFor(EnabledRowPolicies & enabled)
|
||||
std::vector<RowPolicyPtr> policies;
|
||||
};
|
||||
|
||||
std::unordered_map<MixedFiltersKey, MixerWithNames, Hash> mixers;
|
||||
std::unordered_map<MixedFiltersKey, MixerWithNames, Hash> database_mixers;
|
||||
|
||||
/// populate database_mixers using database-level policies
|
||||
/// to aggregate (mix) rules per database
|
||||
for (const auto & [policy_id, info] : all_policies)
|
||||
{
|
||||
const auto & policy = *info.policy;
|
||||
bool match = info.roles->match(enabled.params.user_id, enabled.params.enabled_roles);
|
||||
MixedFiltersKey key;
|
||||
key.database = info.database_and_table_name->first;
|
||||
key.table_name = info.database_and_table_name->second;
|
||||
for (auto filter_type : collections::range(0, RowPolicyFilterType::MAX))
|
||||
if (info.isForDatabase())
|
||||
{
|
||||
auto filter_type_i = static_cast<size_t>(filter_type);
|
||||
if (info.parsed_filters[filter_type_i])
|
||||
const auto & policy = *info.policy;
|
||||
bool match = info.roles->match(enabled.params.user_id, enabled.params.enabled_roles);
|
||||
for (auto filter_type : collections::range(0, RowPolicyFilterType::MAX))
|
||||
{
|
||||
key.filter_type = filter_type;
|
||||
auto & mixer = mixers[key];
|
||||
mixer.database_and_table_name = info.database_and_table_name;
|
||||
if (match)
|
||||
auto filter_type_i = static_cast<size_t>(filter_type);
|
||||
if (info.parsed_filters[filter_type_i])
|
||||
{
|
||||
mixer.mixer.add(info.parsed_filters[filter_type_i], policy.isRestrictive());
|
||||
mixer.policies.push_back(info.policy);
|
||||
MixedFiltersKey key{info.database_and_table_name->first,
|
||||
info.database_and_table_name->second,
|
||||
filter_type};
|
||||
|
||||
auto & mixer = database_mixers[key];
|
||||
mixer.database_and_table_name = info.database_and_table_name;
|
||||
if (match)
|
||||
{
|
||||
mixer.mixer.add(info.parsed_filters[filter_type_i], policy.isRestrictive());
|
||||
mixer.policies.push_back(info.policy);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_map<MixedFiltersKey, MixerWithNames, Hash> table_mixers;
|
||||
|
||||
/// populate table_mixers using database_mixers and table-level policies
|
||||
for (const auto & [policy_id, info] : all_policies)
|
||||
{
|
||||
if (!info.isForDatabase())
|
||||
{
|
||||
const auto & policy = *info.policy;
|
||||
bool match = info.roles->match(enabled.params.user_id, enabled.params.enabled_roles);
|
||||
for (auto filter_type : collections::range(0, RowPolicyFilterType::MAX))
|
||||
{
|
||||
auto filter_type_i = static_cast<size_t>(filter_type);
|
||||
if (info.parsed_filters[filter_type_i])
|
||||
{
|
||||
MixedFiltersKey key{info.database_and_table_name->first,
|
||||
info.database_and_table_name->second,
|
||||
filter_type};
|
||||
auto table_it = table_mixers.find(key);
|
||||
if (table_it == table_mixers.end())
|
||||
{ /// no exact match - create new mixer
|
||||
MixedFiltersKey database_key = key;
|
||||
database_key.table_name = RowPolicyName::ANY_TABLE_MARK;
|
||||
|
||||
auto database_it = database_mixers.find(database_key);
|
||||
|
||||
if (database_it == database_mixers.end())
|
||||
{
|
||||
table_it = table_mixers.try_emplace(key).first;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// table policies are based on database ones
|
||||
table_it = table_mixers.insert({key, database_it->second}).first;
|
||||
}
|
||||
}
|
||||
|
||||
auto & mixer = table_it->second; /// getting table level mixer
|
||||
mixer.database_and_table_name = info.database_and_table_name;
|
||||
if (match)
|
||||
{
|
||||
mixer.mixer.add(info.parsed_filters[filter_type_i], policy.isRestrictive());
|
||||
mixer.policies.push_back(info.policy);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto mixed_filters = boost::make_shared<MixedFiltersMap>();
|
||||
for (auto & [key, mixer] : mixers)
|
||||
|
||||
/// Retrieve aggregated policies from mixers
|
||||
/// if a table has a policy for this particular table, we have all needed information in table_mixers
|
||||
/// (policies for the database are already applied)
|
||||
/// otherwise we would look for a policy for database using RowPolicy::ANY_TABLE_MARK
|
||||
/// Consider restrictive policies a=1 for db.t, b=2 for db.* and c=3 for db.*
|
||||
/// We are going to have two items in mixed_filters:
|
||||
/// 1. a=1 AND b=2 AND c=3 for db.t (comes from table_mixers, where it had been created with the help of database_mixers)
|
||||
/// 2. b=2 AND c=3 for db.* (comes directly from database_mixers)
|
||||
for (auto * mixer_map_ptr : {&table_mixers, &database_mixers})
|
||||
{
|
||||
auto mixed_filter = std::make_shared<RowPolicyFilter>();
|
||||
mixed_filter->database_and_table_name = std::move(mixer.database_and_table_name);
|
||||
mixed_filter->expression = std::move(mixer.mixer).getResult(access_control.isEnabledUsersWithoutRowPoliciesCanReadRows());
|
||||
mixed_filter->policies = std::move(mixer.policies);
|
||||
mixed_filters->emplace(key, std::move(mixed_filter));
|
||||
for (auto & [key, mixer] : *mixer_map_ptr)
|
||||
{
|
||||
auto mixed_filter = std::make_shared<RowPolicyFilter>();
|
||||
mixed_filter->database_and_table_name = std::move(mixer.database_and_table_name);
|
||||
mixed_filter->expression = std::move(mixer.mixer).getResult(access_control.isEnabledUsersWithoutRowPoliciesCanReadRows());
|
||||
mixed_filter->policies = std::move(mixer.policies);
|
||||
mixed_filters->emplace(key, std::move(mixed_filter));
|
||||
}
|
||||
}
|
||||
|
||||
enabled.mixed_filters.store(mixed_filters);
|
||||
|
@ -29,6 +29,7 @@ private:
|
||||
explicit PolicyInfo(const RowPolicyPtr & policy_) { setPolicy(policy_); }
|
||||
void setPolicy(const RowPolicyPtr & policy_);
|
||||
|
||||
bool isForDatabase() const { return policy->isForDatabase(); }
|
||||
RowPolicyPtr policy;
|
||||
const RolesOrUsersSet * roles = nullptr;
|
||||
std::shared_ptr<const std::pair<String, String>> database_and_table_name;
|
||||
|
@ -105,21 +105,21 @@ void SettingsConstraints::check(const Settings & current_settings, const Setting
|
||||
if (SettingsProfileElements::isAllowBackupSetting(element.setting_name))
|
||||
continue;
|
||||
|
||||
if (!element.value.isNull())
|
||||
if (element.value)
|
||||
{
|
||||
SettingChange value(element.setting_name, element.value);
|
||||
SettingChange value(element.setting_name, *element.value);
|
||||
check(current_settings, value);
|
||||
}
|
||||
|
||||
if (!element.min_value.isNull())
|
||||
if (element.min_value)
|
||||
{
|
||||
SettingChange value(element.setting_name, element.min_value);
|
||||
SettingChange value(element.setting_name, *element.min_value);
|
||||
check(current_settings, value);
|
||||
}
|
||||
|
||||
if (!element.max_value.isNull())
|
||||
if (element.max_value)
|
||||
{
|
||||
SettingChange value(element.setting_name, element.max_value);
|
||||
SettingChange value(element.setting_name, *element.max_value);
|
||||
check(current_settings, value);
|
||||
}
|
||||
|
||||
|
@ -63,18 +63,18 @@ void SettingsProfileElement::init(const ASTSettingsProfileElement & ast, const A
|
||||
max_value = ast.max_value;
|
||||
writability = ast.writability;
|
||||
|
||||
if (!value.isNull())
|
||||
value = Settings::castValueUtil(setting_name, value);
|
||||
if (!min_value.isNull())
|
||||
min_value = Settings::castValueUtil(setting_name, min_value);
|
||||
if (!max_value.isNull())
|
||||
max_value = Settings::castValueUtil(setting_name, max_value);
|
||||
if (value)
|
||||
value = Settings::castValueUtil(setting_name, *value);
|
||||
if (min_value)
|
||||
min_value = Settings::castValueUtil(setting_name, *min_value);
|
||||
if (max_value)
|
||||
max_value = Settings::castValueUtil(setting_name, *max_value);
|
||||
}
|
||||
}
|
||||
|
||||
bool SettingsProfileElement::isConstraint() const
|
||||
{
|
||||
return this->writability || !this->min_value.isNull() || !this->max_value.isNull();
|
||||
return this->writability || this->min_value || this->max_value;
|
||||
}
|
||||
|
||||
std::shared_ptr<ASTSettingsProfileElement> SettingsProfileElement::toAST() const
|
||||
@ -187,8 +187,8 @@ Settings SettingsProfileElements::toSettings() const
|
||||
Settings res;
|
||||
for (const auto & elem : *this)
|
||||
{
|
||||
if (!elem.setting_name.empty() && !isAllowBackupSetting(elem.setting_name) && !elem.value.isNull())
|
||||
res.set(elem.setting_name, elem.value);
|
||||
if (!elem.setting_name.empty() && !isAllowBackupSetting(elem.setting_name) && elem.value)
|
||||
res.set(elem.setting_name, *elem.value);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
@ -200,8 +200,8 @@ SettingsChanges SettingsProfileElements::toSettingsChanges() const
|
||||
{
|
||||
if (!elem.setting_name.empty() && !isAllowBackupSetting(elem.setting_name))
|
||||
{
|
||||
if (!elem.value.isNull())
|
||||
res.push_back({elem.setting_name, elem.value});
|
||||
if (elem.value)
|
||||
res.push_back({elem.setting_name, *elem.value});
|
||||
}
|
||||
}
|
||||
return res;
|
||||
@ -214,8 +214,8 @@ SettingsConstraints SettingsProfileElements::toSettingsConstraints(const AccessC
|
||||
if (!elem.setting_name.empty() && elem.isConstraint() && !isAllowBackupSetting(elem.setting_name))
|
||||
res.set(
|
||||
elem.setting_name,
|
||||
elem.min_value,
|
||||
elem.max_value,
|
||||
elem.min_value ? *elem.min_value : Field{},
|
||||
elem.max_value ? *elem.max_value : Field{},
|
||||
elem.writability ? *elem.writability : SettingConstraintWritability::WRITABLE);
|
||||
return res;
|
||||
}
|
||||
@ -240,8 +240,8 @@ bool SettingsProfileElements::isBackupAllowed() const
|
||||
{
|
||||
for (const auto & setting : *this)
|
||||
{
|
||||
if (isAllowBackupSetting(setting.setting_name))
|
||||
return static_cast<bool>(SettingFieldBool{setting.value});
|
||||
if (isAllowBackupSetting(setting.setting_name) && setting.value)
|
||||
return static_cast<bool>(SettingFieldBool{*setting.value});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -23,9 +23,9 @@ struct SettingsProfileElement
|
||||
std::optional<UUID> parent_profile;
|
||||
|
||||
String setting_name;
|
||||
Field value;
|
||||
Field min_value;
|
||||
Field max_value;
|
||||
std::optional<Field> value;
|
||||
std::optional<Field> min_value;
|
||||
std::optional<Field> max_value;
|
||||
std::optional<SettingConstraintWritability> writability;
|
||||
|
||||
auto toTuple() const { return std::tie(parent_profile, setting_name, value, min_value, max_value, writability); }
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <base/arithmeticOverflow.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
@ -121,7 +121,7 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample(
|
||||
|
||||
void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true };
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("groupArray", { createAggregateFunctionGroupArray<false>, properties });
|
||||
factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties });
|
||||
|
@ -43,6 +43,7 @@ struct KolmogorovSmirnov : public StatisticalSample<Float64, Float64>
|
||||
Float64 now_s = 0;
|
||||
UInt64 pos_x = 0;
|
||||
UInt64 pos_y = 0;
|
||||
UInt64 pos_tmp;
|
||||
UInt64 n1 = x.size();
|
||||
UInt64 n2 = y.size();
|
||||
|
||||
@ -65,14 +66,22 @@ struct KolmogorovSmirnov : public StatisticalSample<Float64, Float64>
|
||||
now_s -= n2_d;
|
||||
++pos_y;
|
||||
}
|
||||
max_s = std::max(max_s, now_s);
|
||||
min_s = std::min(min_s, now_s);
|
||||
}
|
||||
else
|
||||
{
|
||||
now_s += n1_d;
|
||||
++pos_x;
|
||||
pos_tmp = pos_x + 1;
|
||||
while (pos_tmp < x.size() && unlikely(fabs(x[pos_tmp] - x[pos_x]) <= tol))
|
||||
pos_tmp++;
|
||||
now_s += n1_d * (pos_tmp - pos_x);
|
||||
pos_x = pos_tmp;
|
||||
pos_tmp = pos_y + 1;
|
||||
while (pos_tmp < y.size() && unlikely(fabs(y[pos_tmp] - y[pos_y]) <= tol))
|
||||
pos_tmp++;
|
||||
now_s -= n2_d * (pos_tmp - pos_y);
|
||||
pos_y = pos_tmp;
|
||||
}
|
||||
max_s = std::max(max_s, now_s);
|
||||
min_s = std::min(min_s, now_s);
|
||||
}
|
||||
now_s += n1_d * (x.size() - pos_x) - n2_d * (y.size() - pos_y);
|
||||
min_s = std::min(min_s, now_s);
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <base/StringRef.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
@ -72,7 +72,7 @@ public:
|
||||
{
|
||||
/// Currently the only functions that returns not-NULL on all NULL arguments are count and uniq, and they returns UInt64.
|
||||
if (properties.returns_default_when_only_null)
|
||||
return std::make_shared<AggregateFunctionNothing>(arguments, params, nested_function->getResultType());
|
||||
return std::make_shared<AggregateFunctionNothing>(arguments, params, std::make_shared<DataTypeUInt64>());
|
||||
else
|
||||
return std::make_shared<AggregateFunctionNothing>(arguments, params, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>()));
|
||||
}
|
||||
|
@ -209,15 +209,20 @@ ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
function_ast->kind = ASTFunction::Kind::WINDOW_FUNCTION;
|
||||
}
|
||||
|
||||
auto new_options = options;
|
||||
/// To avoid surrounding constants with several internal casts.
|
||||
if (function_name == "_CAST" && (*getArguments().begin())->getNodeType() == QueryTreeNodeType::CONSTANT)
|
||||
new_options.add_cast_for_constants = false;
|
||||
|
||||
const auto & parameters = getParameters();
|
||||
if (!parameters.getNodes().empty())
|
||||
{
|
||||
function_ast->children.push_back(parameters.toAST(options));
|
||||
function_ast->children.push_back(parameters.toAST(new_options));
|
||||
function_ast->parameters = function_ast->children.back();
|
||||
}
|
||||
|
||||
const auto & arguments = getArguments();
|
||||
function_ast->children.push_back(arguments.toAST(options));
|
||||
function_ast->children.push_back(arguments.toAST(new_options));
|
||||
function_ast->arguments = function_ast->children.back();
|
||||
|
||||
auto window_node = getWindowNode();
|
||||
@ -226,7 +231,7 @@ ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
if (auto * identifier_node = window_node->as<IdentifierNode>())
|
||||
function_ast->window_name = identifier_node->getIdentifier().getFullName();
|
||||
else
|
||||
function_ast->window_definition = window_node->toAST(options);
|
||||
function_ast->window_definition = window_node->toAST(new_options);
|
||||
}
|
||||
|
||||
return function_ast;
|
||||
|
@ -38,6 +38,9 @@ public:
|
||||
if (!query->hasGroupBy())
|
||||
return;
|
||||
|
||||
if (query->isGroupByWithCube() || query->isGroupByWithRollup())
|
||||
return;
|
||||
|
||||
auto & group_by = query->getGroupBy().getNodes();
|
||||
if (query->isGroupByWithGroupingSets())
|
||||
{
|
||||
|
@ -115,6 +115,7 @@ namespace
|
||||
writeBinary(info.checksum, out);
|
||||
writeBinary(info.base_size, out);
|
||||
writeBinary(info.base_checksum, out);
|
||||
writeBinary(info.encrypted_by_disk, out);
|
||||
/// We don't store `info.data_file_name` and `info.data_file_index` because they're determined automalically
|
||||
/// after reading file infos for all the hosts (see the class BackupCoordinationFileInfos).
|
||||
}
|
||||
@ -136,6 +137,7 @@ namespace
|
||||
readBinary(info.checksum, in);
|
||||
readBinary(info.base_size, in);
|
||||
readBinary(info.base_checksum, in);
|
||||
readBinary(info.encrypted_by_disk, in);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
@ -254,7 +256,10 @@ void BackupCoordinationRemote::removeAllNodes()
|
||||
|
||||
void BackupCoordinationRemote::setStage(const String & new_stage, const String & message)
|
||||
{
|
||||
stage_sync->set(current_host, new_stage, message);
|
||||
if (is_internal)
|
||||
stage_sync->set(current_host, new_stage, message);
|
||||
else
|
||||
stage_sync->set(current_host, new_stage, /* message */ "", /* all_hosts */ true);
|
||||
}
|
||||
|
||||
void BackupCoordinationRemote::setError(const Exception & exception)
|
||||
@ -777,8 +782,8 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &)
|
||||
String status;
|
||||
if (zk->tryGet(root_zookeeper_path + "/" + existing_backup_path + "/stage", status))
|
||||
{
|
||||
/// If status is not COMPLETED it could be because the backup failed, check if 'error' exists
|
||||
if (status != Stage::COMPLETED && !zk->exists(root_zookeeper_path + "/" + existing_backup_path + "/error"))
|
||||
/// Check if some other backup is in progress
|
||||
if (status == Stage::SCHEDULED_TO_START)
|
||||
{
|
||||
LOG_WARNING(log, "Found a concurrent backup: {}, current backup: {}", existing_backup_uuid, toString(backup_uuid));
|
||||
result = true;
|
||||
|
@ -43,6 +43,10 @@ namespace BackupCoordinationStage
|
||||
|
||||
/// Coordination stage meaning that a host finished its work.
|
||||
constexpr const char * COMPLETED = "completed";
|
||||
|
||||
/// Coordination stage meaning that backup/restore has failed due to an error
|
||||
/// Check '/error' for the error message
|
||||
constexpr const char * ERROR = "error";
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -8,11 +8,13 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Backups/BackupCoordinationStage.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace Stage = BackupCoordinationStage;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE;
|
||||
@ -42,7 +44,7 @@ void BackupCoordinationStageSync::createRootNodes()
|
||||
});
|
||||
}
|
||||
|
||||
void BackupCoordinationStageSync::set(const String & current_host, const String & new_stage, const String & message)
|
||||
void BackupCoordinationStageSync::set(const String & current_host, const String & new_stage, const String & message, const bool & all_hosts)
|
||||
{
|
||||
auto holder = with_retries.createRetriesControlHolder("set");
|
||||
holder.retries_ctl.retryLoop(
|
||||
@ -50,14 +52,23 @@ void BackupCoordinationStageSync::set(const String & current_host, const String
|
||||
{
|
||||
with_retries.renewZooKeeper(zookeeper);
|
||||
|
||||
/// Make an ephemeral node so the initiator can track if the current host is still working.
|
||||
String alive_node_path = zookeeper_path + "/alive|" + current_host;
|
||||
auto code = zookeeper->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
|
||||
if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNODEEXISTS)
|
||||
throw zkutil::KeeperException(code, alive_node_path);
|
||||
if (all_hosts)
|
||||
{
|
||||
auto code = zookeeper->trySet(zookeeper_path, new_stage);
|
||||
if (code != Coordination::Error::ZOK)
|
||||
throw zkutil::KeeperException(code, zookeeper_path);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Make an ephemeral node so the initiator can track if the current host is still working.
|
||||
String alive_node_path = zookeeper_path + "/alive|" + current_host;
|
||||
auto code = zookeeper->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
|
||||
if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNODEEXISTS)
|
||||
throw zkutil::KeeperException(code, alive_node_path);
|
||||
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/started|" + current_host, "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/current|" + current_host + "|" + new_stage, message);
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/started|" + current_host, "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/current|" + current_host + "|" + new_stage, message);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@ -73,6 +84,10 @@ void BackupCoordinationStageSync::setError(const String & current_host, const Ex
|
||||
writeStringBinary(current_host, buf);
|
||||
writeException(exception, buf, true);
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str());
|
||||
|
||||
auto code = zookeeper->trySet(zookeeper_path, Stage::ERROR);
|
||||
if (code != Coordination::Error::ZOK)
|
||||
throw zkutil::KeeperException(code, zookeeper_path);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,7 @@ public:
|
||||
Poco::Logger * log_);
|
||||
|
||||
/// Sets the stage of the current host and signal other hosts if there were other hosts waiting for that.
|
||||
void set(const String & current_host, const String & new_stage, const String & message);
|
||||
void set(const String & current_host, const String & new_stage, const String & message, const bool & all_hosts = false);
|
||||
void setError(const String & current_host, const Exception & exception);
|
||||
|
||||
/// Sets the stage of the current host and waits until all hosts come to the same stage.
|
||||
|
@ -1,26 +1,45 @@
|
||||
#include <Backups/BackupEntryFromAppendOnlyFile.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <IO/LimitSeekableReadBuffer.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
/// For append-only files we must calculate its size on the construction of a backup entry.
|
||||
UInt64 calculateSize(const DiskPtr & disk, const String & file_path, bool copy_encrypted, std::optional<UInt64> unencrypted_file_size)
|
||||
{
|
||||
if (!unencrypted_file_size)
|
||||
return copy_encrypted ? disk->getEncryptedFileSize(file_path) : disk->getFileSize(file_path);
|
||||
else if (copy_encrypted)
|
||||
return disk->getEncryptedFileSize(*unencrypted_file_size);
|
||||
else
|
||||
return *unencrypted_file_size;
|
||||
}
|
||||
}
|
||||
|
||||
BackupEntryFromAppendOnlyFile::BackupEntryFromAppendOnlyFile(
|
||||
const DiskPtr & disk_,
|
||||
const String & file_path_,
|
||||
const ReadSettings & settings_,
|
||||
const std::optional<UInt64> & file_size_,
|
||||
const std::optional<UInt128> & checksum_,
|
||||
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_)
|
||||
: BackupEntryFromImmutableFile(disk_, file_path_, settings_, file_size_, checksum_, temporary_file_)
|
||||
, limit(BackupEntryFromImmutableFile::getSize())
|
||||
const DiskPtr & disk_, const String & file_path_, bool copy_encrypted_, const std::optional<UInt64> & file_size_)
|
||||
: disk(disk_)
|
||||
, file_path(file_path_)
|
||||
, data_source_description(disk->getDataSourceDescription())
|
||||
, copy_encrypted(copy_encrypted_ && data_source_description.is_encrypted)
|
||||
, size(calculateSize(disk_, file_path_, copy_encrypted, file_size_))
|
||||
{
|
||||
}
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> BackupEntryFromAppendOnlyFile::getReadBuffer() const
|
||||
BackupEntryFromAppendOnlyFile::~BackupEntryFromAppendOnlyFile() = default;
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> BackupEntryFromAppendOnlyFile::getReadBuffer(const ReadSettings & read_settings) const
|
||||
{
|
||||
auto buf = BackupEntryFromImmutableFile::getReadBuffer();
|
||||
return std::make_unique<LimitSeekableReadBuffer>(std::move(buf), 0, limit);
|
||||
std::unique_ptr<SeekableReadBuffer> buf;
|
||||
if (copy_encrypted)
|
||||
buf = disk->readEncryptedFile(file_path, read_settings.adjustBufferSize(size));
|
||||
else
|
||||
buf = disk->readFile(file_path, read_settings.adjustBufferSize(size));
|
||||
return std::make_unique<LimitSeekableReadBuffer>(std::move(buf), 0, size);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/BackupEntryFromImmutableFile.h>
|
||||
#include <Backups/BackupEntryWithChecksumCalculation.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -8,24 +8,34 @@ namespace DB
|
||||
|
||||
/// Represents a file prepared to be included in a backup, assuming that until this backup entry is destroyed
|
||||
/// the file can be appended with new data, but the bytes which are already in the file won't be changed.
|
||||
class BackupEntryFromAppendOnlyFile : public BackupEntryFromImmutableFile
|
||||
class BackupEntryFromAppendOnlyFile : public BackupEntryWithChecksumCalculation<IBackupEntry>
|
||||
{
|
||||
public:
|
||||
|
||||
/// The constructor is allowed to not set `file_size_` or `checksum_`, in that case it will be calculated from the data.
|
||||
/// The constructor is allowed to not set `file_size_`, in that case it will be calculated from the data.
|
||||
BackupEntryFromAppendOnlyFile(
|
||||
const DiskPtr & disk_,
|
||||
const String & file_path_,
|
||||
const ReadSettings & settings_,
|
||||
const std::optional<UInt64> & file_size_ = {},
|
||||
const std::optional<UInt128> & checksum_ = {},
|
||||
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_ = {});
|
||||
bool copy_encrypted_ = false,
|
||||
const std::optional<UInt64> & file_size_ = {});
|
||||
|
||||
UInt64 getSize() const override { return limit; }
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer() const override;
|
||||
~BackupEntryFromAppendOnlyFile() override;
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override;
|
||||
UInt64 getSize() const override { return size; }
|
||||
|
||||
DataSourceDescription getDataSourceDescription() const override { return data_source_description; }
|
||||
bool isEncryptedByDisk() const override { return copy_encrypted; }
|
||||
|
||||
bool isFromFile() const override { return true; }
|
||||
DiskPtr getDisk() const override { return disk; }
|
||||
String getFilePath() const override { return file_path; }
|
||||
|
||||
private:
|
||||
const UInt64 limit;
|
||||
const DiskPtr disk;
|
||||
const String file_path;
|
||||
const DataSourceDescription data_source_description;
|
||||
const bool copy_encrypted;
|
||||
const UInt64 size;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,53 +1,84 @@
|
||||
#include <Backups/BackupEntryFromImmutableFile.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <Disks/IO/createReadBufferFromFileBase.h>
|
||||
#include <Poco/File.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
/// We mix the checksum calculated for non-encrypted data with IV generated to encrypt the file
|
||||
/// to generate kind of a checksum for encrypted data. Of course it differs from the CityHash properly calculated for encrypted data.
|
||||
UInt128 combineChecksums(UInt128 checksum1, UInt128 checksum2)
|
||||
{
|
||||
chassert(std::size(checksum2.items) == 2);
|
||||
return CityHash_v1_0_2::CityHash128WithSeed(reinterpret_cast<const char *>(&checksum1), sizeof(checksum1), {checksum2.items[0], checksum2.items[1]});
|
||||
}
|
||||
}
|
||||
|
||||
BackupEntryFromImmutableFile::BackupEntryFromImmutableFile(
|
||||
const DiskPtr & disk_,
|
||||
const String & file_path_,
|
||||
const ReadSettings & settings_,
|
||||
bool copy_encrypted_,
|
||||
const std::optional<UInt64> & file_size_,
|
||||
const std::optional<UInt128> & checksum_,
|
||||
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_)
|
||||
const std::optional<UInt128> & checksum_)
|
||||
: disk(disk_)
|
||||
, file_path(file_path_)
|
||||
, settings(settings_)
|
||||
, data_source_description(disk->getDataSourceDescription())
|
||||
, copy_encrypted(copy_encrypted_ && data_source_description.is_encrypted)
|
||||
, file_size(file_size_)
|
||||
, checksum(checksum_)
|
||||
, temporary_file_on_disk(temporary_file_)
|
||||
{
|
||||
}
|
||||
|
||||
BackupEntryFromImmutableFile::~BackupEntryFromImmutableFile() = default;
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> BackupEntryFromImmutableFile::getReadBuffer(const ReadSettings & read_settings) const
|
||||
{
|
||||
if (copy_encrypted)
|
||||
return disk->readEncryptedFile(file_path, read_settings);
|
||||
else
|
||||
return disk->readFile(file_path, read_settings);
|
||||
}
|
||||
|
||||
UInt64 BackupEntryFromImmutableFile::getSize() const
|
||||
{
|
||||
std::lock_guard lock{get_file_size_mutex};
|
||||
if (!file_size)
|
||||
file_size = disk->getFileSize(file_path);
|
||||
std::lock_guard lock{size_and_checksum_mutex};
|
||||
if (!file_size_adjusted)
|
||||
{
|
||||
if (!file_size)
|
||||
file_size = copy_encrypted ? disk->getEncryptedFileSize(file_path) : disk->getFileSize(file_path);
|
||||
else if (copy_encrypted)
|
||||
file_size = disk->getEncryptedFileSize(*file_size);
|
||||
file_size_adjusted = true;
|
||||
}
|
||||
return *file_size;
|
||||
}
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> BackupEntryFromImmutableFile::getReadBuffer() const
|
||||
UInt128 BackupEntryFromImmutableFile::getChecksum() const
|
||||
{
|
||||
return disk->readFile(file_path, settings);
|
||||
std::lock_guard lock{size_and_checksum_mutex};
|
||||
if (!checksum_adjusted)
|
||||
{
|
||||
if (!checksum)
|
||||
checksum = BackupEntryWithChecksumCalculation<IBackupEntry>::getChecksum();
|
||||
else if (copy_encrypted)
|
||||
checksum = combineChecksums(*checksum, disk->getEncryptedFileIV(file_path));
|
||||
checksum_adjusted = true;
|
||||
}
|
||||
return *checksum;
|
||||
}
|
||||
|
||||
|
||||
DataSourceDescription BackupEntryFromImmutableFile::getDataSourceDescription() const
|
||||
std::optional<UInt128> BackupEntryFromImmutableFile::getPartialChecksum(size_t prefix_length) const
|
||||
{
|
||||
return disk->getDataSourceDescription();
|
||||
}
|
||||
if (prefix_length == 0)
|
||||
return 0;
|
||||
|
||||
String BackupEntryFromImmutableFile::getFilePath() const
|
||||
{
|
||||
return file_path;
|
||||
if (prefix_length >= getSize())
|
||||
return getChecksum();
|
||||
|
||||
/// For immutable files we don't use partial checksums.
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,49 +1,53 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IBackupEntry.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <Backups/BackupEntryWithChecksumCalculation.h>
|
||||
#include <base/defines.h>
|
||||
#include <mutex>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class TemporaryFileOnDisk;
|
||||
class IDisk;
|
||||
using DiskPtr = std::shared_ptr<IDisk>;
|
||||
|
||||
/// Represents a file prepared to be included in a backup, assuming that until this backup entry is destroyed the file won't be changed.
|
||||
class BackupEntryFromImmutableFile : public IBackupEntry
|
||||
class BackupEntryFromImmutableFile : public BackupEntryWithChecksumCalculation<IBackupEntry>
|
||||
{
|
||||
public:
|
||||
|
||||
/// The constructor is allowed to not set `file_size_` or `checksum_`, in that case it will be calculated from the data.
|
||||
BackupEntryFromImmutableFile(
|
||||
const DiskPtr & disk_,
|
||||
const String & file_path_,
|
||||
const ReadSettings & settings_,
|
||||
bool copy_encrypted_ = false,
|
||||
const std::optional<UInt64> & file_size_ = {},
|
||||
const std::optional<UInt128> & checksum_ = {},
|
||||
const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_ = {});
|
||||
const std::optional<UInt128> & checksum_ = {});
|
||||
|
||||
~BackupEntryFromImmutableFile() override;
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override;
|
||||
|
||||
UInt64 getSize() const override;
|
||||
std::optional<UInt128> getChecksum() const override { return checksum; }
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer() const override;
|
||||
UInt128 getChecksum() const override;
|
||||
std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override;
|
||||
|
||||
String getFilePath() const override;
|
||||
DataSourceDescription getDataSourceDescription() const override;
|
||||
DataSourceDescription getDataSourceDescription() const override { return data_source_description; }
|
||||
bool isEncryptedByDisk() const override { return copy_encrypted; }
|
||||
|
||||
DiskPtr tryGetDiskIfExists() const override { return disk; }
|
||||
bool isFromFile() const override { return true; }
|
||||
bool isFromImmutableFile() const override { return true; }
|
||||
DiskPtr getDisk() const override { return disk; }
|
||||
String getFilePath() const override { return file_path; }
|
||||
|
||||
private:
|
||||
const DiskPtr disk;
|
||||
const String file_path;
|
||||
ReadSettings settings;
|
||||
mutable std::optional<UInt64> file_size TSA_GUARDED_BY(get_file_size_mutex);
|
||||
mutable std::mutex get_file_size_mutex;
|
||||
const std::optional<UInt128> checksum;
|
||||
const std::shared_ptr<TemporaryFileOnDisk> temporary_file_on_disk;
|
||||
const DataSourceDescription data_source_description;
|
||||
const bool copy_encrypted;
|
||||
mutable std::optional<UInt64> file_size;
|
||||
mutable std::optional<UInt64> checksum;
|
||||
mutable bool file_size_adjusted = false;
|
||||
mutable bool checksum_adjusted = false;
|
||||
mutable std::mutex size_and_checksum_mutex;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -5,17 +5,16 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
BackupEntryFromMemory::BackupEntryFromMemory(const void * data_, size_t size_, const std::optional<UInt128> & checksum_)
|
||||
: BackupEntryFromMemory(String{reinterpret_cast<const char *>(data_), size_}, checksum_)
|
||||
BackupEntryFromMemory::BackupEntryFromMemory(const void * data_, size_t size_)
|
||||
: BackupEntryFromMemory(String{reinterpret_cast<const char *>(data_), size_})
|
||||
{
|
||||
}
|
||||
|
||||
BackupEntryFromMemory::BackupEntryFromMemory(String data_, const std::optional<UInt128> & checksum_)
|
||||
: data(std::move(data_)), checksum(checksum_)
|
||||
BackupEntryFromMemory::BackupEntryFromMemory(String data_) : data(std::move(data_))
|
||||
{
|
||||
}
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> BackupEntryFromMemory::getReadBuffer() const
|
||||
std::unique_ptr<SeekableReadBuffer> BackupEntryFromMemory::getReadBuffer(const ReadSettings &) const
|
||||
{
|
||||
return std::make_unique<ReadBufferFromString>(data);
|
||||
}
|
||||
|
@ -1,39 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IBackupEntry.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <Backups/BackupEntryWithChecksumCalculation.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Represents small preloaded data to be included in a backup.
|
||||
class BackupEntryFromMemory : public IBackupEntry
|
||||
class BackupEntryFromMemory : public BackupEntryWithChecksumCalculation<IBackupEntry>
|
||||
{
|
||||
public:
|
||||
/// The constructor is allowed to not set `checksum_`, in that case it will be calculated from the data.
|
||||
BackupEntryFromMemory(const void * data_, size_t size_, const std::optional<UInt128> & checksum_ = {});
|
||||
explicit BackupEntryFromMemory(String data_, const std::optional<UInt128> & checksum_ = {});
|
||||
BackupEntryFromMemory(const void * data_, size_t size_);
|
||||
explicit BackupEntryFromMemory(String data_);
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings &) const override;
|
||||
UInt64 getSize() const override { return data.size(); }
|
||||
std::optional<UInt128> getChecksum() const override { return checksum; }
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer() const override;
|
||||
|
||||
String getFilePath() const override
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
DataSourceDescription getDataSourceDescription() const override
|
||||
{
|
||||
return DataSourceDescription{DataSourceType::RAM, "", false, false};
|
||||
}
|
||||
|
||||
DiskPtr tryGetDiskIfExists() const override { return nullptr; }
|
||||
DataSourceDescription getDataSourceDescription() const override { return DataSourceDescription{DataSourceType::RAM, "", false, false}; }
|
||||
|
||||
private:
|
||||
const String data;
|
||||
const std::optional<UInt128> checksum;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,9 @@
|
||||
#include <Backups/BackupEntryFromSmallFile.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
#include <Disks/DiskLocal.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <Disks/IO/createReadBufferFromFileBase.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
|
||||
@ -16,9 +19,9 @@ namespace
|
||||
return s;
|
||||
}
|
||||
|
||||
String readFile(const DiskPtr & disk, const String & file_path)
|
||||
String readFile(const DiskPtr & disk, const String & file_path, bool copy_encrypted)
|
||||
{
|
||||
auto buf = disk->readFile(file_path);
|
||||
auto buf = copy_encrypted ? disk->readEncryptedFile(file_path, {}) : disk->readFile(file_path);
|
||||
String s;
|
||||
readStringUntilEOF(s, *buf);
|
||||
return s;
|
||||
@ -26,15 +29,25 @@ namespace
|
||||
}
|
||||
|
||||
|
||||
BackupEntryFromSmallFile::BackupEntryFromSmallFile(const String & file_path_, const std::optional<UInt128> & checksum_)
|
||||
: BackupEntryFromMemory(readFile(file_path_), checksum_), file_path(file_path_)
|
||||
BackupEntryFromSmallFile::BackupEntryFromSmallFile(const String & file_path_)
|
||||
: file_path(file_path_)
|
||||
, data_source_description(DiskLocal::getLocalDataSourceDescription(file_path_))
|
||||
, data(readFile(file_path_))
|
||||
{
|
||||
}
|
||||
|
||||
BackupEntryFromSmallFile::BackupEntryFromSmallFile(
|
||||
const DiskPtr & disk_, const String & file_path_, const std::optional<UInt128> & checksum_)
|
||||
: BackupEntryFromMemory(readFile(disk_, file_path_), checksum_), disk(disk_), file_path(file_path_)
|
||||
BackupEntryFromSmallFile::BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, bool copy_encrypted_)
|
||||
: disk(disk_)
|
||||
, file_path(file_path_)
|
||||
, data_source_description(disk_->getDataSourceDescription())
|
||||
, copy_encrypted(copy_encrypted_ && data_source_description.is_encrypted)
|
||||
, data(readFile(disk_, file_path, copy_encrypted))
|
||||
{
|
||||
}
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> BackupEntryFromSmallFile::getReadBuffer(const ReadSettings &) const
|
||||
{
|
||||
return std::make_unique<ReadBufferFromString>(data);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/BackupEntryFromMemory.h>
|
||||
#include <Backups/BackupEntryWithChecksumCalculation.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -10,25 +10,28 @@ using DiskPtr = std::shared_ptr<IDisk>;
|
||||
|
||||
/// Represents a file prepared to be included in a backup,
|
||||
/// assuming that the file is small and can be easily loaded into memory.
|
||||
class BackupEntryFromSmallFile : public BackupEntryFromMemory
|
||||
class BackupEntryFromSmallFile : public BackupEntryWithChecksumCalculation<IBackupEntry>
|
||||
{
|
||||
public:
|
||||
/// The constructor is allowed to not set `checksum_`, in that case it will be calculated from the data.
|
||||
explicit BackupEntryFromSmallFile(
|
||||
const String & file_path_,
|
||||
const std::optional<UInt128> & checksum_ = {});
|
||||
explicit BackupEntryFromSmallFile(const String & file_path_);
|
||||
BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, bool copy_encrypted_ = false);
|
||||
|
||||
BackupEntryFromSmallFile(
|
||||
const DiskPtr & disk_,
|
||||
const String & file_path_,
|
||||
const std::optional<UInt128> & checksum_ = {});
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings &) const override;
|
||||
UInt64 getSize() const override { return data.size(); }
|
||||
|
||||
DataSourceDescription getDataSourceDescription() const override { return data_source_description; }
|
||||
bool isEncryptedByDisk() const override { return copy_encrypted; }
|
||||
|
||||
bool isFromFile() const override { return true; }
|
||||
DiskPtr getDisk() const override { return disk; }
|
||||
String getFilePath() const override { return file_path; }
|
||||
|
||||
DiskPtr tryGetDiskIfExists() const override { return disk; }
|
||||
private:
|
||||
const DiskPtr disk;
|
||||
const String file_path;
|
||||
const DataSourceDescription data_source_description;
|
||||
const bool copy_encrypted = false;
|
||||
const String data;
|
||||
};
|
||||
|
||||
}
|
||||
|
54
src/Backups/BackupEntryWithChecksumCalculation.cpp
Normal file
54
src/Backups/BackupEntryWithChecksumCalculation.cpp
Normal file
@ -0,0 +1,54 @@
|
||||
#include <Backups/BackupEntryWithChecksumCalculation.h>
|
||||
#include <IO/HashingReadBuffer.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <typename Base>
|
||||
UInt128 BackupEntryWithChecksumCalculation<Base>::getChecksum() const
|
||||
{
|
||||
std::lock_guard lock{checksum_calculation_mutex};
|
||||
if (!calculated_checksum)
|
||||
{
|
||||
auto read_buffer = this->getReadBuffer(ReadSettings{}.adjustBufferSize(this->getSize()));
|
||||
HashingReadBuffer hashing_read_buffer(*read_buffer);
|
||||
hashing_read_buffer.ignoreAll();
|
||||
calculated_checksum = hashing_read_buffer.getHash();
|
||||
}
|
||||
return *calculated_checksum;
|
||||
}
|
||||
|
||||
template <typename Base>
|
||||
std::optional<UInt128> BackupEntryWithChecksumCalculation<Base>::getPartialChecksum(size_t prefix_length) const
|
||||
{
|
||||
if (prefix_length == 0)
|
||||
return 0;
|
||||
|
||||
size_t size = this->getSize();
|
||||
if (prefix_length >= size)
|
||||
return this->getChecksum();
|
||||
|
||||
std::lock_guard lock{checksum_calculation_mutex};
|
||||
|
||||
ReadSettings read_settings;
|
||||
if (calculated_checksum)
|
||||
read_settings.adjustBufferSize(calculated_checksum ? prefix_length : size);
|
||||
|
||||
auto read_buffer = this->getReadBuffer(read_settings);
|
||||
HashingReadBuffer hashing_read_buffer(*read_buffer);
|
||||
hashing_read_buffer.ignore(prefix_length);
|
||||
auto partial_checksum = hashing_read_buffer.getHash();
|
||||
|
||||
if (!calculated_checksum)
|
||||
{
|
||||
hashing_read_buffer.ignoreAll();
|
||||
calculated_checksum = hashing_read_buffer.getHash();
|
||||
}
|
||||
|
||||
return partial_checksum;
|
||||
}
|
||||
|
||||
template class BackupEntryWithChecksumCalculation<IBackupEntry>;
|
||||
|
||||
}
|
22
src/Backups/BackupEntryWithChecksumCalculation.h
Normal file
22
src/Backups/BackupEntryWithChecksumCalculation.h
Normal file
@ -0,0 +1,22 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IBackupEntry.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Calculates the checksum and the partial checksum for a backup entry based on ReadBuffer returned by getReadBuffer().
|
||||
template <typename Base>
|
||||
class BackupEntryWithChecksumCalculation : public Base
|
||||
{
|
||||
public:
|
||||
UInt128 getChecksum() const override;
|
||||
std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override;
|
||||
|
||||
private:
|
||||
mutable std::optional<UInt128> calculated_checksum;
|
||||
mutable std::mutex checksum_calculation_mutex;
|
||||
};
|
||||
|
||||
}
|
@ -15,23 +15,33 @@ public:
|
||||
BackupEntryWrappedWith(BackupEntryPtr entry_, T && custom_value_) : entry(entry_), custom_value(std::move(custom_value_)) { }
|
||||
~BackupEntryWrappedWith() override = default;
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override { return entry->getReadBuffer(read_settings); }
|
||||
UInt64 getSize() const override { return entry->getSize(); }
|
||||
std::optional<UInt128> getChecksum() const override { return entry->getChecksum(); }
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer() const override { return entry->getReadBuffer(); }
|
||||
String getFilePath() const override { return entry->getFilePath(); }
|
||||
DiskPtr tryGetDiskIfExists() const override { return entry->tryGetDiskIfExists(); }
|
||||
UInt128 getChecksum() const override { return entry->getChecksum(); }
|
||||
std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override { return entry->getPartialChecksum(prefix_length); }
|
||||
DataSourceDescription getDataSourceDescription() const override { return entry->getDataSourceDescription(); }
|
||||
bool isEncryptedByDisk() const override { return entry->isEncryptedByDisk(); }
|
||||
bool isFromFile() const override { return entry->isFromFile(); }
|
||||
bool isFromImmutableFile() const override { return entry->isFromImmutableFile(); }
|
||||
String getFilePath() const override { return entry->getFilePath(); }
|
||||
DiskPtr getDisk() const override { return entry->getDisk(); }
|
||||
|
||||
private:
|
||||
BackupEntryPtr entry;
|
||||
T custom_value;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
BackupEntryPtr wrapBackupEntryWith(BackupEntryPtr && backup_entry, const T & custom_value)
|
||||
{
|
||||
return std::make_shared<BackupEntryWrappedWith<T>>(std::move(backup_entry), custom_value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void wrapBackupEntriesWith(std::vector<std::pair<String, BackupEntryPtr>> & backup_entries, const T & custom_value)
|
||||
{
|
||||
for (auto & [_, backup_entry] : backup_entries)
|
||||
backup_entry = std::make_shared<BackupEntryWrappedWith<T>>(std::move(backup_entry), custom_value);
|
||||
backup_entry = wrapBackupEntryWith(std::move(backup_entry), custom_value);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <Common/scope_guard_safe.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <IO/HashingReadBuffer.h>
|
||||
#include <base/hex.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -36,7 +36,7 @@ namespace
|
||||
{
|
||||
/// We cannot reuse base backup because our file is smaller
|
||||
/// than file stored in previous backup
|
||||
if (new_entry_info.size < base_backup_info.first)
|
||||
if ((new_entry_info.size < base_backup_info.first) || !base_backup_info.first)
|
||||
return CheckBackupResult::HasNothing;
|
||||
|
||||
if (base_backup_info.first == new_entry_info.size)
|
||||
@ -48,45 +48,22 @@ namespace
|
||||
|
||||
struct ChecksumsForNewEntry
|
||||
{
|
||||
UInt128 full_checksum;
|
||||
UInt128 prefix_checksum;
|
||||
/// 0 is the valid checksum of empty data.
|
||||
UInt128 full_checksum = 0;
|
||||
|
||||
/// std::nullopt here means that it's too difficult to calculate a partial checksum so it shouldn't be used.
|
||||
std::optional<UInt128> prefix_checksum;
|
||||
};
|
||||
|
||||
/// Calculate checksum for backup entry if it's empty.
|
||||
/// Also able to calculate additional checksum of some prefix.
|
||||
ChecksumsForNewEntry calculateNewEntryChecksumsIfNeeded(const BackupEntryPtr & entry, size_t prefix_size)
|
||||
{
|
||||
if (prefix_size > 0)
|
||||
{
|
||||
auto read_buffer = entry->getReadBuffer();
|
||||
HashingReadBuffer hashing_read_buffer(*read_buffer);
|
||||
hashing_read_buffer.ignore(prefix_size);
|
||||
auto prefix_checksum = hashing_read_buffer.getHash();
|
||||
if (entry->getChecksum() == std::nullopt)
|
||||
{
|
||||
hashing_read_buffer.ignoreAll();
|
||||
auto full_checksum = hashing_read_buffer.getHash();
|
||||
return ChecksumsForNewEntry{full_checksum, prefix_checksum};
|
||||
}
|
||||
else
|
||||
{
|
||||
return ChecksumsForNewEntry{*(entry->getChecksum()), prefix_checksum};
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (entry->getChecksum() == std::nullopt)
|
||||
{
|
||||
auto read_buffer = entry->getReadBuffer();
|
||||
HashingReadBuffer hashing_read_buffer(*read_buffer);
|
||||
hashing_read_buffer.ignoreAll();
|
||||
return ChecksumsForNewEntry{hashing_read_buffer.getHash(), 0};
|
||||
}
|
||||
else
|
||||
{
|
||||
return ChecksumsForNewEntry{*(entry->getChecksum()), 0};
|
||||
}
|
||||
}
|
||||
ChecksumsForNewEntry res;
|
||||
/// The partial checksum should be calculated before the full checksum to enable optimization in BackupEntryWithChecksumCalculation.
|
||||
res.prefix_checksum = entry->getPartialChecksum(prefix_size);
|
||||
res.full_checksum = entry->getChecksum();
|
||||
return res;
|
||||
}
|
||||
|
||||
/// We store entries' file names in the backup without leading slashes.
|
||||
@ -111,6 +88,7 @@ String BackupFileInfo::describe() const
|
||||
result += fmt::format("base_checksum: {};\n", getHexUIntLowercase(checksum));
|
||||
result += fmt::format("data_file_name: {};\n", data_file_name);
|
||||
result += fmt::format("data_file_index: {};\n", data_file_index);
|
||||
result += fmt::format("encrypted_by_disk: {};\n", encrypted_by_disk);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -122,6 +100,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
|
||||
BackupFileInfo info;
|
||||
info.file_name = adjusted_path;
|
||||
info.size = backup_entry->getSize();
|
||||
info.encrypted_by_disk = backup_entry->isEncryptedByDisk();
|
||||
|
||||
/// We don't set `info.data_file_name` and `info.data_file_index` in this function because they're set during backup coordination
|
||||
/// (see the class BackupCoordinationFileInfos).
|
||||
@ -139,7 +118,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
|
||||
|
||||
/// We have info about this file in base backup
|
||||
/// If file has no checksum -- calculate and fill it.
|
||||
if (base_backup_file_info.has_value())
|
||||
if (base_backup_file_info)
|
||||
{
|
||||
LOG_TRACE(log, "File {} found in base backup, checking for equality", adjusted_path);
|
||||
CheckBackupResult check_base = checkBaseBackupForFile(*base_backup_file_info, info);
|
||||
|
@ -35,6 +35,9 @@ struct BackupFileInfo
|
||||
/// This field is set during backup coordination (see the class BackupCoordinationFileInfos).
|
||||
size_t data_file_index = static_cast<size_t>(-1);
|
||||
|
||||
/// Whether this file is encrypted by an encrypted disk.
|
||||
bool encrypted_by_disk = false;
|
||||
|
||||
struct LessByFileName
|
||||
{
|
||||
bool operator()(const BackupFileInfo & lhs, const BackupFileInfo & rhs) const { return (lhs.file_name < rhs.file_name); }
|
||||
|
@ -1,46 +0,0 @@
|
||||
#include <Backups/BackupIO.h>
|
||||
|
||||
#include <IO/copyData.h>
|
||||
#include <IO/WriteBufferFromFileBase.h>
|
||||
#include <IO/SeekableReadBuffer.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
void IBackupReader::copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode, const WriteSettings & write_settings)
|
||||
{
|
||||
auto read_buffer = readFile(file_name);
|
||||
auto write_buffer = destination_disk->writeFile(destination_path, std::min<size_t>(size, DBMS_DEFAULT_BUFFER_SIZE), write_mode, write_settings);
|
||||
copyData(*read_buffer, *write_buffer, size);
|
||||
write_buffer->finalize();
|
||||
}
|
||||
|
||||
IBackupWriter::IBackupWriter(const ContextPtr & context_)
|
||||
: read_settings(context_->getBackupReadSettings())
|
||||
, has_throttling(static_cast<bool>(context_->getBackupsThrottler()))
|
||||
{}
|
||||
|
||||
void IBackupWriter::copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name)
|
||||
{
|
||||
auto read_buffer = create_read_buffer();
|
||||
if (offset)
|
||||
read_buffer->seek(offset, SEEK_SET);
|
||||
auto write_buffer = writeFile(dest_file_name);
|
||||
copyData(*read_buffer, *write_buffer, size);
|
||||
write_buffer->finalize();
|
||||
}
|
||||
|
||||
void IBackupWriter::copyFileNative(
|
||||
DiskPtr /* src_disk */, const String & /* src_file_name */, UInt64 /* src_offset */, UInt64 /* src_size */, const String & /* dest_file_name */)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Native copy not implemented for backup writer");
|
||||
}
|
||||
}
|
@ -1,58 +1,72 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <Disks/DiskType.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class IDisk;
|
||||
using DiskPtr = std::shared_ptr<IDisk>;
|
||||
class SeekableReadBuffer;
|
||||
class WriteBuffer;
|
||||
enum class WriteMode;
|
||||
struct WriteSettings;
|
||||
struct ReadSettings;
|
||||
|
||||
/// Represents operations of loading from disk or downloading for reading a backup.
|
||||
class IBackupReader /// BackupReaderFile, BackupReaderDisk
|
||||
/// See also implementations: BackupReaderFile, BackupReaderDisk.
|
||||
class IBackupReader
|
||||
{
|
||||
public:
|
||||
virtual ~IBackupReader() = default;
|
||||
|
||||
virtual bool fileExists(const String & file_name) = 0;
|
||||
virtual UInt64 getFileSize(const String & file_name) = 0;
|
||||
|
||||
virtual std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) = 0;
|
||||
virtual void copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode, const WriteSettings & write_settings);
|
||||
virtual DataSourceDescription getDataSourceDescription() const = 0;
|
||||
|
||||
/// The function copyFileToDisk() can be much faster than reading the file with readFile() and then writing it to some disk.
|
||||
/// (especially for S3 where it can use CopyObject to copy objects inside S3 instead of downloading and uploading them).
|
||||
/// Parameters:
|
||||
/// `encrypted_in_backup` specify if this file is encrypted in the backup, so it shouldn't be encrypted again while restoring to an encrypted disk.
|
||||
virtual void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
|
||||
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) = 0;
|
||||
|
||||
virtual const ReadSettings & getReadSettings() const = 0;
|
||||
virtual const WriteSettings & getWriteSettings() const = 0;
|
||||
virtual size_t getWriteBufferSize() const = 0;
|
||||
};
|
||||
|
||||
/// Represents operations of storing to disk or uploading for writing a backup.
|
||||
class IBackupWriter /// BackupWriterFile, BackupWriterDisk
|
||||
/// See also implementations: BackupWriterFile, BackupWriterDisk
|
||||
class IBackupWriter
|
||||
{
|
||||
public:
|
||||
using CreateReadBufferFunction = std::function<std::unique_ptr<SeekableReadBuffer>()>;
|
||||
|
||||
explicit IBackupWriter(const ContextPtr & context_);
|
||||
|
||||
virtual ~IBackupWriter() = default;
|
||||
|
||||
virtual bool fileExists(const String & file_name) = 0;
|
||||
virtual UInt64 getFileSize(const String & file_name) = 0;
|
||||
virtual bool fileContentsEqual(const String & file_name, const String & expected_file_contents) = 0;
|
||||
|
||||
virtual std::unique_ptr<WriteBuffer> writeFile(const String & file_name) = 0;
|
||||
|
||||
using CreateReadBufferFunction = std::function<std::unique_ptr<SeekableReadBuffer>()>;
|
||||
virtual void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) = 0;
|
||||
|
||||
/// The function copyFileFromDisk() can be much faster than copyDataToFile()
|
||||
/// (especially for S3 where it can use CopyObject to copy objects inside S3 instead of downloading and uploading them).
|
||||
/// Parameters:
|
||||
/// `start_pos` and `length` specify a part of the file on `src_disk` to copy to the backup.
|
||||
/// `copy_encrypted` specify whether this function should copy encrypted data of the file `src_path` to the backup.
|
||||
virtual void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
|
||||
bool copy_encrypted, UInt64 start_pos, UInt64 length) = 0;
|
||||
|
||||
virtual void removeFile(const String & file_name) = 0;
|
||||
virtual void removeFiles(const Strings & file_names) = 0;
|
||||
virtual DataSourceDescription getDataSourceDescription() const = 0;
|
||||
virtual void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name);
|
||||
virtual bool supportNativeCopy(DataSourceDescription /* data_source_description */) const { return false; }
|
||||
|
||||
/// Copy file using native copy (optimized for S3 to use CopyObject)
|
||||
///
|
||||
/// NOTE: It still may fall back to copyDataToFile() if native copy is not possible:
|
||||
/// - different buckets
|
||||
/// - throttling had been requested
|
||||
virtual void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name);
|
||||
|
||||
protected:
|
||||
const ReadSettings read_settings;
|
||||
const bool has_throttling;
|
||||
virtual const ReadSettings & getReadSettings() const = 0;
|
||||
virtual const WriteSettings & getWriteSettings() const = 0;
|
||||
virtual size_t getWriteBufferSize() const = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
95
src/Backups/BackupIO_Default.cpp
Normal file
95
src/Backups/BackupIO_Default.cpp
Normal file
@ -0,0 +1,95 @@
|
||||
#include <Backups/BackupIO_Default.h>
|
||||
|
||||
#include <Disks/IDisk.h>
|
||||
#include <IO/copyData.h>
|
||||
#include <IO/WriteBufferFromFileBase.h>
|
||||
#include <IO/SeekableReadBuffer.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
BackupReaderDefault::BackupReaderDefault(Poco::Logger * log_, const ContextPtr & context_)
|
||||
: log(log_)
|
||||
, read_settings(context_->getBackupReadSettings())
|
||||
, write_settings(context_->getWriteSettings())
|
||||
, write_buffer_size(DBMS_DEFAULT_BUFFER_SIZE)
|
||||
{
|
||||
}
|
||||
|
||||
void BackupReaderDefault::copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
|
||||
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode)
|
||||
{
|
||||
LOG_TRACE(log, "Copying file {} to disk {} through buffers", path_in_backup, destination_disk->getName());
|
||||
|
||||
auto read_buffer = readFile(path_in_backup);
|
||||
|
||||
std::unique_ptr<WriteBuffer> write_buffer;
|
||||
auto buf_size = std::min(file_size, write_buffer_size);
|
||||
if (encrypted_in_backup)
|
||||
write_buffer = destination_disk->writeEncryptedFile(destination_path, buf_size, write_mode, write_settings);
|
||||
else
|
||||
write_buffer = destination_disk->writeFile(destination_path, buf_size, write_mode, write_settings);
|
||||
|
||||
copyData(*read_buffer, *write_buffer, file_size);
|
||||
write_buffer->finalize();
|
||||
}
|
||||
|
||||
BackupWriterDefault::BackupWriterDefault(Poco::Logger * log_, const ContextPtr & context_)
|
||||
: log(log_)
|
||||
, read_settings(context_->getBackupReadSettings())
|
||||
, write_settings(context_->getWriteSettings())
|
||||
, write_buffer_size(DBMS_DEFAULT_BUFFER_SIZE)
|
||||
{
|
||||
}
|
||||
|
||||
bool BackupWriterDefault::fileContentsEqual(const String & file_name, const String & expected_file_contents)
|
||||
{
|
||||
if (!fileExists(file_name))
|
||||
return false;
|
||||
|
||||
try
|
||||
{
|
||||
auto in = readFile(file_name, expected_file_contents.size());
|
||||
String actual_file_contents(expected_file_contents.size(), ' ');
|
||||
return (in->read(actual_file_contents.data(), actual_file_contents.size()) == actual_file_contents.size())
|
||||
&& (actual_file_contents == expected_file_contents) && in->eof();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void BackupWriterDefault::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
|
||||
{
|
||||
auto read_buffer = create_read_buffer();
|
||||
|
||||
if (start_pos)
|
||||
read_buffer->seek(start_pos, SEEK_SET);
|
||||
|
||||
auto write_buffer = writeFile(path_in_backup);
|
||||
|
||||
copyData(*read_buffer, *write_buffer, length);
|
||||
write_buffer->finalize();
|
||||
}
|
||||
|
||||
void BackupWriterDefault::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
|
||||
bool copy_encrypted, UInt64 start_pos, UInt64 length)
|
||||
{
|
||||
LOG_TRACE(log, "Copying file {} from disk {} through buffers", src_path, src_disk->getName());
|
||||
|
||||
auto create_read_buffer = [src_disk, src_path, copy_encrypted, settings = read_settings.adjustBufferSize(start_pos + length)]
|
||||
{
|
||||
if (copy_encrypted)
|
||||
return src_disk->readEncryptedFile(src_path, settings);
|
||||
else
|
||||
return src_disk->readFile(src_path, settings);
|
||||
};
|
||||
|
||||
copyDataToFile(path_in_backup, create_read_buffer, start_pos, length);
|
||||
}
|
||||
}
|
73
src/Backups/BackupIO_Default.h
Normal file
73
src/Backups/BackupIO_Default.h
Normal file
@ -0,0 +1,73 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/BackupIO.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <IO/WriteSettings.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class IDisk;
|
||||
using DiskPtr = std::shared_ptr<IDisk>;
|
||||
class ReadBuffer;
|
||||
class SeekableReadBuffer;
|
||||
class WriteBuffer;
|
||||
enum class WriteMode;
|
||||
|
||||
/// Represents operations of loading from disk or downloading for reading a backup.
|
||||
class BackupReaderDefault : public IBackupReader
|
||||
{
|
||||
public:
|
||||
BackupReaderDefault(Poco::Logger * log_, const ContextPtr & context_);
|
||||
~BackupReaderDefault() override = default;
|
||||
|
||||
/// The function copyFileToDisk() can be much faster than reading the file with readFile() and then writing it to some disk.
|
||||
/// (especially for S3 where it can use CopyObject to copy objects inside S3 instead of downloading and uploading them).
|
||||
/// Parameters:
|
||||
/// `encrypted_in_backup` specify if this file is encrypted in the backup, so it shouldn't be encrypted again while restoring to an encrypted disk.
|
||||
void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
|
||||
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
|
||||
|
||||
const ReadSettings & getReadSettings() const override { return read_settings; }
|
||||
const WriteSettings & getWriteSettings() const override { return write_settings; }
|
||||
size_t getWriteBufferSize() const override { return write_buffer_size; }
|
||||
|
||||
protected:
|
||||
Poco::Logger * const log;
|
||||
const ReadSettings read_settings;
|
||||
|
||||
/// The write settings are used to write to the source disk in copyFileToDisk().
|
||||
const WriteSettings write_settings;
|
||||
const size_t write_buffer_size;
|
||||
};
|
||||
|
||||
/// Represents operations of storing to disk or uploading for writing a backup.
|
||||
class BackupWriterDefault : public IBackupWriter
|
||||
{
|
||||
public:
|
||||
BackupWriterDefault(Poco::Logger * log_, const ContextPtr & context_);
|
||||
~BackupWriterDefault() override = default;
|
||||
|
||||
bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
|
||||
void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) override;
|
||||
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path, bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
|
||||
|
||||
const ReadSettings & getReadSettings() const override { return read_settings; }
|
||||
const WriteSettings & getWriteSettings() const override { return write_settings; }
|
||||
size_t getWriteBufferSize() const override { return write_buffer_size; }
|
||||
|
||||
protected:
|
||||
/// Here readFile() is used only to implement fileContentsEqual().
|
||||
virtual std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) = 0;
|
||||
|
||||
Poco::Logger * const log;
|
||||
|
||||
/// The read settings are used to read from the source disk in copyFileFromDisk().
|
||||
const ReadSettings read_settings;
|
||||
|
||||
const WriteSettings write_settings;
|
||||
const size_t write_buffer_size;
|
||||
};
|
||||
|
||||
}
|
@ -8,13 +8,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
BackupReaderDisk::BackupReaderDisk(const DiskPtr & disk_, const String & path_)
|
||||
: disk(disk_), path(path_), log(&Poco::Logger::get("BackupReaderDisk"))
|
||||
BackupReaderDisk::BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_)
|
||||
: BackupReaderDefault(&Poco::Logger::get("BackupReaderDisk"), context_)
|
||||
, disk(disk_)
|
||||
, root_path(root_path_)
|
||||
, data_source_description(disk->getDataSourceDescription())
|
||||
{
|
||||
}
|
||||
|
||||
@ -22,38 +20,47 @@ BackupReaderDisk::~BackupReaderDisk() = default;
|
||||
|
||||
bool BackupReaderDisk::fileExists(const String & file_name)
|
||||
{
|
||||
return disk->exists(path / file_name);
|
||||
return disk->exists(root_path / file_name);
|
||||
}
|
||||
|
||||
UInt64 BackupReaderDisk::getFileSize(const String & file_name)
|
||||
{
|
||||
return disk->getFileSize(path / file_name);
|
||||
return disk->getFileSize(root_path / file_name);
|
||||
}
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> BackupReaderDisk::readFile(const String & file_name)
|
||||
{
|
||||
return disk->readFile(path / file_name);
|
||||
return disk->readFile(root_path / file_name, read_settings);
|
||||
}
|
||||
|
||||
void BackupReaderDisk::copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode, const WriteSettings & write_settings)
|
||||
void BackupReaderDisk::copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
|
||||
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode)
|
||||
{
|
||||
if (write_mode == WriteMode::Rewrite)
|
||||
/// Use IDisk::copyFile() as a more optimal way to copy a file if it's possible.
|
||||
/// However IDisk::copyFile() can't use throttling for reading, and can't copy an encrypted file or do appending.
|
||||
bool has_throttling = disk->isRemote() ? static_cast<bool>(read_settings.remote_throttler) : static_cast<bool>(read_settings.local_throttler);
|
||||
if (!has_throttling && (write_mode == WriteMode::Rewrite) && !encrypted_in_backup)
|
||||
{
|
||||
LOG_TRACE(log, "Copying {}/{} from disk {} to {} by the disk", path, file_name, disk->getName(), destination_disk->getName());
|
||||
disk->copyFile(path / file_name, *destination_disk, destination_path, write_settings);
|
||||
return;
|
||||
auto destination_data_source_description = destination_disk->getDataSourceDescription();
|
||||
if (destination_data_source_description.sameKind(data_source_description) && !data_source_description.is_encrypted)
|
||||
{
|
||||
/// Use more optimal way.
|
||||
LOG_TRACE(log, "Copying file {} from disk {} to disk {}", path_in_backup, disk->getName(), destination_disk->getName());
|
||||
disk->copyFile(root_path / path_in_backup, *destination_disk, destination_path, write_settings);
|
||||
return; /// copied!
|
||||
}
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "Copying {}/{} from disk {} to {} through buffers", path, file_name, disk->getName(), destination_disk->getName());
|
||||
IBackupReader::copyFileToDisk(file_name, size, destination_disk, destination_path, write_mode, write_settings);
|
||||
/// Fallback to copy through buffers.
|
||||
BackupReaderDefault::copyFileToDisk(path_in_backup, file_size, encrypted_in_backup, destination_disk, destination_path, write_mode);
|
||||
}
|
||||
|
||||
|
||||
BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & path_, const ContextPtr & context_)
|
||||
: IBackupWriter(context_)
|
||||
BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_)
|
||||
: BackupWriterDefault(&Poco::Logger::get("BackupWriterDisk"), context_)
|
||||
, disk(disk_)
|
||||
, path(path_)
|
||||
, root_path(root_path_)
|
||||
, data_source_description(disk->getDataSourceDescription())
|
||||
{
|
||||
}
|
||||
|
||||
@ -61,85 +68,64 @@ BackupWriterDisk::~BackupWriterDisk() = default;
|
||||
|
||||
bool BackupWriterDisk::fileExists(const String & file_name)
|
||||
{
|
||||
return disk->exists(path / file_name);
|
||||
return disk->exists(root_path / file_name);
|
||||
}
|
||||
|
||||
UInt64 BackupWriterDisk::getFileSize(const String & file_name)
|
||||
{
|
||||
return disk->getFileSize(path / file_name);
|
||||
return disk->getFileSize(root_path / file_name);
|
||||
}
|
||||
|
||||
bool BackupWriterDisk::fileContentsEqual(const String & file_name, const String & expected_file_contents)
|
||||
std::unique_ptr<ReadBuffer> BackupWriterDisk::readFile(const String & file_name, size_t expected_file_size)
|
||||
{
|
||||
if (!disk->exists(path / file_name))
|
||||
return false;
|
||||
|
||||
try
|
||||
{
|
||||
auto in = disk->readFile(path / file_name);
|
||||
String actual_file_contents(expected_file_contents.size(), ' ');
|
||||
return (in->read(actual_file_contents.data(), actual_file_contents.size()) == actual_file_contents.size())
|
||||
&& (actual_file_contents == expected_file_contents) && in->eof();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
return false;
|
||||
}
|
||||
return disk->readFile(root_path / file_name, read_settings.adjustBufferSize(expected_file_size));
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBuffer> BackupWriterDisk::writeFile(const String & file_name)
|
||||
{
|
||||
auto file_path = path / file_name;
|
||||
auto file_path = root_path / file_name;
|
||||
disk->createDirectories(file_path.parent_path());
|
||||
return disk->writeFile(file_path);
|
||||
return disk->writeFile(file_path, write_buffer_size, WriteMode::Rewrite, write_settings);
|
||||
}
|
||||
|
||||
void BackupWriterDisk::removeFile(const String & file_name)
|
||||
{
|
||||
disk->removeFileIfExists(path / file_name);
|
||||
if (disk->isDirectory(path) && disk->isDirectoryEmpty(path))
|
||||
disk->removeDirectory(path);
|
||||
disk->removeFileIfExists(root_path / file_name);
|
||||
if (disk->isDirectory(root_path) && disk->isDirectoryEmpty(root_path))
|
||||
disk->removeDirectory(root_path);
|
||||
}
|
||||
|
||||
void BackupWriterDisk::removeFiles(const Strings & file_names)
|
||||
{
|
||||
for (const auto & file_name : file_names)
|
||||
disk->removeFileIfExists(path / file_name);
|
||||
if (disk->isDirectory(path) && disk->isDirectoryEmpty(path))
|
||||
disk->removeDirectory(path);
|
||||
disk->removeFileIfExists(root_path / file_name);
|
||||
if (disk->isDirectory(root_path) && disk->isDirectoryEmpty(root_path))
|
||||
disk->removeDirectory(root_path);
|
||||
}
|
||||
|
||||
DataSourceDescription BackupWriterDisk::getDataSourceDescription() const
|
||||
void BackupWriterDisk::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
|
||||
bool copy_encrypted, UInt64 start_pos, UInt64 length)
|
||||
{
|
||||
return disk->getDataSourceDescription();
|
||||
}
|
||||
|
||||
DataSourceDescription BackupReaderDisk::getDataSourceDescription() const
|
||||
{
|
||||
return disk->getDataSourceDescription();
|
||||
}
|
||||
|
||||
bool BackupWriterDisk::supportNativeCopy(DataSourceDescription data_source_description) const
|
||||
{
|
||||
return data_source_description == disk->getDataSourceDescription();
|
||||
}
|
||||
|
||||
void BackupWriterDisk::copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name)
|
||||
{
|
||||
if (!src_disk)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot natively copy data to disk without source disk");
|
||||
|
||||
if (has_throttling || (src_offset != 0) || (src_size != src_disk->getFileSize(src_file_name)))
|
||||
/// Use IDisk::copyFile() as a more optimal way to copy a file if it's possible.
|
||||
/// However IDisk::copyFile() can't use throttling for reading, and can't copy an encrypted file or copy a part of the file.
|
||||
bool has_throttling = src_disk->isRemote() ? static_cast<bool>(read_settings.remote_throttler) : static_cast<bool>(read_settings.local_throttler);
|
||||
if (!has_throttling && !start_pos && !copy_encrypted)
|
||||
{
|
||||
auto create_read_buffer = [this, src_disk, src_file_name] { return src_disk->readFile(src_file_name, read_settings); };
|
||||
copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
|
||||
return;
|
||||
auto source_data_source_description = src_disk->getDataSourceDescription();
|
||||
if (source_data_source_description.sameKind(data_source_description) && !source_data_source_description.is_encrypted
|
||||
&& (length == src_disk->getFileSize(src_path)))
|
||||
{
|
||||
/// Use more optimal way.
|
||||
LOG_TRACE(log, "Copying file {} from disk {} to disk {}", src_path, src_disk->getName(), disk->getName());
|
||||
auto dest_file_path = root_path / path_in_backup;
|
||||
disk->createDirectories(dest_file_path.parent_path());
|
||||
src_disk->copyFile(src_path, *disk, dest_file_path, write_settings);
|
||||
return; /// copied!
|
||||
}
|
||||
}
|
||||
|
||||
auto file_path = path / dest_file_name;
|
||||
disk->createDirectories(file_path.parent_path());
|
||||
src_disk->copyFile(src_file_name, *disk, file_path);
|
||||
/// Fallback to copy through buffers.
|
||||
BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,53 +1,58 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/BackupIO_Default.h>
|
||||
#include <Disks/DiskType.h>
|
||||
#include <filesystem>
|
||||
#include <Backups/BackupIO.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class IDisk;
|
||||
using DiskPtr = std::shared_ptr<IDisk>;
|
||||
|
||||
class BackupReaderDisk : public IBackupReader
|
||||
class BackupReaderDisk : public BackupReaderDefault
|
||||
{
|
||||
public:
|
||||
BackupReaderDisk(const DiskPtr & disk_, const String & path_);
|
||||
BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_);
|
||||
~BackupReaderDisk() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
UInt64 getFileSize(const String & file_name) override;
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
|
||||
void copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode, const WriteSettings & write_settings) override;
|
||||
DataSourceDescription getDataSourceDescription() const override;
|
||||
|
||||
void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
|
||||
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
|
||||
|
||||
private:
|
||||
DiskPtr disk;
|
||||
std::filesystem::path path;
|
||||
Poco::Logger * log;
|
||||
const DiskPtr disk;
|
||||
const std::filesystem::path root_path;
|
||||
const DataSourceDescription data_source_description;
|
||||
};
|
||||
|
||||
class BackupWriterDisk : public IBackupWriter
|
||||
class BackupWriterDisk : public BackupWriterDefault
|
||||
{
|
||||
public:
|
||||
BackupWriterDisk(const DiskPtr & disk_, const String & path_, const ContextPtr & context_);
|
||||
BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_);
|
||||
~BackupWriterDisk() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
UInt64 getFileSize(const String & file_name) override;
|
||||
bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
|
||||
|
||||
std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
|
||||
|
||||
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
|
||||
bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
|
||||
|
||||
void removeFile(const String & file_name) override;
|
||||
void removeFiles(const Strings & file_names) override;
|
||||
DataSourceDescription getDataSourceDescription() const override;
|
||||
|
||||
bool supportNativeCopy(DataSourceDescription data_source_description) const override;
|
||||
void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name) override;
|
||||
|
||||
private:
|
||||
DiskPtr disk;
|
||||
std::filesystem::path path;
|
||||
std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
|
||||
|
||||
const DiskPtr disk;
|
||||
const std::filesystem::path root_path;
|
||||
const DataSourceDescription data_source_description;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,9 +1,7 @@
|
||||
#include <Backups/BackupIO_File.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <Disks/DiskLocal.h>
|
||||
#include <Disks/IO/createReadBufferFromFileBase.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/copyData.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
|
||||
@ -12,158 +10,146 @@ namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
BackupReaderFile::BackupReaderFile(const String & path_) : path(path_), log(&Poco::Logger::get("BackupReaderFile"))
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
BackupReaderFile::BackupReaderFile(const String & root_path_, const ContextPtr & context_)
|
||||
: BackupReaderDefault(&Poco::Logger::get("BackupReaderFile"), context_)
|
||||
, root_path(root_path_)
|
||||
, data_source_description(DiskLocal::getLocalDataSourceDescription(root_path))
|
||||
{
|
||||
}
|
||||
|
||||
BackupReaderFile::~BackupReaderFile() = default;
|
||||
|
||||
bool BackupReaderFile::fileExists(const String & file_name)
|
||||
{
|
||||
return fs::exists(path / file_name);
|
||||
return fs::exists(root_path / file_name);
|
||||
}
|
||||
|
||||
UInt64 BackupReaderFile::getFileSize(const String & file_name)
|
||||
{
|
||||
return fs::file_size(path / file_name);
|
||||
return fs::file_size(root_path / file_name);
|
||||
}
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> BackupReaderFile::readFile(const String & file_name)
|
||||
{
|
||||
return createReadBufferFromFileBase(path / file_name, {});
|
||||
return createReadBufferFromFileBase(root_path / file_name, read_settings);
|
||||
}
|
||||
|
||||
void BackupReaderFile::copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode, const WriteSettings & write_settings)
|
||||
void BackupReaderFile::copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
|
||||
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode)
|
||||
{
|
||||
if (destination_disk->getDataSourceDescription() == getDataSourceDescription())
|
||||
/// std::filesystem::copy() can copy from the filesystem only, and can't do throttling or appending.
|
||||
bool has_throttling = static_cast<bool>(read_settings.local_throttler);
|
||||
if (!has_throttling && (write_mode == WriteMode::Rewrite))
|
||||
{
|
||||
/// Use more optimal way.
|
||||
LOG_TRACE(log, "Copying {}/{} to disk {} locally", path, file_name, destination_disk->getName());
|
||||
fs::copy(path / file_name, fullPath(destination_disk, destination_path), fs::copy_options::overwrite_existing);
|
||||
return;
|
||||
auto destination_data_source_description = destination_disk->getDataSourceDescription();
|
||||
if (destination_data_source_description.sameKind(data_source_description)
|
||||
&& (destination_data_source_description.is_encrypted == encrypted_in_backup))
|
||||
{
|
||||
/// Use more optimal way.
|
||||
LOG_TRACE(log, "Copying file {} to disk {} locally", path_in_backup, destination_disk->getName());
|
||||
|
||||
auto write_blob_function = [abs_source_path = root_path / path_in_backup, file_size](
|
||||
const Strings & blob_path, WriteMode mode, const std::optional<ObjectAttributes> &) -> size_t
|
||||
{
|
||||
/// For local disks the size of a blob path is expected to be 1.
|
||||
if (blob_path.size() != 1 || mode != WriteMode::Rewrite)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Blob writing function called with unexpected blob_path.size={} or mode={}",
|
||||
blob_path.size(), mode);
|
||||
fs::copy(abs_source_path, blob_path.at(0), fs::copy_options::overwrite_existing);
|
||||
return file_size;
|
||||
};
|
||||
|
||||
destination_disk->writeFileUsingBlobWritingFunction(destination_path, write_mode, write_blob_function);
|
||||
return; /// copied!
|
||||
}
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "Copying {}/{} to disk {} through buffers", path, file_name, destination_disk->getName());
|
||||
IBackupReader::copyFileToDisk(path / file_name, size, destination_disk, destination_path, write_mode, write_settings);
|
||||
/// Fallback to copy through buffers.
|
||||
BackupReaderDefault::copyFileToDisk(path_in_backup, file_size, encrypted_in_backup, destination_disk, destination_path, write_mode);
|
||||
}
|
||||
|
||||
|
||||
BackupWriterFile::BackupWriterFile(const String & path_, const ContextPtr & context_)
|
||||
: IBackupWriter(context_)
|
||||
, path(path_)
|
||||
BackupWriterFile::BackupWriterFile(const String & root_path_, const ContextPtr & context_)
|
||||
: BackupWriterDefault(&Poco::Logger::get("BackupWriterFile"), context_)
|
||||
, root_path(root_path_)
|
||||
, data_source_description(DiskLocal::getLocalDataSourceDescription(root_path))
|
||||
{
|
||||
}
|
||||
|
||||
BackupWriterFile::~BackupWriterFile() = default;
|
||||
|
||||
bool BackupWriterFile::fileExists(const String & file_name)
|
||||
{
|
||||
return fs::exists(path / file_name);
|
||||
return fs::exists(root_path / file_name);
|
||||
}
|
||||
|
||||
UInt64 BackupWriterFile::getFileSize(const String & file_name)
|
||||
{
|
||||
return fs::file_size(path / file_name);
|
||||
return fs::file_size(root_path / file_name);
|
||||
}
|
||||
|
||||
bool BackupWriterFile::fileContentsEqual(const String & file_name, const String & expected_file_contents)
|
||||
std::unique_ptr<ReadBuffer> BackupWriterFile::readFile(const String & file_name, size_t expected_file_size)
|
||||
{
|
||||
if (!fs::exists(path / file_name))
|
||||
return false;
|
||||
|
||||
try
|
||||
{
|
||||
auto in = createReadBufferFromFileBase(path / file_name, {});
|
||||
String actual_file_contents(expected_file_contents.size(), ' ');
|
||||
return (in->read(actual_file_contents.data(), actual_file_contents.size()) == actual_file_contents.size())
|
||||
&& (actual_file_contents == expected_file_contents) && in->eof();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
return false;
|
||||
}
|
||||
return createReadBufferFromFileBase(root_path / file_name, read_settings.adjustBufferSize(expected_file_size));
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBuffer> BackupWriterFile::writeFile(const String & file_name)
|
||||
{
|
||||
auto file_path = path / file_name;
|
||||
auto file_path = root_path / file_name;
|
||||
fs::create_directories(file_path.parent_path());
|
||||
return std::make_unique<WriteBufferFromFile>(file_path);
|
||||
return std::make_unique<WriteBufferFromFile>(file_path, write_buffer_size, -1, write_settings.local_throttler);
|
||||
}
|
||||
|
||||
void BackupWriterFile::removeFile(const String & file_name)
|
||||
{
|
||||
fs::remove(path / file_name);
|
||||
if (fs::is_directory(path) && fs::is_empty(path))
|
||||
fs::remove(path);
|
||||
fs::remove(root_path / file_name);
|
||||
if (fs::is_directory(root_path) && fs::is_empty(root_path))
|
||||
fs::remove(root_path);
|
||||
}
|
||||
|
||||
void BackupWriterFile::removeFiles(const Strings & file_names)
|
||||
{
|
||||
for (const auto & file_name : file_names)
|
||||
fs::remove(path / file_name);
|
||||
if (fs::is_directory(path) && fs::is_empty(path))
|
||||
fs::remove(path);
|
||||
fs::remove(root_path / file_name);
|
||||
if (fs::is_directory(root_path) && fs::is_empty(root_path))
|
||||
fs::remove(root_path);
|
||||
}
|
||||
|
||||
DataSourceDescription BackupWriterFile::getDataSourceDescription() const
|
||||
void BackupWriterFile::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
|
||||
bool copy_encrypted, UInt64 start_pos, UInt64 length)
|
||||
{
|
||||
DataSourceDescription data_source_description;
|
||||
|
||||
data_source_description.type = DataSourceType::Local;
|
||||
|
||||
if (auto block_device_id = tryGetBlockDeviceId(path); block_device_id.has_value())
|
||||
data_source_description.description = *block_device_id;
|
||||
else
|
||||
data_source_description.description = path;
|
||||
data_source_description.is_encrypted = false;
|
||||
data_source_description.is_cached = false;
|
||||
|
||||
return data_source_description;
|
||||
}
|
||||
|
||||
DataSourceDescription BackupReaderFile::getDataSourceDescription() const
|
||||
{
|
||||
DataSourceDescription data_source_description;
|
||||
|
||||
data_source_description.type = DataSourceType::Local;
|
||||
|
||||
if (auto block_device_id = tryGetBlockDeviceId(path); block_device_id.has_value())
|
||||
data_source_description.description = *block_device_id;
|
||||
else
|
||||
data_source_description.description = path;
|
||||
data_source_description.is_encrypted = false;
|
||||
data_source_description.is_cached = false;
|
||||
|
||||
return data_source_description;
|
||||
}
|
||||
|
||||
|
||||
bool BackupWriterFile::supportNativeCopy(DataSourceDescription data_source_description) const
|
||||
{
|
||||
return data_source_description == getDataSourceDescription();
|
||||
}
|
||||
|
||||
void BackupWriterFile::copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name)
|
||||
{
|
||||
std::string abs_source_path;
|
||||
if (src_disk)
|
||||
abs_source_path = fullPath(src_disk, src_file_name);
|
||||
else
|
||||
abs_source_path = fs::absolute(src_file_name);
|
||||
|
||||
if (has_throttling || (src_offset != 0) || (src_size != fs::file_size(abs_source_path)))
|
||||
/// std::filesystem::copy() can copy from the filesystem only, and can't do throttling or copy a part of the file.
|
||||
bool has_throttling = static_cast<bool>(read_settings.local_throttler);
|
||||
if (!has_throttling)
|
||||
{
|
||||
auto create_read_buffer = [this, abs_source_path] { return createReadBufferFromFileBase(abs_source_path, read_settings); };
|
||||
copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
|
||||
return;
|
||||
auto source_data_source_description = src_disk->getDataSourceDescription();
|
||||
if (source_data_source_description.sameKind(data_source_description)
|
||||
&& (source_data_source_description.is_encrypted == copy_encrypted))
|
||||
{
|
||||
/// std::filesystem::copy() can copy from a single file only.
|
||||
if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 1)
|
||||
{
|
||||
auto abs_source_path = blob_path[0];
|
||||
|
||||
/// std::filesystem::copy() can copy a file as a whole only.
|
||||
if ((start_pos == 0) && (length == fs::file_size(abs_source_path)))
|
||||
{
|
||||
/// Use more optimal way.
|
||||
LOG_TRACE(log, "Copying file {} from disk {} locally", src_path, src_disk->getName());
|
||||
auto abs_dest_path = root_path / path_in_backup;
|
||||
fs::create_directories(abs_dest_path.parent_path());
|
||||
fs::copy(abs_source_path, abs_dest_path, fs::copy_options::overwrite_existing);
|
||||
return; /// copied!
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto file_path = path / dest_file_name;
|
||||
fs::create_directories(file_path.parent_path());
|
||||
fs::copy(abs_source_path, file_path, fs::copy_options::overwrite_existing);
|
||||
/// Fallback to copy through buffers.
|
||||
BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,48 +1,51 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/BackupIO_Default.h>
|
||||
#include <Disks/DiskType.h>
|
||||
#include <filesystem>
|
||||
#include <Backups/BackupIO.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class BackupReaderFile : public IBackupReader
|
||||
class BackupReaderFile : public BackupReaderDefault
|
||||
{
|
||||
public:
|
||||
explicit BackupReaderFile(const String & path_);
|
||||
~BackupReaderFile() override;
|
||||
explicit BackupReaderFile(const String & root_path_, const ContextPtr & context_);
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
UInt64 getFileSize(const String & file_name) override;
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
|
||||
void copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode, const WriteSettings & write_settings) override;
|
||||
DataSourceDescription getDataSourceDescription() const override;
|
||||
|
||||
void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
|
||||
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
|
||||
|
||||
private:
|
||||
std::filesystem::path path;
|
||||
Poco::Logger * log;
|
||||
const std::filesystem::path root_path;
|
||||
const DataSourceDescription data_source_description;
|
||||
};
|
||||
|
||||
class BackupWriterFile : public IBackupWriter
|
||||
class BackupWriterFile : public BackupWriterDefault
|
||||
{
|
||||
public:
|
||||
explicit BackupWriterFile(const String & path_, const ContextPtr & context_);
|
||||
~BackupWriterFile() override;
|
||||
BackupWriterFile(const String & root_path_, const ContextPtr & context_);
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
UInt64 getFileSize(const String & file_name) override;
|
||||
bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
|
||||
std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
|
||||
|
||||
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
|
||||
bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
|
||||
|
||||
void removeFile(const String & file_name) override;
|
||||
void removeFiles(const Strings & file_names) override;
|
||||
DataSourceDescription getDataSourceDescription() const override;
|
||||
bool supportNativeCopy(DataSourceDescription data_source_description) const override;
|
||||
void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name) override;
|
||||
|
||||
private:
|
||||
std::filesystem::path path;
|
||||
std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
|
||||
|
||||
const std::filesystem::path root_path;
|
||||
const DataSourceDescription data_source_description;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
#if USE_AWS_S3
|
||||
#include <Common/quoteString.h>
|
||||
#include <Disks/ObjectStorages/S3/copyS3FileToDisk.h>
|
||||
#include <Interpreters/threadPoolCallbackRunner.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <IO/SharedThreadPools.h>
|
||||
@ -12,6 +11,7 @@
|
||||
#include <IO/S3/copyS3File.h>
|
||||
#include <IO/S3/Client.h>
|
||||
#include <IO/S3/Credentials.h>
|
||||
#include <Disks/IDisk.h>
|
||||
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
@ -102,21 +102,15 @@ namespace
|
||||
|
||||
BackupReaderS3::BackupReaderS3(
|
||||
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_)
|
||||
: s3_uri(s3_uri_)
|
||||
: BackupReaderDefault(&Poco::Logger::get("BackupReaderS3"), context_)
|
||||
, s3_uri(s3_uri_)
|
||||
, client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
|
||||
, read_settings(context_->getReadSettings())
|
||||
, request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
|
||||
, log(&Poco::Logger::get("BackupReaderS3"))
|
||||
, data_source_description{DataSourceType::S3, s3_uri.endpoint, false, false}
|
||||
{
|
||||
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
|
||||
}
|
||||
|
||||
DataSourceDescription BackupReaderS3::getDataSourceDescription() const
|
||||
{
|
||||
return DataSourceDescription{DataSourceType::S3, s3_uri.endpoint, false, false};
|
||||
}
|
||||
|
||||
|
||||
BackupReaderS3::~BackupReaderS3() = default;
|
||||
|
||||
bool BackupReaderS3::fileExists(const String & file_name)
|
||||
@ -138,75 +132,98 @@ std::unique_ptr<SeekableReadBuffer> BackupReaderS3::readFile(const String & file
|
||||
client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, request_settings, read_settings);
|
||||
}
|
||||
|
||||
void BackupReaderS3::copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode, const WriteSettings & write_settings)
|
||||
void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
|
||||
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode)
|
||||
{
|
||||
LOG_TRACE(log, "Copying {} to disk {}", file_name, destination_disk->getName());
|
||||
/// Use the native copy as a more optimal way to copy a file from S3 to S3 if it's possible.
|
||||
/// We don't check for `has_throttling` here because the native copy almost doesn't use network.
|
||||
auto destination_data_source_description = destination_disk->getDataSourceDescription();
|
||||
if (destination_data_source_description.sameKind(data_source_description)
|
||||
&& (destination_data_source_description.is_encrypted == encrypted_in_backup))
|
||||
{
|
||||
/// Use native copy, the more optimal way.
|
||||
LOG_TRACE(log, "Copying {} from S3 to disk {} using native copy", path_in_backup, destination_disk->getName());
|
||||
auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional<ObjectAttributes> & object_attributes) -> size_t
|
||||
{
|
||||
/// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files.
|
||||
if (blob_path.size() != 2 || mode != WriteMode::Rewrite)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Blob writing function called with unexpected blob_path.size={} or mode={}",
|
||||
blob_path.size(), mode);
|
||||
|
||||
copyS3FileToDisk(
|
||||
client,
|
||||
s3_uri.bucket,
|
||||
fs::path(s3_uri.key) / file_name,
|
||||
s3_uri.version_id,
|
||||
0,
|
||||
size,
|
||||
destination_disk,
|
||||
destination_path,
|
||||
write_mode,
|
||||
read_settings,
|
||||
write_settings,
|
||||
request_settings,
|
||||
threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupReaderS3"));
|
||||
copyS3File(
|
||||
client,
|
||||
s3_uri.bucket,
|
||||
fs::path(s3_uri.key) / path_in_backup,
|
||||
0,
|
||||
file_size,
|
||||
/* dest_bucket= */ blob_path[1],
|
||||
/* dest_key= */ blob_path[0],
|
||||
request_settings,
|
||||
object_attributes,
|
||||
threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupReaderS3"),
|
||||
/* for_disk_s3= */ true);
|
||||
|
||||
return file_size;
|
||||
};
|
||||
|
||||
destination_disk->writeFileUsingBlobWritingFunction(destination_path, write_mode, write_blob_function);
|
||||
return; /// copied!
|
||||
}
|
||||
|
||||
/// Fallback to copy through buffers.
|
||||
BackupReaderDefault::copyFileToDisk(path_in_backup, file_size, encrypted_in_backup, destination_disk, destination_path, write_mode);
|
||||
}
|
||||
|
||||
|
||||
BackupWriterS3::BackupWriterS3(
|
||||
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_)
|
||||
: IBackupWriter(context_)
|
||||
: BackupWriterDefault(&Poco::Logger::get("BackupWriterS3"), context_)
|
||||
, s3_uri(s3_uri_)
|
||||
, client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
|
||||
, request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
|
||||
, log(&Poco::Logger::get("BackupWriterS3"))
|
||||
, data_source_description{DataSourceType::S3, s3_uri.endpoint, false, false}
|
||||
{
|
||||
request_settings.updateFromSettings(context_->getSettingsRef());
|
||||
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
|
||||
}
|
||||
|
||||
DataSourceDescription BackupWriterS3::getDataSourceDescription() const
|
||||
void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
|
||||
bool copy_encrypted, UInt64 start_pos, UInt64 length)
|
||||
{
|
||||
return DataSourceDescription{DataSourceType::S3, s3_uri.endpoint, false, false};
|
||||
}
|
||||
|
||||
bool BackupWriterS3::supportNativeCopy(DataSourceDescription data_source_description) const
|
||||
{
|
||||
return getDataSourceDescription() == data_source_description;
|
||||
}
|
||||
|
||||
void BackupWriterS3::copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name)
|
||||
{
|
||||
if (!src_disk)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot natively copy data to disk without source disk");
|
||||
|
||||
auto objects = src_disk->getStorageObjects(src_file_name);
|
||||
if (objects.size() > 1)
|
||||
/// Use the native copy as a more optimal way to copy a file from S3 to S3 if it's possible.
|
||||
/// We don't check for `has_throttling` here because the native copy almost doesn't use network.
|
||||
auto source_data_source_description = src_disk->getDataSourceDescription();
|
||||
if (source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted))
|
||||
{
|
||||
auto create_read_buffer = [this, src_disk, src_file_name] { return src_disk->readFile(src_file_name, read_settings); };
|
||||
copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto object_storage = src_disk->getObjectStorage();
|
||||
std::string src_bucket = object_storage->getObjectsNamespace();
|
||||
auto file_path = fs::path(s3_uri.key) / dest_file_name;
|
||||
copyS3File(client, src_bucket, objects[0].remote_path, src_offset, src_size, s3_uri.bucket, file_path, request_settings, {},
|
||||
threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
|
||||
/// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in S3 bucket.
|
||||
/// In this case we can't use the native copy.
|
||||
if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2)
|
||||
{
|
||||
/// Use native copy, the more optimal way.
|
||||
LOG_TRACE(log, "Copying file {} from disk {} to S3 using native copy", src_path, src_disk->getName());
|
||||
copyS3File(
|
||||
client,
|
||||
/* src_bucket */ blob_path[1],
|
||||
/* src_key= */ blob_path[0],
|
||||
start_pos,
|
||||
length,
|
||||
s3_uri.bucket,
|
||||
fs::path(s3_uri.key) / path_in_backup,
|
||||
request_settings,
|
||||
{},
|
||||
threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
|
||||
return; /// copied!
|
||||
}
|
||||
}
|
||||
|
||||
/// Fallback to copy through buffers.
|
||||
BackupWriterDefault::copyFileFromDisk(path_in_backup, src_disk, src_path, copy_encrypted, start_pos, length);
|
||||
}
|
||||
|
||||
void BackupWriterS3::copyDataToFile(
|
||||
const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name)
|
||||
void BackupWriterS3::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
|
||||
{
|
||||
copyDataToS3File(create_read_buffer, offset, size, client, s3_uri.bucket, fs::path(s3_uri.key) / dest_file_name, request_settings, {},
|
||||
copyDataToS3File(create_read_buffer, start_pos, length, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, request_settings, {},
|
||||
threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
|
||||
}
|
||||
|
||||
@ -225,24 +242,11 @@ UInt64 BackupWriterS3::getFileSize(const String & file_name)
|
||||
return objects[0].GetSize();
|
||||
}
|
||||
|
||||
bool BackupWriterS3::fileContentsEqual(const String & file_name, const String & expected_file_contents)
|
||||
std::unique_ptr<ReadBuffer> BackupWriterS3::readFile(const String & file_name, size_t expected_file_size)
|
||||
{
|
||||
if (listObjects(*client, s3_uri, file_name).empty())
|
||||
return false;
|
||||
|
||||
try
|
||||
{
|
||||
auto in = std::make_unique<ReadBufferFromS3>(
|
||||
client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, request_settings, read_settings);
|
||||
String actual_file_contents(expected_file_contents.size(), ' ');
|
||||
return (in->read(actual_file_contents.data(), actual_file_contents.size()) == actual_file_contents.size())
|
||||
&& (actual_file_contents == expected_file_contents) && in->eof();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
return false;
|
||||
}
|
||||
return std::make_unique<ReadBufferFromS3>(
|
||||
client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, request_settings, read_settings,
|
||||
false, 0, 0, false, expected_file_size);
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
|
||||
@ -253,8 +257,8 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
|
||||
fs::path(s3_uri.key) / file_name,
|
||||
request_settings,
|
||||
std::nullopt,
|
||||
DBMS_DEFAULT_BUFFER_SIZE,
|
||||
threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
|
||||
threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"),
|
||||
write_settings);
|
||||
}
|
||||
|
||||
void BackupWriterS3::removeFile(const String & file_name)
|
||||
|
@ -3,8 +3,8 @@
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
#include <Backups/BackupIO.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <Backups/BackupIO_Default.h>
|
||||
#include <Disks/DiskType.h>
|
||||
#include <IO/S3Common.h>
|
||||
#include <Storages/StorageS3Settings.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
@ -14,7 +14,7 @@ namespace DB
|
||||
{
|
||||
|
||||
/// Represents a backup stored to AWS S3.
|
||||
class BackupReaderS3 : public IBackupReader
|
||||
class BackupReaderS3 : public BackupReaderDefault
|
||||
{
|
||||
public:
|
||||
BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_);
|
||||
@ -23,20 +23,19 @@ public:
|
||||
bool fileExists(const String & file_name) override;
|
||||
UInt64 getFileSize(const String & file_name) override;
|
||||
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
|
||||
void copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode, const WriteSettings & write_settings) override;
|
||||
DataSourceDescription getDataSourceDescription() const override;
|
||||
|
||||
void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
|
||||
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
|
||||
|
||||
private:
|
||||
S3::URI s3_uri;
|
||||
std::shared_ptr<S3::Client> client;
|
||||
ReadSettings read_settings;
|
||||
const S3::URI s3_uri;
|
||||
const std::shared_ptr<S3::Client> client;
|
||||
S3Settings::RequestSettings request_settings;
|
||||
Poco::Logger * log;
|
||||
const DataSourceDescription data_source_description;
|
||||
};
|
||||
|
||||
|
||||
class BackupWriterS3 : public IBackupWriter
|
||||
class BackupWriterS3 : public BackupWriterDefault
|
||||
{
|
||||
public:
|
||||
BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_);
|
||||
@ -44,42 +43,24 @@ public:
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
UInt64 getFileSize(const String & file_name) override;
|
||||
bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
|
||||
std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
|
||||
|
||||
void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name) override;
|
||||
void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) override;
|
||||
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
|
||||
bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
|
||||
|
||||
void removeFile(const String & file_name) override;
|
||||
void removeFiles(const Strings & file_names) override;
|
||||
|
||||
DataSourceDescription getDataSourceDescription() const override;
|
||||
bool supportNativeCopy(DataSourceDescription data_source_description) const override;
|
||||
void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name) override;
|
||||
|
||||
private:
|
||||
void copyObjectImpl(
|
||||
const String & src_bucket,
|
||||
const String & src_key,
|
||||
const String & dst_bucket,
|
||||
const String & dst_key,
|
||||
size_t size,
|
||||
const std::optional<ObjectAttributes> & metadata = std::nullopt) const;
|
||||
|
||||
void copyObjectMultipartImpl(
|
||||
const String & src_bucket,
|
||||
const String & src_key,
|
||||
const String & dst_bucket,
|
||||
const String & dst_key,
|
||||
size_t size,
|
||||
const std::optional<ObjectAttributes> & metadata = std::nullopt) const;
|
||||
|
||||
std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
|
||||
void removeFilesBatch(const Strings & file_names);
|
||||
|
||||
S3::URI s3_uri;
|
||||
std::shared_ptr<S3::Client> client;
|
||||
const S3::URI s3_uri;
|
||||
const std::shared_ptr<S3::Client> client;
|
||||
S3Settings::RequestSettings request_settings;
|
||||
Poco::Logger * log;
|
||||
std::optional<bool> supports_batch_delete;
|
||||
const DataSourceDescription data_source_description;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -36,6 +36,7 @@ namespace ErrorCodes
|
||||
extern const int WRONG_BASE_BACKUP;
|
||||
extern const int BACKUP_ENTRY_NOT_FOUND;
|
||||
extern const int BACKUP_IS_EMPTY;
|
||||
extern const int CANNOT_RESTORE_TO_NONENCRYPTED_DISK;
|
||||
extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
@ -339,6 +340,8 @@ void BackupImpl::writeBackupMetadata()
|
||||
}
|
||||
if (!info.data_file_name.empty() && (info.data_file_name != info.file_name))
|
||||
*out << "<data_file>" << xml << info.data_file_name << "</data_file>";
|
||||
if (info.encrypted_by_disk)
|
||||
*out << "<encrypted_by_disk>true</encrypted_by_disk>";
|
||||
}
|
||||
|
||||
total_size += info.size;
|
||||
@ -444,6 +447,7 @@ void BackupImpl::readBackupMetadata()
|
||||
{
|
||||
info.data_file_name = getString(file_config, "data_file", info.file_name);
|
||||
}
|
||||
info.encrypted_by_disk = getBool(file_config, "encrypted_by_disk", false);
|
||||
}
|
||||
|
||||
file_names.emplace(info.file_name, std::pair{info.size, info.checksum});
|
||||
@ -633,6 +637,11 @@ std::unique_ptr<SeekableReadBuffer> BackupImpl::readFile(const String & file_nam
|
||||
}
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) const
|
||||
{
|
||||
return readFileImpl(size_and_checksum, /* read_encrypted= */ false);
|
||||
}
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> BackupImpl::readFileImpl(const SizeAndChecksum & size_and_checksum, bool read_encrypted) const
|
||||
{
|
||||
if (open_mode != OpenMode::READ)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is not opened for reading");
|
||||
@ -660,6 +669,14 @@ std::unique_ptr<SeekableReadBuffer> BackupImpl::readFile(const SizeAndChecksum &
|
||||
info = it->second;
|
||||
}
|
||||
|
||||
if (info.encrypted_by_disk != read_encrypted)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TO_NONENCRYPTED_DISK,
|
||||
"File {} is encrypted in the backup, it can be restored only to an encrypted disk",
|
||||
info.data_file_name);
|
||||
}
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> read_buffer;
|
||||
std::unique_ptr<SeekableReadBuffer> base_read_buffer;
|
||||
|
||||
@ -720,14 +737,14 @@ std::unique_ptr<SeekableReadBuffer> BackupImpl::readFile(const SizeAndChecksum &
|
||||
}
|
||||
}
|
||||
|
||||
size_t BackupImpl::copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode, const WriteSettings & write_settings) const
|
||||
size_t BackupImpl::copyFileToDisk(const String & file_name,
|
||||
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const
|
||||
{
|
||||
return copyFileToDisk(getFileSizeAndChecksum(file_name), destination_disk, destination_path, write_mode, write_settings);
|
||||
return copyFileToDisk(getFileSizeAndChecksum(file_name), destination_disk, destination_path, write_mode);
|
||||
}
|
||||
|
||||
size_t BackupImpl::copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode, const WriteSettings & write_settings) const
|
||||
size_t BackupImpl::copyFileToDisk(const SizeAndChecksum & size_and_checksum,
|
||||
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const
|
||||
{
|
||||
if (open_mode != OpenMode::READ)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is not opened for reading");
|
||||
@ -760,19 +777,26 @@ size_t BackupImpl::copyFileToDisk(const SizeAndChecksum & size_and_checksum, Dis
|
||||
info = it->second;
|
||||
}
|
||||
|
||||
if (info.encrypted_by_disk && !destination_disk->getDataSourceDescription().is_encrypted)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TO_NONENCRYPTED_DISK,
|
||||
"File {} is encrypted in the backup, it can be restored only to an encrypted disk",
|
||||
info.data_file_name);
|
||||
}
|
||||
|
||||
bool file_copied = false;
|
||||
|
||||
if (info.size && !info.base_size && !use_archive)
|
||||
{
|
||||
/// Data comes completely from this backup.
|
||||
reader->copyFileToDisk(info.data_file_name, info.size, destination_disk, destination_path, write_mode, write_settings);
|
||||
reader->copyFileToDisk(info.data_file_name, info.size, info.encrypted_by_disk, destination_disk, destination_path, write_mode);
|
||||
file_copied = true;
|
||||
|
||||
}
|
||||
else if (info.size && (info.size == info.base_size))
|
||||
{
|
||||
/// Data comes completely from the base backup (nothing comes from this backup).
|
||||
base_backup->copyFileToDisk(std::pair{info.base_size, info.base_checksum}, destination_disk, destination_path, write_mode, write_settings);
|
||||
base_backup->copyFileToDisk(std::pair{info.base_size, info.base_checksum}, destination_disk, destination_path, write_mode);
|
||||
file_copied = true;
|
||||
}
|
||||
|
||||
@ -786,9 +810,13 @@ size_t BackupImpl::copyFileToDisk(const SizeAndChecksum & size_and_checksum, Dis
|
||||
else
|
||||
{
|
||||
/// Use the generic way to copy data. `readFile()` will update `num_read_files`.
|
||||
auto read_buffer = readFile(size_and_checksum);
|
||||
auto write_buffer = destination_disk->writeFile(destination_path, std::min<size_t>(info.size, DBMS_DEFAULT_BUFFER_SIZE),
|
||||
write_mode, write_settings);
|
||||
auto read_buffer = readFileImpl(size_and_checksum, /* read_encrypted= */ info.encrypted_by_disk);
|
||||
std::unique_ptr<WriteBuffer> write_buffer;
|
||||
size_t buf_size = std::min<size_t>(info.size, reader->getWriteBufferSize());
|
||||
if (info.encrypted_by_disk)
|
||||
write_buffer = destination_disk->writeEncryptedFile(destination_path, buf_size, write_mode, reader->getWriteSettings());
|
||||
else
|
||||
write_buffer = destination_disk->writeFile(destination_path, buf_size, write_mode, reader->getWriteSettings());
|
||||
copyData(*read_buffer, *write_buffer, info.size);
|
||||
write_buffer->finalize();
|
||||
}
|
||||
@ -805,72 +833,57 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
|
||||
if (writing_finalized)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is already finalized");
|
||||
|
||||
std::string from_file_name = "memory buffer";
|
||||
if (auto fname = entry->getFilePath(); !fname.empty())
|
||||
from_file_name = "file " + fname;
|
||||
|
||||
bool should_check_lock_file = false;
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
++num_files;
|
||||
total_size += info.size;
|
||||
if (!num_entries)
|
||||
should_check_lock_file = true;
|
||||
}
|
||||
|
||||
auto src_disk = entry->getDisk();
|
||||
auto src_file_path = entry->getFilePath();
|
||||
bool from_immutable_file = entry->isFromImmutableFile();
|
||||
String src_file_desc = src_file_path.empty() ? "memory buffer" : ("file " + src_file_path);
|
||||
|
||||
if (info.data_file_name.empty())
|
||||
{
|
||||
LOG_TRACE(log, "Writing backup for file {} from {}: skipped, {}", info.data_file_name, from_file_name, !info.size ? "empty" : "base backup has it");
|
||||
LOG_TRACE(log, "Writing backup for file {} from {}: skipped, {}", info.data_file_name, src_file_desc, !info.size ? "empty" : "base backup has it");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!coordination->startWritingFile(info.data_file_index))
|
||||
{
|
||||
LOG_TRACE(log, "Writing backup for file {} from {}: skipped, data file #{} is already being written", info.data_file_name, from_file_name, info.data_file_index);
|
||||
LOG_TRACE(log, "Writing backup for file {} from {}: skipped, data file #{} is already being written", info.data_file_name, src_file_desc, info.data_file_index);
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "Writing backup for file {} from {}: data file #{}", info.data_file_name, from_file_name, info.data_file_index);
|
||||
if (!should_check_lock_file)
|
||||
checkLockFile(true);
|
||||
|
||||
auto writer_description = writer->getDataSourceDescription();
|
||||
auto reader_description = entry->getDataSourceDescription();
|
||||
/// NOTE: `mutex` must be unlocked during copying otherwise writing will be in one thread maximum and hence slow.
|
||||
|
||||
/// We need to copy whole file without archive, we can do it faster
|
||||
/// if source and destination are compatible
|
||||
if (!use_archive && writer->supportNativeCopy(reader_description))
|
||||
if (use_archive)
|
||||
{
|
||||
/// Should be much faster than writing data through server.
|
||||
LOG_TRACE(log, "Will copy file {} using native copy", info.data_file_name);
|
||||
|
||||
/// NOTE: `mutex` must be unlocked here otherwise writing will be in one thread maximum and hence slow.
|
||||
|
||||
writer->copyFileNative(entry->tryGetDiskIfExists(), entry->getFilePath(), info.base_size, info.size - info.base_size, info.data_file_name);
|
||||
LOG_TRACE(log, "Writing backup for file {} from {}: data file #{}, adding to archive", info.data_file_name, src_file_desc, info.data_file_index);
|
||||
auto out = archive_writer->writeFile(info.data_file_name);
|
||||
auto read_buffer = entry->getReadBuffer(writer->getReadSettings());
|
||||
if (info.base_size != 0)
|
||||
read_buffer->seek(info.base_size, SEEK_SET);
|
||||
copyData(*read_buffer, *out);
|
||||
out->finalize();
|
||||
}
|
||||
else if (src_disk && from_immutable_file)
|
||||
{
|
||||
LOG_TRACE(log, "Writing backup for file {} from {} (disk {}): data file #{}", info.data_file_name, src_file_desc, src_disk->getName(), info.data_file_index);
|
||||
writer->copyFileFromDisk(info.data_file_name, src_disk, src_file_path, info.encrypted_by_disk, info.base_size, info.size - info.base_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
bool has_entries = false;
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
has_entries = num_entries > 0;
|
||||
}
|
||||
if (!has_entries)
|
||||
checkLockFile(true);
|
||||
|
||||
if (use_archive)
|
||||
{
|
||||
LOG_TRACE(log, "Adding file {} to archive", info.data_file_name);
|
||||
auto out = archive_writer->writeFile(info.data_file_name);
|
||||
auto read_buffer = entry->getReadBuffer();
|
||||
if (info.base_size != 0)
|
||||
read_buffer->seek(info.base_size, SEEK_SET);
|
||||
copyData(*read_buffer, *out);
|
||||
out->finalize();
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_TRACE(log, "Will copy file {}", info.data_file_name);
|
||||
auto create_read_buffer = [entry] { return entry->getReadBuffer(); };
|
||||
|
||||
/// NOTE: `mutex` must be unlocked here otherwise writing will be in one thread maximum and hence slow.
|
||||
writer->copyDataToFile(create_read_buffer, info.base_size, info.size - info.base_size, info.data_file_name);
|
||||
}
|
||||
LOG_TRACE(log, "Writing backup for file {} from {}: data file #{}", info.data_file_name, src_file_desc, info.data_file_index);
|
||||
auto create_read_buffer = [entry, read_settings = writer->getReadSettings()] { return entry->getReadBuffer(read_settings); };
|
||||
writer->copyDataToFile(info.data_file_name, create_read_buffer, info.base_size, info.size - info.base_size);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -76,10 +76,8 @@ public:
|
||||
SizeAndChecksum getFileSizeAndChecksum(const String & file_name) const override;
|
||||
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) const override;
|
||||
std::unique_ptr<SeekableReadBuffer> readFile(const SizeAndChecksum & size_and_checksum) const override;
|
||||
size_t copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode, const WriteSettings & write_settings) const override;
|
||||
size_t copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode, const WriteSettings & write_settings) const override;
|
||||
size_t copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const override;
|
||||
size_t copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) const override;
|
||||
void writeFile(const BackupFileInfo & info, BackupEntryPtr entry) override;
|
||||
void finalizeWriting() override;
|
||||
bool supportsWritingInMultipleThreads() const override { return !use_archive; }
|
||||
@ -109,6 +107,8 @@ private:
|
||||
/// Calculates and sets `compressed_size`.
|
||||
void setCompressedSize();
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> readFileImpl(const SizeAndChecksum & size_and_checksum, bool read_encrypted) const;
|
||||
|
||||
const String backup_name_for_logging;
|
||||
const bool use_archive;
|
||||
const ArchiveParams archive_params;
|
||||
|
@ -23,6 +23,7 @@ namespace ErrorCodes
|
||||
M(String, password) \
|
||||
M(Bool, structure_only) \
|
||||
M(Bool, async) \
|
||||
M(Bool, decrypt_files_from_encrypted_disks) \
|
||||
M(Bool, deduplicate_files) \
|
||||
M(UInt64, shard_num) \
|
||||
M(UInt64, replica_num) \
|
||||
|
@ -32,6 +32,9 @@ struct BackupSettings
|
||||
/// Whether the BACKUP command must return immediately without waiting until the backup has completed.
|
||||
bool async = false;
|
||||
|
||||
/// Whether the BACKUP command should decrypt files stored on encrypted disks.
|
||||
bool decrypt_files_from_encrypted_disks = false;
|
||||
|
||||
/// Whether the BACKUP will omit similar files (within one backup only).
|
||||
bool deduplicate_files = true;
|
||||
|
||||
|
@ -368,6 +368,7 @@ void BackupsWorker::doBackup(
|
||||
|
||||
/// Wait until all the hosts have written their backup entries.
|
||||
backup_coordination->waitForStage(Stage::COMPLETED);
|
||||
backup_coordination->setStage(Stage::COMPLETED,"");
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -385,7 +386,7 @@ void BackupsWorker::doBackup(
|
||||
writeBackupEntries(backup, std::move(backup_entries), backup_id, backup_coordination, backup_settings.internal);
|
||||
|
||||
/// We have written our backup entries, we need to tell other hosts (they could be waiting for it).
|
||||
backup_coordination->setStage(Stage::COMPLETED, "");
|
||||
backup_coordination->setStage(Stage::COMPLETED,"");
|
||||
}
|
||||
|
||||
size_t num_files = 0;
|
||||
@ -654,12 +655,26 @@ void BackupsWorker::doRestore(
|
||||
/// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.)
|
||||
ClusterPtr cluster;
|
||||
bool on_cluster = !restore_query->cluster.empty();
|
||||
|
||||
if (on_cluster)
|
||||
{
|
||||
restore_query->cluster = context->getMacros()->expand(restore_query->cluster);
|
||||
cluster = context->getCluster(restore_query->cluster);
|
||||
restore_settings.cluster_host_ids = cluster->getHostIDs();
|
||||
}
|
||||
|
||||
/// Make a restore coordination.
|
||||
if (!restore_coordination)
|
||||
restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ on_cluster);
|
||||
|
||||
if (!allow_concurrent_restores && restore_coordination->hasConcurrentRestores(std::ref(num_active_restores)))
|
||||
throw Exception(
|
||||
ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED,
|
||||
"Concurrent restores not supported, turn on setting 'allow_concurrent_restores'");
|
||||
|
||||
|
||||
if (on_cluster)
|
||||
{
|
||||
/// We cannot just use access checking provided by the function executeDDLQueryOnCluster(): it would be incorrect
|
||||
/// because different replicas can contain different set of tables and so the required access rights can differ too.
|
||||
/// So the right way is pass through the entire cluster and check access for each host.
|
||||
@ -676,15 +691,6 @@ void BackupsWorker::doRestore(
|
||||
}
|
||||
}
|
||||
|
||||
/// Make a restore coordination.
|
||||
if (!restore_coordination)
|
||||
restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ on_cluster);
|
||||
|
||||
if (!allow_concurrent_restores && restore_coordination->hasConcurrentRestores(std::ref(num_active_restores)))
|
||||
throw Exception(
|
||||
ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED,
|
||||
"Concurrent restores not supported, turn on setting 'allow_concurrent_restores'");
|
||||
|
||||
/// Do RESTORE.
|
||||
if (on_cluster)
|
||||
{
|
||||
@ -703,6 +709,7 @@ void BackupsWorker::doRestore(
|
||||
|
||||
/// Wait until all the hosts have written their backup entries.
|
||||
restore_coordination->waitForStage(Stage::COMPLETED);
|
||||
restore_coordination->setStage(Stage::COMPLETED,"");
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -109,10 +109,10 @@ public:
|
||||
|
||||
/// Copies a file from the backup to a specified destination disk. Returns the number of bytes written.
|
||||
virtual size_t copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode = WriteMode::Rewrite, const WriteSettings & write_settings = {}) const = 0;
|
||||
WriteMode write_mode = WriteMode::Rewrite) const = 0;
|
||||
|
||||
virtual size_t copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path,
|
||||
WriteMode write_mode = WriteMode::Rewrite, const WriteSettings & write_settings = {}) const = 0;
|
||||
WriteMode write_mode = WriteMode::Rewrite) const = 0;
|
||||
|
||||
/// Puts a new entry to the backup.
|
||||
virtual void writeFile(const BackupFileInfo & file_info, BackupEntryPtr entry) = 0;
|
||||
|
@ -17,23 +17,16 @@ class IBackupEntriesLazyBatch::BackupEntryFromBatch : public IBackupEntry
|
||||
public:
|
||||
BackupEntryFromBatch(const std::shared_ptr<IBackupEntriesLazyBatch> & batch_, size_t index_) : batch(batch_), index(index_) { }
|
||||
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override { return getInternalBackupEntry()->getReadBuffer(read_settings); }
|
||||
UInt64 getSize() const override { return getInternalBackupEntry()->getSize(); }
|
||||
std::optional<UInt128> getChecksum() const override { return getInternalBackupEntry()->getChecksum(); }
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer() const override { return getInternalBackupEntry()->getReadBuffer(); }
|
||||
String getFilePath() const override
|
||||
{
|
||||
return getInternalBackupEntry()->getFilePath();
|
||||
}
|
||||
|
||||
DiskPtr tryGetDiskIfExists() const override
|
||||
{
|
||||
return getInternalBackupEntry()->tryGetDiskIfExists();
|
||||
}
|
||||
|
||||
DataSourceDescription getDataSourceDescription() const override
|
||||
{
|
||||
return getInternalBackupEntry()->getDataSourceDescription();
|
||||
}
|
||||
UInt128 getChecksum() const override { return getInternalBackupEntry()->getChecksum(); }
|
||||
std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override { return getInternalBackupEntry()->getPartialChecksum(prefix_length); }
|
||||
DataSourceDescription getDataSourceDescription() const override { return getInternalBackupEntry()->getDataSourceDescription(); }
|
||||
bool isEncryptedByDisk() const override { return getInternalBackupEntry()->isEncryptedByDisk(); }
|
||||
bool isFromFile() const override { return getInternalBackupEntry()->isFromFile(); }
|
||||
bool isFromImmutableFile() const override { return getInternalBackupEntry()->isFromImmutableFile(); }
|
||||
String getFilePath() const override { return getInternalBackupEntry()->getFilePath(); }
|
||||
DiskPtr getDisk() const override { return getInternalBackupEntry()->getDisk(); }
|
||||
|
||||
private:
|
||||
BackupEntryPtr getInternalBackupEntry() const
|
||||
|
@ -20,16 +20,24 @@ public:
|
||||
/// Returns the size of the data.
|
||||
virtual UInt64 getSize() const = 0;
|
||||
|
||||
/// Returns the checksum of the data if it's precalculated.
|
||||
/// Can return nullopt which means the checksum should be calculated from the read buffer.
|
||||
virtual std::optional<UInt128> getChecksum() const { return {}; }
|
||||
/// Returns the checksum of the data.
|
||||
virtual UInt128 getChecksum() const = 0;
|
||||
|
||||
/// Returns a partial checksum, i.e. the checksum calculated for a prefix part of the data.
|
||||
/// Can return nullopt if the partial checksum is too difficult to calculate.
|
||||
virtual std::optional<UInt128> getPartialChecksum(size_t /* prefix_length */) const { return {}; }
|
||||
|
||||
/// Returns a read buffer for reading the data.
|
||||
virtual std::unique_ptr<SeekableReadBuffer> getReadBuffer() const = 0;
|
||||
virtual std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const = 0;
|
||||
|
||||
virtual String getFilePath() const = 0;
|
||||
/// Returns true if the data returned by getReadBuffer() is encrypted by an encrypted disk.
|
||||
virtual bool isEncryptedByDisk() const { return false; }
|
||||
|
||||
virtual DiskPtr tryGetDiskIfExists() const = 0;
|
||||
/// Returns information about disk and file if this backup entry is generated from a file.
|
||||
virtual bool isFromFile() const { return false; }
|
||||
virtual bool isFromImmutableFile() const { return false; }
|
||||
virtual String getFilePath() const { return ""; }
|
||||
virtual DiskPtr getDisk() const { return nullptr; }
|
||||
|
||||
virtual DataSourceDescription getDataSourceDescription() const = 0;
|
||||
};
|
||||
|
@ -93,7 +93,10 @@ void RestoreCoordinationRemote::createRootNodes()
|
||||
|
||||
void RestoreCoordinationRemote::setStage(const String & new_stage, const String & message)
|
||||
{
|
||||
stage_sync->set(current_host, new_stage, message);
|
||||
if (is_internal)
|
||||
stage_sync->set(current_host, new_stage, message);
|
||||
else
|
||||
stage_sync->set(current_host, new_stage, /* message */ "", /* all_hosts */ true);
|
||||
}
|
||||
|
||||
void RestoreCoordinationRemote::setError(const Exception & exception)
|
||||
@ -283,8 +286,8 @@ bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic<size_t>
|
||||
String status;
|
||||
if (zk->tryGet(root_zookeeper_path + "/" + existing_restore_path + "/stage", status))
|
||||
{
|
||||
/// If status is not COMPLETED it could be because the restore failed, check if 'error' exists
|
||||
if (status != Stage::COMPLETED && !zk->exists(root_zookeeper_path + "/" + existing_restore_path + "/error"))
|
||||
/// Check if some other restore is in progress
|
||||
if (status == Stage::SCHEDULED_TO_START)
|
||||
{
|
||||
LOG_WARNING(log, "Found a concurrent restore: {}, current restore: {}", existing_restore_uuid, toString(restore_uuid));
|
||||
result = true;
|
||||
|
@ -169,9 +169,9 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
|
||||
{
|
||||
std::shared_ptr<IBackupReader> reader;
|
||||
if (engine_name == "File")
|
||||
reader = std::make_shared<BackupReaderFile>(path);
|
||||
reader = std::make_shared<BackupReaderFile>(path, params.context);
|
||||
else
|
||||
reader = std::make_shared<BackupReaderDisk>(disk, path);
|
||||
reader = std::make_shared<BackupReaderDisk>(disk, path, params.context);
|
||||
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context);
|
||||
}
|
||||
else
|
||||
|
@ -211,6 +211,7 @@ endif()
|
||||
if (TARGET ch_contrib::jemalloc)
|
||||
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::jemalloc)
|
||||
endif()
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash)
|
||||
|
||||
add_subdirectory(Access/Common)
|
||||
add_subdirectory(Common/ZooKeeper)
|
||||
@ -463,7 +464,7 @@ endif ()
|
||||
if (TARGET ch_contrib::ldap)
|
||||
dbms_target_link_libraries (PRIVATE ch_contrib::ldap ch_contrib::lber)
|
||||
endif ()
|
||||
dbms_target_link_libraries (PRIVATE ch_contrib::sparsehash)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::sparsehash)
|
||||
|
||||
if (TARGET ch_contrib::protobuf)
|
||||
dbms_target_link_libraries (PRIVATE ch_contrib::protobuf)
|
||||
@ -527,7 +528,7 @@ target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::fast_float)
|
||||
|
||||
if (USE_ORC)
|
||||
dbms_target_link_libraries(PUBLIC ${ORC_LIBRARIES})
|
||||
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR} "${CMAKE_BINARY_DIR}/contrib/orc/c++/include")
|
||||
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR} "${PROJECT_BINARY_DIR}/contrib/orc/c++/include")
|
||||
endif ()
|
||||
|
||||
if (TARGET ch_contrib::rocksdb)
|
||||
@ -612,6 +613,7 @@ if (ENABLE_TESTS)
|
||||
|
||||
target_link_libraries(unit_tests_dbms PRIVATE
|
||||
ch_contrib::gtest_all
|
||||
ch_contrib::gmock_all
|
||||
clickhouse_functions
|
||||
clickhouse_aggregate_functions
|
||||
clickhouse_parsers
|
||||
|
@ -3,11 +3,18 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
thread_local FiberInfo current_fiber_info;
|
||||
|
||||
AsyncTaskExecutor::AsyncTaskExecutor(std::unique_ptr<AsyncTask> task_) : task(std::move(task_))
|
||||
{
|
||||
createFiber();
|
||||
}
|
||||
|
||||
FiberInfo AsyncTaskExecutor::getCurrentFiberInfo()
|
||||
{
|
||||
return current_fiber_info;
|
||||
}
|
||||
|
||||
void AsyncTaskExecutor::resume()
|
||||
{
|
||||
if (routine_is_finished)
|
||||
@ -31,7 +38,10 @@ void AsyncTaskExecutor::resume()
|
||||
|
||||
void AsyncTaskExecutor::resumeUnlocked()
|
||||
{
|
||||
auto parent_fiber_info = current_fiber_info;
|
||||
current_fiber_info = FiberInfo{&fiber, &parent_fiber_info};
|
||||
fiber = std::move(fiber).resume();
|
||||
current_fiber_info = parent_fiber_info;
|
||||
}
|
||||
|
||||
void AsyncTaskExecutor::cancel()
|
||||
|
@ -24,6 +24,11 @@ enum class AsyncEventTimeoutType
|
||||
using AsyncCallback = std::function<void(int, Poco::Timespan, AsyncEventTimeoutType, const std::string &, uint32_t)>;
|
||||
using ResumeCallback = std::function<void()>;
|
||||
|
||||
struct FiberInfo
|
||||
{
|
||||
const Fiber * fiber = nullptr;
|
||||
const FiberInfo * parent_fiber_info = nullptr;
|
||||
};
|
||||
|
||||
/// Base class for a task that will be executed in a fiber.
|
||||
/// It has only one method - run, that takes 2 callbacks:
|
||||
@ -75,6 +80,7 @@ public:
|
||||
};
|
||||
#endif
|
||||
|
||||
static FiberInfo getCurrentFiberInfo();
|
||||
protected:
|
||||
/// Method that is called in resume() before actual fiber resuming.
|
||||
/// If it returns false, resume() will return immediately without actual fiber resuming.
|
||||
@ -118,6 +124,48 @@ private:
|
||||
std::unique_ptr<AsyncTask> task;
|
||||
};
|
||||
|
||||
/// Simple implementation for fiber local variable.
|
||||
template <typename T>
|
||||
struct FiberLocal
|
||||
{
|
||||
public:
|
||||
FiberLocal()
|
||||
{
|
||||
/// Initialize main instance for this thread. Instances for fibers will inherit it,
|
||||
/// (it's needed because main instance could be changed before creating fibers
|
||||
/// and changes should be visible in fibers).
|
||||
data[nullptr] = T();
|
||||
}
|
||||
|
||||
T & operator*()
|
||||
{
|
||||
return get();
|
||||
}
|
||||
|
||||
T * operator->()
|
||||
{
|
||||
return &get();
|
||||
}
|
||||
|
||||
private:
|
||||
T & get()
|
||||
{
|
||||
return getInstanceForFiber(AsyncTaskExecutor::getCurrentFiberInfo());
|
||||
}
|
||||
|
||||
T & getInstanceForFiber(FiberInfo info)
|
||||
{
|
||||
auto it = data.find(info.fiber);
|
||||
/// If it's the first request, we need to initialize instance for the fiber
|
||||
/// using instance from parent fiber or main thread that created fiber.
|
||||
if (it == data.end())
|
||||
it = data.insert({info.fiber, getInstanceForFiber(*info.parent_fiber_info)}).first;
|
||||
return it->second;
|
||||
}
|
||||
|
||||
std::unordered_map<const Fiber *, T> data;
|
||||
};
|
||||
|
||||
String getSocketTimeoutExceededMessageByTimeoutType(AsyncEventTimeoutType type, Poco::Timespan timeout, const String & socket_description);
|
||||
|
||||
}
|
||||
|
@ -579,6 +579,7 @@
|
||||
M(694, ASYNC_LOAD_CYCLE) \
|
||||
M(695, ASYNC_LOAD_FAILED) \
|
||||
M(696, ASYNC_LOAD_CANCELED) \
|
||||
M(697, CANNOT_RESTORE_TO_NONENCRYPTED_DISK) \
|
||||
\
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
M(1000, POCO_EXCEPTION) \
|
||||
|
@ -7,12 +7,16 @@
|
||||
#include <Core/Settings.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
#include <Common/AsyncTaskExecutor.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace OpenTelemetry
|
||||
{
|
||||
|
||||
thread_local TracingContextOnThread current_thread_trace_context;
|
||||
///// This code can be executed inside several fibers in one thread,
|
||||
///// we should use fiber local tracing context.
|
||||
thread_local FiberLocal<TracingContextOnThread> current_fiber_trace_context;
|
||||
|
||||
bool Span::addAttribute(std::string_view name, UInt64 value) noexcept
|
||||
{
|
||||
@ -104,7 +108,7 @@ bool Span::addAttributeImpl(std::string_view name, std::string_view value) noexc
|
||||
|
||||
SpanHolder::SpanHolder(std::string_view _operation_name, SpanKind _kind)
|
||||
{
|
||||
if (!current_thread_trace_context.isTraceEnabled())
|
||||
if (!current_fiber_trace_context->isTraceEnabled())
|
||||
{
|
||||
return;
|
||||
}
|
||||
@ -112,8 +116,8 @@ SpanHolder::SpanHolder(std::string_view _operation_name, SpanKind _kind)
|
||||
/// Use try-catch to make sure the ctor is exception safe.
|
||||
try
|
||||
{
|
||||
this->trace_id = current_thread_trace_context.trace_id;
|
||||
this->parent_span_id = current_thread_trace_context.span_id;
|
||||
this->trace_id = current_fiber_trace_context->trace_id;
|
||||
this->parent_span_id = current_fiber_trace_context->span_id;
|
||||
this->span_id = thread_local_rng(); // create a new id for this span
|
||||
this->operation_name = _operation_name;
|
||||
this->kind = _kind;
|
||||
@ -132,7 +136,7 @@ SpanHolder::SpanHolder(std::string_view _operation_name, SpanKind _kind)
|
||||
}
|
||||
|
||||
/// Set current span as parent of other spans created later on this thread.
|
||||
current_thread_trace_context.span_id = this->span_id;
|
||||
current_fiber_trace_context->span_id = this->span_id;
|
||||
}
|
||||
|
||||
void SpanHolder::finish() noexcept
|
||||
@ -141,12 +145,12 @@ void SpanHolder::finish() noexcept
|
||||
return;
|
||||
|
||||
// First of all, restore old value of current span.
|
||||
assert(current_thread_trace_context.span_id == span_id);
|
||||
current_thread_trace_context.span_id = parent_span_id;
|
||||
assert(current_fiber_trace_context->span_id == span_id);
|
||||
current_fiber_trace_context->span_id = parent_span_id;
|
||||
|
||||
try
|
||||
{
|
||||
auto log = current_thread_trace_context.span_log.lock();
|
||||
auto log = current_fiber_trace_context->span_log.lock();
|
||||
|
||||
/// The log might be disabled, check it before use
|
||||
if (log)
|
||||
@ -269,7 +273,7 @@ void TracingContext::serialize(WriteBuffer & buf) const
|
||||
|
||||
const TracingContextOnThread & CurrentContext()
|
||||
{
|
||||
return current_thread_trace_context;
|
||||
return *current_fiber_trace_context;
|
||||
}
|
||||
|
||||
void TracingContextOnThread::reset() noexcept
|
||||
@ -291,7 +295,7 @@ TracingContextHolder::TracingContextHolder(
|
||||
/// If any exception is raised during the construction, the tracing is not enabled on current thread.
|
||||
try
|
||||
{
|
||||
if (current_thread_trace_context.isTraceEnabled())
|
||||
if (current_fiber_trace_context->isTraceEnabled())
|
||||
{
|
||||
///
|
||||
/// This is not the normal case,
|
||||
@ -304,15 +308,15 @@ TracingContextHolder::TracingContextHolder(
|
||||
/// So this branch ensures this class can be instantiated multiple times on one same thread safely.
|
||||
///
|
||||
this->is_context_owner = false;
|
||||
this->root_span.trace_id = current_thread_trace_context.trace_id;
|
||||
this->root_span.parent_span_id = current_thread_trace_context.span_id;
|
||||
this->root_span.trace_id = current_fiber_trace_context->trace_id;
|
||||
this->root_span.parent_span_id = current_fiber_trace_context->span_id;
|
||||
this->root_span.span_id = thread_local_rng();
|
||||
this->root_span.operation_name = _operation_name;
|
||||
this->root_span.start_time_us
|
||||
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
|
||||
|
||||
/// Set the root span as parent of other spans created on current thread
|
||||
current_thread_trace_context.span_id = this->root_span.span_id;
|
||||
current_fiber_trace_context->span_id = this->root_span.span_id;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -356,10 +360,10 @@ TracingContextHolder::TracingContextHolder(
|
||||
}
|
||||
|
||||
/// Set up trace context on current thread only when the root span is successfully initialized.
|
||||
current_thread_trace_context = _parent_trace_context;
|
||||
current_thread_trace_context.span_id = this->root_span.span_id;
|
||||
current_thread_trace_context.trace_flags = TRACE_FLAG_SAMPLED;
|
||||
current_thread_trace_context.span_log = _span_log;
|
||||
*current_fiber_trace_context = _parent_trace_context;
|
||||
current_fiber_trace_context->span_id = this->root_span.span_id;
|
||||
current_fiber_trace_context->trace_flags = TRACE_FLAG_SAMPLED;
|
||||
current_fiber_trace_context->span_log = _span_log;
|
||||
}
|
||||
|
||||
TracingContextHolder::~TracingContextHolder()
|
||||
@ -371,7 +375,7 @@ TracingContextHolder::~TracingContextHolder()
|
||||
|
||||
try
|
||||
{
|
||||
auto shared_span_log = current_thread_trace_context.span_log.lock();
|
||||
auto shared_span_log = current_fiber_trace_context->span_log.lock();
|
||||
if (shared_span_log)
|
||||
{
|
||||
try
|
||||
@ -402,11 +406,11 @@ TracingContextHolder::~TracingContextHolder()
|
||||
if (this->is_context_owner)
|
||||
{
|
||||
/// Clear the context on current thread
|
||||
current_thread_trace_context.reset();
|
||||
current_fiber_trace_context->reset();
|
||||
}
|
||||
else
|
||||
{
|
||||
current_thread_trace_context.span_id = this->root_span.parent_span_id;
|
||||
current_fiber_trace_context->span_id = this->root_span.parent_span_id;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -61,20 +61,20 @@ UInt64 Throttler::add(size_t amount)
|
||||
throw Exception::createDeprecated(limit_exceeded_exception_message + std::string(" Maximum: ") + toString(limit), ErrorCodes::LIMIT_EXCEEDED);
|
||||
|
||||
/// Wait unless there is positive amount of tokens - throttling
|
||||
Int64 sleep_time = 0;
|
||||
Int64 sleep_time_ns = 0;
|
||||
if (max_speed && tokens_value < 0)
|
||||
{
|
||||
sleep_time = static_cast<Int64>(-tokens_value / max_speed * NS);
|
||||
accumulated_sleep += sleep_time;
|
||||
sleepForNanoseconds(sleep_time);
|
||||
accumulated_sleep -= sleep_time;
|
||||
ProfileEvents::increment(ProfileEvents::ThrottlerSleepMicroseconds, sleep_time / 1000UL);
|
||||
sleep_time_ns = static_cast<Int64>(-tokens_value / max_speed * NS);
|
||||
accumulated_sleep += sleep_time_ns;
|
||||
sleepForNanoseconds(sleep_time_ns);
|
||||
accumulated_sleep -= sleep_time_ns;
|
||||
ProfileEvents::increment(ProfileEvents::ThrottlerSleepMicroseconds, sleep_time_ns / 1000UL);
|
||||
}
|
||||
|
||||
if (parent)
|
||||
sleep_time += parent->add(amount);
|
||||
sleep_time_ns += parent->add(amount);
|
||||
|
||||
return static_cast<UInt64>(sleep_time);
|
||||
return static_cast<UInt64>(sleep_time_ns);
|
||||
}
|
||||
|
||||
void Throttler::reset()
|
||||
|
@ -34,15 +34,15 @@ public:
|
||||
const std::shared_ptr<Throttler> & parent_ = nullptr);
|
||||
|
||||
/// Use `amount` tokens, sleeps if required or throws exception on limit overflow.
|
||||
/// Returns duration of sleep in microseconds (to distinguish sleeping on different kinds of throttlers for metrics)
|
||||
/// Returns duration of sleep in nanoseconds (to distinguish sleeping on different kinds of throttlers for metrics)
|
||||
UInt64 add(size_t amount);
|
||||
|
||||
UInt64 add(size_t amount, ProfileEvents::Event event_amount, ProfileEvents::Event event_sleep_us)
|
||||
{
|
||||
UInt64 sleep_us = add(amount);
|
||||
UInt64 sleep_ns = add(amount);
|
||||
ProfileEvents::increment(event_amount, amount);
|
||||
ProfileEvents::increment(event_sleep_us, sleep_us);
|
||||
return sleep_us;
|
||||
ProfileEvents::increment(event_sleep_us, sleep_ns / 1000UL);
|
||||
return sleep_ns;
|
||||
}
|
||||
|
||||
/// Not thread safe
|
||||
|
@ -466,6 +466,8 @@ public:
|
||||
/// Useful to check owner of ephemeral node.
|
||||
virtual int64_t getSessionID() const = 0;
|
||||
|
||||
virtual String getConnectedAddress() const = 0;
|
||||
|
||||
/// If the method will throw an exception, callbacks won't be called.
|
||||
///
|
||||
/// After the method is executed successfully, you must wait for callbacks
|
||||
|
@ -39,6 +39,7 @@ public:
|
||||
|
||||
bool isExpired() const override { return expired; }
|
||||
int64_t getSessionID() const override { return 0; }
|
||||
String getConnectedAddress() const override { return connected_zk_address; }
|
||||
|
||||
|
||||
void create(
|
||||
@ -126,6 +127,8 @@ private:
|
||||
|
||||
zkutil::ZooKeeperArgs args;
|
||||
|
||||
String connected_zk_address;
|
||||
|
||||
std::mutex push_request_mutex;
|
||||
std::atomic<bool> expired{false};
|
||||
|
||||
|
@ -111,6 +111,26 @@ void ZooKeeper::init(ZooKeeperArgs args_)
|
||||
LOG_TRACE(log, "Initialized, hosts: {}", fmt::join(args.hosts, ","));
|
||||
else
|
||||
LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(args.hosts, ","), args.chroot);
|
||||
|
||||
String address = impl->getConnectedAddress();
|
||||
|
||||
size_t colon_pos = address.find(':');
|
||||
connected_zk_host = address.substr(0, colon_pos);
|
||||
connected_zk_port = address.substr(colon_pos + 1);
|
||||
|
||||
connected_zk_index = 0;
|
||||
|
||||
if (args.hosts.size() > 1)
|
||||
{
|
||||
for (size_t i = 0; i < args.hosts.size(); i++)
|
||||
{
|
||||
if (args.hosts[i] == address)
|
||||
{
|
||||
connected_zk_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (args.implementation == "testkeeper")
|
||||
{
|
||||
|
@ -523,6 +523,10 @@ public:
|
||||
|
||||
void setServerCompletelyStarted();
|
||||
|
||||
String getConnectedZooKeeperHost() const { return connected_zk_host; }
|
||||
String getConnectedZooKeeperPort() const { return connected_zk_port; }
|
||||
size_t getConnectedZooKeeperIndex() const { return connected_zk_index; }
|
||||
|
||||
private:
|
||||
void init(ZooKeeperArgs args_);
|
||||
|
||||
@ -586,6 +590,10 @@ private:
|
||||
|
||||
ZooKeeperArgs args;
|
||||
|
||||
String connected_zk_host;
|
||||
String connected_zk_port;
|
||||
size_t connected_zk_index;
|
||||
|
||||
std::mutex mutex;
|
||||
|
||||
Poco::Logger * log = nullptr;
|
||||
|
@ -433,6 +433,8 @@ void ZooKeeper::connect(
|
||||
}
|
||||
|
||||
connected = true;
|
||||
connected_zk_address = node.address.toString();
|
||||
|
||||
break;
|
||||
}
|
||||
catch (...)
|
||||
@ -448,6 +450,8 @@ void ZooKeeper::connect(
|
||||
if (!connected)
|
||||
{
|
||||
WriteBufferFromOwnString message;
|
||||
connected_zk_address = "";
|
||||
|
||||
message << "All connection tries failed while connecting to ZooKeeper. nodes: ";
|
||||
bool first = true;
|
||||
for (const auto & node : nodes)
|
||||
|
@ -125,6 +125,8 @@ public:
|
||||
/// Useful to check owner of ephemeral node.
|
||||
int64_t getSessionID() const override { return session_id; }
|
||||
|
||||
String getConnectedAddress() const override { return connected_zk_address; }
|
||||
|
||||
void executeGenericRequest(
|
||||
const ZooKeeperRequestPtr & request,
|
||||
ResponseCallback callback);
|
||||
@ -201,6 +203,7 @@ public:
|
||||
|
||||
private:
|
||||
ACLs default_acls;
|
||||
String connected_zk_address;
|
||||
|
||||
zkutil::ZooKeeperArgs args;
|
||||
|
||||
|
@ -339,37 +339,37 @@ void KeeperStorage::UncommittedState::applyDelta(const Delta & delta)
|
||||
nodes.emplace(delta.path, UncommittedNode{.node = nullptr});
|
||||
}
|
||||
|
||||
auto & [node, acls, last_applied_zxid] = nodes.at(delta.path);
|
||||
|
||||
std::visit(
|
||||
[&, &my_node = node, &my_acls = acls, &my_last_applied_zxid = last_applied_zxid]<typename DeltaType>(const DeltaType & operation)
|
||||
[&]<typename DeltaType>(const DeltaType & operation)
|
||||
{
|
||||
auto & [node, acls, last_applied_zxid] = nodes.at(delta.path);
|
||||
|
||||
if constexpr (std::same_as<DeltaType, CreateNodeDelta>)
|
||||
{
|
||||
assert(!my_node);
|
||||
my_node = std::make_shared<Node>();
|
||||
my_node->stat = operation.stat;
|
||||
my_node->setData(operation.data);
|
||||
my_acls = operation.acls;
|
||||
my_last_applied_zxid = delta.zxid;
|
||||
assert(!node);
|
||||
node = std::make_shared<Node>();
|
||||
node->stat = operation.stat;
|
||||
node->setData(operation.data);
|
||||
acls = operation.acls;
|
||||
last_applied_zxid = delta.zxid;
|
||||
}
|
||||
else if constexpr (std::same_as<DeltaType, RemoveNodeDelta>)
|
||||
{
|
||||
assert(my_node);
|
||||
my_node = nullptr;
|
||||
my_last_applied_zxid = delta.zxid;
|
||||
assert(node);
|
||||
node = nullptr;
|
||||
last_applied_zxid = delta.zxid;
|
||||
}
|
||||
else if constexpr (std::same_as<DeltaType, UpdateNodeDelta>)
|
||||
{
|
||||
assert(my_node);
|
||||
my_node->invalidateDigestCache();
|
||||
assert(node);
|
||||
node->invalidateDigestCache();
|
||||
operation.update_fn(*node);
|
||||
my_last_applied_zxid = delta.zxid;
|
||||
last_applied_zxid = delta.zxid;
|
||||
}
|
||||
else if constexpr (std::same_as<DeltaType, SetACLDelta>)
|
||||
{
|
||||
my_acls = operation.acls;
|
||||
my_last_applied_zxid = delta.zxid;
|
||||
acls = operation.acls;
|
||||
last_applied_zxid = delta.zxid;
|
||||
}
|
||||
},
|
||||
delta.operation);
|
||||
|
@ -663,12 +663,10 @@ Names Block::getDataTypeNames() const
|
||||
|
||||
Block::NameMap Block::getNamesToIndexesMap() const
|
||||
{
|
||||
NameMap res;
|
||||
res.reserve(index_by_name.size());
|
||||
|
||||
NameMap res(index_by_name.size());
|
||||
res.set_empty_key(StringRef{});
|
||||
for (const auto & [name, index] : index_by_name)
|
||||
res[name] = index;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -5,13 +5,11 @@
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
|
||||
#include <initializer_list>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <sparsehash/dense_hash_map>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -97,7 +95,7 @@ public:
|
||||
Names getDataTypeNames() const;
|
||||
|
||||
/// Hash table match `column name -> position in the block`.
|
||||
using NameMap = HashMap<StringRef, size_t, StringRefHash>;
|
||||
using NameMap = ::google::dense_hash_map<StringRef, size_t, StringRefHash>;
|
||||
NameMap getNamesToIndexesMap() const;
|
||||
|
||||
Serializations getSerializations() const;
|
||||
|
@ -23,6 +23,8 @@ namespace DB
|
||||
M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
|
||||
M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The maximum number of threads that would be used for loading outdated data parts on startup", 0) \
|
||||
M(UInt64, outdated_part_loading_thread_pool_queue_size, 10000, "Queue size for parts loading thread pool.", 0) \
|
||||
M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0) \
|
||||
|
@ -101,8 +101,6 @@ class IColumn;
|
||||
M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
|
||||
M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \
|
||||
M(Bool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.", 0) \
|
||||
M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited. Only has meaning at server startup.", 0) \
|
||||
M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited. Only has meaning at server startup.", 0) \
|
||||
M(UInt64, max_remote_read_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for read.", 0) \
|
||||
M(UInt64, max_remote_write_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for write.", 0) \
|
||||
M(UInt64, max_local_read_bandwidth, 0, "The maximum speed of local reads in bytes per second.", 0) \
|
||||
@ -748,6 +746,7 @@ class IColumn;
|
||||
M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
|
||||
M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \
|
||||
M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
|
||||
M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
|
||||
// End of COMMON_SETTINGS
|
||||
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
|
||||
|
||||
@ -778,6 +777,7 @@ class IColumn;
|
||||
MAKE_OBSOLETE(M, UInt64, partial_merge_join_optimizations, 0) \
|
||||
MAKE_OBSOLETE(M, MaxThreads, max_alter_threads, 0) \
|
||||
MAKE_OBSOLETE(M, Bool, allow_experimental_projection_optimization, true) \
|
||||
MAKE_OBSOLETE(M, Bool, allow_experimental_query_cache, true) \
|
||||
/* moved to config.xml: see also src/Core/ServerSettings.h */ \
|
||||
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_buffer_flush_schedule_pool_size, 16) \
|
||||
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_pool_size, 16) \
|
||||
@ -790,6 +790,8 @@ class IColumn;
|
||||
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_distributed_schedule_pool_size, 16) \
|
||||
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_read_network_bandwidth_for_server, 0) \
|
||||
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_write_network_bandwidth_for_server, 0) \
|
||||
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0) \
|
||||
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0) \
|
||||
/* ---- */ \
|
||||
MAKE_OBSOLETE(M, DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic) \
|
||||
MAKE_OBSOLETE(M, UInt64, max_pipeline_depth, 0) \
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user