Merge master and resolve conflict

This commit is contained in:
Wangyang Guo 2022-08-02 10:40:40 +08:00
commit 6a7213291b
925 changed files with 7933 additions and 5085 deletions

View File

@ -151,8 +151,8 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
SplitBuildSmokeTest:
needs: [BuilderDebSplitted]
SharedBuildSmokeTest:
needs: [BuilderDebShared]
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
@ -171,7 +171,7 @@ jobs:
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Split build check
- name: Shared build check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
@ -598,7 +598,7 @@ jobs:
##########################################################################################
##################################### SPECIAL BUILDS #####################################
##########################################################################################
BuilderDebSplitted:
BuilderDebShared:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
@ -609,7 +609,7 @@ jobs:
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_splitted
BUILD_NAME=binary_shared
EOF
- name: Download changed images
uses: actions/download-artifact@v2
@ -1012,7 +1012,7 @@ jobs:
# - BuilderBinGCC
- BuilderBinPPC64
- BuilderBinClangTidy
- BuilderDebSplitted
- BuilderDebShared
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
@ -1126,6 +1126,84 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatelessTestReleaseDatabaseReplicated0:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_database_replicated
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, DatabaseReplicated)
REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=2
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatelessTestReleaseDatabaseReplicated1:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_database_replicated
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, DatabaseReplicated)
REPO_COPY=${{runner.temp}}/stateless_database_replicated/ClickHouse
KILL_TIMEOUT=10800
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=2
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatelessTestReleaseS3:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
@ -1706,43 +1784,6 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatefulTestReleaseDatabaseOrdinary:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release_database_ordinary
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (release, DatabaseOrdinary)
REPO_COPY=${{runner.temp}}/stateful_release_database_ordinary/ClickHouse
KILL_TIMEOUT=3600
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatefulTestAarch64:
needs: [BuilderDebAarch64]
runs-on: [self-hosted, func-tester-aarch64]
@ -3063,6 +3104,8 @@ jobs:
- FunctionalStatelessTestDebug2
- FunctionalStatelessTestRelease
- FunctionalStatelessTestReleaseDatabaseOrdinary
- FunctionalStatelessTestReleaseDatabaseReplicated0
- FunctionalStatelessTestReleaseDatabaseReplicated1
- FunctionalStatelessTestAarch64
- FunctionalStatelessTestAsan0
- FunctionalStatelessTestAsan1
@ -3075,7 +3118,6 @@ jobs:
- FunctionalStatelessTestUBsan
- FunctionalStatefulTestDebug
- FunctionalStatefulTestRelease
- FunctionalStatefulTestReleaseDatabaseOrdinary
- FunctionalStatelessTestReleaseS3
- FunctionalStatefulTestAarch64
- FunctionalStatefulTestAsan
@ -3111,7 +3153,7 @@ jobs:
- UnitTestsMsan
- UnitTestsUBsan
- UnitTestsReleaseClang
- SplitBuildSmokeTest
- SharedBuildSmokeTest
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository

View File

@ -216,8 +216,8 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
SplitBuildSmokeTest:
needs: [BuilderDebSplitted]
SharedBuildSmokeTest:
needs: [BuilderDebShared]
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
@ -236,7 +236,7 @@ jobs:
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Split build check
- name: Shared build check
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
@ -254,7 +254,7 @@ jobs:
#################################### ORDINARY BUILDS ####################################
#########################################################################################
BuilderDebRelease:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -301,7 +301,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
BuilderBinRelease:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -345,53 +345,8 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
# BuilderBinGCC:
# needs: [DockerHubPush, FastTest]
# runs-on: [self-hosted, builder]
# steps:
# - name: Set envs
# run: |
# cat >> "$GITHUB_ENV" << 'EOF'
# TEMP_PATH=${{runner.temp}}/build_check
# IMAGES_PATH=${{runner.temp}}/images_path
# REPO_COPY=${{runner.temp}}/build_check/ClickHouse
# CACHES_PATH=${{runner.temp}}/../ccaches
# BUILD_NAME=binary_gcc
# EOF
# - name: Download changed images
# uses: actions/download-artifact@v2
# with:
# name: changed_images
# path: ${{ runner.temp }}/images_path
# - name: Clear repository
# run: |
# sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
# - name: Check out repository code
# uses: actions/checkout@v2
# - name: Build
# run: |
# git -C "$GITHUB_WORKSPACE" submodule sync --recursive
# git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
# sudo rm -fr "$TEMP_PATH"
# mkdir -p "$TEMP_PATH"
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
# cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
# - name: Upload build URLs to artifacts
# if: ${{ success() || failure() }}
# uses: actions/upload-artifact@v2
# with:
# name: ${{ env.BUILD_URLS }}
# path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
# - name: Cleanup
# if: always()
# run: |
# # shellcheck disable=SC2046
# docker kill $(docker ps -q) ||:
# # shellcheck disable=SC2046
# docker rm -f $(docker ps -a -q) ||:
# sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebAarch64:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -438,7 +393,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebAsan:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -483,7 +438,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebUBsan:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -528,7 +483,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebTsan:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -573,7 +528,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebMsan:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -618,7 +573,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebDebug:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -665,8 +620,8 @@ jobs:
##########################################################################################
##################################### SPECIAL BUILDS #####################################
##########################################################################################
BuilderDebSplitted:
needs: [DockerHubPush, FastTest]
BuilderDebShared:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -676,7 +631,7 @@ jobs:
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_splitted
BUILD_NAME=binary_shared
EOF
- name: Download changed images
uses: actions/download-artifact@v2
@ -711,7 +666,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinClangTidy:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -756,7 +711,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinDarwin:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -801,7 +756,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinAarch64:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -846,7 +801,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinFreeBSD:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -891,7 +846,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinDarwinAarch64:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -936,7 +891,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinPPC64:
needs: [DockerHubPush, FastTest]
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -1069,7 +1024,7 @@ jobs:
# - BuilderBinGCC
- BuilderBinPPC64
- BuilderBinClangTidy
- BuilderDebSplitted
- BuilderDebShared
runs-on: [self-hosted, style-checker]
if: ${{ success() || failure() }}
steps:
@ -2974,42 +2929,6 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
# UnitTestsReleaseGCC:
# needs: [BuilderBinGCC]
# runs-on: [self-hosted, fuzzer-unit-tester]
# steps:
# - name: Set envs
# run: |
# cat >> "$GITHUB_ENV" << 'EOF'
# TEMP_PATH=${{runner.temp}}/unit_tests_asan
# REPORTS_PATH=${{runner.temp}}/reports_dir
# CHECK_NAME=Unit tests (release-gcc)
# REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
# EOF
# - name: Download json reports
# uses: actions/download-artifact@v2
# with:
# path: ${{ env.REPORTS_PATH }}
# - name: Clear repository
# run: |
# sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
# - name: Check out repository code
# uses: actions/checkout@v2
# - name: Unit test
# run: |
# sudo rm -fr "$TEMP_PATH"
# mkdir -p "$TEMP_PATH"
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
# cd "$REPO_COPY/tests/ci"
# python3 unit_tests_check.py "$CHECK_NAME"
# - name: Cleanup
# if: always()
# run: |
# # shellcheck disable=SC2046
# docker kill $(docker ps -q) ||:
# # shellcheck disable=SC2046
# docker rm -f $(docker ps -a -q) ||:
# sudo rm -fr "$TEMP_PATH"
UnitTestsTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, fuzzer-unit-tester]
@ -3497,7 +3416,7 @@ jobs:
- UnitTestsMsan
- UnitTestsUBsan
- UnitTestsReleaseClang
- SplitBuildSmokeTest
- SharedBuildSmokeTest
- CompatibilityCheck
- IntegrationTestsFlakyCheck
- Jepsen

View File

@ -77,10 +77,9 @@ option(USE_STATIC_LIBRARIES "Disable to use shared libraries" ON)
# DEVELOPER ONLY.
# Faster linking if turned on.
option(SPLIT_SHARED_LIBRARIES "Keep all internal libraries as separate .so files" OFF)
option(CLICKHOUSE_SPLIT_BINARY "Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled" OFF)
if (USE_STATIC_LIBRARIES AND (SPLIT_SHARED_LIBRARIES OR CLICKHOUSE_SPLIT_BINARY))
message(FATAL_ERROR "SPLIT_SHARED_LIBRARIES=1 or CLICKHOUSE_SPLIT_BINARY=1 must not be used together with USE_STATIC_LIBRARIES=1")
if (USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
message(FATAL_ERROR "SPLIT_SHARED_LIBRARIES=1 must not be used together with USE_STATIC_LIBRARIES=1")
endif()
if (NOT USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
@ -502,7 +501,7 @@ endif ()
message (STATUS
"Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ;
USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES}
SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES}")
SPLIT_SHARED_LIBRARIES=${SPLIT_SHARED_LIBRARIES}")
include (GNUInstallDirs)

View File

@ -12,11 +12,7 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Blog](https://clickhouse.com/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events.
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
* [Contacts](https://clickhouse.com/company/#contact) can help to get your questions answered if there are any.
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
## Upcoming events
* [v22.7 Release Webinar](https://clickhouse.com/company/events/v22-7-release-webinar/) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
* [ClickHouse Meetup at the Cloudflare office in London](https://www.meetup.com/clickhouse-london-user-group/events/286891586/) ClickHouse meetup at the Cloudflare office space in central London
* [ClickHouse Meetup at the Metoda office in Munich](https://www.meetup.com/clickhouse-meetup-munich/events/286891667/) ClickHouse meetup at the Metoda office in Munich
* [**v22.8 Release Webinar**](https://clickhouse.com/company/events/v22-8-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.

View File

@ -16,7 +16,7 @@ option (ENABLE_SSE41 "Use SSE4.1 instructions on x86_64" 1)
option (ENABLE_SSE42 "Use SSE4.2 instructions on x86_64" 1)
option (ENABLE_PCLMULQDQ "Use pclmulqdq instructions on x86_64" 1)
option (ENABLE_POPCNT "Use popcnt instructions on x86_64" 1)
option (ENABLE_AVX "Use AVX instructions on x86_64" 0)
option (ENABLE_AVX "Use AVX instructions on x86_64" 1)
option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 0)
option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0)
option (ENABLE_AVX512_VBMI "Use AVX512_VBMI instruction on x86_64 (depends on ENABLE_AVX512)" 0)

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 1334b9ae72576821a698d657d08838861cf33007
Subproject commit e1dc47c1cfd529801a8c94a396a3921a71ae3ccf

2
contrib/libgsasl vendored

@ -1 +1 @@
Subproject commit 383ee28e82f69fa16ed43b48bd9c8ee5b313ab84
Subproject commit 0324680f13f22bb43df5353a08e26453d7d640ac

View File

@ -55,7 +55,7 @@ ccache --zero-stats ||:
if [ "$BUILD_MUSL_KEEPER" == "1" ]
then
# build keeper with musl separately
cmake --debug-trycompile --verbose=1 -DBUILD_STANDALONE_KEEPER=1 -DENABLE_CLICKHOUSE_KEEPER=1 -DCMAKE_VERBOSE_MAKEFILE=1 -DUSE_MUSL=1 -LA -DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-x86_64-musl.cmake "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
cmake --debug-trycompile -DBUILD_STANDALONE_KEEPER=1 -DENABLE_CLICKHOUSE_KEEPER=1 -DCMAKE_VERBOSE_MAKEFILE=1 -DUSE_MUSL=1 -LA -DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-x86_64-musl.cmake "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
ninja $NINJA_FLAGS clickhouse-keeper
@ -70,10 +70,10 @@ then
rm -f CMakeCache.txt
# Build the rest of binaries
cmake --debug-trycompile --verbose=1 -DBUILD_STANDALONE_KEEPER=0 -DCREATE_KEEPER_SYMLINK=0 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
cmake --debug-trycompile -DBUILD_STANDALONE_KEEPER=0 -DCREATE_KEEPER_SYMLINK=0 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
else
# Build everything
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
cmake --debug-trycompile -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
fi
if [ "coverity" == "$COMBINED_OUTPUT" ]
@ -88,7 +88,7 @@ fi
# No quotes because I want it to expand to nothing if empty.
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
$SCAN_WRAPPER ninja $NINJA_FLAGS clickhouse-bundle
$SCAN_WRAPPER ninja $NINJA_FLAGS $BUILD_TARGET
ls -la ./programs

View File

@ -100,12 +100,12 @@ def run_docker_image_with_env(
subprocess.check_call(cmd, shell=True)
def is_release_build(build_type, package_type, sanitizer, split_binary):
def is_release_build(build_type, package_type, sanitizer, shared_libraries):
return (
build_type == ""
and package_type == "deb"
and sanitizer == ""
and not split_binary
and not shared_libraries
)
@ -116,7 +116,7 @@ def parse_env_variables(
package_type,
cache,
distcc_hosts,
split_binary,
shared_libraries,
clang_tidy,
version,
author,
@ -134,6 +134,7 @@ def parse_env_variables(
result = []
result.append("OUTPUT_DIR=/output")
cmake_flags = ["$CMAKE_FLAGS"]
build_target = "clickhouse-bundle"
is_cross_darwin = compiler.endswith(DARWIN_SUFFIX)
is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX)
@ -201,7 +202,7 @@ def parse_env_variables(
cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
if is_release_build(build_type, package_type, sanitizer, split_binary):
if is_release_build(build_type, package_type, sanitizer, shared_libraries):
cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON")
result.append("WITH_PERFORMANCE=1")
if is_cross_arm:
@ -218,7 +219,7 @@ def parse_env_variables(
if package_type == "coverity":
result.append("COMBINED_OUTPUT=coverity")
result.append('COVERITY_TOKEN="$COVERITY_TOKEN"')
elif split_binary:
elif shared_libraries:
result.append("COMBINED_OUTPUT=shared_build")
if sanitizer:
@ -236,7 +237,11 @@ def parse_env_variables(
result.append("CCACHE_BASEDIR=/build")
result.append("CCACHE_NOHASHDIR=true")
result.append("CCACHE_COMPILERCHECK=content")
result.append("CCACHE_MAXSIZE=15G")
cache_maxsize = "15G"
if clang_tidy:
# 15G is not enough for tidy build
cache_maxsize = "25G"
result.append(f"CCACHE_MAXSIZE={cache_maxsize}")
# result.append("CCACHE_UMASK=777")
if distcc_hosts:
@ -259,24 +264,28 @@ def parse_env_variables(
result.append("BINARY_OUTPUT=tests")
cmake_flags.append("-DENABLE_TESTS=1")
if split_binary:
if shared_libraries:
cmake_flags.append(
"-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 "
"-DCLICKHOUSE_SPLIT_BINARY=1"
"-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1"
)
# We can't always build utils because it requires too much space, but
# we have to build them at least in some way in CI. The split build is
# probably the least heavy disk-wise.
cmake_flags.append("-DENABLE_UTILS=1")
# utils are not included into clickhouse-bundle, so build everything
build_target = "all"
if clang_tidy:
cmake_flags.append("-DENABLE_CLANG_TIDY=1")
cmake_flags.append("-DENABLE_UTILS=1")
cmake_flags.append("-DENABLE_TESTS=1")
cmake_flags.append("-DENABLE_EXAMPLES=1")
# Don't stop on first error to find more clang-tidy errors in one run.
result.append("NINJA_FLAGS=-k0")
cmake_flags.append("-DENABLE_UTILS=1")
# utils are not included into clickhouse-bundle, so build everything
build_target = "all"
if with_coverage:
cmake_flags.append("-DWITH_COVERAGE=1")
@ -290,6 +299,7 @@ def parse_env_variables(
cmake_flags.append("-DCLICKHOUSE_OFFICIAL_BUILD=1")
result.append('CMAKE_FLAGS="' + " ".join(cmake_flags) + '"')
result.append(f"BUILD_TARGET={build_target}")
return result
@ -341,7 +351,7 @@ if __name__ == "__main__":
default="",
)
parser.add_argument("--split-binary", action="store_true")
parser.add_argument("--shared-libraries", action="store_true")
parser.add_argument("--clang-tidy", action="store_true")
parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="")
parser.add_argument(
@ -394,7 +404,7 @@ if __name__ == "__main__":
args.package_type,
args.cache,
args.distcc_hosts,
args.split_binary,
args.shared_libraries,
args.clang_tidy,
args.version,
args.author,

View File

@ -38,6 +38,7 @@ FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_
# There could be many disks declared in config
readarray -t FILESYSTEM_CACHE_PATHS < <(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key='storage_configuration.disks.*.data_cache_path' || true)
readarray -t DISKS_PATHS < <(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key='storage_configuration.disks.*.path' || true)
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
@ -50,7 +51,8 @@ for dir in "$DATA_DIR" \
"$TMP_DIR" \
"$USER_PATH" \
"$FORMAT_SCHEMA_PATH" \
"${FILESYSTEM_CACHE_PATHS[@]}"
"${FILESYSTEM_CACHE_PATHS[@]}" \
"${DISKS_PATHS[@]}"
do
# check if variable not empty
[ -z "$dir" ] && continue

View File

@ -34,13 +34,14 @@ RUN apt-get update \
ARG TARGETARCH
# Install MySQL ODBC driver from RHEL rpm
# For reference https://downloads.mysql.com/archives/c-odbc/ RHEL
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
esac \
&& cd /tmp \
&& curl -o mysql-odbc.rpm "https://cdn.mysql.com/Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.27-1.el8.${rarch}.rpm" \
&& curl -o mysql-odbc.rpm "https://cdn.mysql.com/archives/mysql-connector-odbc-8.0/mysql-connector-odbc-8.0.27-1.el8.${rarch}.rpm" \
&& rpm2archive mysql-odbc.rpm \
&& tar xf mysql-odbc.rpm.tgz -C / ./usr/lib64/ \
&& LINK_DIR=$(dpkg -L libodbc1 | grep '^/usr/lib/.*-linux-gnu/odbc$') \

View File

@ -171,11 +171,11 @@ concurrency-related errors. If it fails:
## Split Build Smoke Test
Checks that the server build in [split build](../development/build.md#split-build)
Checks that the server build in [split build](../development/developer-instruction.md#split-build)
configuration can start and run simple queries. If it fails:
* Fix other test errors first;
* Build the server in [split build](../development/build.md#split-build) configuration
* Build the server in [split build](../development/developer-instruction.md#split-build) configuration
locally and check whether it can start and run `select 1`.

View File

@ -267,19 +267,19 @@ The system will prepare ClickHouse binary builds for your pull request individua
Most probably some of the builds will fail at first times. This is due to the fact that we check builds both with gcc as well as with clang, with almost all of existing warnings (always with the `-Werror` flag) enabled for clang. On that same page, you can find all of the build logs so that you do not have to build ClickHouse in all of the possible ways.
## Browse ClickHouse Source Code {#browse-clickhouse-source-code}
You can use the **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). It provides code navigation, semantic highlighting, search and indexing. The code snapshot is updated daily.
Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual.
## Faster builds for development: Split build configuration {#split-build}
ClickHouse is normally statically linked into a single static `clickhouse` binary with minimal dependencies. This is convenient for distribution, but it means that for every change the entire binary needs to be re-linked, which is slow and inconvenient for development. As an alternative, you can instead build dynamically linked shared libraries and separate binaries `clickhouse-server`, `clickhouse-client` etc., allowing for faster incremental builds. To use it, add the following flags to your `cmake` invocation:
ClickHouse is normally statically linked into a single static `clickhouse` binary with minimal dependencies. This is convenient for distribution, but it means that for every change the entire binary needs to be re-linked, which is slow and inconvenient for development. As an alternative, you can instead build dynamically linked shared libraries, allowing for faster incremental builds. To use it, add the following flags to your `cmake` invocation:
```
-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1
-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1
```
Note that the split build has several drawbacks:
* There is no single `clickhouse` binary, and you have to run `clickhouse-server`, `clickhouse-client`, etc.
* Risk of segfault if you run any of the programs while rebuilding the project.
* You cannot run the integration tests since they only work a single complete binary.
* You can't easily copy the binaries elsewhere. Instead of moving a single binary you'll need to copy all binaries and libraries.
If you are not interested in functionality provided by third-party libraries, you can further speed up the build using `cmake` options
```
-DENABLE_LIBRARIES=0 -DENABLE_EMBEDDED_COMPILER=0

View File

@ -389,12 +389,6 @@ SETTINGS mutations_sync = 1;
Let's run the same 3 queries.
[Enable](../../operations/settings/settings.md#allow-experimental-projection-optimization) projections for selects:
```sql
SET allow_experimental_projection_optimization = 1;
```
### Query 1. Average Price Per Year {#average-price-projections}
Query:

View File

@ -149,7 +149,7 @@ Features:
### ClickCat {#clickcat}
[ClickCat](https://github.com/open-botech/ClickCat) is a firendly user interface that lets you search, explore and visualize your ClickHouse Data.
[ClickCat](https://github.com/clickcat-project/ClickCat) is a friendly user interface that lets you search, explore and visualize your ClickHouse Data.
Features:

View File

@ -5,12 +5,12 @@ sidebar_label: Testing Hardware
# How to Test Your Hardware with ClickHouse
You can run basic ClickHouse performance test on any server without installation of ClickHouse packages.
You can run a basic ClickHouse performance test on any server without installation of ClickHouse packages.
## Automated Run
You can run benchmark with a single script.
You can run the benchmark with a single script.
1. Download the script.
```
@ -26,58 +26,3 @@ chmod a+x ./hardware.sh
3. Copy the output and send it to feedback@clickhouse.com
All the results are published here: https://clickhouse.com/benchmark/hardware/
## Manual Run
Alternatively you can perform benchmark in the following steps.
1. ssh to the server and download the binary with wget:
```bash
# For amd64:
wget https://builds.clickhouse.com/master/amd64/clickhouse
# For aarch64:
wget https://builds.clickhouse.com/master/aarch64/clickhouse
# For powerpc64le:
wget https://builds.clickhouse.com/master/powerpc64le/clickhouse
# For freebsd:
wget https://builds.clickhouse.com/master/freebsd/clickhouse
# For freebsd-aarch64:
wget https://builds.clickhouse.com/master/freebsd-aarch64/clickhouse
# For freebsd-powerpc64le:
wget https://builds.clickhouse.com/master/freebsd-powerpc64le/clickhouse
# For macos:
wget https://builds.clickhouse.com/master/macos/clickhouse
# For macos-aarch64:
wget https://builds.clickhouse.com/master/macos-aarch64/clickhouse
# Then do:
chmod a+x clickhouse
```
2. Download benchmark files:
```bash
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/hardware/benchmark-new.sh
chmod a+x benchmark-new.sh
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/hardware/queries.sql
```
3. Download the [web analytics dataset](../getting-started/example-datasets/metrica.md) (“hits” table containing 100 million rows).
```bash
wget https://datasets.clickhouse.com/hits/partitions/hits_100m_obfuscated_v1.tar.xz
tar xvf hits_100m_obfuscated_v1.tar.xz -C .
mv hits_100m_obfuscated_v1/* .
```
4. Run the server:
```bash
./clickhouse server
```
5. Check the data: ssh to the server in another terminal
```bash
./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated"
100000000
```
6. Run the benchmark:
```bash
./benchmark-new.sh hits_100m_obfuscated
```
7. Send the numbers and the info about your hardware configuration to feedback@clickhouse.com
All the results are published here: https://clickhouse.com/benchmark/hardware/

View File

@ -197,7 +197,7 @@ Default value: `480` (8 minute).
Parameter of a task that cleans up garbage from `store/` directory.
If some subdirectory is not used by clickhouse-server and this directory was not modified for last
`database_catalog_unused_dir_hide_timeout_sec` seconds, the task will "hide" this directory by
`database_catalog_unused_dir_hide_timeout_sec` seconds, the task will "hide" this directory by
removing all access rights. It also works for directories that clickhouse-server does not
expect to see inside `store/`. Zero means "immediately".
@ -206,10 +206,10 @@ Default value: `3600` (1 hour).
## database_catalog_unused_dir_rm_timeout_sec {#database_catalog_unused_dir_rm_timeout_sec}
Parameter of a task that cleans up garbage from `store/` directory.
If some subdirectory is not used by clickhouse-server and it was previousely "hidden"
(see [database_catalog_unused_dir_hide_timeout_sec](../../operations/server-configuration-parameters/settings.md#database_catalog_unused_dir_hide_timeout_sec))
If some subdirectory is not used by clickhouse-server and it was previousely "hidden"
(see [database_catalog_unused_dir_hide_timeout_sec](../../operations/server-configuration-parameters/settings.md#database_catalog_unused_dir_hide_timeout_sec))
and this directory was not modified for last
`database_catalog_unused_dir_rm_timeout_sec` seconds, the task will remove this directory.
`database_catalog_unused_dir_rm_timeout_sec` seconds, the task will remove this directory.
It also works for directories that clickhouse-server does not
expect to see inside `store/`. Zero means "never".
@ -438,6 +438,18 @@ For more information, see the section “[Configuration files](../../operations/
<include_from>/etc/metrica.xml</include_from>
```
## interserver_listen_host {#interserver-listen-host}
Restriction on hosts that can exchange data between ClickHouse servers.
The default value equals to `listen_host` setting.
Examples:
``` xml
<interserver_listen_host>::ffff:a00:1</interserver_listen_host>
<interserver_listen_host>10.0.0.1</interserver_listen_host>
```
## interserver_http_port {#interserver-http-port}
Port for exchanging data between ClickHouse servers.
@ -731,6 +743,16 @@ On hosts with low RAM and swap, you possibly need setting `max_server_memory_usa
- [max_server_memory_usage](#max_server_memory_usage)
## concurrent_threads_soft_limit {#concurrent_threads_soft_limit}
The maximum number of query processing threads, excluding threads for retrieving data from remote servers, allowed to run all queries. This is not a hard limit. In case if the limit is reached the query will still get one thread to run.
Possible values:
- Positive integer.
- 0 — No limit.
- -1 — The parameter is initialized by number of logical cores multiplies by 3. Which is a good heuristic for CPU-bound tasks.
Default value: `0`.
## max_concurrent_queries {#max-concurrent-queries}
The maximum number of simultaneously processed queries.
@ -960,7 +982,7 @@ Default value: 2.
**Example**
```xml
<background_merges_mutations_concurrency_ratio>3</background_pbackground_merges_mutations_concurrency_ratio>
<background_merges_mutations_concurrency_ratio>3</background_merges_mutations_concurrency_ratio>
```
## background_move_pool_size {#background_move_pool_size}

View File

@ -4,7 +4,7 @@ sidebar_position: 6
# any
Selects the first encountered value.
Selects the first encountered (non-NULL) value, unless all rows have NULL values in that column.
The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate.
To get a determinate result, you can use the min or max function instead of any.

View File

@ -956,9 +956,28 @@ SELECT
## timeSlots(StartTime, Duration,\[, Size\])
For a time interval starting at StartTime and continuing for Duration seconds, it returns an array of moments in time, consisting of points from this interval rounded down to the Size in seconds. Size is an optional parameter: a constant UInt32, set to 1800 by default.
For example, `timeSlots(toDateTime('2012-01-01 12:20:00'), 600) = [toDateTime('2012-01-01 12:00:00'), toDateTime('2012-01-01 12:30:00')]`.
This is necessary for searching for pageviews in the corresponding session.
For a time interval starting at StartTime and continuing for Duration seconds, it returns an array of moments in time, consisting of points from this interval rounded down to the Size in seconds. Size is an optional parameter set to 1800 (30 minutes) by default.
This is necessary, for example, when searching for pageviews in the corresponding session.
Accepts DateTime and DateTime64 as StartTime argument. For DateTime, Duration and Size arguments must be `UInt32`. For DateTime64 they must be `Decimal64`.
Returns an array of DateTime/DateTime64 (return type matches the type of StartTime). For DateTime64, the return value's scale can differ from the scale of StartTime --- the highest scale among all given arguments is taken.
Example:
```sql
SELECT timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600));
SELECT timeSlots(toDateTime('1980-12-12 21:01:02', 'UTC'), toUInt32(600), 299);
SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64(600.1, 1), toDecimal64(299, 0));
```
``` text
┌─timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600))─┐
│ ['2012-01-01 12:00:00','2012-01-01 12:30:00'] │
└─────────────────────────────────────────────────────────────┘
┌─timeSlots(toDateTime('1980-12-12 21:01:02', 'UTC'), toUInt32(600), 299)─┐
│ ['1980-12-12 20:56:13','1980-12-12 21:01:12','1980-12-12 21:06:11'] │
└─────────────────────────────────────────────────────────────────────────┘
┌─timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64(600.1, 1), toDecimal64(299, 0))─┐
│ ['1980-12-12 20:56:13.0000','1980-12-12 21:01:12.0000','1980-12-12 21:06:11.0000'] │
└───────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## formatDateTime

View File

@ -12,12 +12,13 @@ Reads file as a String. The file content is not parsed, so any information is re
**Syntax**
``` sql
file(path)
file(path[, default])
```
**Arguments**
- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following wildcards: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
- `default` — The value that will be returned in the case when a file does not exist or cannot be accessed. Data types supported: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal).
**Example**

View File

@ -11,7 +11,7 @@ Compressed files are supported. Compression type is detected by the extension of
**Syntax**
```sql
SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type [LEVEL level]]
SELECT <expr_list> INTO OUTFILE file_name [AND STDOUT] [COMPRESSION type [LEVEL level]]
```
`file_name` and `type` are string literals. Supported compression types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
@ -23,6 +23,7 @@ SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type [LEVEL level]]
- This functionality is available in the [command-line client](../../../interfaces/cli.md) and [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Thus a query sent via [HTTP interface](../../../interfaces/http.md) will fail.
- The query will fail if a file with the same file name already exists.
- The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). Use [FORMAT](format.md) clause to change it.
- If `AND STDOUT` is mentioned in the query then the output that is written to the file is also displayed on standard output. If used with compression, the plaintext is displayed on standard output.
**Example**

View File

@ -170,7 +170,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
В случае использования на разработческой машине старого HDD или SSD, а также при желании использовать меньше места для артефактов сборки можно использовать следующую команду:
```bash
cmake -DUSE_DEBUG_HELPERS=1 -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 ..
cmake -DUSE_DEBUG_HELPERS=1 -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 ..
```
При этом надо учесть, что получаемые в результате сборки исполнимые файлы будут динамически слинкованы с библиотеками, и поэтому фактически станут непереносимыми на другие компьютеры (либо для этого нужно будет предпринять значительно больше усилий по сравнению со статической сборкой). Плюсом же в данном случае является значительно меньшее время сборки (это проявляется не на первой сборке, а на последующих, после внесения изменений в исходный код - тратится меньшее время на линковку по сравнению со статической сборкой) и значительно меньшее использование места на жёстком диске (экономия более, чем в 3 раза по сравнению со статической сборкой). Для целей разработки, когда планируются только отладочные запуски на том же компьютере, где осуществлялась сборка, это может быть наиболее удобным вариантом.
@ -285,3 +285,9 @@ Pull request можно создать, даже если работа над з
Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку «Details» у проверки «Clickhouse build check». Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно).
Вероятнее всего, часть сборок не будет успешной с первого раза. Ведь мы проверяем сборку кода и gcc и clang, а при сборке с помощью clang включаются почти все существующие в природе warnings (всегда с флагом `-Werror`). На той же странице, вы сможете найти логи сборки - вам не обязательно самому собирать ClickHouse всеми возможными способами.
## Навигация по коду ClickHouse {#navigatsiia-po-kodu-clickhouse}
Для навигации по коду онлайн доступен **Woboq**, он расположен [здесь](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). В нём реализовано удобное перемещение между исходными файлами, семантическая подсветка, подсказки, индексация и поиск. Слепок кода обновляется ежедневно.
Также вы можете просматривать исходники на [GitHub](https://github.com/ClickHouse/ClickHouse).

View File

@ -389,12 +389,6 @@ SETTINGS mutations_sync = 1;
Давайте выполним те же 3 запроса.
[Включите](../../operations/settings/settings.md#allow-experimental-projection-optimization) поддержку проекций:
```sql
SET allow_experimental_projection_optimization = 1;
```
### Запрос 1. Средняя цена за год {#average-price-projections}
Запрос:
@ -647,4 +641,3 @@ no projection: 100 rows in set. Elapsed: 0.069 sec. Processed 26.32 million rows
### Online Playground {#playground}
Этот набор данных доступен в [Online Playground](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==).

View File

@ -407,6 +407,18 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
<include_from>/etc/metrica.xml</include_from>
```
## interserver_listen_host {#interserver-listen-host}
Ограничение по хостам, для обмена между серверами ClickHouse.
Значение по умолчанию совпадает со значением параметра listen_host
Примеры:
``` xml
<interserver_listen_host>::ffff:a00:1</interserver_listen_host>
<interserver_listen_host>10.0.0.1</interserver_listen_host>
```
## interserver_http_port {#interserver-http-port}
Порт для обмена между серверами ClickHouse.

View File

@ -944,14 +944,31 @@ SELECT now('Europe/Moscow');
## timeSlot {#timeslot}
Округляет время до получаса.
Эта функция является специфичной для Яндекс.Метрики, так как пол часа - минимальное время, для которого, если соседние по времени хиты одного посетителя на одном счётчике отстоят друг от друга строго более, чем на это время, визит может быть разбит на два визита. То есть, кортежи (номер счётчика, идентификатор посетителя, тайм-слот) могут использоваться для поиска хитов, входящий в соответствующий визит.
Эта функция является специфичной для Яндекс.Метрики, так как полчаса - минимальное время, для которого, если соседние по времени хиты одного посетителя на одном счётчике отстоят друг от друга строго более, чем на это время, визит может быть разбит на два визита. То есть, кортежи (номер счётчика, идентификатор посетителя, тайм-слот) могут использоваться для поиска хитов, входящий в соответствующий визит.
## timeSlots(StartTime, Duration,\[, Size\]) {#timeslotsstarttime-duration-size}
Для интервала, начинающегося в `StartTime` и длящегося `Duration` секунд, возвращает массив моментов времени, кратных `Size`. Параметр `Size` указывать необязательно, по умолчанию он равен 1800 секундам (30 минутам) - необязательный параметр.
Данная функция может использоваться, например, для анализа количества просмотров страницы за соответствующую сессию.
Аргумент `StartTime` может иметь тип `DateTime` или `DateTime64`. В случае, если используется `DateTime`, аргументы `Duration` и `Size` должны иметь тип `UInt32`; Для DateTime64 они должны быть типа `Decimal64`.
Возвращает массив DateTime/DateTime64 (тип будет совпадать с типом параметра StartTime). Для DateTime64 масштаб(scale) возвращаемой величины может отличаться от масштаба фргумента StartTime --- результат будет иметь наибольший масштаб среди всех данных аргументов.
Для интервала времени, начинающегося в StartTime и продолжающегося Duration секунд, возвращает массив моментов времени, состоящий из округлений вниз до Size точек в секундах из этого интервала. Size - необязательный параметр, константный UInt32, по умолчанию равен 1800.
Например, `timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600)) = [toDateTime('2012-01-01 12:00:00'), toDateTime('2012-01-01 12:30:00')]`.
Это нужно для поиска хитов, входящих в соответствующий визит.
Пример использования:
```sql
SELECT timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600));
SELECT timeSlots(toDateTime('1980-12-12 21:01:02', 'UTC'), toUInt32(600), 299);
SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64(600.1, 1), toDecimal64(299, 0));
```
``` text
┌─timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600))─┐
│ ['2012-01-01 12:00:00','2012-01-01 12:30:00'] │
└─────────────────────────────────────────────────────────────┘
┌─timeSlots(toDateTime('1980-12-12 21:01:02', 'UTC'), toUInt32(600), 299)─┐
│ ['1980-12-12 20:56:13','1980-12-12 21:01:12','1980-12-12 21:06:11'] │
└─────────────────────────────────────────────────────────────────────────┘
┌─timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64(600.1, 1), toDecimal64(299, 0))─┐
│ ['1980-12-12 20:56:13.0000','1980-12-12 21:01:12.0000','1980-12-12 21:06:11.0000'] │
└───────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## toYYYYMM

View File

@ -12,12 +12,13 @@ sidebar_label: "Функции для работы с файлами"
**Синтаксис**
``` sql
file(path)
file(path[, default])
```
**Аргументы**
- `path` — относительный путь до файла от [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Путь к файлу может включать следующие символы подстановки и шаблоны: `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, `'abc', 'def'` — строки.
- `default` — Значение возвращаемое в случае, если указанный файл не существует. Поддерживаемые типы данных: [String](../../sql-reference/data-types/string.md) и [NULL](../../sql-reference/syntax.md#null-literal).
**Примеры**

View File

@ -264,3 +264,9 @@ ClickHouse成员一旦在您的拉取请求上贴上«可以测试»标签
系统将分别为您的拉取请求准备ClickHouse二进制版本。若要检索这些构建信息请在检查列表中单击« ClickHouse构建检查»旁边的«详细信息»链接。在这里您会找到指向ClickHouse的.deb软件包的直接链接此外甚至可以将其部署在生产服务器上如果您不担心
某些构建项很可能会在首次构建时失败。这是因为我们同时检查了基于gcc和clang的构建几乎所有现有的被clang启用的警告总是带有`-Werror`标志。在同一页面上您可以找到所有构建的日志因此不必以所有可能的方式构建ClickHouse。
## 浏览ClickHouse源代码 {#browse-clickhouse-source-code}
您可以使用 **Woboq** 在线代码浏览器 [点击这里](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). 它提供了代码导航和语义突出显示、搜索和索引。 代码快照每天更新。
此外,您还可以像往常一样浏览源代码 [GitHub](https://github.com/ClickHouse/ClickHouse)

View File

@ -62,6 +62,8 @@ ORDER BY expr
- `PARTITION BY` — [分区键](custom-partitioning-key.md) ,可选项。
大多数情况下,不需要分使用区键。即使需要使用,也不需要使用比月更细粒度的分区键。分区不会加快查询(这与 ORDER BY 表达式不同)。永远也别使用过细粒度的分区键。不要使用客户端指定分区标识符或分区字段名称来对数据进行分区(而是将分区字段标识或名称作为 ORDER BY 表达式的第一列来指定分区)。
要按月分区,可以使用表达式 `toYYYYMM(date_column)` ,这里的 `date_column` 是一个 [Date](../../../engines/table-engines/mergetree-family/mergetree.md) 类型的列。分区名的格式会是 `"YYYYMM"`
- `PRIMARY KEY` - 如果要 [选择与排序键不同的主键](#choosing-a-primary-key-that-differs-from-the-sorting-key),在这里指定,可选项。

View File

@ -18,11 +18,7 @@ option (ENABLE_CLICKHOUSE_SERVER "Server mode (main mode)" ${ENABLE_CLICKHOUSE_A
option (ENABLE_CLICKHOUSE_CLIENT "Client mode (interactive tui/shell that connects to the server)"
${ENABLE_CLICKHOUSE_ALL})
if (CLICKHOUSE_SPLIT_BINARY)
option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" OFF)
else ()
option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" ON)
endif ()
option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" ON)
# https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/
option (ENABLE_CLICKHOUSE_LOCAL "Local files fast processing mode" ${ENABLE_CLICKHOUSE_ALL})
@ -80,12 +76,7 @@ if (NOT ENABLE_NURAFT)
set(ENABLE_CLICKHOUSE_KEEPER_CONVERTER OFF)
endif()
if (CLICKHOUSE_SPLIT_BINARY)
option(ENABLE_CLICKHOUSE_INSTALL "Install ClickHouse without .deb/.rpm/.tgz packages (having the binary only)" OFF)
else ()
option(ENABLE_CLICKHOUSE_INSTALL "Install ClickHouse without .deb/.rpm/.tgz packages (having the binary only)"
${ENABLE_CLICKHOUSE_ALL})
endif ()
option(ENABLE_CLICKHOUSE_INSTALL "Install ClickHouse without .deb/.rpm/.tgz packages (having the binary only)" ${ENABLE_CLICKHOUSE_ALL})
message(STATUS "ClickHouse modes:")
@ -195,6 +186,12 @@ else()
message(STATUS "ClickHouse disks mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_SU)
message(STATUS "ClickHouse su: ON")
else()
message(STATUS "ClickHouse su: OFF")
endif()
configure_file (config_tools.h.in ${ConfigIncludePath}/config_tools.h)
macro(clickhouse_target_link_split_lib target name)
@ -205,10 +202,6 @@ macro(clickhouse_target_link_split_lib target name)
endif()
endmacro()
macro(clickhouse_program_link_split_binary name)
clickhouse_target_link_split_lib(clickhouse-${name} ${name})
endmacro()
macro(clickhouse_program_add_library name)
string(TOUPPER ${name} name_uc)
string(REPLACE "-" "_" name_uc ${name_uc})
@ -233,17 +226,8 @@ macro(clickhouse_program_add_library name)
endif()
endmacro()
macro(clickhouse_program_add_executable name)
if(CLICKHOUSE_SPLIT_BINARY)
clickhouse_add_executable(clickhouse-${name} clickhouse-${name}.cpp)
clickhouse_program_link_split_binary(${name})
install(TARGETS clickhouse-${name} ${CLICKHOUSE_ALL_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
endmacro()
macro(clickhouse_program_add name)
clickhouse_program_add_library(${name})
clickhouse_program_add_executable(${name})
endmacro()
add_subdirectory (server)
@ -336,206 +320,173 @@ if (CLICKHOUSE_ONE_SHARED)
install (TARGETS clickhouse-lib LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse)
endif()
if (CLICKHOUSE_SPLIT_BINARY)
set (CLICKHOUSE_ALL_TARGETS
clickhouse-server
clickhouse-client
clickhouse-local
clickhouse-benchmark
clickhouse-extract-from-config
clickhouse-compressor
clickhouse-format
clickhouse-obfuscator
clickhouse-git-import
clickhouse-copier
clickhouse-static-files-disk-uploader
clickhouse-disks)
clickhouse_add_executable (clickhouse main.cpp)
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-odbc-bridge)
endif ()
if (NOT USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
# Shared split (dev) build: In CI, the server is run with custom LD_LIBRARY_PATH. This makes the harmful env check re-execute the
# process in a clean environment but as in CI the containing directory is not included in DT_RUNPATH/DT_RPATH, the server won't come up.
target_compile_definitions(clickhouse PRIVATE DISABLE_HARMFUL_ENV_VAR_CHECK)
endif ()
if (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE)
list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-library-bridge)
endif ()
# A library that prevent usage of several functions from libc.
if (ARCH_AMD64 AND OS_LINUX AND NOT OS_ANDROID)
set (HARMFUL_LIB harmful)
endif ()
if (ENABLE_CLICKHOUSE_KEEPER)
list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-keeper)
endif ()
target_link_libraries (clickhouse PRIVATE clickhouse_common_io string_utils ${HARMFUL_LIB})
target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-keeper-converter)
endif ()
if (ENABLE_CLICKHOUSE_SERVER)
clickhouse_target_link_split_lib(clickhouse server)
endif ()
if (ENABLE_CLICKHOUSE_CLIENT)
clickhouse_target_link_split_lib(clickhouse client)
endif ()
if (ENABLE_CLICKHOUSE_LOCAL)
clickhouse_target_link_split_lib(clickhouse local)
endif ()
if (ENABLE_CLICKHOUSE_BENCHMARK)
clickhouse_target_link_split_lib(clickhouse benchmark)
endif ()
if (ENABLE_CLICKHOUSE_COPIER)
clickhouse_target_link_split_lib(clickhouse copier)
endif ()
if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG)
clickhouse_target_link_split_lib(clickhouse extract-from-config)
endif ()
if (ENABLE_CLICKHOUSE_COMPRESSOR)
clickhouse_target_link_split_lib(clickhouse compressor)
endif ()
if (ENABLE_CLICKHOUSE_FORMAT)
clickhouse_target_link_split_lib(clickhouse format)
endif ()
if (ENABLE_CLICKHOUSE_OBFUSCATOR)
clickhouse_target_link_split_lib(clickhouse obfuscator)
endif ()
if (ENABLE_CLICKHOUSE_GIT_IMPORT)
clickhouse_target_link_split_lib(clickhouse git-import)
endif ()
if (ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER)
clickhouse_target_link_split_lib(clickhouse static-files-disk-uploader)
endif ()
if (ENABLE_CLICKHOUSE_SU)
clickhouse_target_link_split_lib(clickhouse su)
endif ()
if (ENABLE_CLICKHOUSE_KEEPER)
clickhouse_target_link_split_lib(clickhouse keeper)
endif()
if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
clickhouse_target_link_split_lib(clickhouse keeper-converter)
endif()
if (ENABLE_CLICKHOUSE_INSTALL)
clickhouse_target_link_split_lib(clickhouse install)
endif ()
if (ENABLE_CLICKHOUSE_DISKS)
clickhouse_target_link_split_lib(clickhouse disks)
endif ()
set_target_properties(${CLICKHOUSE_ALL_TARGETS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
set (CLICKHOUSE_BUNDLE)
if (ENABLE_CLICKHOUSE_SELF_EXTRACTING)
list(APPEND CLICKHOUSE_BUNDLE self-extracting)
endif ()
if (ENABLE_CLICKHOUSE_SERVER)
add_custom_target (clickhouse-server ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-server DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-server" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-server)
endif ()
if (ENABLE_CLICKHOUSE_CLIENT)
add_custom_target (clickhouse-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-client DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-client)
endif ()
if (ENABLE_CLICKHOUSE_LOCAL)
add_custom_target (clickhouse-local ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-local DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-local" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-local)
endif ()
if (ENABLE_CLICKHOUSE_BENCHMARK)
add_custom_target (clickhouse-benchmark ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-benchmark DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-benchmark" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-benchmark)
endif ()
if (ENABLE_CLICKHOUSE_COPIER)
add_custom_target (clickhouse-copier ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-copier DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-copier" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-copier)
endif ()
if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG)
add_custom_target (clickhouse-extract-from-config ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-extract-from-config DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-extract-from-config" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-extract-from-config)
endif ()
if (ENABLE_CLICKHOUSE_COMPRESSOR)
add_custom_target (clickhouse-compressor ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-compressor DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-compressor" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-compressor)
endif ()
if (ENABLE_CLICKHOUSE_FORMAT)
add_custom_target (clickhouse-format ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-format DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-format" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-format)
endif ()
if (ENABLE_CLICKHOUSE_OBFUSCATOR)
add_custom_target (clickhouse-obfuscator ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-obfuscator DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator)
endif ()
if (ENABLE_CLICKHOUSE_GIT_IMPORT)
add_custom_target (clickhouse-git-import ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-git-import DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-git-import" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-git-import)
endif ()
if (ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER)
add_custom_target (clickhouse-static-files-disk-uploader ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-static-files-disk-uploader DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-static-files-disk-uploader" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-static-files-disk-uploader)
endif ()
if (ENABLE_CLICKHOUSE_SU)
add_custom_target (clickhouse-su ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-su DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-su" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-su)
endif ()
add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_ALL_TARGETS})
add_custom_target (clickhouse ALL DEPENDS clickhouse-bundle)
install(PROGRAMS clickhouse-split-helper DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME clickhouse COMPONENT clickhouse)
else ()
clickhouse_add_executable (clickhouse main.cpp)
# A library that prevent usage of several functions from libc.
if (ARCH_AMD64 AND OS_LINUX AND NOT OS_ANDROID)
set (HARMFUL_LIB harmful)
endif ()
target_link_libraries (clickhouse PRIVATE clickhouse_common_io string_utils ${HARMFUL_LIB})
target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
if (ENABLE_CLICKHOUSE_SERVER)
clickhouse_target_link_split_lib(clickhouse server)
endif ()
if (ENABLE_CLICKHOUSE_CLIENT)
clickhouse_target_link_split_lib(clickhouse client)
endif ()
if (ENABLE_CLICKHOUSE_LOCAL)
clickhouse_target_link_split_lib(clickhouse local)
endif ()
if (ENABLE_CLICKHOUSE_BENCHMARK)
clickhouse_target_link_split_lib(clickhouse benchmark)
endif ()
if (ENABLE_CLICKHOUSE_COPIER)
clickhouse_target_link_split_lib(clickhouse copier)
endif ()
if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG)
clickhouse_target_link_split_lib(clickhouse extract-from-config)
endif ()
if (ENABLE_CLICKHOUSE_COMPRESSOR)
clickhouse_target_link_split_lib(clickhouse compressor)
endif ()
if (ENABLE_CLICKHOUSE_FORMAT)
clickhouse_target_link_split_lib(clickhouse format)
endif ()
if (ENABLE_CLICKHOUSE_OBFUSCATOR)
clickhouse_target_link_split_lib(clickhouse obfuscator)
endif ()
if (ENABLE_CLICKHOUSE_GIT_IMPORT)
clickhouse_target_link_split_lib(clickhouse git-import)
endif ()
if (ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER)
clickhouse_target_link_split_lib(clickhouse static-files-disk-uploader)
endif ()
if (ENABLE_CLICKHOUSE_SU)
clickhouse_target_link_split_lib(clickhouse su)
endif ()
if (ENABLE_CLICKHOUSE_KEEPER)
clickhouse_target_link_split_lib(clickhouse keeper)
endif()
if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
clickhouse_target_link_split_lib(clickhouse keeper-converter)
endif()
if (ENABLE_CLICKHOUSE_INSTALL)
clickhouse_target_link_split_lib(clickhouse install)
endif ()
if (ENABLE_CLICKHOUSE_DISKS)
clickhouse_target_link_split_lib(clickhouse disks)
endif ()
set (CLICKHOUSE_BUNDLE)
if (ENABLE_CLICKHOUSE_SELF_EXTRACTING)
list(APPEND CLICKHOUSE_BUNDLE self-extracting)
endif ()
if (ENABLE_CLICKHOUSE_SERVER)
add_custom_target (clickhouse-server ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-server DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-server" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-server)
endif ()
if (ENABLE_CLICKHOUSE_CLIENT)
add_custom_target (clickhouse-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-client DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-client)
endif ()
if (ENABLE_CLICKHOUSE_LOCAL)
add_custom_target (clickhouse-local ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-local DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-local" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-local)
endif ()
if (ENABLE_CLICKHOUSE_BENCHMARK)
add_custom_target (clickhouse-benchmark ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-benchmark DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-benchmark" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-benchmark)
endif ()
if (ENABLE_CLICKHOUSE_COPIER)
add_custom_target (clickhouse-copier ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-copier DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-copier" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-copier)
endif ()
if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG)
add_custom_target (clickhouse-extract-from-config ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-extract-from-config DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-extract-from-config" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-extract-from-config)
endif ()
if (ENABLE_CLICKHOUSE_COMPRESSOR)
add_custom_target (clickhouse-compressor ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-compressor DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-compressor" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-compressor)
endif ()
if (ENABLE_CLICKHOUSE_FORMAT)
add_custom_target (clickhouse-format ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-format DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-format" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-format)
endif ()
if (ENABLE_CLICKHOUSE_OBFUSCATOR)
add_custom_target (clickhouse-obfuscator ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-obfuscator DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator)
endif ()
if (ENABLE_CLICKHOUSE_GIT_IMPORT)
add_custom_target (clickhouse-git-import ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-git-import DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-git-import" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-git-import)
endif ()
if (ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER)
add_custom_target (clickhouse-static-files-disk-uploader ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-static-files-disk-uploader DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-static-files-disk-uploader" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-static-files-disk-uploader)
endif ()
if (ENABLE_CLICKHOUSE_SU)
add_custom_target (clickhouse-su ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-su DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-su" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-su)
endif ()
if (ENABLE_CLICKHOUSE_KEEPER)
if (NOT BUILD_STANDALONE_KEEPER AND CREATE_KEEPER_SYMLINK)
add_custom_target (clickhouse-keeper ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
# otherwise we don't build keeper
if (BUILD_STANDALONE_KEEPER OR CREATE_KEEPER_SYMLINK)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper)
endif()
endif ()
if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
add_custom_target (clickhouse-keeper-converter ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-converter DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-converter" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter)
endif ()
if (ENABLE_CLICKHOUSE_DISKS)
add_custom_target (clickhouse-disks ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-disks DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-disks" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-disks)
endif ()
add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE})
if (USE_GDB_ADD_INDEX)
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} clickhouse COMMENT "Adding .gdb-index to clickhouse" VERBATIM)
if (ENABLE_CLICKHOUSE_KEEPER)
if (NOT BUILD_STANDALONE_KEEPER AND CREATE_KEEPER_SYMLINK)
add_custom_target (clickhouse-keeper ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
if (USE_BINARY_HASH)
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .clickhouse.hash=hash clickhouse COMMENT "Adding section '.clickhouse.hash' to clickhouse binary" VERBATIM)
# otherwise we don't build keeper
if (BUILD_STANDALONE_KEEPER OR CREATE_KEEPER_SYMLINK)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper)
endif()
endif ()
if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
add_custom_target (clickhouse-keeper-converter ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-converter DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-converter" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter)
endif ()
if (ENABLE_CLICKHOUSE_DISKS)
add_custom_target (clickhouse-disks ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-disks DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-disks" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-disks)
endif ()
if (SPLIT_DEBUG_SYMBOLS)
clickhouse_split_debug_symbols(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH clickhouse)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${SPLITTED_DEBUG_SYMBOLS_DIR})
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE})
if (USE_GDB_ADD_INDEX)
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} clickhouse COMMENT "Adding .gdb-index to clickhouse" VERBATIM)
endif()
if (USE_BINARY_HASH)
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .clickhouse.hash=hash clickhouse COMMENT "Adding section '.clickhouse.hash' to clickhouse binary" VERBATIM)
endif()
if (SPLIT_DEBUG_SYMBOLS)
clickhouse_split_debug_symbols(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH clickhouse)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${SPLITTED_DEBUG_SYMBOLS_DIR})
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
if (ENABLE_TESTS)

View File

@ -39,14 +39,19 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv)
try
{
DB::KeeperStorage storage(500, "", true);
auto keeper_context = std::make_shared<KeeperContext>();
keeper_context->digest_enabled = true;
DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false);
DB::deserializeKeeperStorageFromSnapshotsDir(storage, options["zookeeper-snapshots-dir"].as<std::string>(), logger);
storage.initializeSystemNodes();
DB::deserializeLogsAndApplyToStorage(storage, options["zookeeper-logs-dir"].as<std::string>(), logger);
DB::SnapshotMetadataPtr snapshot_meta = std::make_shared<DB::SnapshotMetadata>(storage.getZXID(), 1, std::make_shared<nuraft::cluster_config>());
DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta);
DB::KeeperSnapshotManager manager(options["output-dir"].as<std::string>(), 1);
DB::KeeperSnapshotManager manager(options["output-dir"].as<std::string>(), 1, keeper_context);
auto snp = manager.serializeSnapshotToBuffer(snapshot);
auto path = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID());
std::cout << "Snapshot serialized to path:" << path << std::endl;

View File

@ -352,6 +352,9 @@ try
{
UseSSL use_ssl;
ThreadStatus thread_status;
StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true));
setupSignalHandler();
std::cout << std::fixed << std::setprecision(3);

View File

@ -345,6 +345,7 @@ struct Checker
;
#ifndef DISABLE_HARMFUL_ENV_VAR_CHECK
/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete.
void checkHarmfulEnvironmentVariables(char ** argv)
{
@ -396,6 +397,7 @@ void checkHarmfulEnvironmentVariables(char ** argv)
_exit(error);
}
}
#endif
}
@ -422,7 +424,9 @@ int main(int argc_, char ** argv_)
/// will work only after additional call of this function.
updatePHDRCache();
#ifndef DISABLE_HARMFUL_ENV_VAR_CHECK
checkHarmfulEnvironmentVariables(argv_);
#endif
/// Reset new handler to default (that throws std::bad_alloc)
/// It is needed because LLVM library clobbers it.

View File

@ -46,6 +46,7 @@
#include <boost/algorithm/string.hpp>
#include <boost/container/flat_map.hpp>
#include <Common/TerminalSize.h>
#include <bit>
static const char * documentation = R"(
@ -186,7 +187,7 @@ static UInt64 transform(UInt64 x, UInt64 seed)
if (x == 2 || x == 3)
return x ^ (seed & 1);
size_t num_leading_zeros = __builtin_clzll(x);
size_t num_leading_zeros = std::countl_zero(x);
return feistelNetwork(x, 64 - num_leading_zeros - 1, seed);
}

View File

@ -29,6 +29,7 @@
#include <Common/ClickHouseRevision.h>
#include <Common/DNSResolver.h>
#include <Common/CurrentMetrics.h>
#include <Common/ConcurrencyControl.h>
#include <Common/Macros.h>
#include <Common/ShellCommand.h>
#include <Common/StringUtils/StringUtils.h>
@ -366,7 +367,7 @@ Poco::Net::SocketAddress Server::socketBindListen(
return address;
}
std::vector<std::string> getListenHosts(const Poco::Util::AbstractConfiguration & config)
Strings getListenHosts(const Poco::Util::AbstractConfiguration & config)
{
auto listen_hosts = DB::getMultipleValuesFromConfig(config, "", "listen_host");
if (listen_hosts.empty())
@ -377,6 +378,16 @@ std::vector<std::string> getListenHosts(const Poco::Util::AbstractConfiguration
return listen_hosts;
}
Strings getInterserverListenHosts(const Poco::Util::AbstractConfiguration & config)
{
auto interserver_listen_hosts = DB::getMultipleValuesFromConfig(config, "", "interserver_listen_host");
if (!interserver_listen_hosts.empty())
return interserver_listen_hosts;
/// Use more general restriction in case of emptiness
return getListenHosts(config);
}
bool getListenTry(const Poco::Util::AbstractConfiguration & config)
{
bool listen_try = config.getBool("listen_try", false);
@ -626,6 +637,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
MainThreadStatus::getInstance();
StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true));
registerFunctions();
registerAggregateFunctions();
registerTableFunctions();
@ -1124,6 +1137,23 @@ int Server::main(const std::vector<std::string> & /*args*/)
if (config->has("max_partition_size_to_drop"))
global_context->setMaxPartitionSizeToDrop(config->getUInt64("max_partition_size_to_drop"));
if (config->has("concurrent_threads_soft_limit"))
{
auto concurrent_threads_soft_limit = config->getInt("concurrent_threads_soft_limit", 0);
if (concurrent_threads_soft_limit == -1)
{
// Based on tests concurrent_threads_soft_limit has an optimal value when it's about 3 times of logical CPU cores
constexpr size_t thread_factor = 3;
concurrent_threads_soft_limit = std::thread::hardware_concurrency() * thread_factor;
}
if (concurrent_threads_soft_limit)
ConcurrencyControl::instance().setMaxConcurrency(concurrent_threads_soft_limit);
else
ConcurrencyControl::instance().setMaxConcurrency(ConcurrencyControl::Unlimited);
}
else
ConcurrencyControl::instance().setMaxConcurrency(ConcurrencyControl::Unlimited);
if (config->has("max_concurrent_queries"))
global_context->getProcessList().setMaxSize(config->getInt("max_concurrent_queries", 0));
@ -1214,6 +1244,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
/* already_loaded = */ false); /// Reload it right now (initial loading)
const auto listen_hosts = getListenHosts(config());
const auto interserver_listen_hosts = getInterserverListenHosts(config());
const auto listen_try = getListenTry(config());
if (config().has("keeper_server"))
@ -1609,7 +1640,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
{
std::lock_guard lock(servers_lock);
createServers(config(), listen_hosts, listen_try, server_pool, async_metrics, servers);
createServers(config(), listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers);
if (servers.empty())
throw Exception(
"No servers started (add valid listen_host and 'tcp_port' or 'http_port' to configuration file.)",
@ -1791,7 +1822,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
void Server::createServers(
Poco::Util::AbstractConfiguration & config,
const std::vector<std::string> & listen_hosts,
const Strings & listen_hosts,
const Strings & interserver_listen_hosts,
bool listen_try,
Poco::ThreadPool & server_pool,
AsynchronousMetrics & async_metrics,
@ -1909,51 +1941,6 @@ void Server::createServers(
#endif
});
/// Interserver IO HTTP
port_name = "interserver_http_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port);
socket.setReceiveTimeout(settings.http_receive_timeout);
socket.setSendTimeout(settings.http_send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"replica communication (interserver): http://" + address.toString(),
std::make_unique<HTTPServer>(
context(),
createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory"),
server_pool,
socket,
http_params));
});
port_name = "interserver_https_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
#if USE_SSL
Poco::Net::SecureServerSocket socket;
auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
socket.setReceiveTimeout(settings.http_receive_timeout);
socket.setSendTimeout(settings.http_send_timeout);
return ProtocolServerAdapter(
listen_host,
port_name,
"secure replica communication (interserver): https://" + address.toString(),
std::make_unique<HTTPServer>(
context(),
createHandlerFactory(*this, async_metrics, "InterserverIOHTTPSHandler-factory"),
server_pool,
socket,
http_params));
#else
UNUSED(port);
throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.",
ErrorCodes::SUPPORT_IS_DISABLED};
#endif
});
port_name = "mysql_port";
createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
@ -2012,6 +1999,55 @@ void Server::createServers(
});
}
/// Now iterate over interserver_listen_hosts
for (const auto & interserver_listen_host : interserver_listen_hosts)
{
/// Interserver IO HTTP
const char * port_name = "interserver_http_port";
createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
Poco::Net::ServerSocket socket;
auto address = socketBindListen(config, socket, interserver_listen_host, port);
socket.setReceiveTimeout(settings.http_receive_timeout);
socket.setSendTimeout(settings.http_send_timeout);
return ProtocolServerAdapter(
interserver_listen_host,
port_name,
"replica communication (interserver): http://" + address.toString(),
std::make_unique<HTTPServer>(
context(),
createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory"),
server_pool,
socket,
http_params));
});
port_name = "interserver_https_port";
createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
{
#if USE_SSL
Poco::Net::SecureServerSocket socket;
auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true);
socket.setReceiveTimeout(settings.http_receive_timeout);
socket.setSendTimeout(settings.http_send_timeout);
return ProtocolServerAdapter(
interserver_listen_host,
port_name,
"secure replica communication (interserver): https://" + address.toString(),
std::make_unique<HTTPServer>(
context(),
createHandlerFactory(*this, async_metrics, "InterserverIOHTTPSHandler-factory"),
server_pool,
socket,
http_params));
#else
UNUSED(port);
throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.",
ErrorCodes::SUPPORT_IS_DISABLED};
#endif
});
}
}
void Server::updateServers(
@ -2023,6 +2059,7 @@ void Server::updateServers(
Poco::Logger * log = &logger();
const auto listen_hosts = getListenHosts(config);
const auto interserver_listen_hosts = getInterserverListenHosts(config);
const auto listen_try = getListenTry(config);
/// Remove servers once all their connections are closed
@ -2055,7 +2092,7 @@ void Server::updateServers(
}
}
createServers(config, listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true);
createServers(config, listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true);
std::erase_if(servers, std::bind_front(check_server, ""));
}

View File

@ -86,7 +86,8 @@ private:
void createServers(
Poco::Util::AbstractConfiguration & config,
const std::vector<std::string> & listen_hosts,
const Strings & listen_hosts,
const Strings & interserver_listen_hosts,
bool listen_try,
Poco::ThreadPool & server_pool,
AsynchronousMetrics & async_metrics,

View File

@ -188,6 +188,10 @@
<listen_host>127.0.0.1</listen_host>
-->
<!-- <interserver_listen_host>::</interserver_listen_host> -->
<!-- Listen host for communication between replicas. Used for data exchange -->
<!-- Default values - equal to listen_host -->
<!-- Don't exit if IPv6 or IPv4 networks are unavailable while trying to listen. -->
<!-- <listen_try>0</listen_try> -->
@ -269,6 +273,13 @@
<http_server_default_response><![CDATA[<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>]]></http_server_default_response>
-->
<!-- Maximum number of query processing threads to run all queries.
Note that This is not a hard limit. In case if the limit is reached the query will still get one thread to run.
For value equals to -1 this parameter is initialized by number of logical cores multiplies by 3.
Which is a good heuristic for CPU-bound tasks.
-->
<concurrent_threads_soft_limit>0</concurrent_threads_soft_limit>
<!-- Maximum number of concurrent queries. -->
<max_concurrent_queries>100</max_concurrent_queries>
@ -604,7 +615,7 @@
if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows.
By default this setting is false for compatibility with earlier access configurations. -->
<users_without_row_policies_can_read_rows>false</users_without_row_policies_can_read_rows>
<!-- By default, for backward compatibility ON CLUSTER queries ignore CLUSTER grant,
however you can change this behaviour by setting this to true -->
<on_cluster_queries_require_cluster_grant>false</on_cluster_queries_require_cluster_grant>
@ -1379,4 +1390,13 @@
<lru_cache_size>268435456</lru_cache_size>
<continue_if_corrupted>true</continue_if_corrupted>
</merge_tree_metadata_cache-->
<!-- This allows to disable exposing addresses in stack traces for security reasons.
Please be aware that it does not improve security much, but makes debugging much harder.
The addresses that are small offsets from zero will be displayed nevertheless to show nullptr dereferences.
Regardless of this configuration, the addresses are visible in the system.stack_trace and system.trace_log tables
if the user has access to these tables.
I don't recommend to change this setting.
-->
<show_addresses_in_stack_traces>false</show_addresses_in_stack_traces>
</clickhouse>

View File

@ -1,3 +1,3 @@
set (CLICKHOUSE_SU_SOURCES clickhouse-su.cpp)
set (CLICKHOUSE_SU_SOURCES clickhouse-su.cpp su.cpp)
set (CLICKHOUSE_SU_LINK PRIVATE dbms)
clickhouse_program_add(su)

View File

@ -1,145 +1,2 @@
#include <Common/Exception.h>
#include <IO/ReadHelpers.h>
#include <fmt/format.h>
#include <vector>
#include <sys/types.h>
#include <unistd.h>
#include <pwd.h>
#include <grp.h>
/// "su" means "set user"
/// In fact, this program can set Unix user and group.
///
/// Usage:
/// clickhouse su user[:group] args...
///
/// - will set user and, optionally, group and exec the remaining args.
/// user and group can be numeric identifiers or strings.
///
/// The motivation for this tool is very obscure and idiosyncratic. It is needed for Docker.
/// People want to run programs inside Docker with dropped privileges (less than root).
/// But the standard Linux "su" program is not suitable for usage inside Docker,
/// because it is creating pseudoterminals to avoid hijacking input from the terminal, for security,
/// but Docker is also doing something with the terminal and it is incompatible.
/// For this reason, people use alternative and less "secure" versions of "su" tools like "gosu" or "su-exec".
/// But it would be very strange to use 3rd-party software only to do two-three syscalls.
/// That's why we provide this tool.
///
/// Note: ClickHouse does not need Docker at all and works better without Docker.
/// ClickHouse has no dependencies, it is packaged and distributed in single binary.
/// There is no reason to use Docker unless you are already running all your software in Docker.
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int SYSTEM_ERROR;
}
void setUserAndGroup(std::string arg_uid, std::string arg_gid)
{
static constexpr size_t buf_size = 16384; /// Linux man page says it is enough. Nevertheless, we will check if it's not enough and throw.
std::unique_ptr<char[]> buf(new char[buf_size]);
/// Set the group first, because if we set user, the privileges will be already dropped and we will not be able to set the group later.
if (!arg_gid.empty())
{
gid_t gid = 0;
if (!tryParse(gid, arg_gid) || gid == 0)
{
group entry{};
group * result{};
if (0 != getgrnam_r(arg_gid.data(), &entry, buf.get(), buf_size, &result))
throwFromErrno(fmt::format("Cannot do 'getgrnam_r' to obtain gid from group name ({})", arg_gid), ErrorCodes::SYSTEM_ERROR);
if (!result)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Group {} is not found in the system", arg_gid);
gid = entry.gr_gid;
}
if (gid == 0 && getgid() != 0)
throw Exception("Group has id 0, but dropping privileges to gid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS);
if (0 != setgid(gid))
throwFromErrno(fmt::format("Cannot do 'setgid' to user ({})", arg_gid), ErrorCodes::SYSTEM_ERROR);
}
if (!arg_uid.empty())
{
/// Is it numeric id or name?
uid_t uid = 0;
if (!tryParse(uid, arg_uid) || uid == 0)
{
passwd entry{};
passwd * result{};
if (0 != getpwnam_r(arg_uid.data(), &entry, buf.get(), buf_size, &result))
throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name ({})", arg_uid), ErrorCodes::SYSTEM_ERROR);
if (!result)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User {} is not found in the system", arg_uid);
uid = entry.pw_uid;
}
if (uid == 0 && getuid() != 0)
throw Exception("User has id 0, but dropping privileges to uid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS);
if (0 != setuid(uid))
throwFromErrno(fmt::format("Cannot do 'setuid' to user ({})", arg_uid), ErrorCodes::SYSTEM_ERROR);
}
}
}
int mainEntryClickHouseSU(int argc, char ** argv)
try
{
using namespace DB;
if (argc < 3)
{
std::cout << "Usage: ./clickhouse su user:group ..." << std::endl;
exit(0);
}
std::string_view user_and_group = argv[1];
std::string user;
std::string group;
auto pos = user_and_group.find(':');
if (pos == std::string_view::npos)
{
user = user_and_group;
}
else
{
user = user_and_group.substr(0, pos);
group = user_and_group.substr(pos + 1);
}
setUserAndGroup(std::move(user), std::move(group));
std::vector<char *> new_argv;
new_argv.reserve(argc - 1);
new_argv.insert(new_argv.begin(), argv + 2, argv + argc);
new_argv.push_back(nullptr);
execvp(new_argv.front(), new_argv.data());
throwFromErrno("Cannot execvp", ErrorCodes::SYSTEM_ERROR);
}
catch (...)
{
std::cerr << DB::getCurrentExceptionMessage(false) << '\n';
return 1;
}
int mainEntryClickHouseSU(int argc, char ** argv);
int main(int argc_, char ** argv_) { return mainEntryClickHouseSU(argc_, argv_); }

145
programs/su/su.cpp Normal file
View File

@ -0,0 +1,145 @@
#include <Common/Exception.h>
#include <IO/ReadHelpers.h>
#include <fmt/format.h>
#include <vector>
#include <sys/types.h>
#include <unistd.h>
#include <pwd.h>
#include <grp.h>
/// "su" means "set user"
/// In fact, this program can set Unix user and group.
///
/// Usage:
/// clickhouse su user[:group] args...
///
/// - will set user and, optionally, group and exec the remaining args.
/// user and group can be numeric identifiers or strings.
///
/// The motivation for this tool is very obscure and idiosyncratic. It is needed for Docker.
/// People want to run programs inside Docker with dropped privileges (less than root).
/// But the standard Linux "su" program is not suitable for usage inside Docker,
/// because it is creating pseudoterminals to avoid hijacking input from the terminal, for security,
/// but Docker is also doing something with the terminal and it is incompatible.
/// For this reason, people use alternative and less "secure" versions of "su" tools like "gosu" or "su-exec".
/// But it would be very strange to use 3rd-party software only to do two-three syscalls.
/// That's why we provide this tool.
///
/// Note: ClickHouse does not need Docker at all and works better without Docker.
/// ClickHouse has no dependencies, it is packaged and distributed in single binary.
/// There is no reason to use Docker unless you are already running all your software in Docker.
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int SYSTEM_ERROR;
}
void setUserAndGroup(std::string arg_uid, std::string arg_gid)
{
static constexpr size_t buf_size = 16384; /// Linux man page says it is enough. Nevertheless, we will check if it's not enough and throw.
std::unique_ptr<char[]> buf(new char[buf_size]);
/// Set the group first, because if we set user, the privileges will be already dropped and we will not be able to set the group later.
if (!arg_gid.empty())
{
gid_t gid = 0;
if (!tryParse(gid, arg_gid) || gid == 0)
{
group entry{};
group * result{};
if (0 != getgrnam_r(arg_gid.data(), &entry, buf.get(), buf_size, &result))
throwFromErrno(fmt::format("Cannot do 'getgrnam_r' to obtain gid from group name ({})", arg_gid), ErrorCodes::SYSTEM_ERROR);
if (!result)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Group {} is not found in the system", arg_gid);
gid = entry.gr_gid;
}
if (gid == 0 && getgid() != 0)
throw Exception("Group has id 0, but dropping privileges to gid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS);
if (0 != setgid(gid))
throwFromErrno(fmt::format("Cannot do 'setgid' to user ({})", arg_gid), ErrorCodes::SYSTEM_ERROR);
}
if (!arg_uid.empty())
{
/// Is it numeric id or name?
uid_t uid = 0;
if (!tryParse(uid, arg_uid) || uid == 0)
{
passwd entry{};
passwd * result{};
if (0 != getpwnam_r(arg_uid.data(), &entry, buf.get(), buf_size, &result))
throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name ({})", arg_uid), ErrorCodes::SYSTEM_ERROR);
if (!result)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User {} is not found in the system", arg_uid);
uid = entry.pw_uid;
}
if (uid == 0 && getuid() != 0)
throw Exception("User has id 0, but dropping privileges to uid 0 does not make sense", ErrorCodes::BAD_ARGUMENTS);
if (0 != setuid(uid))
throwFromErrno(fmt::format("Cannot do 'setuid' to user ({})", arg_uid), ErrorCodes::SYSTEM_ERROR);
}
}
}
int mainEntryClickHouseSU(int argc, char ** argv)
try
{
using namespace DB;
if (argc < 3)
{
std::cout << "Usage: ./clickhouse su user:group ..." << std::endl;
exit(0);
}
std::string_view user_and_group = argv[1];
std::string user;
std::string group;
auto pos = user_and_group.find(':');
if (pos == std::string_view::npos)
{
user = user_and_group;
}
else
{
user = user_and_group.substr(0, pos);
group = user_and_group.substr(pos + 1);
}
setUserAndGroup(std::move(user), std::move(group));
std::vector<char *> new_argv;
new_argv.reserve(argc - 1);
new_argv.insert(new_argv.begin(), argv + 2, argv + argc);
new_argv.push_back(nullptr);
execvp(new_argv.front(), new_argv.data());
throwFromErrno("Cannot execvp", ErrorCodes::SYSTEM_ERROR);
}
catch (...)
{
std::cerr << DB::getCurrentExceptionMessage(false) << '\n';
return 1;
}

View File

@ -25,7 +25,7 @@ int main(int argc, char ** argv)
return 0;
}
String cache_name = "";
const char * cache_name = "";
if (argc == 4)
cache_name = argv[3];

View File

@ -13,20 +13,20 @@ using FileInfo = IBackupCoordination::FileInfo;
BackupCoordinationLocal::BackupCoordinationLocal() = default;
BackupCoordinationLocal::~BackupCoordinationLocal() = default;
void BackupCoordinationLocal::setStatus(const String &, const String &, const String &)
void BackupCoordinationLocal::setStage(const String &, const String &, const String &)
{
}
void BackupCoordinationLocal::setErrorStatus(const String &, const Exception &)
void BackupCoordinationLocal::setError(const String &, const Exception &)
{
}
Strings BackupCoordinationLocal::waitStatus(const Strings &, const String &)
Strings BackupCoordinationLocal::waitForStage(const Strings &, const String &)
{
return {};
}
Strings BackupCoordinationLocal::waitStatusFor(const Strings &, const String &, UInt64)
Strings BackupCoordinationLocal::waitForStage(const Strings &, const String &, std::chrono::milliseconds)
{
return {};
}

View File

@ -20,10 +20,10 @@ public:
BackupCoordinationLocal();
~BackupCoordinationLocal() override;
void setStatus(const String & current_host, const String & new_status, const String & message) override;
void setErrorStatus(const String & current_host, const Exception & exception) override;
Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) override;
Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) override;
void setStage(const String & current_host, const String & new_stage, const String & message) override;
void setError(const String & current_host, const Exception & exception) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
void addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name,
const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;

View File

@ -165,55 +165,94 @@ namespace
constexpr size_t NUM_ATTEMPTS = 10;
}
BackupCoordinationRemote::BackupCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
BackupCoordinationRemote::BackupCoordinationRemote(
const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, status_sync(zookeeper_path_ + "/status", get_zookeeper_, &Poco::Logger::get("BackupCoordination"))
, remove_zk_nodes_in_destructor(remove_zk_nodes_in_destructor_)
{
createRootNodes();
stage_sync.emplace(
zookeeper_path_ + "/stage", [this] { return getZooKeeper(); }, &Poco::Logger::get("BackupCoordination"));
}
BackupCoordinationRemote::~BackupCoordinationRemote() = default;
BackupCoordinationRemote::~BackupCoordinationRemote()
{
try
{
if (remove_zk_nodes_in_destructor)
removeAllNodes();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
zkutil::ZooKeeperPtr BackupCoordinationRemote::getZooKeeper() const
{
std::lock_guard lock{mutex};
return getZooKeeperNoLock();
}
zkutil::ZooKeeperPtr BackupCoordinationRemote::getZooKeeperNoLock() const
{
if (!zookeeper || zookeeper->expired())
{
zookeeper = get_zookeeper();
/// It's possible that we connected to different [Zoo]Keeper instance
/// so we may read a bit stale state.
zookeeper->sync(zookeeper_path);
}
return zookeeper;
}
void BackupCoordinationRemote::createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_part_names", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_mutations", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_data_paths", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_access", "");
zookeeper->createIfNotExists(zookeeper_path + "/file_names", "");
zookeeper->createIfNotExists(zookeeper_path + "/file_infos", "");
zookeeper->createIfNotExists(zookeeper_path + "/archive_suffixes", "");
auto zk = getZooKeeper();
zk->createAncestors(zookeeper_path);
zk->createIfNotExists(zookeeper_path, "");
zk->createIfNotExists(zookeeper_path + "/repl_part_names", "");
zk->createIfNotExists(zookeeper_path + "/repl_mutations", "");
zk->createIfNotExists(zookeeper_path + "/repl_data_paths", "");
zk->createIfNotExists(zookeeper_path + "/repl_access", "");
zk->createIfNotExists(zookeeper_path + "/file_names", "");
zk->createIfNotExists(zookeeper_path + "/file_infos", "");
zk->createIfNotExists(zookeeper_path + "/archive_suffixes", "");
}
void BackupCoordinationRemote::removeAllNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->removeRecursive(zookeeper_path);
/// Usually this function is called by the initiator when a backup is complete so we don't need the coordination anymore.
///
/// However there can be a rare situation when this function is called after an error occurs on the initiator of a query
/// while some hosts are still making the backup. Removing all the nodes will remove the parent node of the backup coordination
/// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some useless part
/// of their backup work before that. Anyway in this case backup won't be finalized (because only an initiator can do that).
auto zk = getZooKeeper();
zk->removeRecursive(zookeeper_path);
}
void BackupCoordinationRemote::setStatus(const String & current_host, const String & new_status, const String & message)
void BackupCoordinationRemote::setStage(const String & current_host, const String & new_stage, const String & message)
{
status_sync.set(current_host, new_status, message);
stage_sync->set(current_host, new_stage, message);
}
void BackupCoordinationRemote::setErrorStatus(const String & current_host, const Exception & exception)
void BackupCoordinationRemote::setError(const String & current_host, const Exception & exception)
{
status_sync.setError(current_host, exception);
stage_sync->setError(current_host, exception);
}
Strings BackupCoordinationRemote::waitStatus(const Strings & all_hosts, const String & status_to_wait)
Strings BackupCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait)
{
return status_sync.wait(all_hosts, status_to_wait);
return stage_sync->wait(all_hosts, stage_to_wait);
}
Strings BackupCoordinationRemote::waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms)
Strings BackupCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout)
{
return status_sync.waitFor(all_hosts, status_to_wait, timeout_ms);
return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
}
@ -229,11 +268,11 @@ void BackupCoordinationRemote::addReplicatedPartNames(
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedPartNames() must not be called after preparing");
}
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_part_names/" + escapeForFileName(table_shared_id);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(replica_name);
zookeeper->create(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs), zkutil::CreateMode::Persistent);
zk->create(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs), zkutil::CreateMode::Persistent);
}
Strings BackupCoordinationRemote::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const
@ -255,11 +294,11 @@ void BackupCoordinationRemote::addReplicatedMutations(
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedMutations() must not be called after preparing");
}
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_mutations/" + escapeForFileName(table_shared_id);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(replica_name);
zookeeper->create(path, ReplicatedMutations::serialize(mutations, table_name_for_logs), zkutil::CreateMode::Persistent);
zk->create(path, ReplicatedMutations::serialize(mutations, table_name_for_logs), zkutil::CreateMode::Persistent);
}
std::vector<IBackupCoordination::MutationInfo> BackupCoordinationRemote::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const
@ -279,11 +318,11 @@ void BackupCoordinationRemote::addReplicatedDataPath(
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedDataPath() must not be called after preparing");
}
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_shared_id);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(data_path);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
}
Strings BackupCoordinationRemote::getReplicatedDataPaths(const String & table_shared_id) const
@ -300,18 +339,18 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
return;
replicated_tables.emplace();
auto zookeeper = get_zookeeper();
auto zk = getZooKeeperNoLock();
{
String path = zookeeper_path + "/repl_part_names";
for (const String & escaped_table_shared_id : zookeeper->getChildren(path))
for (const String & escaped_table_shared_id : zk->getChildren(path))
{
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
String path2 = path + "/" + escaped_table_shared_id;
for (const String & escaped_replica_name : zookeeper->getChildren(path2))
for (const String & escaped_replica_name : zk->getChildren(path2))
{
String replica_name = unescapeForFileName(escaped_replica_name);
auto part_names = ReplicatedPartNames::deserialize(zookeeper->get(path2 + "/" + escaped_replica_name));
auto part_names = ReplicatedPartNames::deserialize(zk->get(path2 + "/" + escaped_replica_name));
replicated_tables->addPartNames(table_shared_id, part_names.table_name_for_logs, replica_name, part_names.part_names_and_checksums);
}
}
@ -319,14 +358,14 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
{
String path = zookeeper_path + "/repl_mutations";
for (const String & escaped_table_shared_id : zookeeper->getChildren(path))
for (const String & escaped_table_shared_id : zk->getChildren(path))
{
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
String path2 = path + "/" + escaped_table_shared_id;
for (const String & escaped_replica_name : zookeeper->getChildren(path2))
for (const String & escaped_replica_name : zk->getChildren(path2))
{
String replica_name = unescapeForFileName(escaped_replica_name);
auto mutations = ReplicatedMutations::deserialize(zookeeper->get(path2 + "/" + escaped_replica_name));
auto mutations = ReplicatedMutations::deserialize(zk->get(path2 + "/" + escaped_replica_name));
replicated_tables->addMutations(table_shared_id, mutations.table_name_for_logs, replica_name, mutations.mutations);
}
}
@ -334,11 +373,11 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
{
String path = zookeeper_path + "/repl_data_paths";
for (const String & escaped_table_shared_id : zookeeper->getChildren(path))
for (const String & escaped_table_shared_id : zk->getChildren(path))
{
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
String path2 = path + "/" + escaped_table_shared_id;
for (const String & escaped_data_path : zookeeper->getChildren(path2))
for (const String & escaped_data_path : zk->getChildren(path2))
{
String data_path = unescapeForFileName(escaped_data_path);
replicated_tables->addDataPath(table_shared_id, data_path);
@ -356,13 +395,13 @@ void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedAccessFilePath() must not be called after preparing");
}
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_access/" + escapeForFileName(access_zk_path);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + AccessEntityTypeInfo::get(access_entity_type).name;
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + host_id;
zookeeper->createIfNotExists(path, file_path);
zk->createIfNotExists(path, file_path);
}
Strings BackupCoordinationRemote::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const
@ -378,20 +417,20 @@ void BackupCoordinationRemote::prepareReplicatedAccess() const
return;
replicated_access.emplace();
auto zookeeper = get_zookeeper();
auto zk = getZooKeeperNoLock();
String path = zookeeper_path + "/repl_access";
for (const String & escaped_access_zk_path : zookeeper->getChildren(path))
for (const String & escaped_access_zk_path : zk->getChildren(path))
{
String access_zk_path = unescapeForFileName(escaped_access_zk_path);
String path2 = path + "/" + escaped_access_zk_path;
for (const String & type_str : zookeeper->getChildren(path2))
for (const String & type_str : zk->getChildren(path2))
{
AccessEntityType type = AccessEntityTypeInfo::parseType(type_str);
String path3 = path2 + "/" + type_str;
for (const String & host_id : zookeeper->getChildren(path3))
for (const String & host_id : zk->getChildren(path3))
{
String file_path = zookeeper->get(path3 + "/" + host_id);
String file_path = zk->get(path3 + "/" + host_id);
replicated_access->addFilePath(access_zk_path, type, host_id, file_path);
}
}
@ -401,11 +440,11 @@ void BackupCoordinationRemote::prepareReplicatedAccess() const
void BackupCoordinationRemote::addFileInfo(const FileInfo & file_info, bool & is_data_file_required)
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String full_path = zookeeper_path + "/file_names/" + escapeForFileName(file_info.file_name);
String size_and_checksum = serializeSizeAndChecksum(std::pair{file_info.size, file_info.checksum});
zookeeper->create(full_path, size_and_checksum, zkutil::CreateMode::Persistent);
zk->create(full_path, size_and_checksum, zkutil::CreateMode::Persistent);
if (!file_info.size)
{
@ -414,7 +453,7 @@ void BackupCoordinationRemote::addFileInfo(const FileInfo & file_info, bool & is
}
full_path = zookeeper_path + "/file_infos/" + size_and_checksum;
auto code = zookeeper->tryCreate(full_path, serializeFileInfo(file_info), zkutil::CreateMode::Persistent);
auto code = zk->tryCreate(full_path, serializeFileInfo(file_info), zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, full_path);
@ -426,15 +465,15 @@ void BackupCoordinationRemote::updateFileInfo(const FileInfo & file_info)
if (!file_info.size)
return; /// we don't keep FileInfos for empty files, nothing to update
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String size_and_checksum = serializeSizeAndChecksum(std::pair{file_info.size, file_info.checksum});
String full_path = zookeeper_path + "/file_infos/" + size_and_checksum;
for (size_t attempt = 0; attempt < NUM_ATTEMPTS; ++attempt)
{
Coordination::Stat stat;
auto new_info = deserializeFileInfo(zookeeper->get(full_path, &stat));
auto new_info = deserializeFileInfo(zk->get(full_path, &stat));
new_info.archive_suffix = file_info.archive_suffix;
auto code = zookeeper->trySet(full_path, serializeFileInfo(new_info), stat.version);
auto code = zk->trySet(full_path, serializeFileInfo(new_info), stat.version);
if (code == Coordination::Error::ZOK)
return;
bool is_last_attempt = (attempt == NUM_ATTEMPTS - 1);
@ -445,16 +484,16 @@ void BackupCoordinationRemote::updateFileInfo(const FileInfo & file_info)
std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
std::vector<FileInfo> file_infos;
Strings escaped_names = zookeeper->getChildren(zookeeper_path + "/file_names");
Strings escaped_names = zk->getChildren(zookeeper_path + "/file_names");
for (const String & escaped_name : escaped_names)
{
String size_and_checksum = zookeeper->get(zookeeper_path + "/file_names/" + escaped_name);
String size_and_checksum = zk->get(zookeeper_path + "/file_names/" + escaped_name);
UInt64 size = deserializeSizeAndChecksum(size_and_checksum).first;
FileInfo file_info;
if (size) /// we don't keep FileInfos for empty files
file_info = deserializeFileInfo(zookeeper->get(zookeeper_path + "/file_infos/" + size_and_checksum));
file_info = deserializeFileInfo(zk->get(zookeeper_path + "/file_infos/" + size_and_checksum));
file_info.file_name = unescapeForFileName(escaped_name);
file_infos.emplace_back(std::move(file_info));
}
@ -463,8 +502,8 @@ std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
Strings BackupCoordinationRemote::listFiles(const String & directory, bool recursive) const
{
auto zookeeper = get_zookeeper();
Strings escaped_names = zookeeper->getChildren(zookeeper_path + "/file_names");
auto zk = getZooKeeper();
Strings escaped_names = zk->getChildren(zookeeper_path + "/file_names");
String prefix = directory;
if (!prefix.empty() && !prefix.ends_with('/'))
@ -496,8 +535,8 @@ Strings BackupCoordinationRemote::listFiles(const String & directory, bool recur
bool BackupCoordinationRemote::hasFiles(const String & directory) const
{
auto zookeeper = get_zookeeper();
Strings escaped_names = zookeeper->getChildren(zookeeper_path + "/file_names");
auto zk = getZooKeeper();
Strings escaped_names = zk->getChildren(zookeeper_path + "/file_names");
String prefix = directory;
if (!prefix.empty() && !prefix.ends_with('/'))
@ -515,42 +554,42 @@ bool BackupCoordinationRemote::hasFiles(const String & directory) const
std::optional<FileInfo> BackupCoordinationRemote::getFileInfo(const String & file_name) const
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String size_and_checksum;
if (!zookeeper->tryGet(zookeeper_path + "/file_names/" + escapeForFileName(file_name), size_and_checksum))
if (!zk->tryGet(zookeeper_path + "/file_names/" + escapeForFileName(file_name), size_and_checksum))
return std::nullopt;
UInt64 size = deserializeSizeAndChecksum(size_and_checksum).first;
FileInfo file_info;
if (size) /// we don't keep FileInfos for empty files
file_info = deserializeFileInfo(zookeeper->get(zookeeper_path + "/file_infos/" + size_and_checksum));
file_info = deserializeFileInfo(zk->get(zookeeper_path + "/file_infos/" + size_and_checksum));
file_info.file_name = file_name;
return file_info;
}
std::optional<FileInfo> BackupCoordinationRemote::getFileInfo(const SizeAndChecksum & size_and_checksum) const
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String file_info_str;
if (!zookeeper->tryGet(zookeeper_path + "/file_infos/" + serializeSizeAndChecksum(size_and_checksum), file_info_str))
if (!zk->tryGet(zookeeper_path + "/file_infos/" + serializeSizeAndChecksum(size_and_checksum), file_info_str))
return std::nullopt;
return deserializeFileInfo(file_info_str);
}
std::optional<SizeAndChecksum> BackupCoordinationRemote::getFileSizeAndChecksum(const String & file_name) const
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String size_and_checksum;
if (!zookeeper->tryGet(zookeeper_path + "/file_names/" + escapeForFileName(file_name), size_and_checksum))
if (!zk->tryGet(zookeeper_path + "/file_names/" + escapeForFileName(file_name), size_and_checksum))
return std::nullopt;
return deserializeSizeAndChecksum(size_and_checksum);
}
String BackupCoordinationRemote::getNextArchiveSuffix()
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/archive_suffixes/a";
String path_created;
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::PersistentSequential, path_created);
auto code = zk->tryCreate(path, "", zkutil::CreateMode::PersistentSequential, path_created);
if (code != Coordination::Error::ZOK)
throw zkutil::KeeperException(code, path);
return formatArchiveSuffix(extractCounterFromSequentialNodeName(path_created));
@ -558,16 +597,11 @@ String BackupCoordinationRemote::getNextArchiveSuffix()
Strings BackupCoordinationRemote::getAllArchiveSuffixes() const
{
auto zookeeper = get_zookeeper();
Strings node_names = zookeeper->getChildren(zookeeper_path + "/archive_suffixes");
auto zk = getZooKeeper();
Strings node_names = zk->getChildren(zookeeper_path + "/archive_suffixes");
for (auto & node_name : node_names)
node_name = formatArchiveSuffix(extractCounterFromSequentialNodeName(node_name));
return node_names;
}
void BackupCoordinationRemote::drop()
{
removeAllNodes();
}
}

View File

@ -3,7 +3,7 @@
#include <Backups/IBackupCoordination.h>
#include <Backups/BackupCoordinationReplicatedAccess.h>
#include <Backups/BackupCoordinationReplicatedTables.h>
#include <Backups/BackupCoordinationStatusSync.h>
#include <Backups/BackupCoordinationStageSync.h>
namespace DB
@ -13,13 +13,13 @@ namespace DB
class BackupCoordinationRemote : public IBackupCoordination
{
public:
BackupCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_);
BackupCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_);
~BackupCoordinationRemote() override;
void setStatus(const String & current_host, const String & new_status, const String & message) override;
void setErrorStatus(const String & current_host, const Exception & exception) override;
Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) override;
Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) override;
void setStage(const String & current_host, const String & new_stage, const String & message) override;
void setError(const String & current_host, const Exception & exception) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
void addReplicatedPartNames(
const String & table_shared_id,
@ -56,9 +56,9 @@ public:
String getNextArchiveSuffix() override;
Strings getAllArchiveSuffixes() const override;
void drop() override;
private:
zkutil::ZooKeeperPtr getZooKeeper() const;
zkutil::ZooKeeperPtr getZooKeeperNoLock() const;
void createRootNodes();
void removeAllNodes();
void prepareReplicatedTables() const;
@ -66,10 +66,12 @@ private:
const String zookeeper_path;
const zkutil::GetZooKeeper get_zookeeper;
const bool remove_zk_nodes_in_destructor;
BackupCoordinationStatusSync status_sync;
std::optional<BackupCoordinationStageSync> stage_sync;
mutable std::mutex mutex;
mutable zkutil::ZooKeeperPtr zookeeper;
mutable std::optional<BackupCoordinationReplicatedTables> replicated_tables;
mutable std::optional<BackupCoordinationReplicatedAccess> replicated_access;
};

View File

@ -0,0 +1,13 @@
#include <Backups/BackupCoordinationStage.h>
#include <fmt/format.h>
namespace DB
{
String BackupCoordinationStage::formatGatheringMetadata(size_t pass)
{
return fmt::format("{} ({})", GATHERING_METADATA, pass);
}
}

View File

@ -0,0 +1,41 @@
#pragma once
#include <base/types.h>
namespace DB
{
namespace BackupCoordinationStage
{
/// Finding all tables and databases which we're going to put to the backup and collecting their metadata.
constexpr const char * GATHERING_METADATA = "gathering metadata";
String formatGatheringMetadata(size_t pass);
/// Making temporary hard links and prepare backup entries.
constexpr const char * EXTRACTING_DATA_FROM_TABLES = "extracting data from tables";
/// Running special tasks for replicated tables which can also prepare some backup entries.
constexpr const char * RUNNING_POST_TASKS = "running post-tasks";
/// Writing backup entries to the backup and removing temporary hard links.
constexpr const char * WRITING_BACKUP = "writing backup";
/// Finding databases and tables in the backup which we're going to restore.
constexpr const char * FINDING_TABLES_IN_BACKUP = "finding tables in backup";
/// Creating databases or finding them and checking their definitions.
constexpr const char * CREATING_DATABASES = "creating databases";
/// Creating tables or finding them and checking their definition.
constexpr const char * CREATING_TABLES = "creating tables";
/// Inserting restored data to tables.
constexpr const char * INSERTING_DATA_TO_TABLES = "inserting data to tables";
/// Coordination stage meaning that a host finished its work.
constexpr const char * COMPLETED = "completed";
}
}

View File

@ -0,0 +1,201 @@
#include <Backups/BackupCoordinationStageSync.h>
#include <Common/Exception.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <base/chrono_io.h>
namespace DB
{
namespace ErrorCodes
{
extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE;
}
BackupCoordinationStageSync::BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, log(log_)
{
createRootNodes();
}
void BackupCoordinationStageSync::createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
}
void BackupCoordinationStageSync::set(const String & current_host, const String & new_stage, const String & message)
{
auto zookeeper = get_zookeeper();
/// Make an ephemeral node so the initiator can track if the current host is still working.
String alive_node_path = zookeeper_path + "/alive|" + current_host;
auto code = zookeeper->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral);
if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNODEEXISTS)
throw zkutil::KeeperException(code, alive_node_path);
zookeeper->createIfNotExists(zookeeper_path + "/started|" + current_host, "");
zookeeper->create(zookeeper_path + "/current|" + current_host + "|" + new_stage, message, zkutil::CreateMode::Persistent);
}
void BackupCoordinationStageSync::setError(const String & current_host, const Exception & exception)
{
auto zookeeper = get_zookeeper();
WriteBufferFromOwnString buf;
writeStringBinary(current_host, buf);
writeException(exception, buf, true);
zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str());
}
Strings BackupCoordinationStageSync::wait(const Strings & all_hosts, const String & stage_to_wait)
{
return waitImpl(all_hosts, stage_to_wait, {});
}
Strings BackupCoordinationStageSync::waitFor(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout)
{
return waitImpl(all_hosts, stage_to_wait, timeout);
}
namespace
{
struct UnreadyHostState
{
bool started = false;
bool alive = false;
};
}
struct BackupCoordinationStageSync::State
{
Strings results;
std::map<String, UnreadyHostState> unready_hosts;
std::optional<std::pair<String, Exception>> error;
std::optional<String> host_terminated;
};
BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState(
zkutil::ZooKeeperPtr zookeeper, const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const
{
std::unordered_set<std::string_view> zk_nodes_set{zk_nodes.begin(), zk_nodes.end()};
State state;
if (zk_nodes_set.contains("error"))
{
ReadBufferFromOwnString buf{zookeeper->get(zookeeper_path + "/error")};
String host;
readStringBinary(host, buf);
state.error = std::make_pair(host, readException(buf, fmt::format("Got error from {}", host)));
return state;
}
for (const auto & host : all_hosts)
{
if (!zk_nodes_set.contains("current|" + host + "|" + stage_to_wait))
{
UnreadyHostState unready_host_state;
unready_host_state.started = zk_nodes_set.contains("started|" + host);
unready_host_state.alive = zk_nodes_set.contains("alive|" + host);
state.unready_hosts.emplace(host, unready_host_state);
if (!unready_host_state.alive && unready_host_state.started && !state.host_terminated)
state.host_terminated = host;
}
}
if (state.host_terminated || !state.unready_hosts.empty())
return state;
state.results.reserve(all_hosts.size());
for (const auto & host : all_hosts)
state.results.emplace_back(zookeeper->get(zookeeper_path + "/current|" + host + "|" + stage_to_wait));
return state;
}
Strings BackupCoordinationStageSync::waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const
{
if (all_hosts.empty())
return {};
/// Wait until all hosts are ready or an error happens or time is out.
auto zookeeper = get_zookeeper();
/// Set by ZooKepper when list of zk nodes have changed.
auto watch = std::make_shared<Poco::Event>();
bool use_timeout = timeout.has_value();
std::chrono::steady_clock::time_point end_of_timeout;
if (use_timeout)
end_of_timeout = std::chrono::steady_clock::now() + std::chrono::duration_cast<std::chrono::steady_clock::duration>(*timeout);
State state;
String previous_unready_host; /// Used for logging: we don't want to log the same unready host again.
for (;;)
{
/// Get zk nodes and subscribe on their changes.
Strings zk_nodes = zookeeper->getChildren(zookeeper_path, nullptr, watch);
/// Read and analyze the current state of zk nodes.
state = readCurrentState(zookeeper, zk_nodes, all_hosts, stage_to_wait);
if (state.error || state.host_terminated || state.unready_hosts.empty())
break; /// Error happened or everything is ready.
/// Log that we will wait for another host.
const auto & unready_host = state.unready_hosts.begin()->first;
if (unready_host != previous_unready_host)
{
LOG_TRACE(log, "Waiting for host {}", unready_host);
previous_unready_host = unready_host;
}
/// Wait until `watch_callback` is called by ZooKeeper meaning that zk nodes have changed.
{
if (use_timeout)
{
auto current_time = std::chrono::steady_clock::now();
if ((current_time > end_of_timeout)
|| !watch->tryWait(std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - current_time).count()))
break;
}
else
{
watch->wait();
}
}
}
/// Rethrow an error raised originally on another host.
if (state.error)
state.error->second.rethrow();
/// Another host terminated without errors.
if (state.host_terminated)
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Host {} suddenly stopped working", *state.host_terminated);
/// Something's unready, timeout is probably not enough.
if (!state.unready_hosts.empty())
{
const auto & [unready_host, unready_host_state] = *state.unready_hosts.begin();
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Waited for host {} too long (> {}){}",
unready_host,
to_string(*timeout),
unready_host_state.started ? "" : ": Operation didn't start");
}
return state.results;
}
}

View File

@ -0,0 +1,39 @@
#pragma once
#include <Common/ZooKeeper/Common.h>
namespace DB
{
/// Used to coordinate hosts so all hosts would come to a specific stage at around the same time.
class BackupCoordinationStageSync
{
public:
BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_);
/// Sets the stage of the current host and signal other hosts if there were other hosts waiting for that.
void set(const String & current_host, const String & new_stage, const String & message);
void setError(const String & current_host, const Exception & exception);
/// Sets the stage of the current host and waits until all hosts come to the same stage.
/// The function returns the messages all hosts set when they come to the required stage.
Strings wait(const Strings & all_hosts, const String & stage_to_wait);
/// Almost the same as setAndWait() but this one stops waiting and throws an exception after a specific amount of time.
Strings waitFor(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout);
private:
void createRootNodes();
struct State;
State readCurrentState(zkutil::ZooKeeperPtr zookeeper, const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const;
Strings waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional<std::chrono::milliseconds> timeout) const;
String zookeeper_path;
zkutil::GetZooKeeper get_zookeeper;
Poco::Logger * log;
};
}

View File

@ -1,182 +0,0 @@
#include <Backups/BackupCoordinationStatusSync.h>
#include <Common/Exception.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <base/chrono_io.h>
namespace DB
{
namespace ErrorCodes
{
extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE;
}
BackupCoordinationStatusSync::BackupCoordinationStatusSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, log(log_)
{
createRootNodes();
}
void BackupCoordinationStatusSync::createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
}
void BackupCoordinationStatusSync::set(const String & current_host, const String & new_status, const String & message)
{
auto zookeeper = get_zookeeper();
zookeeper->createIfNotExists(zookeeper_path + "/" + current_host + "|" + new_status, message);
}
void BackupCoordinationStatusSync::setError(const String & current_host, const Exception & exception)
{
auto zookeeper = get_zookeeper();
Exception exception2 = exception;
exception2.addMessage("Host {}", current_host);
WriteBufferFromOwnString buf;
writeException(exception2, buf, true);
zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str());
}
Strings BackupCoordinationStatusSync::wait(const Strings & all_hosts, const String & status_to_wait)
{
return waitImpl(all_hosts, status_to_wait, {});
}
Strings BackupCoordinationStatusSync::waitFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms)
{
return waitImpl(all_hosts, status_to_wait, timeout_ms);
}
Strings BackupCoordinationStatusSync::waitImpl(const Strings & all_hosts, const String & status_to_wait, std::optional<UInt64> timeout_ms)
{
if (all_hosts.empty())
return {};
/// Wait for other hosts.
Strings ready_hosts_results;
ready_hosts_results.resize(all_hosts.size());
std::map<String, std::vector<size_t> /* index in `ready_hosts_results` */> unready_hosts;
for (size_t i = 0; i != all_hosts.size(); ++i)
unready_hosts[all_hosts[i]].push_back(i);
std::optional<Exception> error;
auto zookeeper = get_zookeeper();
/// Process ZooKeeper's nodes and set `all_hosts_ready` or `unready_host` or `error_message`.
auto process_zk_nodes = [&](const Strings & zk_nodes)
{
for (const String & zk_node : zk_nodes)
{
if (zk_node.starts_with("remove_watch-"))
continue;
if (zk_node == "error")
{
ReadBufferFromOwnString buf{zookeeper->get(zookeeper_path + "/error")};
error = readException(buf, "", true);
break;
}
size_t separator_pos = zk_node.find('|');
if (separator_pos == String::npos)
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected zk node {}", zookeeper_path + "/" + zk_node);
String host = zk_node.substr(0, separator_pos);
String status = zk_node.substr(separator_pos + 1);
auto it = unready_hosts.find(host);
if ((it != unready_hosts.end()) && (status == status_to_wait))
{
String result = zookeeper->get(zookeeper_path + "/" + zk_node);
for (size_t i : it->second)
ready_hosts_results[i] = result;
unready_hosts.erase(it);
}
}
};
/// Wait until all hosts are ready or an error happens or time is out.
std::atomic<bool> watch_set = false;
std::condition_variable watch_triggered_event;
auto watch_callback = [&](const Coordination::WatchResponse &)
{
watch_set = false; /// After it's triggered it's not set until we call getChildrenWatch() again.
watch_triggered_event.notify_all();
};
auto watch_triggered = [&] { return !watch_set; };
bool use_timeout = timeout_ms.has_value();
std::chrono::milliseconds timeout{timeout_ms.value_or(0)};
std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
std::chrono::steady_clock::duration elapsed;
std::mutex dummy_mutex;
String previous_unready_host;
while (!unready_hosts.empty() && !error)
{
watch_set = true;
Strings nodes = zookeeper->getChildrenWatch(zookeeper_path, nullptr, watch_callback);
process_zk_nodes(nodes);
if (!unready_hosts.empty() && !error)
{
const auto & unready_host = unready_hosts.begin()->first;
if (unready_host != previous_unready_host)
{
LOG_TRACE(log, "Waiting for host {}", unready_host);
previous_unready_host = unready_host;
}
std::unique_lock dummy_lock{dummy_mutex};
if (use_timeout)
{
elapsed = std::chrono::steady_clock::now() - start_time;
if ((elapsed > timeout) || !watch_triggered_event.wait_for(dummy_lock, timeout - elapsed, watch_triggered))
break;
}
else
watch_triggered_event.wait(dummy_lock, watch_triggered);
}
}
if (watch_set)
{
/// Remove watch by triggering it.
zookeeper->create(zookeeper_path + "/remove_watch-", "", zkutil::CreateMode::EphemeralSequential);
std::unique_lock dummy_lock{dummy_mutex};
watch_triggered_event.wait(dummy_lock, watch_triggered);
}
if (error)
error->rethrow();
if (!unready_hosts.empty())
{
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Waited for host {} too long ({})",
unready_hosts.begin()->first,
to_string(elapsed));
}
return ready_hosts_results;
}
}

View File

@ -1,37 +0,0 @@
#pragma once
#include <Common/ZooKeeper/Common.h>
namespace DB
{
/// Used to coordinate hosts so all hosts would come to a specific status at around the same time.
class BackupCoordinationStatusSync
{
public:
BackupCoordinationStatusSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_);
/// Sets the status of the current host and signal other hosts if there were other hosts waiting for that.
void set(const String & current_host, const String & new_status, const String & message);
void setError(const String & current_host, const Exception & exception);
/// Sets the status of the current host and waits until all hosts come to the same status.
/// The function returns the messages all hosts set when they come to the required status.
Strings wait(const Strings & all_hosts, const String & status_to_wait);
/// Almost the same as setAndWait() but this one stops waiting and throws an exception after a specific amount of time.
Strings waitFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms);
static constexpr const char * kErrorStatus = "error";
private:
void createRootNodes();
Strings waitImpl(const Strings & all_hosts, const String & status_to_wait, std::optional<UInt64> timeout_ms);
String zookeeper_path;
zkutil::GetZooKeeper get_zookeeper;
Poco::Logger * log;
};
}

View File

@ -1,6 +1,7 @@
#include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/IBackupCoordination.h>
#include <Backups/BackupCoordinationStage.h>
#include <Backups/BackupUtils.h>
#include <Backups/DDLAdjustingForBackupVisitor.h>
#include <Databases/IDatabase.h>
@ -31,20 +32,11 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
namespace Stage = BackupCoordinationStage;
namespace
{
/// Finding all tables and databases which we're going to put to the backup and collecting their metadata.
constexpr const char * kGatheringMetadataStatus = "gathering metadata";
/// Making temporary hard links and prepare backup entries.
constexpr const char * kExtractingDataFromTablesStatus = "extracting data from tables";
/// Running special tasks for replicated tables which can also prepare some backup entries.
constexpr const char * kRunningPostTasksStatus = "running post-tasks";
/// Writing backup entries to the backup and removing temporary hard links.
constexpr const char * kWritingBackupStatus = "writing backup";
/// Uppercases the first character of a passed string.
String toUpperFirst(const String & str)
{
@ -90,7 +82,8 @@ BackupEntriesCollector::BackupEntriesCollector(
, backup_settings(backup_settings_)
, backup_coordination(backup_coordination_)
, context(context_)
, consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 300000))
, on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000))
, consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000))
, log(&Poco::Logger::get("BackupEntriesCollector"))
{
}
@ -100,7 +93,7 @@ BackupEntriesCollector::~BackupEntriesCollector() = default;
BackupEntries BackupEntriesCollector::run()
{
/// run() can be called onle once.
if (!current_status.empty())
if (!current_stage.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Already making backup entries");
/// Find other hosts working along with us to execute this ON CLUSTER query.
@ -123,36 +116,40 @@ BackupEntries BackupEntriesCollector::run()
makeBackupEntriesForTablesDefs();
/// Make backup entries for the data of the found tables.
setStatus(kExtractingDataFromTablesStatus);
setStage(Stage::EXTRACTING_DATA_FROM_TABLES);
makeBackupEntriesForTablesData();
/// Run all the tasks added with addPostCollectingTask().
setStatus(kRunningPostTasksStatus);
setStage(Stage::RUNNING_POST_TASKS);
runPostTasks();
/// No more backup entries or tasks are allowed after this point.
setStatus(kWritingBackupStatus);
setStage(Stage::WRITING_BACKUP);
return std::move(backup_entries);
}
Strings BackupEntriesCollector::setStatus(const String & new_status, const String & message)
Strings BackupEntriesCollector::setStage(const String & new_stage, const String & message)
{
LOG_TRACE(log, "{}", toUpperFirst(new_status));
current_status = new_status;
LOG_TRACE(log, "{}", toUpperFirst(new_stage));
current_stage = new_stage;
backup_coordination->setStatus(backup_settings.host_id, new_status, message);
backup_coordination->setStage(backup_settings.host_id, new_stage, message);
if (new_status.starts_with(kGatheringMetadataStatus))
if (new_stage == Stage::formatGatheringMetadata(1))
{
auto now = std::chrono::steady_clock::now();
auto end_of_timeout = std::max(now, consistent_metadata_snapshot_start_time + consistent_metadata_snapshot_timeout);
return backup_coordination->waitStatusFor(
all_hosts, new_status, std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - now).count());
return backup_coordination->waitForStage(all_hosts, new_stage, on_cluster_first_sync_timeout);
}
else if (new_stage.starts_with(Stage::GATHERING_METADATA))
{
auto current_time = std::chrono::steady_clock::now();
auto end_of_timeout = std::max(current_time, consistent_metadata_snapshot_end_time);
return backup_coordination->waitForStage(
all_hosts, new_stage, std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - current_time));
}
else
{
return backup_coordination->waitStatus(all_hosts, new_status);
return backup_coordination->waitForStage(all_hosts, new_stage);
}
}
@ -173,18 +170,18 @@ void BackupEntriesCollector::calculateRootPathInBackup()
/// Finds databases and tables which we will put to the backup.
void BackupEntriesCollector::gatherMetadataAndCheckConsistency()
{
consistent_metadata_snapshot_start_time = std::chrono::steady_clock::now();
auto end_of_timeout = consistent_metadata_snapshot_start_time + consistent_metadata_snapshot_timeout;
setStatus(fmt::format("{} ({})", kGatheringMetadataStatus, 1));
setStage(Stage::formatGatheringMetadata(1));
consistent_metadata_snapshot_end_time = std::chrono::steady_clock::now() + consistent_metadata_snapshot_timeout;
for (size_t pass = 1;; ++pass)
{
String new_status = fmt::format("{} ({})", kGatheringMetadataStatus, pass + 1);
String next_stage = Stage::formatGatheringMetadata(pass + 1);
std::optional<Exception> inconsistency_error;
if (tryGatherMetadataAndCompareWithPrevious(inconsistency_error))
{
/// Gathered metadata and checked consistency, cool! But we have to check that other hosts cope with that too.
auto all_hosts_results = setStatus(new_status, "consistent");
auto all_hosts_results = setStage(next_stage, "consistent");
std::optional<String> host_with_inconsistency;
std::optional<String> inconsistency_error_on_other_host;
@ -210,13 +207,13 @@ void BackupEntriesCollector::gatherMetadataAndCheckConsistency()
else
{
/// Failed to gather metadata or something wasn't consistent. We'll let other hosts know that and try again.
setStatus(new_status, inconsistency_error->displayText());
setStage(next_stage, inconsistency_error->displayText());
}
/// Two passes is minimum (we need to compare with table names with previous ones to be sure we don't miss anything).
if (pass >= 2)
{
if (std::chrono::steady_clock::now() > end_of_timeout)
if (std::chrono::steady_clock::now() > consistent_metadata_snapshot_end_time)
inconsistency_error->rethrow();
else
LOG_WARNING(log, "{}", inconsistency_error->displayText());
@ -239,6 +236,7 @@ bool BackupEntriesCollector::tryGatherMetadataAndCompareWithPrevious(std::option
table_infos.clear();
gatherDatabasesMetadata();
gatherTablesMetadata();
lockTablesForReading();
}
catch (Exception & e)
{
@ -526,12 +524,11 @@ void BackupEntriesCollector::lockTablesForReading()
for (auto & [table_name, table_info] : table_infos)
{
auto storage = table_info.storage;
TableLockHolder table_lock;
if (storage)
{
try
{
table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
table_info.table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
}
catch (Exception & e)
{
@ -712,7 +709,7 @@ void BackupEntriesCollector::makeBackupEntriesForTableData(const QualifiedTableN
void BackupEntriesCollector::addBackupEntry(const String & file_name, BackupEntryPtr backup_entry)
{
if (current_status == kWritingBackupStatus)
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed");
backup_entries.emplace_back(file_name, backup_entry);
}
@ -724,21 +721,21 @@ void BackupEntriesCollector::addBackupEntry(const std::pair<String, BackupEntryP
void BackupEntriesCollector::addBackupEntries(const BackupEntries & backup_entries_)
{
if (current_status == kWritingBackupStatus)
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of backup entries is not allowed");
insertAtEnd(backup_entries, backup_entries_);
}
void BackupEntriesCollector::addBackupEntries(BackupEntries && backup_entries_)
{
if (current_status == kWritingBackupStatus)
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of backup entries is not allowed");
insertAtEnd(backup_entries, std::move(backup_entries_));
}
void BackupEntriesCollector::addPostTask(std::function<void()> task)
{
if (current_status == kWritingBackupStatus)
if (current_stage == Stage::WRITING_BACKUP)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of post tasks is not allowed");
post_tasks.push(std::move(task));
}

View File

@ -86,12 +86,13 @@ private:
void runPostTasks();
Strings setStatus(const String & new_status, const String & message = "");
Strings setStage(const String & new_stage, const String & message = "");
const ASTBackupQuery::Elements backup_query_elements;
const BackupSettings backup_settings;
std::shared_ptr<IBackupCoordination> backup_coordination;
ContextPtr context;
std::chrono::milliseconds on_cluster_first_sync_timeout;
std::chrono::milliseconds consistent_metadata_snapshot_timeout;
Poco::Logger * log;
@ -129,8 +130,8 @@ private:
std::optional<ASTs> partitions;
};
String current_status;
std::chrono::steady_clock::time_point consistent_metadata_snapshot_start_time;
String current_stage;
std::chrono::steady_clock::time_point consistent_metadata_snapshot_end_time;
std::unordered_map<String, DatabaseInfo> database_infos;
std::unordered_map<QualifiedTableName, TableInfo> table_infos;
std::vector<std::pair<String, String>> previous_databases_metadata;

View File

@ -8,21 +8,22 @@ class SeekableReadBuffer;
class WriteBuffer;
/// Represents operations of loading from disk or downloading for reading a backup.
class IBackupReader /// BackupReaderFile, BackupReaderDisk, BackupReaderS3
class IBackupReader /// BackupReaderFile, BackupReaderDisk
{
public:
virtual ~IBackupReader() = default;
virtual bool fileExists(const String & file_name) = 0;
virtual size_t getFileSize(const String & file_name) = 0;
virtual UInt64 getFileSize(const String & file_name) = 0;
virtual std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) = 0;
};
/// Represents operations of storing to disk or uploading for writing a backup.
class IBackupWriter /// BackupWriterFile, BackupWriterDisk, BackupWriterS3
class IBackupWriter /// BackupWriterFile, BackupWriterDisk
{
public:
virtual ~IBackupWriter() = default;
virtual bool fileExists(const String & file_name) = 0;
virtual UInt64 getFileSize(const String & file_name) = 0;
virtual bool fileContentsEqual(const String & file_name, const String & expected_file_contents) = 0;
virtual std::unique_ptr<WriteBuffer> writeFile(const String & file_name) = 0;
virtual void removeFiles(const Strings & file_names) = 0;

View File

@ -17,7 +17,7 @@ bool BackupReaderDisk::fileExists(const String & file_name)
return disk->exists(path / file_name);
}
size_t BackupReaderDisk::getFileSize(const String & file_name)
UInt64 BackupReaderDisk::getFileSize(const String & file_name)
{
return disk->getFileSize(path / file_name);
}
@ -38,6 +38,11 @@ bool BackupWriterDisk::fileExists(const String & file_name)
return disk->exists(path / file_name);
}
UInt64 BackupWriterDisk::getFileSize(const String & file_name)
{
return disk->getFileSize(path / file_name);
}
bool BackupWriterDisk::fileContentsEqual(const String & file_name, const String & expected_file_contents)
{
if (!disk->exists(path / file_name))

View File

@ -15,7 +15,7 @@ public:
~BackupReaderDisk() override;
bool fileExists(const String & file_name) override;
size_t getFileSize(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
private:
@ -30,6 +30,7 @@ public:
~BackupWriterDisk() override;
bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override;

View File

@ -18,7 +18,7 @@ bool BackupReaderFile::fileExists(const String & file_name)
return fs::exists(path / file_name);
}
size_t BackupReaderFile::getFileSize(const String & file_name)
UInt64 BackupReaderFile::getFileSize(const String & file_name)
{
return fs::file_size(path / file_name);
}
@ -39,6 +39,11 @@ bool BackupWriterFile::fileExists(const String & file_name)
return fs::exists(path / file_name);
}
UInt64 BackupWriterFile::getFileSize(const String & file_name)
{
return fs::file_size(path / file_name);
}
bool BackupWriterFile::fileContentsEqual(const String & file_name, const String & expected_file_contents)
{
if (!fs::exists(path / file_name))

View File

@ -13,7 +13,7 @@ public:
~BackupReaderFile() override;
bool fileExists(const String & file_name) override;
size_t getFileSize(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
private:
@ -27,6 +27,7 @@ public:
~BackupWriterFile() override;
bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
void removeFiles(const Strings & file_names) override;

View File

@ -219,10 +219,7 @@ void BackupImpl::open(const ContextPtr & context)
void BackupImpl::close()
{
std::lock_guard lock{mutex};
archive_readers.clear();
for (auto & archive_writer : archive_writers)
archive_writer = {"", nullptr};
closeArchives();
if (!is_internal_backup && writer && !writing_finalized)
removeAllFilesAfterFailure();
@ -232,10 +229,29 @@ void BackupImpl::close()
coordination.reset();
}
time_t BackupImpl::getTimestamp() const
void BackupImpl::closeArchives()
{
archive_readers.clear();
for (auto & archive_writer : archive_writers)
archive_writer = {"", nullptr};
}
size_t BackupImpl::getNumFiles() const
{
std::lock_guard lock{mutex};
return timestamp;
return num_files;
}
UInt64 BackupImpl::getUncompressedSize() const
{
std::lock_guard lock{mutex};
return uncompressed_size;
}
UInt64 BackupImpl::getCompressedSize() const
{
std::lock_guard lock{mutex};
return compressed_size;
}
void BackupImpl::writeBackupMetadata()
@ -290,6 +306,7 @@ void BackupImpl::writeBackupMetadata()
if (info.pos_in_archive != static_cast<size_t>(-1))
config->setUInt64(prefix + "pos_in_archive", info.pos_in_archive);
}
increaseUncompressedSize(info);
++index;
}
@ -306,6 +323,8 @@ void BackupImpl::writeBackupMetadata()
out = writer->writeFile(".backup");
out->write(str.data(), str.size());
out->finalize();
increaseUncompressedSize(str.size());
}
void BackupImpl::readBackupMetadata()
@ -315,6 +334,7 @@ void BackupImpl::readBackupMetadata()
{
if (!reader->fileExists(archive_params.archive_name))
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name);
setCompressedSize();
in = getArchiveReader("")->readFile(".backup");
}
else
@ -326,6 +346,7 @@ void BackupImpl::readBackupMetadata()
String str;
readStringUntilEOF(str, *in);
increaseUncompressedSize(str.size());
std::istringstream stream(str); // STYLE_CHECK_ALLOW_STD_STRING_STREAM
Poco::AutoPtr<Poco::Util::XMLConfiguration> config{new Poco::Util::XMLConfiguration()};
config->load(stream);
@ -382,8 +403,12 @@ void BackupImpl::readBackupMetadata()
}
coordination->addFileInfo(info);
increaseUncompressedSize(info);
}
}
if (!use_archives)
setCompressedSize();
}
void BackupImpl::checkBackupDoesntExist() const
@ -750,6 +775,8 @@ void BackupImpl::finalizeWriting()
{
LOG_TRACE(log, "Finalizing backup {}", backup_name);
writeBackupMetadata();
closeArchives();
setCompressedSize();
removeLockFile();
LOG_TRACE(log, "Finalized backup {}", backup_name);
}
@ -758,12 +785,32 @@ void BackupImpl::finalizeWriting()
}
void BackupImpl::increaseUncompressedSize(UInt64 file_size)
{
uncompressed_size += file_size;
++num_files;
}
void BackupImpl::increaseUncompressedSize(const FileInfo & info)
{
if ((info.size > info.base_size) && (info.data_file_name.empty() || (info.data_file_name == info.file_name)))
increaseUncompressedSize(info.size - info.base_size);
}
void BackupImpl::setCompressedSize()
{
if (use_archives)
compressed_size = writer ? writer->getFileSize(archive_params.archive_name) : reader->getFileSize(archive_params.archive_name);
else
compressed_size = uncompressed_size;
}
String BackupImpl::getArchiveNameWithSuffix(const String & suffix) const
{
return archive_params.archive_name + (suffix.empty() ? "" : ".") + suffix;
}
std::shared_ptr<IArchiveReader> BackupImpl::getArchiveReader(const String & suffix) const
{
auto it = archive_readers.find(suffix);
@ -796,6 +843,7 @@ std::shared_ptr<IArchiveWriter> BackupImpl::getArchiveWriter(const String & suff
return new_archive_writer;
}
void BackupImpl::removeAllFilesAfterFailure()
{
if (is_internal_backup)

View File

@ -55,8 +55,11 @@ public:
const String & getName() const override { return backup_name; }
OpenMode getOpenMode() const override { return open_mode; }
time_t getTimestamp() const override;
time_t getTimestamp() const override { return timestamp; }
UUID getUUID() const override { return *uuid; }
size_t getNumFiles() const override;
UInt64 getUncompressedSize() const override;
UInt64 getCompressedSize() const override;
Strings listFiles(const String & directory, bool recursive) const override;
bool hasFiles(const String & directory) const override;
bool fileExists(const String & file_name) const override;
@ -76,6 +79,7 @@ private:
void open(const ContextPtr & context);
void close();
void closeArchives();
/// Writes the file ".backup" containing backup's metadata.
void writeBackupMetadata();
@ -96,6 +100,13 @@ private:
std::shared_ptr<IArchiveReader> getArchiveReader(const String & suffix) const;
std::shared_ptr<IArchiveWriter> getArchiveWriter(const String & suffix);
/// Increases `uncompressed_size` by a specific value and `num_files` by 1.
void increaseUncompressedSize(UInt64 file_size);
void increaseUncompressedSize(const FileInfo & info);
/// Calculates and sets `compressed_size`.
void setCompressedSize();
const String backup_name;
const ArchiveParams archive_params;
const bool use_archives;
@ -108,6 +119,9 @@ private:
mutable std::mutex mutex;
std::optional<UUID> uuid;
time_t timestamp = 0;
size_t num_files = 0;
UInt64 uncompressed_size = 0;
UInt64 compressed_size = 0;
UInt64 version;
std::optional<BackupInfo> base_backup_info;
std::shared_ptr<const IBackup> base_backup;

View File

@ -60,6 +60,7 @@ namespace
/// List of backup settings except base_backup_name and cluster_host_ids.
#define LIST_OF_BACKUP_SETTINGS(M) \
M(String, id) \
M(String, compression_method) \
M(Int64, compression_level) \
M(String, password) \

View File

@ -11,6 +11,9 @@ class ASTBackupQuery;
/// Settings specified in the "SETTINGS" clause of a BACKUP query.
struct BackupSettings
{
/// ID of the backup operation, to identify it in the system.backups table. Auto-generated if not set.
String id;
/// Base backup, if it's set an incremental backup will be built. That means only differences made after the base backup will be put
/// into a new backup.
std::optional<BackupInfo> base_backup_info;

View File

@ -15,18 +15,18 @@ std::string_view toString(BackupStatus backup_status)
{
switch (backup_status)
{
case BackupStatus::MAKING_BACKUP:
return "MAKING_BACKUP";
case BackupStatus::BACKUP_COMPLETE:
return "BACKUP_COMPLETE";
case BackupStatus::FAILED_TO_BACKUP:
return "FAILED_TO_BACKUP";
case BackupStatus::CREATING_BACKUP:
return "CREATING_BACKUP";
case BackupStatus::BACKUP_CREATED:
return "BACKUP_CREATED";
case BackupStatus::BACKUP_FAILED:
return "BACKUP_FAILED";
case BackupStatus::RESTORING:
return "RESTORING";
case BackupStatus::RESTORED:
return "RESTORED";
case BackupStatus::FAILED_TO_RESTORE:
return "FAILED_TO_RESTORE";
case BackupStatus::RESTORE_FAILED:
return "RESTORE_FAILED";
default:
break;
}

View File

@ -9,14 +9,14 @@ namespace DB
enum class BackupStatus
{
/// Statuses of making backups
MAKING_BACKUP,
BACKUP_COMPLETE,
FAILED_TO_BACKUP,
CREATING_BACKUP,
BACKUP_CREATED,
BACKUP_FAILED,
/// Status of restoring
RESTORING,
RESTORED,
FAILED_TO_RESTORE,
RESTORE_FAILED,
MAX,
};

View File

@ -5,6 +5,7 @@
#include <Backups/BackupUtils.h>
#include <Backups/IBackupEntry.h>
#include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupCoordinationStage.h>
#include <Backups/BackupCoordinationRemote.h>
#include <Backups/BackupCoordinationLocal.h>
#include <Backups/RestoreCoordinationRemote.h>
@ -18,7 +19,6 @@
#include <Common/Exception.h>
#include <Common/Macros.h>
#include <Common/logger_useful.h>
#include <Common/scope_guard_safe.h>
#include <Common/setThreadName.h>
@ -27,28 +27,95 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
using OperationID = BackupsWorker::OperationID;
namespace Stage = BackupCoordinationStage;
namespace
{
/// Coordination status meaning that a host finished its work.
constexpr const char * kCompletedCoordinationStatus = "completed";
/// Sends information about the current exception to IBackupCoordination or IRestoreCoordination.
template <typename CoordinationType>
void sendErrorToCoordination(std::shared_ptr<CoordinationType> coordination, const String & current_host)
std::shared_ptr<IBackupCoordination> makeBackupCoordination(const String & coordination_zk_path, const ContextPtr & context, bool is_internal_backup)
{
if (!coordination_zk_path.empty())
{
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
return std::make_shared<BackupCoordinationRemote>(coordination_zk_path, get_zookeeper, !is_internal_backup);
}
else
{
return std::make_shared<BackupCoordinationLocal>();
}
}
std::shared_ptr<IRestoreCoordination> makeRestoreCoordination(const String & coordination_zk_path, const ContextPtr & context, bool is_internal_backup)
{
if (!coordination_zk_path.empty())
{
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
return std::make_shared<RestoreCoordinationRemote>(coordination_zk_path, get_zookeeper, !is_internal_backup);
}
else
{
return std::make_shared<RestoreCoordinationLocal>();
}
}
/// Sends information about an exception to IBackupCoordination or IRestoreCoordination.
template <typename CoordinationType>
void sendExceptionToCoordination(std::shared_ptr<CoordinationType> coordination, const String & current_host, const Exception & exception)
{
if (!coordination)
return;
try
{
coordination->setErrorStatus(current_host, Exception{getCurrentExceptionCode(), getCurrentExceptionMessage(true, true)});
if (coordination)
coordination->setError(current_host, exception);
}
catch (...)
{
}
}
/// Sends information about the current exception to IBackupCoordination or IRestoreCoordination.
template <typename CoordinationType>
void sendCurrentExceptionToCoordination(std::shared_ptr<CoordinationType> coordination, const String & current_host)
{
try
{
throw;
}
catch (const Exception & e)
{
sendExceptionToCoordination(coordination, current_host, e);
}
catch (...)
{
coordination->setError(current_host, Exception{getCurrentExceptionCode(), getCurrentExceptionMessage(true, true)});
}
}
bool isFinalStatus(BackupStatus status)
{
return (status == BackupStatus::BACKUP_CREATED) || (status == BackupStatus::BACKUP_FAILED) || (status == BackupStatus::RESTORED)
|| (status == BackupStatus::RESTORE_FAILED);
}
bool isErrorStatus(BackupStatus status)
{
return (status == BackupStatus::BACKUP_FAILED) || (status == BackupStatus::RESTORE_FAILED);
}
/// Used to change num_active_backups.
size_t getNumActiveBackupsChange(BackupStatus status)
{
return status == BackupStatus::CREATING_BACKUP;
}
/// Used to change num_active_restores.
size_t getNumActiveRestoresChange(BackupStatus status)
{
return status == BackupStatus::RESTORING;
}
}
@ -60,7 +127,8 @@ BackupsWorker::BackupsWorker(size_t num_backup_threads, size_t num_restore_threa
/// We set max_free_threads = 0 because we don't want to keep any threads if there is no BACKUP or RESTORE query running right now.
}
UUID BackupsWorker::start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context)
OperationID BackupsWorker::start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context)
{
const ASTBackupQuery & backup_query = typeid_cast<const ASTBackupQuery &>(*backup_or_restore_query);
if (backup_query.kind == ASTBackupQuery::Kind::BACKUP)
@ -70,379 +138,525 @@ UUID BackupsWorker::start(const ASTPtr & backup_or_restore_query, ContextMutable
}
UUID BackupsWorker::startMakingBackup(const ASTPtr & query, const ContextPtr & context)
OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const ContextPtr & context)
{
auto backup_query = std::static_pointer_cast<ASTBackupQuery>(query->clone());
auto backup_settings = BackupSettings::fromBackupQuery(*backup_query);
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
bool on_cluster = !backup_query->cluster.empty();
if (!backup_settings.backup_uuid)
backup_settings.backup_uuid = UUIDHelpers::generateV4();
UUID backup_uuid = *backup_settings.backup_uuid;
/// Prepare context to use.
ContextPtr context_in_use = context;
ContextMutablePtr mutable_context;
if (on_cluster || backup_settings.async)
/// `backup_id` will be used as a key to the `infos` map, so it should be unique.
OperationID backup_id;
if (backup_settings.internal)
backup_id = "internal-" + toString(UUIDHelpers::generateV4()); /// Always generate `backup_id` for internal backup to avoid collision if both internal and non-internal backups are on the same host
else if (!backup_settings.id.empty())
backup_id = backup_settings.id;
else
backup_id = toString(*backup_settings.backup_uuid);
std::shared_ptr<IBackupCoordination> backup_coordination;
if (backup_settings.internal)
{
/// For ON CLUSTER queries we will need to change some settings.
/// For ASYNC queries we have to clone the context anyway.
context_in_use = mutable_context = Context::createCopy(context);
/// The following call of makeBackupCoordination() is not essential because doBackup() will later create a backup coordination
/// if it's not created here. However to handle errors better it's better to make a coordination here because this way
/// if an exception will be thrown in startMakingBackup() other hosts will know about that.
backup_coordination = makeBackupCoordination(backup_settings.coordination_zk_path, context, backup_settings.internal);
}
addInfo(backup_uuid, backup_info.toString(), BackupStatus::MAKING_BACKUP, backup_settings.internal);
try
{
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
addInfo(backup_id, backup_info.toString(), backup_settings.internal, BackupStatus::CREATING_BACKUP);
auto job = [this,
backup_uuid,
/// Prepare context to use.
ContextPtr context_in_use = context;
ContextMutablePtr mutable_context;
bool on_cluster = !backup_query->cluster.empty();
if (on_cluster || backup_settings.async)
{
/// For ON CLUSTER queries we will need to change some settings.
/// For ASYNC queries we have to clone the context anyway.
context_in_use = mutable_context = Context::createCopy(context);
}
if (backup_settings.async)
{
backups_thread_pool.scheduleOrThrowOnError(
[this, backup_query, backup_id, backup_settings, backup_info, backup_coordination, context_in_use, mutable_context]
{
doBackup(
backup_query,
backup_id,
backup_settings,
backup_info,
backup_coordination,
context_in_use,
mutable_context,
/* called_async= */ true);
});
}
else
{
doBackup(
backup_query,
backup_id,
backup_settings,
backup_info,
on_cluster,
backup_coordination,
context_in_use,
mutable_context](bool async) mutable
mutable_context,
/* called_async= */ false);
}
return backup_id;
}
catch (...)
{
std::optional<CurrentThread::QueryScope> query_scope;
std::shared_ptr<IBackupCoordination> backup_coordination;
SCOPE_EXIT_SAFE(if (backup_coordination && !backup_settings.internal) backup_coordination->drop(););
try
{
if (async)
{
query_scope.emplace(mutable_context);
setThreadName("BackupWorker");
}
/// Checks access rights if this is not ON CLUSTER query.
/// (If this is ON CLUSTER query executeDDLQueryOnCluster() will check access rights later.)
auto required_access = getRequiredAccessToBackup(backup_query->elements);
if (!on_cluster)
context_in_use->checkAccess(required_access);
ClusterPtr cluster;
if (on_cluster)
{
backup_query->cluster = context_in_use->getMacros()->expand(backup_query->cluster);
cluster = context_in_use->getCluster(backup_query->cluster);
backup_settings.cluster_host_ids = cluster->getHostIDs();
if (backup_settings.coordination_zk_path.empty())
{
String root_zk_path = context_in_use->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
backup_settings.coordination_zk_path = root_zk_path + "/backup-" + toString(backup_uuid);
}
}
/// Make a backup coordination.
if (!backup_settings.coordination_zk_path.empty())
{
backup_coordination = std::make_shared<BackupCoordinationRemote>(
backup_settings.coordination_zk_path,
[global_context = context_in_use->getGlobalContext()] { return global_context->getZooKeeper(); });
}
else
{
backup_coordination = std::make_shared<BackupCoordinationLocal>();
}
/// Opens a backup for writing.
BackupFactory::CreateParams backup_create_params;
backup_create_params.open_mode = IBackup::OpenMode::WRITE;
backup_create_params.context = context_in_use;
backup_create_params.backup_info = backup_info;
backup_create_params.base_backup_info = backup_settings.base_backup_info;
backup_create_params.compression_method = backup_settings.compression_method;
backup_create_params.compression_level = backup_settings.compression_level;
backup_create_params.password = backup_settings.password;
backup_create_params.is_internal_backup = backup_settings.internal;
backup_create_params.backup_coordination = backup_coordination;
backup_create_params.backup_uuid = backup_uuid;
BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params);
/// Write the backup.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = backup_settings.shard_num;
params.only_replica_num = backup_settings.replica_num;
params.access_to_check = required_access;
backup_settings.copySettingsToQuery(*backup_query);
// executeDDLQueryOnCluster() will return without waiting for completion
mutable_context->setSetting("distributed_ddl_task_timeout", Field{0});
mutable_context->setSetting("distributed_ddl_output_mode", Field{"none"});
executeDDLQueryOnCluster(backup_query, mutable_context, params);
/// Wait until all the hosts have written their backup entries.
auto all_hosts = BackupSettings::Util::filterHostIDs(
backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
backup_coordination->waitStatus(all_hosts, kCompletedCoordinationStatus);
}
else
{
backup_query->setCurrentDatabase(context_in_use->getCurrentDatabase());
/// Prepare backup entries.
BackupEntries backup_entries;
{
BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, context_in_use};
backup_entries = backup_entries_collector.run();
}
/// Write the backup entries to the backup.
writeBackupEntries(backup, std::move(backup_entries), backups_thread_pool);
/// We have written our backup entries, we need to tell other hosts (they could be waiting for it).
backup_coordination->setStatus(backup_settings.host_id, kCompletedCoordinationStatus, "");
}
/// Finalize backup (write its metadata).
if (!backup_settings.internal)
backup->finalizeWriting();
/// Close the backup.
backup.reset();
setStatus(backup_uuid, BackupStatus::BACKUP_COMPLETE);
}
catch (...)
{
/// Something bad happened, the backup has not built.
setStatus(backup_uuid, BackupStatus::FAILED_TO_BACKUP);
sendErrorToCoordination(backup_coordination, backup_settings.host_id);
if (!async)
throw;
}
};
if (backup_settings.async)
backups_thread_pool.scheduleOrThrowOnError([job]() mutable { job(true); });
else
job(false);
return backup_uuid;
/// Something bad happened, the backup has not built.
setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
throw;
}
}
UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr context)
void BackupsWorker::doBackup(
const std::shared_ptr<ASTBackupQuery> & backup_query,
const OperationID & backup_id,
BackupSettings backup_settings,
const BackupInfo & backup_info,
std::shared_ptr<IBackupCoordination> backup_coordination,
const ContextPtr & context,
ContextMutablePtr mutable_context,
bool called_async)
{
std::optional<CurrentThread::QueryScope> query_scope;
try
{
if (called_async)
{
query_scope.emplace(mutable_context);
setThreadName("BackupWorker");
}
bool on_cluster = !backup_query->cluster.empty();
assert(mutable_context || (!on_cluster && !called_async));
/// Checks access rights if this is not ON CLUSTER query.
/// (If this is ON CLUSTER query executeDDLQueryOnCluster() will check access rights later.)
auto required_access = getRequiredAccessToBackup(backup_query->elements);
if (!on_cluster)
context->checkAccess(required_access);
ClusterPtr cluster;
if (on_cluster)
{
backup_query->cluster = context->getMacros()->expand(backup_query->cluster);
cluster = context->getCluster(backup_query->cluster);
backup_settings.cluster_host_ids = cluster->getHostIDs();
if (backup_settings.coordination_zk_path.empty())
{
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
backup_settings.coordination_zk_path = root_zk_path + "/backup-" + toString(*backup_settings.backup_uuid);
}
}
/// Make a backup coordination.
if (!backup_coordination)
backup_coordination = makeBackupCoordination(backup_settings.coordination_zk_path, context, backup_settings.internal);
/// Opens a backup for writing.
BackupFactory::CreateParams backup_create_params;
backup_create_params.open_mode = IBackup::OpenMode::WRITE;
backup_create_params.context = context;
backup_create_params.backup_info = backup_info;
backup_create_params.base_backup_info = backup_settings.base_backup_info;
backup_create_params.compression_method = backup_settings.compression_method;
backup_create_params.compression_level = backup_settings.compression_level;
backup_create_params.password = backup_settings.password;
backup_create_params.is_internal_backup = backup_settings.internal;
backup_create_params.backup_coordination = backup_coordination;
backup_create_params.backup_uuid = backup_settings.backup_uuid;
BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params);
/// Write the backup.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = backup_settings.shard_num;
params.only_replica_num = backup_settings.replica_num;
params.access_to_check = required_access;
backup_settings.copySettingsToQuery(*backup_query);
// executeDDLQueryOnCluster() will return without waiting for completion
mutable_context->setSetting("distributed_ddl_task_timeout", Field{0});
mutable_context->setSetting("distributed_ddl_output_mode", Field{"none"});
executeDDLQueryOnCluster(backup_query, mutable_context, params);
/// Wait until all the hosts have written their backup entries.
auto all_hosts = BackupSettings::Util::filterHostIDs(
backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
backup_coordination->waitForStage(all_hosts, Stage::COMPLETED);
}
else
{
backup_query->setCurrentDatabase(context->getCurrentDatabase());
/// Prepare backup entries.
BackupEntries backup_entries;
{
BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, context};
backup_entries = backup_entries_collector.run();
}
/// Write the backup entries to the backup.
writeBackupEntries(backup, std::move(backup_entries), backups_thread_pool);
/// We have written our backup entries, we need to tell other hosts (they could be waiting for it).
backup_coordination->setStage(backup_settings.host_id, Stage::COMPLETED, "");
}
size_t num_files = 0;
UInt64 uncompressed_size = 0;
UInt64 compressed_size = 0;
/// Finalize backup (write its metadata).
if (!backup_settings.internal)
{
backup->finalizeWriting();
num_files = backup->getNumFiles();
uncompressed_size = backup->getUncompressedSize();
compressed_size = backup->getCompressedSize();
}
/// Close the backup.
backup.reset();
LOG_INFO(log, "{} {} was created successfully", (backup_settings.internal ? "Internal backup" : "Backup"), backup_info.toString());
setStatus(backup_id, BackupStatus::BACKUP_CREATED);
setNumFilesAndSize(backup_id, num_files, uncompressed_size, compressed_size);
}
catch (...)
{
/// Something bad happened, the backup has not built.
if (called_async)
{
tryLogCurrentException(log, fmt::format("Failed to make {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_info.toString()));
setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
}
else
{
/// setStatus() and sendCurrentExceptionToCoordination() will be called by startMakingBackup().
throw;
}
}
}
OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr context)
{
UUID restore_uuid = UUIDHelpers::generateV4();
auto restore_query = std::static_pointer_cast<ASTBackupQuery>(query->clone());
auto restore_settings = RestoreSettings::fromRestoreQuery(*restore_query);
auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
bool on_cluster = !restore_query->cluster.empty();
/// Prepare context to use.
ContextMutablePtr context_in_use = context;
if (restore_settings.async || on_cluster)
/// `restore_id` will be used as a key to the `infos` map, so it should be unique.
OperationID restore_id;
if (restore_settings.internal)
restore_id = "internal-" + toString(UUIDHelpers::generateV4()); /// Always generate `restore_id` for internal restore to avoid collision if both internal and non-internal restores are on the same host
else if (!restore_settings.id.empty())
restore_id = restore_settings.id;
else
restore_id = toString(UUIDHelpers::generateV4());
std::shared_ptr<IRestoreCoordination> restore_coordination;
if (restore_settings.internal)
{
/// For ON CLUSTER queries we will need to change some settings.
/// For ASYNC queries we have to clone the context anyway.
context_in_use = Context::createCopy(context);
/// The following call of makeRestoreCoordination() is not essential because doRestore() will later create a restore coordination
/// if it's not created here. However to handle errors better it's better to make a coordination here because this way
/// if an exception will be thrown in startRestoring() other hosts will know about that.
restore_coordination = makeRestoreCoordination(restore_settings.coordination_zk_path, context, restore_settings.internal);
}
addInfo(restore_uuid, backup_info.toString(), BackupStatus::RESTORING, restore_settings.internal);
try
{
auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
addInfo(restore_id, backup_info.toString(), restore_settings.internal, BackupStatus::RESTORING);
auto job = [this,
restore_uuid,
/// Prepare context to use.
ContextMutablePtr context_in_use = context;
bool on_cluster = !restore_query->cluster.empty();
if (restore_settings.async || on_cluster)
{
/// For ON CLUSTER queries we will need to change some settings.
/// For ASYNC queries we have to clone the context anyway.
context_in_use = Context::createCopy(context);
}
if (restore_settings.async)
{
backups_thread_pool.scheduleOrThrowOnError(
[this, restore_query, restore_id, restore_settings, backup_info, restore_coordination, context_in_use] {
doRestore(
restore_query,
restore_id,
restore_settings,
backup_info,
restore_coordination,
context_in_use,
/* called_async= */ true);
});
}
else
{
doRestore(
restore_query,
restore_id,
restore_settings,
backup_info,
on_cluster,
context_in_use](bool async) mutable
restore_coordination,
context_in_use,
/* called_async= */ false);
}
return restore_id;
}
catch (...)
{
std::optional<CurrentThread::QueryScope> query_scope;
std::shared_ptr<IRestoreCoordination> restore_coordination;
SCOPE_EXIT_SAFE(if (restore_coordination && !restore_settings.internal) restore_coordination->drop(););
try
{
if (async)
{
query_scope.emplace(context_in_use);
setThreadName("RestoreWorker");
}
/// Open the backup for reading.
BackupFactory::CreateParams backup_open_params;
backup_open_params.open_mode = IBackup::OpenMode::READ;
backup_open_params.context = context_in_use;
backup_open_params.backup_info = backup_info;
backup_open_params.base_backup_info = restore_settings.base_backup_info;
backup_open_params.password = restore_settings.password;
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
String current_database = context_in_use->getCurrentDatabase();
/// Checks access rights if this is ON CLUSTER query.
/// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.)
ClusterPtr cluster;
if (on_cluster)
{
restore_query->cluster = context_in_use->getMacros()->expand(restore_query->cluster);
cluster = context_in_use->getCluster(restore_query->cluster);
restore_settings.cluster_host_ids = cluster->getHostIDs();
/// We cannot just use access checking provided by the function executeDDLQueryOnCluster(): it would be incorrect
/// because different replicas can contain different set of tables and so the required access rights can differ too.
/// So the right way is pass through the entire cluster and check access for each host.
auto addresses = cluster->filterAddressesByShardOrReplica(restore_settings.shard_num, restore_settings.replica_num);
for (const auto * address : addresses)
{
restore_settings.host_id = address->toString();
auto restore_elements = restore_query->elements;
String addr_database = address->default_database.empty() ? current_database : address->default_database;
for (auto & element : restore_elements)
element.setCurrentDatabase(addr_database);
RestorerFromBackup dummy_restorer{restore_elements, restore_settings, nullptr, backup, context_in_use};
dummy_restorer.run(RestorerFromBackup::CHECK_ACCESS_ONLY);
}
}
/// Make a restore coordination.
if (on_cluster && restore_settings.coordination_zk_path.empty())
{
String root_zk_path = context_in_use->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
restore_settings.coordination_zk_path = root_zk_path + "/restore-" + toString(restore_uuid);
}
if (!restore_settings.coordination_zk_path.empty())
{
restore_coordination = std::make_shared<RestoreCoordinationRemote>(
restore_settings.coordination_zk_path,
[global_context = context_in_use->getGlobalContext()] { return global_context->getZooKeeper(); });
}
else
{
restore_coordination = std::make_shared<RestoreCoordinationLocal>();
}
/// Do RESTORE.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = restore_settings.shard_num;
params.only_replica_num = restore_settings.replica_num;
restore_settings.copySettingsToQuery(*restore_query);
// executeDDLQueryOnCluster() will return without waiting for completion
context_in_use->setSetting("distributed_ddl_task_timeout", Field{0});
context_in_use->setSetting("distributed_ddl_output_mode", Field{"none"});
executeDDLQueryOnCluster(restore_query, context_in_use, params);
/// Wait until all the hosts have written their backup entries.
auto all_hosts = BackupSettings::Util::filterHostIDs(
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
restore_coordination->waitStatus(all_hosts, kCompletedCoordinationStatus);
}
else
{
restore_query->setCurrentDatabase(current_database);
/// Restore metadata and prepare data restoring tasks.
DataRestoreTasks data_restore_tasks;
{
RestorerFromBackup restorer{restore_query->elements, restore_settings, restore_coordination,
backup, context_in_use};
data_restore_tasks = restorer.run(RestorerFromBackup::RESTORE);
}
/// Execute the data restoring tasks.
restoreTablesData(std::move(data_restore_tasks), restores_thread_pool);
/// We have restored everything, we need to tell other hosts (they could be waiting for it).
restore_coordination->setStatus(restore_settings.host_id, kCompletedCoordinationStatus, "");
}
setStatus(restore_uuid, BackupStatus::RESTORED);
}
catch (...)
{
/// Something bad happened, the backup has not built.
setStatus(restore_uuid, BackupStatus::FAILED_TO_RESTORE);
sendErrorToCoordination(restore_coordination, restore_settings.host_id);
if (!async)
throw;
}
};
if (restore_settings.async)
backups_thread_pool.scheduleOrThrowOnError([job]() mutable { job(true); });
else
job(false);
return restore_uuid;
/// Something bad happened, the backup has not built.
setStatusSafe(restore_id, BackupStatus::RESTORE_FAILED);
sendCurrentExceptionToCoordination(restore_coordination, restore_settings.host_id);
throw;
}
}
void BackupsWorker::addInfo(const UUID & uuid, const String & backup_name, BackupStatus status, bool internal)
void BackupsWorker::doRestore(
const std::shared_ptr<ASTBackupQuery> & restore_query,
const OperationID & restore_id,
RestoreSettings restore_settings,
const BackupInfo & backup_info,
std::shared_ptr<IRestoreCoordination> restore_coordination,
ContextMutablePtr context,
bool called_async)
{
std::optional<CurrentThread::QueryScope> query_scope;
try
{
if (called_async)
{
query_scope.emplace(context);
setThreadName("RestoreWorker");
}
/// Open the backup for reading.
BackupFactory::CreateParams backup_open_params;
backup_open_params.open_mode = IBackup::OpenMode::READ;
backup_open_params.context = context;
backup_open_params.backup_info = backup_info;
backup_open_params.base_backup_info = restore_settings.base_backup_info;
backup_open_params.password = restore_settings.password;
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
setNumFilesAndSize(restore_id, backup->getNumFiles(), backup->getUncompressedSize(), backup->getCompressedSize());
String current_database = context->getCurrentDatabase();
/// Checks access rights if this is ON CLUSTER query.
/// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.)
ClusterPtr cluster;
bool on_cluster = !restore_query->cluster.empty();
if (on_cluster)
{
restore_query->cluster = context->getMacros()->expand(restore_query->cluster);
cluster = context->getCluster(restore_query->cluster);
restore_settings.cluster_host_ids = cluster->getHostIDs();
/// We cannot just use access checking provided by the function executeDDLQueryOnCluster(): it would be incorrect
/// because different replicas can contain different set of tables and so the required access rights can differ too.
/// So the right way is pass through the entire cluster and check access for each host.
auto addresses = cluster->filterAddressesByShardOrReplica(restore_settings.shard_num, restore_settings.replica_num);
for (const auto * address : addresses)
{
restore_settings.host_id = address->toString();
auto restore_elements = restore_query->elements;
String addr_database = address->default_database.empty() ? current_database : address->default_database;
for (auto & element : restore_elements)
element.setCurrentDatabase(addr_database);
RestorerFromBackup dummy_restorer{restore_elements, restore_settings, nullptr, backup, context};
dummy_restorer.run(RestorerFromBackup::CHECK_ACCESS_ONLY);
}
}
/// Make a restore coordination.
if (on_cluster && restore_settings.coordination_zk_path.empty())
{
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
restore_settings.coordination_zk_path = root_zk_path + "/restore-" + toString(UUIDHelpers::generateV4());
}
if (!restore_coordination)
restore_coordination = makeRestoreCoordination(restore_settings.coordination_zk_path, context, restore_settings.internal);
/// Do RESTORE.
if (on_cluster)
{
DDLQueryOnClusterParams params;
params.cluster = cluster;
params.only_shard_num = restore_settings.shard_num;
params.only_replica_num = restore_settings.replica_num;
restore_settings.copySettingsToQuery(*restore_query);
// executeDDLQueryOnCluster() will return without waiting for completion
context->setSetting("distributed_ddl_task_timeout", Field{0});
context->setSetting("distributed_ddl_output_mode", Field{"none"});
executeDDLQueryOnCluster(restore_query, context, params);
/// Wait until all the hosts have written their backup entries.
auto all_hosts = BackupSettings::Util::filterHostIDs(
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
restore_coordination->waitForStage(all_hosts, Stage::COMPLETED);
}
else
{
restore_query->setCurrentDatabase(current_database);
/// Restore metadata and prepare data restoring tasks.
DataRestoreTasks data_restore_tasks;
{
RestorerFromBackup restorer{restore_query->elements, restore_settings, restore_coordination,
backup, context};
data_restore_tasks = restorer.run(RestorerFromBackup::RESTORE);
}
/// Execute the data restoring tasks.
restoreTablesData(std::move(data_restore_tasks), restores_thread_pool);
/// We have restored everything, we need to tell other hosts (they could be waiting for it).
restore_coordination->setStage(restore_settings.host_id, Stage::COMPLETED, "");
}
LOG_INFO(log, "Restored from {} {} successfully", (restore_settings.internal ? "internal backup" : "backup"), backup_info.toString());
setStatus(restore_id, BackupStatus::RESTORED);
}
catch (...)
{
/// Something bad happened, the backup has not built.
if (called_async)
{
tryLogCurrentException(log, fmt::format("Failed to restore from {} {}", (restore_settings.internal ? "internal backup" : "backup"), backup_info.toString()));
setStatusSafe(restore_id, BackupStatus::RESTORE_FAILED);
sendCurrentExceptionToCoordination(restore_coordination, restore_settings.host_id);
}
else
{
/// setStatus() and sendCurrentExceptionToCoordination() will be called by startRestoring().
throw;
}
}
}
void BackupsWorker::addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status)
{
Info info;
info.uuid = uuid;
info.backup_name = backup_name;
info.status = status;
info.status_changed_time = time(nullptr);
info.id = id;
info.name = name;
info.internal = internal;
info.status = status;
info.start_time = std::chrono::system_clock::now();
if (isFinalStatus(status))
info.end_time = info.start_time;
std::lock_guard lock{infos_mutex};
infos[uuid] = std::move(info);
auto it = infos.find(id);
if (it != infos.end())
{
/// It's better not allow to overwrite the current status if it's in progress.
auto current_status = it->second.status;
if (!isFinalStatus(current_status))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot start a backup or restore: ID {} is already in use", id);
}
infos[id] = std::move(info);
num_active_backups += getNumActiveBackupsChange(status);
num_active_restores += getNumActiveRestoresChange(status);
}
void BackupsWorker::setStatus(const UUID & uuid, BackupStatus status)
void BackupsWorker::setStatus(const String & id, BackupStatus status, bool throw_if_error)
{
std::lock_guard lock{infos_mutex};
auto & info = infos.at(uuid);
info.status = status;
info.status_changed_time = time(nullptr);
if (status == BackupStatus::BACKUP_COMPLETE)
auto it = infos.find(id);
if (it == infos.end())
{
LOG_INFO(log, "{} {} was created successfully", (info.internal ? "Internal backup" : "Backup"), info.backup_name);
}
else if (status == BackupStatus::RESTORED)
{
LOG_INFO(log, "Restored from {} {} successfully", (info.internal ? "internal backup" : "backup"), info.backup_name);
}
else if ((status == BackupStatus::FAILED_TO_BACKUP) || (status == BackupStatus::FAILED_TO_RESTORE))
{
String start_of_message;
if (status == BackupStatus::FAILED_TO_BACKUP)
start_of_message = fmt::format("Failed to create {} {}", (info.internal ? "internal backup" : "backup"), info.backup_name);
if (throw_if_error)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup ID {}", id);
else
start_of_message = fmt::format("Failed to restore from {} {}", (info.internal ? "internal backup" : "backup"), info.backup_name);
tryLogCurrentException(log, start_of_message);
return;
}
auto & info = it->second;
auto old_status = info.status;
info.status = status;
if (isFinalStatus(status))
info.end_time = std::chrono::system_clock::now();
if (isErrorStatus(status))
{
info.error_message = getCurrentExceptionMessage(false);
info.exception = std::current_exception();
}
num_active_backups += getNumActiveBackupsChange(status) - getNumActiveBackupsChange(old_status);
num_active_restores += getNumActiveRestoresChange(status) - getNumActiveRestoresChange(old_status);
}
void BackupsWorker::wait(const UUID & backup_or_restore_uuid, bool rethrow_exception)
void BackupsWorker::setNumFilesAndSize(const String & id, size_t num_files, UInt64 uncompressed_size, UInt64 compressed_size)
{
std::lock_guard lock{infos_mutex};
auto it = infos.find(id);
if (it == infos.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup ID {}", id);
auto & info = it->second;
info.num_files = num_files;
info.uncompressed_size = uncompressed_size;
info.compressed_size = compressed_size;
}
void BackupsWorker::wait(const OperationID & id, bool rethrow_exception)
{
std::unique_lock lock{infos_mutex};
status_changed.wait(lock, [&]
{
auto it = infos.find(backup_or_restore_uuid);
auto it = infos.find(id);
if (it == infos.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "BackupsWorker: Unknown UUID {}", toString(backup_or_restore_uuid));
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup ID {}", id);
const auto & info = it->second;
auto current_status = info.status;
if (rethrow_exception && ((current_status == BackupStatus::FAILED_TO_BACKUP) || (current_status == BackupStatus::FAILED_TO_RESTORE)))
if (rethrow_exception && isErrorStatus(current_status))
std::rethrow_exception(info.exception);
return (current_status == BackupStatus::BACKUP_COMPLETE) || (current_status == BackupStatus::RESTORED);
return isFinalStatus(current_status);
});
}
BackupsWorker::Info BackupsWorker::getInfo(const UUID & backup_or_restore_uuid) const
BackupsWorker::Info BackupsWorker::getInfo(const OperationID & id) const
{
std::lock_guard lock{infos_mutex};
auto it = infos.find(backup_or_restore_uuid);
auto it = infos.find(id);
if (it == infos.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "BackupsWorker: Unknown UUID {}", toString(backup_or_restore_uuid));
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup ID {}", id);
return it->second;
}
@ -451,20 +665,24 @@ std::vector<BackupsWorker::Info> BackupsWorker::getAllInfos() const
std::vector<Info> res_infos;
std::lock_guard lock{infos_mutex};
for (const auto & info : infos | boost::adaptors::map_values)
res_infos.push_back(info);
{
if (!info.internal)
res_infos.push_back(info);
}
return res_infos;
}
void BackupsWorker::shutdown()
{
size_t num_active_backups = backups_thread_pool.active();
size_t num_active_restores = restores_thread_pool.active();
if (!num_active_backups && !num_active_restores)
return;
LOG_INFO(log, "Waiting for {} backup and {} restore tasks to be finished", num_active_backups, num_active_restores);
bool has_active_backups_and_restores = (num_active_backups || num_active_restores);
if (has_active_backups_and_restores)
LOG_INFO(log, "Waiting for {} backups and {} restores to be finished", num_active_backups, num_active_restores);
backups_thread_pool.wait();
restores_thread_pool.wait();
LOG_INFO(log, "All backup and restore tasks have finished");
if (has_active_backups_and_restores)
LOG_INFO(log, "All backup and restore tasks have finished");
}
}

View File

@ -11,6 +11,13 @@ namespace Poco::Util { class AbstractConfiguration; }
namespace DB
{
class ASTBackupQuery;
struct BackupSettings;
struct RestoreSettings;
struct BackupInfo;
class IBackupCoordination;
class IRestoreCoordination;
/// Manager of backups and restores: executes backups and restores' threads in the background.
/// Keeps information about backups and restores started in this session.
class BackupsWorker
@ -21,47 +28,75 @@ public:
/// Waits until all tasks have been completed.
void shutdown();
/// Starts executing a BACKUP or RESTORE query. Returns UUID of the operation.
UUID start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context);
/// Backup's or restore's operation ID, can be either passed via SETTINGS id=... or be randomly generated UUID.
using OperationID = String;
/// Starts executing a BACKUP or RESTORE query. Returns ID of the operation.
OperationID start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context);
/// Waits until a BACKUP or RESTORE query started by start() is finished.
/// The function returns immediately if the operation is already finished.
void wait(const UUID & backup_or_restore_uuid, bool rethrow_exception = true);
void wait(const OperationID & backup_or_restore_id, bool rethrow_exception = true);
/// Information about executing a BACKUP or RESTORE query started by calling start().
struct Info
{
UUID uuid;
/// Backup's or restore's operation ID, can be either passed via SETTINGS id=... or be randomly generated UUID.
OperationID id;
/// Backup's name, a string like "Disk('backups', 'my_backup')"
String backup_name;
String name;
BackupStatus status;
time_t status_changed_time;
String error_message;
std::exception_ptr exception;
/// Whether this operation is internal, i.e. caused by another BACKUP or RESTORE operation.
/// For example BACKUP ON CLUSTER executes an internal BACKUP commands per each node.
/// This operation is internal and should not be shown in system.backups
bool internal = false;
/// Status of backup or restore operation.
BackupStatus status;
/// Number of files in the backup (including backup's metadata; only unique files are counted).
size_t num_files = 0;
/// Size of all files in the backup (including backup's metadata; only unique files are counted).
UInt64 uncompressed_size = 0;
/// Size of the backup if it's stored as an archive; or the same as `uncompressed_size` if the backup is stored as a folder.
UInt64 compressed_size = 0;
/// Set only if there was an error.
std::exception_ptr exception;
String error_message;
std::chrono::system_clock::time_point start_time;
std::chrono::system_clock::time_point end_time;
};
Info getInfo(const UUID & backup_or_restore_uuid) const;
Info getInfo(const OperationID & id) const;
std::vector<Info> getAllInfos() const;
private:
UUID startMakingBackup(const ASTPtr & query, const ContextPtr & context);
UUID startRestoring(const ASTPtr & query, ContextMutablePtr context);
OperationID startMakingBackup(const ASTPtr & query, const ContextPtr & context);
void addInfo(const UUID & uuid, const String & backup_name, BackupStatus status, bool internal);
void setStatus(const UUID & uuid, BackupStatus status);
void doBackup(const std::shared_ptr<ASTBackupQuery> & backup_query, const OperationID & backup_id, BackupSettings backup_settings,
const BackupInfo & backup_info, std::shared_ptr<IBackupCoordination> backup_coordination, const ContextPtr & context,
ContextMutablePtr mutable_context, bool called_async);
OperationID startRestoring(const ASTPtr & query, ContextMutablePtr context);
void doRestore(const std::shared_ptr<ASTBackupQuery> & restore_query, const OperationID & restore_id, RestoreSettings restore_settings, const BackupInfo & backup_info,
std::shared_ptr<IRestoreCoordination> restore_coordination, ContextMutablePtr context, bool called_async);
void addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status);
void setStatus(const OperationID & id, BackupStatus status, bool throw_if_error = true);
void setStatusSafe(const String & id, BackupStatus status) { setStatus(id, status, false); }
void setNumFilesAndSize(const OperationID & id, size_t num_files, UInt64 uncompressed_size, UInt64 compressed_size);
ThreadPool backups_thread_pool;
ThreadPool restores_thread_pool;
std::unordered_map<UUID, Info> infos;
std::unordered_map<OperationID, Info> infos;
std::condition_variable status_changed;
std::atomic<size_t> num_active_backups = 0;
std::atomic<size_t> num_active_restores = 0;
mutable std::mutex infos_mutex;
Poco::Logger * log;
};

View File

@ -36,6 +36,15 @@ public:
/// Returns UUID of the backup.
virtual UUID getUUID() const = 0;
/// Returns the number of unique files in the backup.
virtual size_t getNumFiles() const = 0;
/// Returns the total size of unique files in the backup.
virtual UInt64 getUncompressedSize() const = 0;
/// Returns the compressed size of the backup. If the backup is not stored as an archive it returns the same as getUncompressedSize().
virtual UInt64 getCompressedSize() const = 0;
/// Returns names of entries stored in a specified directory in the backup.
/// If `directory` is empty or '/' the functions returns entries in the backup's root.
virtual Strings listFiles(const String & directory, bool recursive = false) const = 0;

View File

@ -18,11 +18,11 @@ class IBackupCoordination
public:
virtual ~IBackupCoordination() = default;
/// Sets the current status and waits for other hosts to come to this status too.
virtual void setStatus(const String & current_host, const String & new_status, const String & message) = 0;
virtual void setErrorStatus(const String & current_host, const Exception & exception) = 0;
virtual Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) = 0;
virtual Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) = 0;
/// Sets the current stage and waits for other hosts to come to this stage too.
virtual void setStage(const String & current_host, const String & new_stage, const String & message) = 0;
virtual void setError(const String & current_host, const Exception & exception) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
struct PartNameAndChecksum
{
@ -115,9 +115,6 @@ public:
/// Returns the list of all the archive suffixes which were generated.
virtual Strings getAllArchiveSuffixes() const = 0;
/// Removes remotely stored information.
virtual void drop() {}
};
}

View File

@ -16,11 +16,11 @@ class IRestoreCoordination
public:
virtual ~IRestoreCoordination() = default;
/// Sets the current status and waits for other hosts to come to this status too.
virtual void setStatus(const String & current_host, const String & new_status, const String & message) = 0;
virtual void setErrorStatus(const String & current_host, const Exception & exception) = 0;
virtual Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) = 0;
virtual Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) = 0;
/// Sets the current stage and waits for other hosts to come to this stage too.
virtual void setStage(const String & current_host, const String & new_stage, const String & message) = 0;
virtual void setError(const String & current_host, const Exception & exception) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) = 0;
virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
static constexpr const char * kErrorStatus = "error";
@ -34,9 +34,6 @@ public:
/// Sets that this replica is going to restore a ReplicatedAccessStorage.
/// The function returns false if this access storage is being already restored by another replica.
virtual bool acquireReplicatedAccessStorage(const String & access_storage_zk_path) = 0;
/// Removes remotely stored information.
virtual void drop() {}
};
}

View File

@ -7,20 +7,20 @@ namespace DB
RestoreCoordinationLocal::RestoreCoordinationLocal() = default;
RestoreCoordinationLocal::~RestoreCoordinationLocal() = default;
void RestoreCoordinationLocal::setStatus(const String &, const String &, const String &)
void RestoreCoordinationLocal::setStage(const String &, const String &, const String &)
{
}
void RestoreCoordinationLocal::setErrorStatus(const String &, const Exception &)
void RestoreCoordinationLocal::setError(const String &, const Exception &)
{
}
Strings RestoreCoordinationLocal::waitStatus(const Strings &, const String &)
Strings RestoreCoordinationLocal::waitForStage(const Strings &, const String &)
{
return {};
}
Strings RestoreCoordinationLocal::waitStatusFor(const Strings &, const String &, UInt64)
Strings RestoreCoordinationLocal::waitForStage(const Strings &, const String &, std::chrono::milliseconds)
{
return {};
}

View File

@ -18,11 +18,11 @@ public:
RestoreCoordinationLocal();
~RestoreCoordinationLocal() override;
/// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts.
void setStatus(const String & current_host, const String & new_status, const String & message) override;
void setErrorStatus(const String & current_host, const Exception & exception) override;
Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) override;
Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) override;
/// Sets the current stage and waits for other hosts to come to this stage too.
void setStage(const String & current_host, const String & new_stage, const String & message) override;
void setError(const String & current_host, const Exception & exception) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;

View File

@ -6,57 +6,86 @@
namespace DB
{
RestoreCoordinationRemote::RestoreCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
RestoreCoordinationRemote::RestoreCoordinationRemote(
const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_)
: zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, status_sync(zookeeper_path_ + "/status", get_zookeeper_, &Poco::Logger::get("RestoreCoordination"))
, remove_zk_nodes_in_destructor(remove_zk_nodes_in_destructor_)
{
createRootNodes();
stage_sync.emplace(
zookeeper_path_ + "/stage", [this] { return getZooKeeper(); }, &Poco::Logger::get("RestoreCoordination"));
}
RestoreCoordinationRemote::~RestoreCoordinationRemote() = default;
RestoreCoordinationRemote::~RestoreCoordinationRemote()
{
try
{
if (remove_zk_nodes_in_destructor)
removeAllNodes();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
zkutil::ZooKeeperPtr RestoreCoordinationRemote::getZooKeeper() const
{
std::lock_guard lock{mutex};
if (!zookeeper || zookeeper->expired())
{
zookeeper = get_zookeeper();
/// It's possible that we connected to different [Zoo]Keeper instance
/// so we may read a bit stale state.
zookeeper->sync(zookeeper_path);
}
return zookeeper;
}
void RestoreCoordinationRemote::createRootNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_databases_tables_acquired", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_tables_data_acquired", "");
zookeeper->createIfNotExists(zookeeper_path + "/repl_access_storages_acquired", "");
auto zk = getZooKeeper();
zk->createAncestors(zookeeper_path);
zk->createIfNotExists(zookeeper_path, "");
zk->createIfNotExists(zookeeper_path + "/repl_databases_tables_acquired", "");
zk->createIfNotExists(zookeeper_path + "/repl_tables_data_acquired", "");
zk->createIfNotExists(zookeeper_path + "/repl_access_storages_acquired", "");
}
void RestoreCoordinationRemote::setStatus(const String & current_host, const String & new_status, const String & message)
void RestoreCoordinationRemote::setStage(const String & current_host, const String & new_stage, const String & message)
{
status_sync.set(current_host, new_status, message);
stage_sync->set(current_host, new_stage, message);
}
void RestoreCoordinationRemote::setErrorStatus(const String & current_host, const Exception & exception)
void RestoreCoordinationRemote::setError(const String & current_host, const Exception & exception)
{
status_sync.setError(current_host, exception);
stage_sync->setError(current_host, exception);
}
Strings RestoreCoordinationRemote::waitStatus(const Strings & all_hosts, const String & status_to_wait)
Strings RestoreCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait)
{
return status_sync.wait(all_hosts, status_to_wait);
return stage_sync->wait(all_hosts, stage_to_wait);
}
Strings RestoreCoordinationRemote::waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms)
Strings RestoreCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout)
{
return status_sync.waitFor(all_hosts, status_to_wait, timeout_ms);
return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
}
bool RestoreCoordinationRemote::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name)
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_databases_tables_acquired/" + escapeForFileName(database_zk_path);
zookeeper->createIfNotExists(path, "");
zk->createIfNotExists(path, "");
path += "/" + escapeForFileName(table_name);
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::Persistent);
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
@ -65,10 +94,10 @@ bool RestoreCoordinationRemote::acquireCreatingTableInReplicatedDatabase(const S
bool RestoreCoordinationRemote::acquireInsertingDataIntoReplicatedTable(const String & table_zk_path)
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_tables_data_acquired/" + escapeForFileName(table_zk_path);
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::Persistent);
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
@ -77,10 +106,10 @@ bool RestoreCoordinationRemote::acquireInsertingDataIntoReplicatedTable(const St
bool RestoreCoordinationRemote::acquireReplicatedAccessStorage(const String & access_storage_zk_path)
{
auto zookeeper = get_zookeeper();
auto zk = getZooKeeper();
String path = zookeeper_path + "/repl_access_storages_acquired/" + escapeForFileName(access_storage_zk_path);
auto code = zookeeper->tryCreate(path, "", zkutil::CreateMode::Persistent);
auto code = zk->tryCreate(path, "", zkutil::CreateMode::Persistent);
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
throw zkutil::KeeperException(code, path);
@ -89,13 +118,15 @@ bool RestoreCoordinationRemote::acquireReplicatedAccessStorage(const String & ac
void RestoreCoordinationRemote::removeAllNodes()
{
auto zookeeper = get_zookeeper();
zookeeper->removeRecursive(zookeeper_path);
}
/// Usually this function is called by the initiator when a restore operation is complete so we don't need the coordination anymore.
///
/// However there can be a rare situation when this function is called after an error occurs on the initiator of a query
/// while some hosts are still restoring something. Removing all the nodes will remove the parent node of the restore coordination
/// at `zookeeper_path` which might cause such hosts to stop with exception "ZNONODE". Or such hosts might still do some part
/// of their restore work before that.
void RestoreCoordinationRemote::drop()
{
removeAllNodes();
auto zk = getZooKeeper();
zk->removeRecursive(zookeeper_path);
}
}

View File

@ -1,7 +1,7 @@
#pragma once
#include <Backups/IRestoreCoordination.h>
#include <Backups/BackupCoordinationStatusSync.h>
#include <Backups/BackupCoordinationStageSync.h>
namespace DB
@ -11,14 +11,14 @@ namespace DB
class RestoreCoordinationRemote : public IRestoreCoordination
{
public:
RestoreCoordinationRemote(const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper);
RestoreCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_);
~RestoreCoordinationRemote() override;
/// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts.
void setStatus(const String & current_host, const String & new_status, const String & message) override;
void setErrorStatus(const String & current_host, const Exception & exception) override;
Strings waitStatus(const Strings & all_hosts, const String & status_to_wait) override;
Strings waitStatusFor(const Strings & all_hosts, const String & status_to_wait, UInt64 timeout_ms) override;
/// Sets the current stage and waits for other hosts to come to this stage too.
void setStage(const String & current_host, const String & new_stage, const String & message) override;
void setError(const String & current_host, const Exception & exception) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;
@ -31,10 +31,8 @@ public:
/// The function returns false if this access storage is being already restored by another replica.
bool acquireReplicatedAccessStorage(const String & access_storage_zk_path) override;
/// Removes remotely stored information.
void drop() override;
private:
zkutil::ZooKeeperPtr getZooKeeper() const;
void createRootNodes();
void removeAllNodes();
@ -42,7 +40,12 @@ private:
const String zookeeper_path;
const zkutil::GetZooKeeper get_zookeeper;
BackupCoordinationStatusSync status_sync;
const bool remove_zk_nodes_in_destructor;
std::optional<BackupCoordinationStageSync> stage_sync;
mutable std::mutex mutex;
mutable zkutil::ZooKeeperPtr zookeeper;
};
}

View File

@ -143,6 +143,7 @@ namespace
/// List of restore settings except base_backup_name and cluster_host_ids.
#define LIST_OF_RESTORE_SETTINGS(M) \
M(String, id) \
M(String, password) \
M(Bool, structure_only) \
M(RestoreTableCreationMode, create_table) \

View File

@ -41,6 +41,9 @@ using RestoreUDFCreationMode = RestoreAccessCreationMode;
/// Settings specified in the "SETTINGS" clause of a RESTORE query.
struct RestoreSettings
{
/// ID of the restore operation, to identify it in the system.backups table. Auto-generated if not set.
String id;
/// Base backup, with this setting we can override the location of the base backup while restoring.
/// Any incremental backup keeps inside the information about its base backup, so using this setting is optional.
std::optional<BackupInfo> base_backup_info;

View File

@ -1,5 +1,6 @@
#include <Backups/RestorerFromBackup.h>
#include <Backups/IRestoreCoordination.h>
#include <Backups/BackupCoordinationStage.h>
#include <Backups/BackupSettings.h>
#include <Backups/IBackup.h>
#include <Backups/IBackupEntry.h>
@ -38,20 +39,10 @@ namespace ErrorCodes
}
namespace Stage = BackupCoordinationStage;
namespace
{
/// Finding databases and tables in the backup which we're going to restore.
constexpr const char * kFindingTablesInBackupStatus = "finding tables in backup";
/// Creating databases or finding them and checking their definitions.
constexpr const char * kCreatingDatabasesStatus = "creating databases";
/// Creating tables or finding them and checking their definition.
constexpr const char * kCreatingTablesStatus = "creating tables";
/// Inserting restored data to tables.
constexpr const char * kInsertingDataToTablesStatus = "inserting data to tables";
/// Uppercases the first character of a passed string.
String toUpperFirst(const String & str)
{
@ -102,6 +93,7 @@ RestorerFromBackup::RestorerFromBackup(
, restore_coordination(restore_coordination_)
, backup(backup_)
, context(context_)
, on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000))
, create_table_timeout(context->getConfigRef().getUInt64("backups.create_table_timeout", 300000))
, log(&Poco::Logger::get("RestorerFromBackup"))
{
@ -112,7 +104,7 @@ RestorerFromBackup::~RestorerFromBackup() = default;
RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode)
{
/// run() can be called onle once.
if (!current_status.empty())
if (!current_stage.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Already restoring");
/// Find other hosts working along with us to execute this ON CLUSTER query.
@ -126,7 +118,7 @@ RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode)
findRootPathsInBackup();
/// Find all the databases and tables which we will read from the backup.
setStatus(kFindingTablesInBackupStatus);
setStage(Stage::FINDING_TABLES_IN_BACKUP);
findDatabasesAndTablesInBackup();
/// Check access rights.
@ -136,27 +128,31 @@ RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode)
return {};
/// Create databases using the create queries read from the backup.
setStatus(kCreatingDatabasesStatus);
setStage(Stage::CREATING_DATABASES);
createDatabases();
/// Create tables using the create queries read from the backup.
setStatus(kCreatingTablesStatus);
setStage(Stage::CREATING_TABLES);
createTables();
/// All what's left is to insert data to tables.
/// No more data restoring tasks are allowed after this point.
setStatus(kInsertingDataToTablesStatus);
setStage(Stage::INSERTING_DATA_TO_TABLES);
return getDataRestoreTasks();
}
void RestorerFromBackup::setStatus(const String & new_status, const String & message)
void RestorerFromBackup::setStage(const String & new_stage, const String & message)
{
LOG_TRACE(log, "{}", toUpperFirst(new_status));
current_status = new_status;
LOG_TRACE(log, "{}", toUpperFirst(new_stage));
current_stage = new_stage;
if (restore_coordination)
{
restore_coordination->setStatus(restore_settings.host_id, new_status, message);
restore_coordination->waitStatus(all_hosts, new_status);
restore_coordination->setStage(restore_settings.host_id, new_stage, message);
if (new_stage == Stage::FINDING_TABLES_IN_BACKUP)
restore_coordination->waitForStage(all_hosts, new_stage, on_cluster_first_sync_timeout);
else
restore_coordination->waitForStage(all_hosts, new_stage);
}
}
@ -814,14 +810,14 @@ std::vector<QualifiedTableName> RestorerFromBackup::findTablesWithoutDependencie
void RestorerFromBackup::addDataRestoreTask(DataRestoreTask && new_task)
{
if (current_status == kInsertingDataToTablesStatus)
if (current_stage == Stage::INSERTING_DATA_TO_TABLES)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of data-restoring tasks is not allowed");
data_restore_tasks.push_back(std::move(new_task));
}
void RestorerFromBackup::addDataRestoreTasks(DataRestoreTasks && new_tasks)
{
if (current_status == kInsertingDataToTablesStatus)
if (current_stage == Stage::INSERTING_DATA_TO_TABLES)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of data-restoring tasks is not allowed");
insertAtEnd(data_restore_tasks, std::move(new_tasks));
}

View File

@ -73,6 +73,7 @@ private:
std::shared_ptr<IRestoreCoordination> restore_coordination;
BackupPtr backup;
ContextMutablePtr context;
std::chrono::milliseconds on_cluster_first_sync_timeout;
std::chrono::milliseconds create_table_timeout;
Poco::Logger * log;
@ -100,7 +101,7 @@ private:
DataRestoreTasks getDataRestoreTasks();
void setStatus(const String & new_status, const String & message = "");
void setStage(const String & new_stage, const String & message = "");
struct DatabaseInfo
{
@ -124,7 +125,7 @@ private:
std::vector<QualifiedTableName> findTablesWithoutDependencies() const;
String current_status;
String current_stage;
std::unordered_map<String, DatabaseInfo> database_infos;
std::map<QualifiedTableName, TableInfo> table_infos;
std::vector<DataRestoreTask> data_restore_tasks;

View File

@ -144,8 +144,8 @@ endif ()
list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD})
list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON})
list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/replicate.cpp Functions/FunctionsLogical.cpp)
list (APPEND dbms_headers Functions/IFunction.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/replicate.h Functions/FunctionsLogical.h)
list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/FunctionsLogical.cpp)
list (APPEND dbms_headers Functions/IFunction.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/FunctionsLogical.h)
list (APPEND dbms_sources
AggregateFunctions/IAggregateFunction.cpp

View File

@ -69,6 +69,7 @@
#include <IO/CompressionMethod.h>
#include <Client/InternalTextLogs.h>
#include <boost/algorithm/string/replace.hpp>
#include <IO/ForkWriteBuffer.h>
namespace fs = std::filesystem;
@ -403,7 +404,6 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
return;
processed_rows += block.rows();
/// Even if all blocks are empty, we still need to initialize the output stream to write empty resultset.
initOutputFormat(block, parsed_query);
@ -414,7 +414,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
return;
/// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker.
if (need_render_progress && (stdout_is_a_tty || is_interactive) && !select_into_file)
if (need_render_progress && (stdout_is_a_tty || is_interactive) && (!select_into_file || select_into_file_and_stdout))
progress_indication.clearProgressOutput();
try
@ -434,7 +434,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
/// Restore progress bar after data block.
if (need_render_progress && (stdout_is_a_tty || is_interactive))
{
if (select_into_file)
if (select_into_file && !select_into_file_and_stdout)
std::cerr << "\r";
progress_indication.writeProgress();
}
@ -511,7 +511,7 @@ try
String current_format = format;
select_into_file = false;
select_into_file_and_stdout = false;
/// The query can specify output format or output file.
if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(parsed_query.get()))
{
@ -554,6 +554,13 @@ try
compression_level
);
if (query_with_output->is_into_outfile_with_stdout)
{
select_into_file_and_stdout = true;
out_file_buf = std::make_unique<ForkWriteBuffer>(std::vector<WriteBufferPtr>{std::move(out_file_buf),
std::make_shared<WriteBufferFromFileDescriptor>(STDOUT_FILENO)});
}
// We are writing to file, so default format is the same as in non-interactive mode.
if (is_interactive && is_default_format)
current_format = "TabSeparated";
@ -576,9 +583,14 @@ try
if (has_vertical_output_suffix)
current_format = "Vertical";
/// It is not clear how to write progress intermixed with data with parallel formatting.
bool logs_into_stdout = server_logs_file == "-";
bool extras_into_stdout = need_render_progress || logs_into_stdout;
bool select_only_into_file = select_into_file && !select_into_file_and_stdout;
/// It is not clear how to write progress and logs
/// intermixed with data with parallel formatting.
/// It may increase code complexity significantly.
if (!need_render_progress || select_into_file)
if (!extras_into_stdout || select_only_into_file)
output_format = global_context->getOutputFormatParallelIfPossible(
current_format, out_file_buf ? *out_file_buf : *out_buf, block);
else

View File

@ -181,6 +181,7 @@ protected:
String format; /// Query results output format.
bool select_into_file = false; /// If writing result INTO OUTFILE. It affects progress rendering.
bool select_into_file_and_stdout = false; /// If writing result INTO OUTFILE AND STDOUT. It affects progress rendering.
bool is_default_format = true; /// false, if format is set in the config or command line.
size_t format_max_block_size = 0; /// Max block size for console output.
String insert_format; /// Format of INSERT data that is read from stdin in batch mode.

View File

@ -298,7 +298,7 @@ ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter & filt, ssize_t result_
{
while (mask)
{
size_t index = __builtin_ctzll(mask);
size_t index = std::countr_zero(mask);
res_data.push_back(data_pos[index]);
#ifdef __BMI__
mask = _blsr_u64(mask);

View File

@ -240,7 +240,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
size_t res_chars_size = res->chars.size();
while (mask)
{
size_t index = __builtin_ctzll(mask);
size_t index = std::countr_zero(mask);
res->chars.resize(res_chars_size + n);
memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos + index * n, n);
res_chars_size += n;

View File

@ -2,13 +2,14 @@
#include <Columns/ColumnVector.h>
#include <Common/typeid_cast.h>
#include <Common/HashTable/HashSet.h>
#include <bit>
#include "ColumnsCommon.h"
namespace DB
{
#if defined(__SSE2__) && defined(__POPCNT__)
#if defined(__SSE2__)
/// Transform 64-byte mask to 64-bit mask.
static UInt64 toBits64(const Int8 * bytes64)
{
@ -41,11 +42,11 @@ size_t countBytesInFilter(const UInt8 * filt, size_t start, size_t end)
const Int8 * end_pos = pos + (end - start);
#if defined(__SSE2__) && defined(__POPCNT__)
#if defined(__SSE2__)
const Int8 * end_pos64 = pos + (end - start) / 64 * 64;
for (; pos < end_pos64; pos += 64)
count += __builtin_popcountll(toBits64(pos));
count += std::popcount(toBits64(pos));
/// TODO Add duff device for tail?
#endif
@ -74,11 +75,11 @@ size_t countBytesInFilterWithNull(const IColumn::Filter & filt, const UInt8 * nu
const Int8 * pos2 = reinterpret_cast<const Int8 *>(null_map) + start;
const Int8 * end_pos = pos + (end - start);
#if defined(__SSE2__) && defined(__POPCNT__)
#if defined(__SSE2__)
const Int8 * end_pos64 = pos + (end - start) / 64 * 64;
for (; pos < end_pos64; pos += 64, pos2 += 64)
count += __builtin_popcountll(toBits64(pos) & ~toBits64(pos2));
count += std::popcount(toBits64(pos) & ~toBits64(pos2));
/// TODO Add duff device for tail?
#endif
@ -259,7 +260,7 @@ namespace
{
while (mask)
{
size_t index = __builtin_ctzll(mask);
size_t index = std::countr_zero(mask);
copy_array(offsets_pos + index);
#ifdef __BMI__
mask = _blsr_u64(mask);

View File

@ -36,7 +36,7 @@ inline UInt64 bytes64MaskToBits64Mask(const UInt8 * bytes64)
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(bytes64)), zero32))) & 0xffffffff)
| (static_cast<UInt64>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(bytes64+32)), zero32))) << 32);
#elif defined(__SSE2__) && defined(__POPCNT__)
#elif defined(__SSE2__)
static const __m128i zero16 = _mm_setzero_si128();
UInt64 res =
(static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(

View File

@ -130,7 +130,7 @@ public:
private:
ReadBuffer & in;
/// The physical location of the current cell.
Locus locus;
Locus locus{};
/// The current position in the file as a cell number.
BucketIndex current_bucket_index = 0;
/// The number of bytes read.

View File

@ -0,0 +1,266 @@
#pragma once
#include <base/types.h>
#include <boost/core/noncopyable.hpp>
#include <mutex>
#include <memory>
#include <list>
#include <condition_variable>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
}
/*
* Controls how many threads can be allocated for a query (or another activity).
* There is a limited amount of slots for threads. It can be set with `setMaxConcurrency(limit)`.
*
* Lifecycle of a slot: free -> granted -> acquired -> free.
* free: slot is available to be allocated by any query.
* granted: slot is allocated by specific query, but not yet acquired by any thread.
* acquired: slot is allocated by specific query and acquired by a thread.
*
* USAGE:
* 1. Create an allocation for a query:
* `auto slots = ConcurrencyControl::instance().allocate(min, max);`
* It will allocate at least `min` and at most `max` slots.
* Note that `min` slots are granted immediately, but other `max - min` may be granted later.
* 2. For every thread a slot has to be acquired from that allocation:
* `while (auto slot = slots->tryAcquire()) createYourThread([slot = std::move(slot)] { ... });`
* This snippet can be used at query startup and for upscaling later.
* (both functions are non-blocking)
*
* Released slots are distributed between waiting allocations in a round-robin manner to provide fairness.
* Oversubscription is possible: total amount of allocated slots can exceed `setMaxConcurrency(limit)`
* because `min` amount of slots is allocated for each query unconditionally.
*/
class ConcurrencyControl : boost::noncopyable
{
public:
struct Allocation;
using AllocationPtr = std::shared_ptr<Allocation>;
using SlotCount = UInt64;
using Waiters = std::list<Allocation *>;
static constexpr SlotCount Unlimited = std::numeric_limits<SlotCount>::max();
// Scoped guard for acquired slot, see Allocation::tryAcquire()
struct Slot : boost::noncopyable
{
~Slot()
{
allocation->release();
}
private:
friend struct Allocation; // for ctor
explicit Slot(AllocationPtr && allocation_)
: allocation(std::move(allocation_))
{}
AllocationPtr allocation;
};
// FIXME: have to be unique_ptr, but ThreadFromGlobalPool does not support move semantics yet
using SlotPtr = std::shared_ptr<Slot>;
// Manages group of slots for a single query, see ConcurrencyControl::allocate(min, max)
struct Allocation : std::enable_shared_from_this<Allocation>, boost::noncopyable
{
~Allocation()
{
// We have to lock parent's mutex to avoid race with grant()
// NOTE: shortcut can be added, but it requires Allocation::mutex lock even to check if shortcut is possible
parent.free(this);
}
// Take one already granted slot if available. Lock-free iff there is no granted slot.
[[nodiscard]] SlotPtr tryAcquire()
{
SlotCount value = granted.load();
while (value)
{
if (granted.compare_exchange_strong(value, value - 1))
{
std::unique_lock lock{mutex};
return SlotPtr(new Slot(shared_from_this())); // can't use std::make_shared due to private ctor
}
}
return {}; // avoid unnecessary locking
}
SlotCount grantedCount() const
{
return granted;
}
private:
friend struct Slot; // for release()
friend class ConcurrencyControl; // for grant(), free() and ctor
Allocation(ConcurrencyControl & parent_, SlotCount limit_, SlotCount granted_, Waiters::iterator waiter_ = {})
: parent(parent_)
, limit(limit_)
, allocated(granted_)
, granted(granted_)
, waiter(waiter_)
{
if (allocated < limit)
*waiter = this;
}
auto cancel()
{
std::unique_lock lock{mutex};
return std::pair{allocated - released,
allocated < limit ?
std::optional<Waiters::iterator>(waiter) :
std::optional<Waiters::iterator>()};
}
// Grant single slot to allocation, returns true iff more slot(s) are required
bool grant()
{
std::unique_lock lock{mutex};
granted++;
allocated++;
return allocated < limit;
}
// Release one slot and grant it to other allocation if required
void release()
{
parent.release(1);
std::unique_lock lock{mutex};
released++;
if (released > allocated)
abort();
}
ConcurrencyControl & parent;
const SlotCount limit;
std::mutex mutex; // the following values must be accessed under this mutex
SlotCount allocated; // allocated total (including already `released`)
SlotCount released = 0;
std::atomic<SlotCount> granted; // allocated, but not yet acquired
const Waiters::iterator waiter; // iterator to itself in Waiters list; valid iff allocated < limit
};
public:
ConcurrencyControl()
: cur_waiter(waiters.end())
{}
// WARNING: all Allocation objects MUST be destructed before ConcurrencyControl
// NOTE: Recommended way to achieve this is to use `instance()` and do graceful shutdown of queries
~ConcurrencyControl()
{
if (!waiters.empty())
abort();
}
// Allocate at least `min` and at most `max` slots.
// If not all `max` slots were successfully allocated, a subscription for later allocation is created
// Use `Allocation::tryAcquire()` to acquire allocated slot, before running a thread.
[[nodiscard]] AllocationPtr allocate(SlotCount min, SlotCount max)
{
if (min > max)
throw DB::Exception("ConcurrencyControl: invalid allocation requirements", DB::ErrorCodes::LOGICAL_ERROR);
std::unique_lock lock{mutex};
// Acquire as much slots as we can, but not lower than `min`
SlotCount granted = std::max(min, std::min(max, available(lock)));
cur_concurrency += granted;
// Create allocation and start waiting if more slots are required
if (granted < max)
return AllocationPtr(new Allocation(*this, max, granted,
waiters.insert(cur_waiter, nullptr /* pointer is set by Allocation ctor */)));
else
return AllocationPtr(new Allocation(*this, max, granted));
}
void setMaxConcurrency(SlotCount value)
{
std::unique_lock lock{mutex};
max_concurrency = std::max<SlotCount>(1, value); // never allow max_concurrency to be zero
schedule(lock);
}
static ConcurrencyControl & instance()
{
static ConcurrencyControl result;
return result;
}
private:
friend struct Allocation; // for free() and release()
void free(Allocation * allocation)
{
// Allocation is allowed to be canceled even if there are:
// - `amount`: granted slots (acquired slots are not possible, because Slot holds AllocationPtr)
// - `waiter`: active waiting for more slots to be allocated
// Thus Allocation destruction may require the following lock, to avoid race conditions
std::unique_lock lock{mutex};
auto [amount, waiter] = allocation->cancel();
cur_concurrency -= amount;
if (waiter)
{
if (cur_waiter == *waiter)
cur_waiter = waiters.erase(*waiter);
else
waiters.erase(*waiter);
}
schedule(lock);
}
void release(SlotCount amount)
{
std::unique_lock lock{mutex};
cur_concurrency -= amount;
schedule(lock);
}
// Round-robin scheduling of available slots among waiting allocations
void schedule(std::unique_lock<std::mutex> &)
{
while (cur_concurrency < max_concurrency && !waiters.empty())
{
cur_concurrency++;
if (cur_waiter == waiters.end())
cur_waiter = waiters.begin();
Allocation * allocation = *cur_waiter;
if (allocation->grant())
++cur_waiter;
else
cur_waiter = waiters.erase(cur_waiter); // last required slot has just been granted -- stop waiting
}
}
SlotCount available(std::unique_lock<std::mutex> &)
{
if (cur_concurrency < max_concurrency)
return max_concurrency - cur_concurrency;
else
return 0;
}
std::mutex mutex;
Waiters waiters;
Waiters::iterator cur_waiter; // round-robin pointer
SlotCount max_concurrency = Unlimited;
SlotCount cur_concurrency = 0;
};

View File

@ -3,6 +3,7 @@
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashTable.h>
#include <bit>
#include <new>
#include <variant>
@ -21,17 +22,17 @@ struct StringKey24
inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n)
{
assert(n != 0);
return {reinterpret_cast<const char *>(&n), 8ul - (__builtin_clzll(n) >> 3)};
return {reinterpret_cast<const char *>(&n), 8ul - (std::countl_zero(n) >> 3)};
}
inline StringRef ALWAYS_INLINE toStringRef(const StringKey16 & n)
{
assert(n.items[1] != 0);
return {reinterpret_cast<const char *>(&n), 16ul - (__builtin_clzll(n.items[1]) >> 3)};
return {reinterpret_cast<const char *>(&n), 16ul - (std::countl_zero(n.items[1]) >> 3)};
}
inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n)
{
assert(n.c != 0);
return {reinterpret_cast<const char *>(&n), 24ul - (__builtin_clzll(n.c) >> 3)};
return {reinterpret_cast<const char *>(&n), 24ul - (std::countl_zero(n.c) >> 3)};
}
struct StringHashTableHash

View File

@ -11,6 +11,7 @@
#include <IO/WriteHelpers.h>
#include <Core/Defines.h>
#include <bit>
#include <cmath>
#include <cstring>
@ -205,7 +206,7 @@ struct TrailingZerosCounter<UInt32>
{
static int apply(UInt32 val)
{
return __builtin_ctz(val);
return std::countr_zero(val);
}
};
@ -214,7 +215,7 @@ struct TrailingZerosCounter<UInt64>
{
static int apply(UInt64 val)
{
return __builtin_ctzll(val);
return std::countr_zero(val);
}
};

View File

@ -5,6 +5,7 @@
#include <Common/formatIPv6.h>
#include <cstring>
#include <bit>
namespace DB
@ -89,7 +90,7 @@ bool matchIPv6Subnet(const uint8_t * addr, const uint8_t * cidr_addr, UInt8 pref
if (mask)
{
auto offset = __builtin_ctz(mask);
auto offset = std::countr_zero(mask);
if (prefix / 8 != offset)
return prefix / 8 < offset;

View File

@ -3,12 +3,18 @@
// MemoryTrackerBlockerInThread
thread_local uint64_t MemoryTrackerBlockerInThread::counter = 0;
thread_local VariableContext MemoryTrackerBlockerInThread::level = VariableContext::Global;
MemoryTrackerBlockerInThread::MemoryTrackerBlockerInThread(VariableContext level_)
: previous_level(level)
{
++counter;
level = level_;
}
MemoryTrackerBlockerInThread::MemoryTrackerBlockerInThread() : MemoryTrackerBlockerInThread(VariableContext::User)
{
}
MemoryTrackerBlockerInThread::~MemoryTrackerBlockerInThread()
{
--counter;

View File

@ -11,9 +11,12 @@ private:
static thread_local VariableContext level;
VariableContext previous_level;
public:
/// level_ - block in level and above
explicit MemoryTrackerBlockerInThread(VariableContext level_ = VariableContext::User);
explicit MemoryTrackerBlockerInThread(VariableContext level_);
public:
explicit MemoryTrackerBlockerInThread();
~MemoryTrackerBlockerInThread();
MemoryTrackerBlockerInThread(const MemoryTrackerBlockerInThread &) = delete;
@ -23,4 +26,6 @@ public:
{
return counter > 0 && current_level >= level;
}
friend class MemoryTracker;
};

View File

@ -78,7 +78,7 @@ private:
constexpr uint64_t nextAlphaSize(uint64_t x)
{
constexpr uint64_t alpha_map_elements_per_counter = 6;
return 1ULL << (sizeof(uint64_t) * 8 - __builtin_clzll(x * alpha_map_elements_per_counter));
return 1ULL << (sizeof(uint64_t) * 8 - std::countl_zero(x * alpha_map_elements_per_counter));
}
public:

View File

@ -1,6 +1,5 @@
#include <Common/StackTrace.h>
#include <Core/Defines.h>
#include <Common/Dwarf.h>
#include <Common/Elf.h>
#include <Common/SymbolIndex.h>
@ -8,6 +7,7 @@
#include <base/CachedFn.h>
#include <base/demangle.h>
#include <atomic>
#include <cstring>
#include <filesystem>
#include <sstream>
@ -19,6 +19,32 @@
# include <libunwind.h>
#endif
namespace
{
/// Currently this variable is set up once on server startup.
/// But we use atomic just in case, so it is possible to be modified at runtime.
std::atomic<bool> show_addresses = true;
bool shouldShowAddress(const void * addr)
{
/// If the address is less than 4096, most likely it is a nullptr dereference with offset,
/// and showing this offset is secure nevertheless.
/// NOTE: 4096 is the page size on x86 and it can be different on other systems,
/// but for the purpose of this branch, it does not matter.
if (reinterpret_cast<uintptr_t>(addr) < 4096)
return true;
return show_addresses.load(std::memory_order_relaxed);
}
}
void StackTrace::setShowAddresses(bool show)
{
show_addresses.store(show, std::memory_order_relaxed);
}
std::string signalToErrorMessage(int sig, const siginfo_t & info, [[maybe_unused]] const ucontext_t & context)
{
std::stringstream error; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
@ -30,7 +56,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, [[maybe_unused
/// Print info about address and reason.
if (nullptr == info.si_addr)
error << "Address: NULL pointer.";
else
else if (shouldShowAddress(info.si_addr))
error << "Address: " << info.si_addr;
#if defined(__x86_64__) && !defined(OS_FREEBSD) && !defined(OS_DARWIN) && !defined(__arm__) && !defined(__powerpc__)
@ -372,7 +398,9 @@ static void toStringEveryLineImpl(
else
out << "?";
out << " @ " << physical_addr;
if (shouldShowAddress(physical_addr))
out << " @ " << physical_addr;
out << " in " << (object ? object->name : "?");
for (size_t j = 0; j < inline_frames.size(); ++j)
@ -393,10 +421,13 @@ static void toStringEveryLineImpl(
for (size_t i = offset; i < size; ++i)
{
const void * addr = frame_pointers[i];
out << i << ". " << addr;
if (shouldShowAddress(addr))
{
out << i << ". " << addr;
callback(out.str());
out.str({});
callback(out.str());
out.str({});
}
}
#endif
}

View File

@ -67,6 +67,11 @@ public:
void toStringEveryLine(std::function<void(const std::string &)> callback) const;
/// Displaying the addresses can be disabled for security reasons.
/// If you turn off addresses, it will be more secure, but we will be unable to help you with debugging.
/// Please note: addresses are also available in the system.stack_trace and system.trace_log tables.
static void setShowAddresses(bool show);
protected:
void tryCapture();

View File

@ -79,7 +79,7 @@ void SystemLogBase<LogElement>::add(const LogElement & element)
/// The size of allocation can be in order of a few megabytes.
/// But this should not be accounted for query memory usage.
/// Otherwise the tests like 01017_uniqCombined_memory_usage.sql will be flacky.
MemoryTrackerBlockerInThread temporarily_disable_memory_tracker(VariableContext::Global);
MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
/// Should not log messages under mutex.
bool queue_is_half_full = false;

View File

@ -15,20 +15,31 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
constexpr size_t StringHashTablePadRequirement = 8;
/// TLDList
TLDList::TLDList(size_t size)
: tld_container(size)
, pool(std::make_unique<Arena>(10 << 20))
{}
bool TLDList::insert(StringRef host)
, memory_pool(std::make_unique<Arena>())
{
bool inserted;
tld_container.emplace(DB::ArenaKeyHolder{host, *pool}, inserted);
return inserted;
/// StringHashTable requires padded to 8 bytes key,
/// and Arena (memory_pool here) does satisfies this,
/// since it has padding with 15 bytes at the right.
///
/// However, StringHashTable may reference -1 byte of the key,
/// so left padding is also required:
memory_pool->alignedAlloc(StringHashTablePadRequirement, StringHashTablePadRequirement);
}
bool TLDList::has(StringRef host) const
void TLDList::insert(const String & host, TLDType type)
{
return tld_container.has(host);
StringRef owned_host{memory_pool->insert(host.data(), host.size()), host.size()};
tld_container[owned_host] = type;
}
TLDType TLDList::lookup(StringRef host) const
{
if (auto it = tld_container.find(host); it != nullptr)
return it->getMapped();
return TLDType::TLD_NONE;
}
/// TLDListsHolder
@ -57,32 +68,44 @@ void TLDListsHolder::parseConfig(const std::string & top_level_domains_path, con
size_t TLDListsHolder::parseAndAddTldList(const std::string & name, const std::string & path)
{
std::unordered_set<std::string> tld_list_tmp;
std::unordered_map<std::string, TLDType> tld_list_tmp;
ReadBufferFromFile in(path);
String line;
String buffer;
while (!in.eof())
{
readEscapedStringUntilEOL(line, in);
readEscapedStringUntilEOL(buffer, in);
if (!in.eof())
++in.position();
std::string_view line(buffer);
/// Skip comments
if (line.size() > 2 && line[0] == '/' && line[1] == '/')
if (line.starts_with("//"))
continue;
line = trim(line, [](char c) { return std::isspace(c); });
line = line.substr(0, line.rend() - std::find_if_not(line.rbegin(), line.rend(), ::isspace));
/// Skip empty line
if (line.empty())
continue;
tld_list_tmp.emplace(line);
/// Validate special symbols.
if (line.starts_with("*."))
{
line = line.substr(2);
tld_list_tmp.emplace(line, TLDType::TLD_ANY);
}
else if (line[0] == '!')
{
line = line.substr(1);
tld_list_tmp.emplace(line, TLDType::TLD_EXCLUDE);
}
else
tld_list_tmp.emplace(line, TLDType::TLD_REGULAR);
}
if (!in.eof())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not all list had been read", name);
TLDList tld_list(tld_list_tmp.size());
for (const auto & host : tld_list_tmp)
for (const auto & [host, type] : tld_list_tmp)
{
StringRef host_ref{host.data(), host.size()};
tld_list.insert(host_ref);
tld_list.insert(host, type);
}
size_t tld_list_size = tld_list.size();

View File

@ -2,7 +2,7 @@
#include <base/defines.h>
#include <base/StringRef.h>
#include <Common/HashTable/StringHashSet.h>
#include <Common/HashTable/StringHashMap.h>
#include <Common/Arena.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <mutex>
@ -12,25 +12,35 @@
namespace DB
{
enum TLDType
{
/// Does not exist marker
TLD_NONE,
/// For regular lines
TLD_REGULAR,
/// For asterisk (*)
TLD_ANY,
/// For exclamation mark (!)
TLD_EXCLUDE,
};
/// Custom TLD List
///
/// Unlike tldLookup (which uses gperf) this one uses plain StringHashSet.
/// Unlike tldLookup (which uses gperf) this one uses plain StringHashMap.
class TLDList
{
public:
using Container = StringHashSet<>;
using Container = StringHashMap<TLDType>;
explicit TLDList(size_t size);
/// Return true if the tld_container does not contains such element.
bool insert(StringRef host);
/// Check is there such TLD
bool has(StringRef host) const;
void insert(const String & host, TLDType type);
TLDType lookup(StringRef host) const;
size_t size() const { return tld_container.size(); }
private:
Container tld_container;
std::unique_ptr<Arena> pool;
std::unique_ptr<Arena> memory_pool;
};
class TLDListsHolder
@ -48,6 +58,11 @@ public:
/// - "//" -- comment,
/// - empty lines will be ignored.
///
/// Treats the following special symbols:
/// - "*"
/// - "!"
///
/// Format : https://github.com/publicsuffix/list/wiki/Format
/// Example: https://publicsuffix.org/list/public_suffix_list.dat
///
/// Return size of the list.

View File

@ -2,6 +2,7 @@
#include <Common/StringUtils/StringUtils.h>
#include <widechar_width.h>
#include <bit>
namespace DB
@ -124,7 +125,7 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
if (non_regular_width_mask)
{
auto num_regular_chars = __builtin_ctz(non_regular_width_mask);
auto num_regular_chars = std::countr_zero(non_regular_width_mask);
width += num_regular_chars;
i += num_regular_chars;
break;

View File

@ -83,7 +83,7 @@ inline size_t countCodePoints(const UInt8 * data, size_t size)
const auto threshold = vdupq_n_s8(0xBF);
for (; data < src_end_sse; data += bytes_sse)
res += __builtin_popcountll(get_nibble_mask(vcgtq_s8(vld1q_s8(reinterpret_cast<const int8_t *>(data)), threshold)));
res += std::popcount(get_nibble_mask(vcgtq_s8(vld1q_s8(reinterpret_cast<const int8_t *>(data)), threshold)));
res >>= 2;
#endif

View File

@ -2,6 +2,7 @@
#include <base/types.h>
#include <Common/Exception.h>
#include <Coordination/KeeperConstants.h>
#include <vector>
#include <memory>
@ -57,6 +58,8 @@ struct Stat
int32_t dataLength{0}; /// NOLINT
int32_t numChildren{0}; /// NOLINT
int64_t pzxid{0};
bool operator==(const Stat &) const = default;
};
enum class Error : int32_t
@ -109,7 +112,6 @@ bool isUserError(Error code);
const char * errorMessage(Error code);
struct Request;
using RequestPtr = std::shared_ptr<Request>;
using Requests = std::vector<RequestPtr>;
@ -516,6 +518,8 @@ public:
const Requests & requests,
MultiCallback callback) = 0;
virtual DB::KeeperApiVersion getApiVersion() = 0;
/// Expire session and finish all pending requests
virtual void finalize(const String & reason) = 0;
};

View File

@ -90,6 +90,11 @@ public:
void finalize(const String & reason) override;
DB::KeeperApiVersion getApiVersion() override
{
return KeeperApiVersion::ZOOKEEPER_COMPATIBLE;
}
struct Node
{
String data;

Some files were not shown because too many files have changed in this diff Show More