mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-02 20:42:04 +00:00
Merge branch 'master' into avoid_printing_settings
This commit is contained in:
commit
d979493fcf
31
.github/workflows/backport_branches.yml
vendored
31
.github/workflows/backport_branches.yml
vendored
@ -350,6 +350,36 @@ jobs:
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
DockerServerImages:
|
||||
needs:
|
||||
- BuilderDebRelease
|
||||
- BuilderDebAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head --no-push
|
||||
python3 docker_server.py --release-type head --no-push --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
############################################################################################
|
||||
##################################### BUILD REPORTER #######################################
|
||||
############################################################################################
|
||||
BuilderReport:
|
||||
@ -560,6 +590,7 @@ jobs:
|
||||
FinishCheck:
|
||||
needs:
|
||||
- DockerHubPush
|
||||
- DockerServerImages
|
||||
- BuilderReport
|
||||
- FunctionalStatelessTestAsan
|
||||
- FunctionalStatefulTestDebug
|
||||
|
31
.github/workflows/release_branches.yml
vendored
31
.github/workflows/release_branches.yml
vendored
@ -427,6 +427,36 @@ jobs:
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
DockerServerImages:
|
||||
needs:
|
||||
- BuilderDebRelease
|
||||
- BuilderDebAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head --no-push
|
||||
python3 docker_server.py --release-type head --no-push --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
############################################################################################
|
||||
##################################### BUILD REPORTER #######################################
|
||||
############################################################################################
|
||||
BuilderReport:
|
||||
@ -1815,6 +1845,7 @@ jobs:
|
||||
FinishCheck:
|
||||
needs:
|
||||
- DockerHubPush
|
||||
- DockerServerImages
|
||||
- BuilderReport
|
||||
- FunctionalStatelessTestDebug0
|
||||
- FunctionalStatelessTestDebug1
|
||||
|
@ -223,11 +223,25 @@ if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
|
||||
OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
|
||||
OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
|
||||
set (OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT ON)
|
||||
else()
|
||||
set (OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT OFF)
|
||||
endif()
|
||||
# Provides faster linking and lower binary size.
|
||||
# Tradeoff is the inability to debug some source files with e.g. gdb
|
||||
# (empty stack frames and no local variables)."
|
||||
option(OMIT_HEAVY_DEBUG_SYMBOLS
|
||||
"Do not generate debugger info for heavy modules (ClickHouse functions and dictionaries, some contrib)"
|
||||
${OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT})
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
|
||||
set(USE_DEBUG_HELPERS ON)
|
||||
endif()
|
||||
|
||||
option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS})
|
||||
|
||||
option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF)
|
||||
if (NOT BUILD_STANDALONE_KEEPER)
|
||||
option(CREATE_KEEPER_SYMLINK "Create symlink for clickhouse-keeper to main server binary" ON)
|
||||
|
@ -62,9 +62,10 @@ execute_process(COMMAND uname -m OUTPUT_VARIABLE ARCH)
|
||||
# By default, prefer clang on Linux
|
||||
# But note, that you still may change the compiler with -DCMAKE_C_COMPILER/-DCMAKE_CXX_COMPILER.
|
||||
if (OS MATCHES "Linux"
|
||||
# some build systems may use CC/CXX env variables
|
||||
AND "$ENV{CC}" STREQUAL ""
|
||||
AND "$ENV{CXX}" STREQUAL "")
|
||||
AND "$ENV{CXX}" STREQUAL ""
|
||||
AND NOT DEFINED CMAKE_C_COMPILER
|
||||
AND NOT DEFINED CMAKE_CXX_COMPILER)
|
||||
find_program(CLANG_PATH clang)
|
||||
if (CLANG_PATH)
|
||||
set(CMAKE_C_COMPILER "clang" CACHE INTERNAL "")
|
||||
@ -87,8 +88,7 @@ if (OS MATCHES "Linux"
|
||||
set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-aarch64.cmake" CACHE INTERNAL "")
|
||||
elseif (ARCH MATCHES "^(ppc64le.*|PPC64LE.*)")
|
||||
set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-ppc64le.cmake" CACHE INTERNAL "")
|
||||
else ()
|
||||
else ()
|
||||
message (FATAL_ERROR "Unsupported architecture: ${ARCH}")
|
||||
endif ()
|
||||
|
||||
endif()
|
||||
|
@ -37,7 +37,7 @@ The following versions of ClickHouse server are currently being supported with s
|
||||
|
||||
We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers.
|
||||
|
||||
To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com).
|
||||
To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). We do not offer any financial rewards for reporting issues to us using this method. Alternatively, you can also submit your findings through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement.
|
||||
|
||||
### When Should I Report a Vulnerability?
|
||||
|
||||
|
@ -93,6 +93,7 @@
|
||||
# define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address")))
|
||||
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
|
||||
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined")))
|
||||
# define DISABLE_SANITIZER_INSTRUMENTATION __attribute__((disable_sanitizer_instrumentation))
|
||||
#else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it.
|
||||
# define NO_SANITIZE_UNDEFINED
|
||||
# define NO_SANITIZE_ADDRESS
|
||||
|
@ -462,5 +462,7 @@ foreach (TOOL ${PARQUET_TOOLS})
|
||||
endforeach ()
|
||||
|
||||
# The library is large - avoid bloat.
|
||||
target_compile_options (_arrow PRIVATE -g0)
|
||||
target_compile_options (_parquet PRIVATE -g0)
|
||||
if (OMIT_HEAVY_DEBUG_SYMBOLS)
|
||||
target_compile_options (_arrow PRIVATE -g0)
|
||||
target_compile_options (_parquet PRIVATE -g0)
|
||||
endif()
|
||||
|
@ -114,7 +114,9 @@ endif()
|
||||
target_link_libraries(_aws_s3 PRIVATE _aws_s3_checksums)
|
||||
|
||||
# The library is large - avoid bloat.
|
||||
target_compile_options (_aws_s3 PRIVATE -g0)
|
||||
target_compile_options (_aws_s3_checksums PRIVATE -g0)
|
||||
if (OMIT_HEAVY_DEBUG_SYMBOLS)
|
||||
target_compile_options (_aws_s3 PRIVATE -g0)
|
||||
target_compile_options (_aws_s3_checksums PRIVATE -g0)
|
||||
endif()
|
||||
|
||||
add_library(ch_contrib::aws_s3 ALIAS _aws_s3)
|
||||
|
@ -171,6 +171,8 @@ target_include_directories (_curl SYSTEM PUBLIC
|
||||
target_link_libraries (_curl PRIVATE OpenSSL::SSL)
|
||||
|
||||
# The library is large - avoid bloat (XXX: is it?)
|
||||
target_compile_options (_curl PRIVATE -g0)
|
||||
if (OMIT_HEAVY_DEBUG_SYMBOLS)
|
||||
target_compile_options (_curl PRIVATE -g0)
|
||||
endif()
|
||||
|
||||
add_library (ch_contrib::curl ALIAS _curl)
|
||||
|
@ -268,10 +268,13 @@ endif()
|
||||
add_library (_vectorscan ${SRCS})
|
||||
|
||||
target_compile_options (_vectorscan PRIVATE
|
||||
-g0 # library has too much debug information
|
||||
-fno-sanitize=undefined # assume the library takes care of itself
|
||||
-O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # options from original build system
|
||||
)
|
||||
# library has too much debug information
|
||||
if (OMIT_HEAVY_DEBUG_SYMBOLS)
|
||||
target_compile_options (_vectorscan PRIVATE -g0)
|
||||
endif()
|
||||
|
||||
# Include version header manually generated by running the original build system
|
||||
target_include_directories (_vectorscan SYSTEM PRIVATE common)
|
||||
|
@ -2,9 +2,9 @@
|
||||
|
||||
## What is ClickHouse?
|
||||
|
||||
ClickHouse is an open-source column-oriented database management system that allows the generation of analytical data reports in real-time.
|
||||
We are the creators of the popular open-source column-oriented DBMS (columnar database management system) for online analytical processing (OLAP) which allows users to generate analytical reports using SQL queries in real-time.
|
||||
|
||||
ClickHouse manages extremely large volumes of data. It currently powers [Yandex.Metrica](https://metrica.yandex.com/), the world’s [second-largest](http://w3techs.com/technologies/overview/traffic_analysis/all) web analytics platform, with over 13 trillion database records and over 20 billion events a day, generating customized reports on-the-fly, directly from non-aggregated data. This system was successfully implemented at [CERN’s LHCb experiment](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) to store and process metadata on 10bn events with over 1000 attributes per event registered in 2011.
|
||||
ClickHouse works 100-1000x faster than traditional database management systems, and processes hundreds of millions to over a billion rows and tens of gigabytes of data per server per second. With a widespread user base around the globe, the technology has received praise for its reliability, ease of use, and fault tolerance.
|
||||
|
||||
For more information and documentation see https://clickhouse.com/.
|
||||
|
||||
@ -52,7 +52,10 @@ You can expose your ClickHouse running in docker by [mapping a particular port](
|
||||
```bash
|
||||
docker run -d -p 18123:8123 -p19000:9000 --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server
|
||||
echo 'SELECT version()' | curl 'http://localhost:18123/' --data-binary @-
|
||||
20.12.3.3
|
||||
```
|
||||
|
||||
```response
|
||||
22.6.3.35
|
||||
```
|
||||
|
||||
or by allowing the container to use [host ports directly](https://docs.docker.com/network/host/) using `--network=host` (also allows archiving better network performance):
|
||||
@ -60,7 +63,10 @@ or by allowing the container to use [host ports directly](https://docs.docker.co
|
||||
```bash
|
||||
docker run -d --network=host --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server
|
||||
echo 'SELECT version()' | curl 'http://localhost:8123/' --data-binary @-
|
||||
20.12.3.3
|
||||
```
|
||||
|
||||
```response
|
||||
22.6.3.35
|
||||
```
|
||||
|
||||
### Volumes
|
||||
|
@ -47,7 +47,8 @@ function configure()
|
||||
|
||||
# we mount tests folder from repo to /usr/share
|
||||
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
ln -s /usr/share/clickhouse-test/ci/download_previous_release.py /usr/bin/download_previous_release
|
||||
ln -s /usr/share/clickhouse-test/ci/download_release_packets.py /usr/bin/download_release_packets
|
||||
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
|
||||
|
||||
# avoid too slow startup
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml | sed "s|<snapshot_distance>100000</snapshot_distance>|<snapshot_distance>10000</snapshot_distance>|" > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
@ -228,7 +229,7 @@ clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_
|
||||
# Sanitizer asserts
|
||||
grep -Fa "==================" /var/log/clickhouse-server/stderr.log | grep -v "in query:" >> /test_output/tmp
|
||||
grep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \
|
||||
zgrep -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
|
||||
&& echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No sanitizer asserts\tOK' >> /test_output/test_results.tsv
|
||||
rm -f /test_output/tmp
|
||||
@ -267,16 +268,31 @@ zgrep -Fa " received signal " /test_output/gdb.log > /dev/null \
|
||||
|
||||
echo -e "Backward compatibility check\n"
|
||||
|
||||
echo "Get previous release tag"
|
||||
previous_release_tag=$(clickhouse-client --query="SELECT version()" | get_previous_release_tag)
|
||||
echo $previous_release_tag
|
||||
|
||||
echo "Clone previous release repository"
|
||||
git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository
|
||||
|
||||
echo "Download previous release server"
|
||||
mkdir previous_release_package_folder
|
||||
clickhouse-client --query="SELECT version()" | download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \
|
||||
|
||||
echo $previous_release_tag | download_release_packets && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv
|
||||
|
||||
stop
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log
|
||||
|
||||
if [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
|
||||
# Check if we cloned previous release repository successfully
|
||||
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
|
||||
then
|
||||
echo -e "Backward compatibility check: Failed to clone previous release tests\tFAIL" >> /test_output/test_results.tsv
|
||||
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
|
||||
then
|
||||
echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv
|
||||
else
|
||||
echo -e "Successfully cloned previous release tests\tOK" >> /test_output/test_results.tsv
|
||||
echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/test_results.tsv
|
||||
|
||||
# Uninstall current packages
|
||||
@ -310,7 +326,7 @@ then
|
||||
|
||||
mkdir tmp_stress_output
|
||||
|
||||
./stress --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
|
||||
./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
|
||||
&& echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv
|
||||
rm -rf tmp_stress_output
|
||||
@ -336,6 +352,7 @@ then
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.clean.log
|
||||
|
||||
# Error messages (we should ignore some errors)
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/38629
|
||||
echo "Check for Error messages in server log:"
|
||||
zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
|
||||
-e "Code: 236. DB::Exception: Cancelled mutating parts" \
|
||||
@ -358,6 +375,7 @@ then
|
||||
-e "and a merge is impossible: we didn't find" \
|
||||
-e "found in queue and some source parts for it was lost" \
|
||||
-e "is lost forever." \
|
||||
-e "pp.proj, errno: 21" \
|
||||
/var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
|
||||
&& echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
@ -368,7 +386,7 @@ then
|
||||
# Sanitizer asserts
|
||||
zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \
|
||||
zgrep -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
|
||||
&& echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv
|
||||
rm -f /test_output/tmp
|
||||
@ -400,8 +418,6 @@ then
|
||||
|
||||
# Remove file bc_check_fatal_messages.txt if it's empty
|
||||
[ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
|
||||
else
|
||||
echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv
|
||||
fi
|
||||
|
||||
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
|
||||
|
@ -28,18 +28,20 @@ def get_options(i, backward_compatibility_check):
|
||||
if i % 2 == 1:
|
||||
options.append(" --database=test_{}".format(i))
|
||||
|
||||
if i % 5 == 1:
|
||||
if i % 3 == 1:
|
||||
client_options.append("join_use_nulls=1")
|
||||
|
||||
if i % 15 == 1:
|
||||
client_options.append("join_algorithm='parallel_hash'")
|
||||
|
||||
if i % 15 == 6:
|
||||
client_options.append("join_algorithm='partial_merge'")
|
||||
|
||||
if i % 15 == 11:
|
||||
client_options.append("join_algorithm='auto'")
|
||||
client_options.append("max_rows_in_join=1000")
|
||||
if i % 2 == 1:
|
||||
join_alg_num = i // 2
|
||||
if join_alg_num % 4 == 0:
|
||||
client_options.append("join_algorithm='parallel_hash'")
|
||||
if join_alg_num % 4 == 1:
|
||||
client_options.append("join_algorithm='partial_merge'")
|
||||
if join_alg_num % 4 == 2:
|
||||
client_options.append("join_algorithm='full_sorting_merge'")
|
||||
if join_alg_num % 4 == 3:
|
||||
client_options.append("join_algorithm='auto'")
|
||||
client_options.append('max_rows_in_join=1000')
|
||||
|
||||
if i == 13:
|
||||
client_options.append("memory_tracker_fault_probability=0.001")
|
||||
|
34
docs/changelogs/v22.6.3.35-stable.md
Normal file
34
docs/changelogs/v22.6.3.35-stable.md
Normal file
@ -0,0 +1,34 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2022
|
||||
---
|
||||
|
||||
# 2022 Changelog
|
||||
|
||||
### ClickHouse release v22.6.3.35-stable FIXME as compared to v22.6.2.12-stable
|
||||
|
||||
#### Bug Fix
|
||||
* Backported in [#38812](https://github.com/ClickHouse/ClickHouse/issues/38812): Fix crash when executing GRANT ALL ON *.* with ON CLUSTER. It was broken in https://github.com/ClickHouse/ClickHouse/pull/35767. This closes [#38618](https://github.com/ClickHouse/ClickHouse/issues/38618). [#38674](https://github.com/ClickHouse/ClickHouse/pull/38674) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Backported in [#38883](https://github.com/ClickHouse/ClickHouse/issues/38883): Add `clickhouse-diagnostics` binary to the packages. [#38647](https://github.com/ClickHouse/ClickHouse/pull/38647) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#38690](https://github.com/ClickHouse/ClickHouse/issues/38690): Fix incorrect columns order in subqueries of UNION (in case of duplicated columns in subselects may produce incorrect result). [#37887](https://github.com/ClickHouse/ClickHouse/pull/37887) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#38500](https://github.com/ClickHouse/ClickHouse/issues/38500): Do not allow recursive usage of OvercommitTracker during logging. Fixes [#37794](https://github.com/ClickHouse/ClickHouse/issues/37794) cc @tavplubix @davenger. [#38246](https://github.com/ClickHouse/ClickHouse/pull/38246) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Backported in [#38712](https://github.com/ClickHouse/ClickHouse/issues/38712): Fix incorrect result of distributed queries with `DISTINCT` and `LIMIT`. Fixes [#38282](https://github.com/ClickHouse/ClickHouse/issues/38282). [#38371](https://github.com/ClickHouse/ClickHouse/pull/38371) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#38594](https://github.com/ClickHouse/ClickHouse/issues/38594): Fix parts removal (will be left forever if they had not been removed on server shutdown) after incorrect server shutdown. [#38486](https://github.com/ClickHouse/ClickHouse/pull/38486) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#38597](https://github.com/ClickHouse/ClickHouse/issues/38597): Fix table creation to avoid replication issues with pre-22.4 replicas. [#38541](https://github.com/ClickHouse/ClickHouse/pull/38541) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#38687](https://github.com/ClickHouse/ClickHouse/issues/38687): Now it's possible to start a clickhouse-server and attach/detach tables even for tables with the incorrect values of IPv4/IPv6 representation. Proper fix for issue [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#38590](https://github.com/ClickHouse/ClickHouse/pull/38590) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#38665](https://github.com/ClickHouse/ClickHouse/issues/38665): Adapt some more nodes to avoid issues with pre-22.4 replicas. [#38627](https://github.com/ClickHouse/ClickHouse/pull/38627) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#38778](https://github.com/ClickHouse/ClickHouse/issues/38778): `rankCorr` function will work correctly if some arguments are NaNs. This closes [#38396](https://github.com/ClickHouse/ClickHouse/issues/38396). [#38722](https://github.com/ClickHouse/ClickHouse/pull/38722) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#38782](https://github.com/ClickHouse/ClickHouse/issues/38782): Fix use-after-free for Map combinator that leads to incorrect result. [#38748](https://github.com/ClickHouse/ClickHouse/pull/38748) ([Azat Khuzhin](https://github.com/azat)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Fix test for system table count in diag tool [#38236](https://github.com/ClickHouse/ClickHouse/pull/38236) ([Dale McDiarmid](https://github.com/gingerwizard)).
|
||||
* Integration tests volume [#38291](https://github.com/ClickHouse/ClickHouse/pull/38291) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Update docker-compose to try get rid of v1 errors [#38394](https://github.com/ClickHouse/ClickHouse/pull/38394) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix backports diff [#38703](https://github.com/ClickHouse/ClickHouse/pull/38703) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
@ -5,34 +5,6 @@ sidebar_label: Build on Linux for AARCH64 (ARM64)
|
||||
|
||||
# How to Build ClickHouse on Linux for AARCH64 (ARM64) Architecture
|
||||
|
||||
This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with AARCH64 CPU architecture.
|
||||
This is intended for continuous integration checks that run on Linux servers.
|
||||
If you use AArch64 machine and want to build ClickHouse for AArch64, build as usual.
|
||||
|
||||
The cross-build for AARCH64 is based on the [Build instructions](../development/build.md), follow them first.
|
||||
|
||||
## Install Clang-14 or newer
|
||||
|
||||
Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do
|
||||
```
|
||||
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
```
|
||||
|
||||
## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
|
||||
|
||||
``` bash
|
||||
cd ClickHouse
|
||||
mkdir -p build-aarch64/cmake/toolchain/linux-aarch64
|
||||
wget 'https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz?revision=2e88a73f-d233-4f96-b1f4-d8b36e9bb0b9&la=en' -O gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz
|
||||
tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build-aarch64/cmake/toolchain/linux-aarch64 --strip-components=1
|
||||
```
|
||||
|
||||
## Build ClickHouse {#build-clickhouse}
|
||||
|
||||
``` bash
|
||||
cd ClickHouse
|
||||
mkdir build-arm64
|
||||
CC=clang-14 CXX=clang++-14 cmake . -Bbuild-arm64 -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-aarch64.cmake
|
||||
ninja -C build-arm64
|
||||
```
|
||||
|
||||
The resulting binary will run only on Linux with the AARCH64 CPU architecture.
|
||||
If you use x86_64 machine and want cross-compile for AArch64, add the following flag to `cmake`: `-DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-aarch64.cmake`
|
||||
|
@ -17,10 +17,12 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
...
|
||||
) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause'])
|
||||
SETTINGS
|
||||
[connection_pool_size=16, ]
|
||||
[connection_max_tries=3, ]
|
||||
[connection_wait_timeout=5, ] /* 0 -- do not wait */
|
||||
[connection_auto_close=true ]
|
||||
[ connection_pool_size=16, ]
|
||||
[ connection_max_tries=3, ]
|
||||
[ connection_wait_timeout=5, ]
|
||||
[ connection_auto_close=true, ]
|
||||
[ connect_timeout=10, ]
|
||||
[ read_write_timeout=300 ]
|
||||
;
|
||||
```
|
||||
|
||||
@ -144,6 +146,36 @@ Possible values:
|
||||
|
||||
Default value: `16`.
|
||||
|
||||
### connection_wait_timeout {#connection-wait-timeout}
|
||||
|
||||
Timeout (in seconds) for waiting for free connection (in case of there is already connection_pool_size active connections), 0 - do not wait.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
|
||||
Default value: `5`.
|
||||
|
||||
### connect_timeout {#connect-timeout}
|
||||
|
||||
Connect timeout (in seconds).
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
|
||||
Default value: `10`.
|
||||
|
||||
### read_write_timeout {#read-write-timeout}
|
||||
|
||||
Read/write timeout (in seconds).
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
|
||||
Default value: `300`.
|
||||
|
||||
## See Also {#see-also}
|
||||
|
||||
- [The mysql table function](../../../sql-reference/table-functions/mysql.md)
|
||||
|
@ -131,7 +131,7 @@ Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data typ
|
||||
|
||||
Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
|
||||
|
||||
The behavior of functions for negative agruments and for the [NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
|
||||
The behavior of functions for negative arguments and for the [NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -8,11 +8,11 @@ sidebar_label: INDEX
|
||||
|
||||
The following operations are available:
|
||||
|
||||
- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - Adds index description to tables metadata.
|
||||
- `ALTER TABLE [db].table_name [ON CLUSTER cluster] ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - Adds index description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].name DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk.
|
||||
- `ALTER TABLE [db].table_name [ON CLUSTER cluster] DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk.
|
||||
|
||||
- `ALTER TABLE [db.]table MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](../../../../sql-reference/statements/alter/index.md#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data.
|
||||
- `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](../../../../sql-reference/statements/alter/index.md#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data.
|
||||
|
||||
The first two commands are lightweight in a sense that they only change metadata or remove files.
|
||||
|
||||
|
@ -27,7 +27,7 @@ The following operations with [partitions](../../../engines/table-engines/merget
|
||||
## DETACH PARTITION\|PART
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name DETACH PARTITION|PART partition_expr
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] DETACH PARTITION|PART partition_expr
|
||||
```
|
||||
|
||||
Moves all data for the specified partition to the `detached` directory. The server forgets about the detached data partition as if it does not exist. The server will not know about this data until you make the [ATTACH](#alter_attach-partition) query.
|
||||
@ -48,7 +48,7 @@ This query is replicated – it moves the data to the `detached` directory on al
|
||||
## DROP PARTITION\|PART
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name DROP PARTITION|PART partition_expr
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] DROP PARTITION|PART partition_expr
|
||||
```
|
||||
|
||||
Deletes the specified partition from the table. This query tags the partition as inactive and deletes data completely, approximately in 10 minutes.
|
||||
@ -67,7 +67,7 @@ ALTER TABLE mt DROP PART 'all_4_4_0';
|
||||
## DROP DETACHED PARTITION\|PART
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name DROP DETACHED PARTITION|PART partition_expr
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] DROP DETACHED PARTITION|PART partition_expr
|
||||
```
|
||||
|
||||
Removes the specified part or all parts of the specified partition from `detached`.
|
||||
@ -76,7 +76,7 @@ Read more about setting the partition expression in a section [How to specify th
|
||||
## ATTACH PARTITION\|PART
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name ATTACH PARTITION|PART partition_expr
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] ATTACH PARTITION|PART partition_expr
|
||||
```
|
||||
|
||||
Adds data to the table from the `detached` directory. It is possible to add data for an entire partition or for a separate part. Examples:
|
||||
@ -99,7 +99,7 @@ You can put data to the `detached` directory on one replica and use the `ALTER .
|
||||
## ATTACH PARTITION FROM
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table2 ATTACH PARTITION partition_expr FROM table1
|
||||
ALTER TABLE table2 [ON CLUSTER cluster] ATTACH PARTITION partition_expr FROM table1
|
||||
```
|
||||
|
||||
This query copies the data partition from `table1` to `table2`.
|
||||
@ -113,7 +113,7 @@ For the query to run successfully, the following conditions must be met:
|
||||
## REPLACE PARTITION
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table2 REPLACE PARTITION partition_expr FROM table1
|
||||
ALTER TABLE table2 [ON CLUSTER cluster] REPLACE PARTITION partition_expr FROM table1
|
||||
```
|
||||
|
||||
This query copies the data partition from the `table1` to `table2` and replaces existing partition in the `table2`. Note that data won’t be deleted from `table1`.
|
||||
@ -126,7 +126,7 @@ For the query to run successfully, the following conditions must be met:
|
||||
## MOVE PARTITION TO TABLE
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_source MOVE PARTITION partition_expr TO TABLE table_dest
|
||||
ALTER TABLE table_source [ON CLUSTER cluster] MOVE PARTITION partition_expr TO TABLE table_dest
|
||||
```
|
||||
|
||||
This query moves the data partition from the `table_source` to `table_dest` with deleting the data from `table_source`.
|
||||
@ -141,7 +141,7 @@ For the query to run successfully, the following conditions must be met:
|
||||
## CLEAR COLUMN IN PARTITION
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name CLEAR COLUMN column_name IN PARTITION partition_expr
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] CLEAR COLUMN column_name IN PARTITION partition_expr
|
||||
```
|
||||
|
||||
Resets all values in the specified column in a partition. If the `DEFAULT` clause was determined when creating a table, this query sets the column value to a specified default value.
|
||||
@ -155,7 +155,7 @@ ALTER TABLE visits CLEAR COLUMN hour in PARTITION 201902
|
||||
## FREEZE PARTITION
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name FREEZE [PARTITION partition_expr] [WITH NAME 'backup_name']
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] FREEZE [PARTITION partition_expr] [WITH NAME 'backup_name']
|
||||
```
|
||||
|
||||
This query creates a local backup of a specified partition. If the `PARTITION` clause is omitted, the query creates the backup of all partitions at once.
|
||||
@ -197,7 +197,7 @@ For more information about backups and restoring data, see the [Data Backup](../
|
||||
## UNFREEZE PARTITION
|
||||
|
||||
``` sql
|
||||
ALTER TABLE 'table_name' UNFREEZE [PARTITION 'part_expr'] WITH NAME 'backup_name'
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] UNFREEZE [PARTITION 'part_expr'] WITH NAME 'backup_name'
|
||||
```
|
||||
|
||||
Removes `freezed` partitions with the specified name from the disk. If the `PARTITION` clause is omitted, the query removes the backup of all partitions at once.
|
||||
@ -205,7 +205,7 @@ Removes `freezed` partitions with the specified name from the disk. If the `PART
|
||||
## CLEAR INDEX IN PARTITION
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name CLEAR INDEX index_name IN PARTITION partition_expr
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] CLEAR INDEX index_name IN PARTITION partition_expr
|
||||
```
|
||||
|
||||
The query works similar to `CLEAR COLUMN`, but it resets an index instead of a column data.
|
||||
@ -213,7 +213,7 @@ The query works similar to `CLEAR COLUMN`, but it resets an index instead of a c
|
||||
## FETCH PARTITION|PART
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name FETCH PARTITION|PART partition_expr FROM 'path-in-zookeeper'
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] FETCH PARTITION|PART partition_expr FROM 'path-in-zookeeper'
|
||||
```
|
||||
|
||||
Downloads a partition from another server. This query only works for the replicated tables.
|
||||
@ -250,7 +250,7 @@ Although the query is called `ALTER TABLE`, it does not change the table structu
|
||||
Moves partitions or data parts to another volume or disk for `MergeTree`-engine tables. See [Using Multiple Block Devices for Data Storage](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes).
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'
|
||||
```
|
||||
|
||||
The `ALTER TABLE t MOVE` query:
|
||||
@ -273,7 +273,7 @@ Manipulates data in the specifies partition matching the specified filtering exp
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] [IN PARTITION partition_id] WHERE filter_expr
|
||||
ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] [IN PARTITION partition_id] WHERE filter_expr
|
||||
```
|
||||
|
||||
### Example
|
||||
@ -293,7 +293,7 @@ Deletes data in the specifies partition matching the specified filtering express
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE [db.]table DELETE [IN PARTITION partition_id] WHERE filter_expr
|
||||
ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE [IN PARTITION partition_id] WHERE filter_expr
|
||||
```
|
||||
|
||||
### Example
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: UPDATE
|
||||
# ALTER TABLE … UPDATE Statements
|
||||
|
||||
``` sql
|
||||
ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr
|
||||
ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] WHERE filter_expr
|
||||
```
|
||||
|
||||
Manipulates data matching the specified filtering expression. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
|
@ -15,7 +15,7 @@ The `EXCHANGE` query is supported by the [Atomic](../../engines/database-engines
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
EXCHANGE TABLES|DICTIONARIES [db0.]name_A AND [db1.]name_B
|
||||
EXCHANGE TABLES|DICTIONARIES [db0.]name_A AND [db1.]name_B [ON CLUSTER cluster]
|
||||
```
|
||||
|
||||
## EXCHANGE TABLES
|
||||
@ -25,7 +25,7 @@ Exchanges the names of two tables.
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
EXCHANGE TABLES [db0.]table_A AND [db1.]table_B
|
||||
EXCHANGE TABLES [db0.]table_A AND [db1.]table_B [ON CLUSTER cluster]
|
||||
```
|
||||
|
||||
## EXCHANGE DICTIONARIES
|
||||
@ -35,7 +35,7 @@ Exchanges the names of two dictionaries.
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
EXCHANGE DICTIONARIES [db0.]dict_A AND [db1.]dict_B
|
||||
EXCHANGE DICTIONARIES [db0.]dict_A AND [db1.]dict_B [ON CLUSTER cluster]
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
@ -25,7 +25,7 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list
|
||||
[LIMIT [n, ]m] [WITH TIES]
|
||||
[SETTINGS ...]
|
||||
[UNION ...]
|
||||
[INTO OUTFILE filename [COMPRESSION type] ]
|
||||
[INTO OUTFILE filename [COMPRESSION type [LEVEL level]] ]
|
||||
[FORMAT format]
|
||||
```
|
||||
|
||||
|
@ -6,16 +6,18 @@ sidebar_label: INTO OUTFILE
|
||||
|
||||
`INTO OUTFILE` clause redirects the result of a `SELECT` query to a file on the **client** side.
|
||||
|
||||
Compressed files are supported. Compression type is detected by the extension of the file name (mode `'auto'` is used by default). Or it can be explicitly specified in a `COMPRESSION` clause.
|
||||
Compressed files are supported. Compression type is detected by the extension of the file name (mode `'auto'` is used by default). Or it can be explicitly specified in a `COMPRESSION` clause. The compression level for a certain compression type can be specified in a `LEVEL` clause.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type]
|
||||
SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type [LEVEL level]]
|
||||
```
|
||||
|
||||
`file_name` and `type` are string literals. Supported compression types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
|
||||
|
||||
`level` is a numeric literal. Positive integers in following ranges are supported: `1-12` for `lz4` type, `1-22` for `zstd` type and `1-9` for other compression types.
|
||||
|
||||
## Implementation Details
|
||||
|
||||
- This functionality is available in the [command-line client](../../../interfaces/cli.md) and [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Thus a query sent via [HTTP interface](../../../interfaces/http.md) will fail.
|
||||
|
@ -70,13 +70,29 @@ https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html
|
||||
|
||||
```text
|
||||
aggregate_function (column_name)
|
||||
OVER ([PARTITION BY groupping_column] [ORDER BY sorting_column]
|
||||
[ROWS or RANGE expression_to_bounds_of_frame])
|
||||
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
|
||||
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
|
||||
FROM table_name
|
||||
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
|
||||
```
|
||||
|
||||
- `PARTITION BY` - defines how to break a resultset into groups.
|
||||
- `ORDER BY` - defines how to order rows inside the group during calculation aggregate_function.
|
||||
- `ROWS or RANGE` - defines bounds of a frame, aggregate_function is calculated within a frame.
|
||||
- `WINDOW` - allows to reuse a window definition with multiple exressions.
|
||||
|
||||
### Functions
|
||||
|
||||
These functions can be used only as a window function.
|
||||
|
||||
`row_number()` - Number the current row within its partition starting from 1.
|
||||
`first_value(x)` - Return the first non-NULL value evaluated within its ordered frame.
|
||||
`last_value(x)` - Return the last non-NULL value evaluated within its ordered frame.
|
||||
`nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
|
||||
`rank()` - Rank the current row within its partition with gaps.
|
||||
`dense_rank()` - Rank the current row within its partition without gaps.
|
||||
`lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame.
|
||||
`leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame.
|
||||
|
||||
```text
|
||||
PARTITION
|
||||
@ -101,7 +117,8 @@ aggregate_function (column_name)
|
||||
CREATE TABLE wf_partition
|
||||
(
|
||||
`part_key` UInt64,
|
||||
`value` UInt64
|
||||
`value` UInt64,
|
||||
`order` UInt64
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
@ -271,6 +288,89 @@ ORDER BY
|
||||
│ 1 │ 4 │ 4 │ [3,4,5] │
|
||||
│ 1 │ 5 │ 5 │ [4,5] │
|
||||
└──────────┴───────┴───────┴──────────────┘
|
||||
|
||||
-- row_number does not respect the frame, so rn_1 = rn_2 = rn_3 != rn_4
|
||||
SELECT
|
||||
part_key,
|
||||
value,
|
||||
order,
|
||||
groupArray(value) OVER w1 AS frame_values,
|
||||
row_number() OVER w1 AS rn_1,
|
||||
sum(1) OVER w1 AS rn_2,
|
||||
row_number() OVER w2 AS rn_3,
|
||||
sum(1) OVER w2 AS rn_4
|
||||
FROM wf_frame
|
||||
WINDOW
|
||||
w1 AS (PARTITION BY part_key ORDER BY order DESC),
|
||||
w2 AS (PARTITION BY part_key ORDER BY order DESC
|
||||
Rows BETWEEN 1 PRECEDING AND CURRENT ROW)
|
||||
ORDER BY
|
||||
part_key ASC,
|
||||
value ASC;
|
||||
┌─part_key─┬─value─┬─order─┬─frame_values─┬─rn_1─┬─rn_2─┬─rn_3─┬─rn_4─┐
|
||||
│ 1 │ 1 │ 1 │ [5,4,3,2,1] │ 5 │ 5 │ 5 │ 2 │
|
||||
│ 1 │ 2 │ 2 │ [5,4,3,2] │ 4 │ 4 │ 4 │ 2 │
|
||||
│ 1 │ 3 │ 3 │ [5,4,3] │ 3 │ 3 │ 3 │ 2 │
|
||||
│ 1 │ 4 │ 4 │ [5,4] │ 2 │ 2 │ 2 │ 2 │
|
||||
│ 1 │ 5 │ 5 │ [5] │ 1 │ 1 │ 1 │ 1 │
|
||||
└──────────┴───────┴───────┴──────────────┴──────┴──────┴──────┴──────┘
|
||||
|
||||
-- first_value and last_value respect the frame
|
||||
SELECT
|
||||
groupArray(value) OVER w1 AS frame_values_1,
|
||||
first_value(value) OVER w1 AS first_value_1,
|
||||
last_value(value) OVER w1 AS last_value_1,
|
||||
groupArray(value) OVER w2 AS frame_values_2,
|
||||
first_value(value) OVER w2 AS first_value_2,
|
||||
last_value(value) OVER w2 AS last_value_2
|
||||
FROM wf_frame
|
||||
WINDOW
|
||||
w1 AS (PARTITION BY part_key ORDER BY order ASC),
|
||||
w2 AS (PARTITION BY part_key ORDER BY order ASC Rows BETWEEN 1 PRECEDING AND CURRENT ROW)
|
||||
ORDER BY
|
||||
part_key ASC,
|
||||
value ASC;
|
||||
┌─frame_values_1─┬─first_value_1─┬─last_value_1─┬─frame_values_2─┬─first_value_2─┬─last_value_2─┐
|
||||
│ [1] │ 1 │ 1 │ [1] │ 1 │ 1 │
|
||||
│ [1,2] │ 1 │ 2 │ [1,2] │ 1 │ 2 │
|
||||
│ [1,2,3] │ 1 │ 3 │ [2,3] │ 2 │ 3 │
|
||||
│ [1,2,3,4] │ 1 │ 4 │ [3,4] │ 3 │ 4 │
|
||||
│ [1,2,3,4,5] │ 1 │ 5 │ [4,5] │ 4 │ 5 │
|
||||
└────────────────┴───────────────┴──────────────┴────────────────┴───────────────┴──────────────┘
|
||||
|
||||
-- second value within the frame
|
||||
SELECT
|
||||
groupArray(value) OVER w1 AS frame_values_1,
|
||||
nth_value(value, 2) OVER w1 AS second_value
|
||||
FROM wf_frame
|
||||
WINDOW w1 AS (PARTITION BY part_key ORDER BY order ASC Rows BETWEEN 3 PRECEDING AND CURRENT ROW)
|
||||
ORDER BY
|
||||
part_key ASC,
|
||||
value ASC
|
||||
┌─frame_values_1─┬─second_value─┐
|
||||
│ [1] │ 0 │
|
||||
│ [1,2] │ 2 │
|
||||
│ [1,2,3] │ 2 │
|
||||
│ [1,2,3,4] │ 2 │
|
||||
│ [2,3,4,5] │ 3 │
|
||||
└────────────────┴──────────────┘
|
||||
|
||||
-- second value within the frame + Null for missing values
|
||||
SELECT
|
||||
groupArray(value) OVER w1 AS frame_values_1,
|
||||
nth_value(toNullable(value), 2) OVER w1 AS second_value
|
||||
FROM wf_frame
|
||||
WINDOW w1 AS (PARTITION BY part_key ORDER BY order ASC Rows BETWEEN 3 PRECEDING AND CURRENT ROW)
|
||||
ORDER BY
|
||||
part_key ASC,
|
||||
value ASC
|
||||
┌─frame_values_1─┬─second_value─┐
|
||||
│ [1] │ ᴺᵁᴸᴸ │
|
||||
│ [1,2] │ 2 │
|
||||
│ [1,2,3] │ 2 │
|
||||
│ [1,2,3,4] │ 2 │
|
||||
│ [2,3,4,5] │ 3 │
|
||||
└────────────────┴──────────────┘
|
||||
```
|
||||
|
||||
## Real world examples
|
||||
|
@ -17,10 +17,12 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
...
|
||||
) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause'])
|
||||
SETTINGS
|
||||
[connection_pool_size=16, ]
|
||||
[connection_max_tries=3, ]
|
||||
[connection_wait_timeout=5, ] /* 0 -- не ждать */
|
||||
[connection_auto_close=true ]
|
||||
[ connection_pool_size=16, ]
|
||||
[ connection_max_tries=3, ]
|
||||
[ connection_wait_timeout=5, ]
|
||||
[ connection_auto_close=true, ]
|
||||
[ connect_timeout=10, ]
|
||||
[ read_write_timeout=300 ]
|
||||
;
|
||||
```
|
||||
|
||||
@ -144,7 +146,37 @@ SELECT * FROM mysql_table
|
||||
|
||||
Значение по умолчанию: `16`.
|
||||
|
||||
### connection_wait_timeout {#connection-wait-timeout}
|
||||
|
||||
Задает таймаут (в секундах) ожидания свободного подключения (в случае, если уже есть активные подключения connection_pool_size), 0 - не ждать.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- Положительное целое число.
|
||||
|
||||
Значение по умолчанию: `5`.
|
||||
|
||||
### connect_timeout {#connect-timeout}
|
||||
|
||||
Задает таймаут ожидания подключения (в секундах).
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- Положительное целое число.
|
||||
|
||||
Значение по умолчанию: `10`.
|
||||
|
||||
### read_write_timeout {#read-write-timeout}
|
||||
|
||||
Задает таймаут ожидания ввода/вывода (в секундах).
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- Положительное целое число.
|
||||
|
||||
Значение по умолчанию: `300`.
|
||||
|
||||
## См. также {#see-also}
|
||||
|
||||
- [Табличная функция mysql](../../../engines/table-engines/integrations/mysql.md)
|
||||
- [Использование MySQL в качестве источника для внешнего словаря](../../../engines/table-engines/integrations/mysql.md#dicts-external_dicts_dict_sources-mysql)
|
||||
- [Табличная функция mysql](../../../sql-reference/table-functions/mysql.md)
|
||||
- [Использование MySQL в качестве источника для внешнего словаря](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql)
|
||||
|
@ -75,8 +75,9 @@ DROP COLUMN [IF EXISTS] name
|
||||
|
||||
Запрос удаляет данные из файловой системы. Так как это представляет собой удаление целых файлов, запрос выполняется почти мгновенно.
|
||||
|
||||
:::danger "Предупреждение"
|
||||
:::warning "Предупреждение"
|
||||
Вы не можете удалить столбец, используемый в [материализованном представлениии](../../../sql-reference/statements/create/view.md#materialized). В противном случае будет ошибка.
|
||||
:::
|
||||
|
||||
Пример:
|
||||
|
||||
|
@ -9,9 +9,9 @@ sidebar_label: "Манипуляции с индексами"
|
||||
Добавить или удалить индекс можно с помощью операций
|
||||
|
||||
``` sql
|
||||
ALTER TABLE [db.]name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]
|
||||
ALTER TABLE [db.]name DROP INDEX name
|
||||
ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name
|
||||
ALTER TABLE [db.]table_name [ON CLUSTER cluster] ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]
|
||||
ALTER TABLE [db.]table_name [ON CLUSTER cluster] DROP INDEX name
|
||||
ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name IN PARTITION partition_name
|
||||
```
|
||||
|
||||
Поддерживается только таблицами семейства `*MergeTree`.
|
||||
|
@ -25,7 +25,7 @@ sidebar_label: PARTITION
|
||||
## DETACH PARTITION\|PART {#alter_detach-partition}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name DETACH PARTITION|PART partition_expr
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] DETACH PARTITION|PART partition_expr
|
||||
```
|
||||
|
||||
Перемещает заданную партицию в директорию `detached`. Сервер не будет знать об этой партиции до тех пор, пока вы не выполните запрос [ATTACH](#alter_attach-partition).
|
||||
@ -46,7 +46,7 @@ ALTER TABLE mt DETACH PART 'all_2_2_0';
|
||||
## DROP PARTITION\|PART {#alter_drop-partition}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name DROP PARTITION|PART partition_expr
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] DROP PARTITION|PART partition_expr
|
||||
```
|
||||
|
||||
Удаляет партицию. Партиция помечается как неактивная и будет полностью удалена примерно через 10 минут.
|
||||
@ -65,7 +65,7 @@ ALTER TABLE mt DROP PART 'all_4_4_0';
|
||||
## DROP DETACHED PARTITION\|PART {#alter_drop-detached}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name DROP DETACHED PARTITION|PART partition_expr
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] DROP DETACHED PARTITION|PART partition_expr
|
||||
```
|
||||
|
||||
Удаляет из `detached` кусок или все куски, принадлежащие партиции.
|
||||
@ -74,7 +74,7 @@ ALTER TABLE table_name DROP DETACHED PARTITION|PART partition_expr
|
||||
## ATTACH PARTITION\|PART {#alter_attach-partition}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name ATTACH PARTITION|PART partition_expr
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] ATTACH PARTITION|PART partition_expr
|
||||
```
|
||||
|
||||
Добавляет данные в таблицу из директории `detached`. Можно добавить данные как для целой партиции, так и для отдельного куска. Примеры:
|
||||
@ -97,7 +97,7 @@ ALTER TABLE visits ATTACH PART 201901_2_2_0;
|
||||
## ATTACH PARTITION FROM {#alter_attach-partition-from}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table2 ATTACH PARTITION partition_expr FROM table1
|
||||
ALTER TABLE table2 [ON CLUSTER cluster] ATTACH PARTITION partition_expr FROM table1
|
||||
```
|
||||
|
||||
Копирует партицию из таблицы `table1` в таблицу `table2`.
|
||||
@ -113,7 +113,7 @@ ALTER TABLE table2 ATTACH PARTITION partition_expr FROM table1
|
||||
## REPLACE PARTITION {#alter_replace-partition}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table2 REPLACE PARTITION partition_expr FROM table1
|
||||
ALTER TABLE table2 [ON CLUSTER cluster] REPLACE PARTITION partition_expr FROM table1
|
||||
```
|
||||
|
||||
Копирует партицию из таблицы `table1` в таблицу `table2` с заменой существующих данных в `table2`. Данные из `table1` не удаляются.
|
||||
@ -128,7 +128,7 @@ ALTER TABLE table2 REPLACE PARTITION partition_expr FROM table1
|
||||
## MOVE PARTITION TO TABLE {#alter_move_to_table-partition}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_source MOVE PARTITION partition_expr TO TABLE table_dest
|
||||
ALTER TABLE table_source [ON CLUSTER cluster] MOVE PARTITION partition_expr TO TABLE table_dest
|
||||
```
|
||||
|
||||
Перемещает партицию из таблицы `table_source` в таблицу `table_dest` (добавляет к существующим данным в `table_dest`) с удалением данных из таблицы `table_source`.
|
||||
@ -143,7 +143,7 @@ ALTER TABLE table_source MOVE PARTITION partition_expr TO TABLE table_dest
|
||||
## CLEAR COLUMN IN PARTITION {#alter_clear-column-partition}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name CLEAR COLUMN column_name IN PARTITION partition_expr
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] CLEAR COLUMN column_name IN PARTITION partition_expr
|
||||
```
|
||||
|
||||
Сбрасывает все значения в столбце для заданной партиции. Если для столбца определено значение по умолчанию (в секции `DEFAULT`), то будет выставлено это значение.
|
||||
@ -157,7 +157,7 @@ ALTER TABLE visits CLEAR COLUMN hour in PARTITION 201902
|
||||
## CLEAR INDEX IN PARTITION {#alter_clear-index-partition}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name CLEAR INDEX index_name IN PARTITION partition_expr
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] CLEAR INDEX index_name IN PARTITION partition_expr
|
||||
```
|
||||
|
||||
Работает как `CLEAR COLUMN`, но сбрасывает индексы вместо данных в столбцах.
|
||||
@ -165,7 +165,7 @@ ALTER TABLE table_name CLEAR INDEX index_name IN PARTITION partition_expr
|
||||
## FREEZE PARTITION {#alter_freeze-partition}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name FREEZE [PARTITION partition_expr] [WITH NAME 'backup_name']
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] FREEZE [PARTITION partition_expr] [WITH NAME 'backup_name']
|
||||
```
|
||||
|
||||
Создаёт резервную копию для заданной партиции. Если выражение `PARTITION` опущено, резервные копии будут созданы для всех партиций.
|
||||
@ -205,7 +205,7 @@ ALTER TABLE table_name FREEZE [PARTITION partition_expr] [WITH NAME 'backup_name
|
||||
## UNFREEZE PARTITION {#alter_unfreeze-partition}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE 'table_name' UNFREEZE [PARTITION 'part_expr'] WITH NAME 'backup_name'
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] UNFREEZE [PARTITION 'part_expr'] WITH NAME 'backup_name'
|
||||
```
|
||||
|
||||
Удаляет с диска "замороженные" партиции с указанным именем. Если секция `PARTITION` опущена, запрос удаляет резервную копию всех партиций сразу.
|
||||
@ -213,7 +213,7 @@ ALTER TABLE 'table_name' UNFREEZE [PARTITION 'part_expr'] WITH NAME 'backup_name
|
||||
## FETCH PARTITION\|PART {#alter_fetch-partition}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name FETCH PARTITION|PART partition_expr FROM 'path-in-zookeeper'
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] FETCH PARTITION|PART partition_expr FROM 'path-in-zookeeper'
|
||||
```
|
||||
|
||||
Загружает партицию с другого сервера. Этот запрос работает только для реплицированных таблиц.
|
||||
@ -250,7 +250,7 @@ ALTER TABLE users ATTACH PART 201901_2_2_0;
|
||||
Перемещает партицию или кусок данных на другой том или диск для таблиц с движком `MergeTree`. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](../../statements/alter/index.md#table_engine-mergetree-multiple-volumes).
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name'
|
||||
```
|
||||
|
||||
Запрос `ALTER TABLE t MOVE`:
|
||||
@ -273,7 +273,7 @@ ALTER TABLE hits MOVE PARTITION '2019-09-01' TO DISK 'fast_ssd'
|
||||
Синтаксис:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] [IN PARTITION partition_id] WHERE filter_expr
|
||||
ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] [IN PARTITION partition_id] WHERE filter_expr
|
||||
```
|
||||
|
||||
### Пример
|
||||
@ -293,7 +293,7 @@ ALTER TABLE mt UPDATE x = x + 1 IN PARTITION 2 WHERE p = 2;
|
||||
Синтаксис:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE [db.]table DELETE [IN PARTITION partition_id] WHERE filter_expr
|
||||
ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE [IN PARTITION partition_id] WHERE filter_expr
|
||||
```
|
||||
|
||||
### Пример
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: UPDATE
|
||||
# ALTER TABLE … UPDATE {#alter-table-update-statements}
|
||||
|
||||
``` sql
|
||||
ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr
|
||||
ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] WHERE filter_expr
|
||||
```
|
||||
|
||||
Манипулирует данными, соответствующими заданному выражению фильтрации. Реализовано как [мутация](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
|
@ -14,7 +14,7 @@ sidebar_label: EXCHANGE
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
EXCHANGE TABLES|DICTIONARIES [db0.]name_A AND [db1.]name_B
|
||||
EXCHANGE TABLES|DICTIONARIES [db0.]name_A AND [db1.]name_B [ON CLUSTER cluster]
|
||||
```
|
||||
|
||||
## EXCHANGE TABLES {#exchange_tables}
|
||||
@ -24,7 +24,7 @@ EXCHANGE TABLES|DICTIONARIES [db0.]name_A AND [db1.]name_B
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
EXCHANGE TABLES [db0.]table_A AND [db1.]table_B
|
||||
EXCHANGE TABLES [db0.]table_A AND [db1.]table_B [ON CLUSTER cluster]
|
||||
```
|
||||
|
||||
## EXCHANGE DICTIONARIES {#exchange_dictionaries}
|
||||
@ -34,7 +34,7 @@ EXCHANGE TABLES [db0.]table_A AND [db1.]table_B
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
EXCHANGE DICTIONARIES [db0.]dict_A AND [db1.]dict_B
|
||||
EXCHANGE DICTIONARIES [db0.]dict_A AND [db1.]dict_B [ON CLUSTER cluster]
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
@ -24,7 +24,7 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list
|
||||
[LIMIT [n, ]m] [WITH TIES]
|
||||
[SETTINGS ...]
|
||||
[UNION ALL ...]
|
||||
[INTO OUTFILE filename [COMPRESSION type] ]
|
||||
[INTO OUTFILE filename [COMPRESSION type [LEVEL level]] ]
|
||||
[FORMAT format]
|
||||
```
|
||||
|
||||
|
@ -6,16 +6,18 @@ sidebar_label: INTO OUTFILE
|
||||
|
||||
Секция `INTO OUTFILE` перенаправляет результат запроса `SELECT` в файл на стороне **клиента**.
|
||||
|
||||
Поддерживаются сжатые файлы. Формат сжатия определяется по расширению файла (по умолчанию используется режим `'auto'`), либо он может быть задан явно в секции `COMPRESSION`.
|
||||
Поддерживаются сжатые файлы. Формат сжатия определяется по расширению файла (по умолчанию используется режим `'auto'`), либо он может быть задан явно в секции `COMPRESSION`. Уровень сжатия для конкретного алгоритма может быть задан в секции `LEVEL`.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type]
|
||||
SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type [LEVEL level]]
|
||||
```
|
||||
|
||||
`file_name` и `type` задаются в виде строковых литералов. Поддерживаются форматы сжатия: `'none`', `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
|
||||
|
||||
`level` задается в виде числового литерала. Поддерживаются положительные значения в следующих диапазонах: `1-12` для формата `lz4`, `1-22` для формата `zstd` и `1-9` для остальных форматов.
|
||||
|
||||
## Детали реализации {#implementation-details}
|
||||
|
||||
- Эта функция доступна только в следующих интерфейсах: [клиент командной строки](../../../interfaces/cli.md) и [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Таким образом, запрос, отправленный через [HTTP интерфейс](../../../interfaces/http.md) вернет ошибку.
|
||||
|
@ -5,12 +5,12 @@ sidebar_position: 101
|
||||
---
|
||||
# 我能把 ClickHouse 当做Key-value 键值存储来使用吗? {#can-i-use-clickhouse-as-a-key-value-storage}.
|
||||
|
||||
简短的回答是 **不能** 。关键值的工作量是在列表中的最高位置时,**不能**{.text-danger}使用ClickHouse的情况。它是一个[OLAP](../../faq/general/olap.md)系统,毕竟有很多优秀的键值存储系统在那里。
|
||||
简短的回答是 **不能** 。键值类型负载是ClickHouse最<span class="text-danger">**不适合**</span>的多种场景之一。ClickHouse 毕竟只是一个[OLAP](../../faq/general/olap.md)系统,对于这类负载来说,目前还是有很多优秀的键值存储系统可供选择。
|
||||
|
||||
然而,可能在某些情况下,使用ClickHouse进行类似键值的查询仍然是有意义的。通常,是一些低预算的产品,主要的工作负载是分析性的,很适合ClickHouse,但也有一些次要的过程需要一个键值模式,请求吞吐量不是很高,没有严格的延迟要求。如果你有无限的预算,你会为这样的次要工作负载安装一个次要的键值数据库,但实际上,多维护一个存储系统(监控、备份等)会有额外的成本,这可能是值得避免的。
|
||||
然而,在某些情况下,使用ClickHouse进行类似键值的查询仍然是有意义的。通常,一些主要的工作负载是分析性的比较适合使用Clickhouse低预算的产品中,也有一些次要的操作是需要使用键值模式的,同时这些操作的请求吞吐量不会很高,也没有严格的延迟要求。如果你有无限的预算,你会为这样的次要工作负载安装一个次要的键值数据库,但实际上,多维护一个存储系统(监控、备份等)会有额外的成本,这是可以考虑避免的。
|
||||
|
||||
如果你决定违背建议,对ClickHouse运行一些类似键值的查询,这里有一些提示。
|
||||
如果你决定不遵从这些建议,想要使用ClickHouse运行一些类似键值的查询,那么这里有一些提示。
|
||||
|
||||
- ClickHouse中点查询昂贵的关键原因是其稀疏的主索引[MergeTree表引擎家族](../../engines/table-engines/mergetree-family/mergetree.md)。这个索引不能指向每一行具体的数据,相反,它指向每N行,系统必须从邻近的N行扫描到所需的行,沿途读取过多的数据。在一个键值场景中,通过`index_granularity`的设置来减少N的值可能是有用的。
|
||||
- ClickHouse将每一列保存在一组单独的文件中,所以要组装一个完整的行,它需要通过这些文件中的每一个。它们的数量随着列数的增加而线性增加,所以在键值场景中,可能值得避免使用许多列,并将所有的有效数据放在一个单一的`String`列中,并以某种序列化格式(如JSON、Protobuf或任何有效的格式)进行编码。
|
||||
- ClickHouse中点查询开销大的关键原因是MergeTree表引擎家族[MergeTree表引擎家族](../../engines/table-engines/mergetree-family/mergetree.md)采用的稀疏主索引。这个索引不能指向每一行具体的数据,相反,它指向每N行,系统必须从邻近的N行扫描到所需的行,沿途读取过多的数据。在一个键值场景中,通过`index_granularity`的设置来减少N的值可能是有用的。
|
||||
- ClickHouse将每一列保存在一组单独的文件中,所以要组装一个完整的行,它需要访问文件组中的每一个文件。访问数据数量会随着列数的增加而线性增加,所以在键值场景中,需要避免使用许多列,并将所有的有效数据放在一个单一的`String`列中,并以某种序列化格式(如JSON、Protobuf或任何有效的格式)进行编码。
|
||||
- 还有一种方法,使用[Join](../../engines/table-engines/special/join.md)表引擎代替正常的`MergeTree`表和[joinGet](../../sql-reference/functions/other-functions.md#joinget) 函数来检索数据。它可以提供更好的查询性能,但可能有一些可用性和可靠性问题。下面是一个[使用实例](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00800_versatile_storage_join.sql#L49-L51)。
|
||||
|
@ -321,7 +321,7 @@ AccessRestorerFromBackup::AccessRestorerFromBackup(
|
||||
|
||||
AccessRestorerFromBackup::~AccessRestorerFromBackup() = default;
|
||||
|
||||
void AccessRestorerFromBackup::addDataPath(const String & data_path, const QualifiedTableName & table_name_for_logs)
|
||||
void AccessRestorerFromBackup::addDataPath(const String & data_path)
|
||||
{
|
||||
if (!data_paths.emplace(data_path).second)
|
||||
return;
|
||||
@ -334,8 +334,8 @@ void AccessRestorerFromBackup::addDataPath(const String & data_path, const Quali
|
||||
for (const String & filename : filenames)
|
||||
{
|
||||
if (!filename.starts_with("access") || !filename.ends_with(".txt"))
|
||||
throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File name {} doesn't match the wildcard \"access*.txt\"",
|
||||
table_name_for_logs.getFullName(), String{data_path_in_backup_fs / filename});
|
||||
throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "File name {} doesn't match the wildcard \"access*.txt\"",
|
||||
String{data_path_in_backup_fs / filename});
|
||||
}
|
||||
|
||||
::sort(filenames.begin(), filenames.end());
|
||||
|
@ -17,7 +17,6 @@ using BackupPtr = std::shared_ptr<const IBackup>;
|
||||
class IBackupEntry;
|
||||
using BackupEntryPtr = std::shared_ptr<const IBackupEntry>;
|
||||
struct RestoreSettings;
|
||||
struct QualifiedTableName;
|
||||
|
||||
|
||||
/// Makes a backup of access entities of a specified type.
|
||||
@ -36,7 +35,7 @@ public:
|
||||
~AccessRestorerFromBackup();
|
||||
|
||||
/// Adds a data path to loads access entities from.
|
||||
void addDataPath(const String & data_path, const QualifiedTableName & table_name_for_logs);
|
||||
void addDataPath(const String & data_path);
|
||||
|
||||
/// Checks that the current user can do restoring.
|
||||
AccessRightsElements getRequiredAccess() const;
|
||||
|
@ -531,6 +531,9 @@ void IAccessStorage::backup(BackupEntriesCollector & backup_entries_collector, c
|
||||
auto entities = readAllWithIDs(type);
|
||||
boost::range::remove_erase_if(entities, [](const std::pair<UUID, AccessEntityPtr> & x) { return !x.second->isBackupAllowed(); });
|
||||
|
||||
if (entities.empty())
|
||||
return;
|
||||
|
||||
auto backup_entry = makeBackupEntryForAccess(
|
||||
entities,
|
||||
data_path_in_backup,
|
||||
|
@ -627,6 +627,9 @@ void ReplicatedAccessStorage::backup(BackupEntriesCollector & backup_entries_col
|
||||
auto entities = readAllWithIDs(type);
|
||||
boost::range::remove_erase_if(entities, [](const std::pair<UUID, AccessEntityPtr> & x) { return !x.second->isBackupAllowed(); });
|
||||
|
||||
if (entities.empty())
|
||||
return;
|
||||
|
||||
auto backup_entry_with_path = makeBackupEntryForAccess(
|
||||
entities,
|
||||
data_path_in_backup,
|
||||
@ -634,21 +637,18 @@ void ReplicatedAccessStorage::backup(BackupEntriesCollector & backup_entries_col
|
||||
backup_entries_collector.getContext()->getAccessControl());
|
||||
|
||||
auto backup_coordination = backup_entries_collector.getBackupCoordination();
|
||||
backup_coordination->addReplicatedAccessPath(zookeeper_path, backup_entry_with_path.first);
|
||||
String current_host_id = backup_entries_collector.getBackupSettings().host_id;
|
||||
backup_coordination->setReplicatedAccessHost(zookeeper_path, current_host_id);
|
||||
backup_coordination->addReplicatedAccessFilePath(zookeeper_path, type, current_host_id, backup_entry_with_path.first);
|
||||
|
||||
backup_entries_collector.addPostTask(
|
||||
[backup_entry = backup_entry_with_path.second,
|
||||
zookeeper_path = zookeeper_path,
|
||||
type,
|
||||
current_host_id,
|
||||
&backup_entries_collector,
|
||||
backup_coordination]
|
||||
{
|
||||
if (current_host_id != backup_coordination->getReplicatedAccessHost(zookeeper_path))
|
||||
return;
|
||||
|
||||
for (const String & path : backup_coordination->getReplicatedAccessPaths(zookeeper_path))
|
||||
for (const String & path : backup_coordination->getReplicatedAccessFilePaths(zookeeper_path, type, current_host_id))
|
||||
backup_entries_collector.addBackupEntry(path, backup_entry);
|
||||
});
|
||||
}
|
||||
|
@ -1,392 +0,0 @@
|
||||
#include <Backups/BackupCoordinationHelpers.h>
|
||||
#include <Storages/MergeTree/MergeTreePartInfo.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <base/chrono_io.h>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_BACKUP_TABLE;
|
||||
extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
struct LessReplicaName
|
||||
{
|
||||
bool operator()(const std::shared_ptr<const String> & left, const std::shared_ptr<const String> & right) { return *left < *right; }
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
class BackupCoordinationReplicatedPartNames::CoveredPartsFinder
|
||||
{
|
||||
public:
|
||||
explicit CoveredPartsFinder(const String & table_name_for_logs_) : table_name_for_logs(table_name_for_logs_) {}
|
||||
|
||||
void addPartName(const String & new_part_name, const std::shared_ptr<const String> & replica_name)
|
||||
{
|
||||
addPartName(MergeTreePartInfo::fromPartName(new_part_name, MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING), replica_name);
|
||||
}
|
||||
|
||||
void addPartName(MergeTreePartInfo && new_part_info, const std::shared_ptr<const String> & replica_name)
|
||||
{
|
||||
auto new_min_block = new_part_info.min_block;
|
||||
auto new_max_block = new_part_info.max_block;
|
||||
auto & parts = partitions[new_part_info.partition_id];
|
||||
|
||||
/// Find the first part with max_block >= `part_info.min_block`.
|
||||
auto first_it = parts.lower_bound(new_min_block);
|
||||
if (first_it == parts.end())
|
||||
{
|
||||
/// All max_blocks < part_info.min_block, so we can safely add the `part_info` to the list of parts.
|
||||
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), replica_name});
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
/// part_info.min_block <= current_info.max_block
|
||||
const auto & part = first_it->second;
|
||||
if (new_max_block < part.info.min_block)
|
||||
{
|
||||
/// (prev_info.max_block < part_info.min_block) AND (part_info.max_block < current_info.min_block),
|
||||
/// so we can safely add the `part_info` to the list of parts.
|
||||
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), replica_name});
|
||||
return;
|
||||
}
|
||||
|
||||
/// (part_info.min_block <= current_info.max_block) AND (part_info.max_block >= current_info.min_block), parts intersect.
|
||||
|
||||
if (part.info.contains(new_part_info))
|
||||
{
|
||||
/// `part_info` is already contained in another part.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// Probably `part_info` is going to replace multiple parts, find the range of parts to replace.
|
||||
auto last_it = first_it;
|
||||
while (last_it != parts.end())
|
||||
{
|
||||
const auto & part = last_it->second;
|
||||
if (part.info.min_block > new_max_block)
|
||||
break;
|
||||
if (!new_part_info.contains(part.info))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_BACKUP_TABLE,
|
||||
"Intersected parts detected in the table {}: {} on replica {} and {} on replica {}. It should be investigated",
|
||||
table_name_for_logs,
|
||||
part.info.getPartName(),
|
||||
*part.replica_name,
|
||||
new_part_info.getPartName(),
|
||||
*replica_name);
|
||||
}
|
||||
++last_it;
|
||||
}
|
||||
|
||||
/// `part_info` will replace multiple parts [first_it..last_it)
|
||||
parts.erase(first_it, last_it);
|
||||
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), replica_name});
|
||||
}
|
||||
|
||||
bool isCoveredByAnotherPart(const String & part_name) const
|
||||
{
|
||||
return isCoveredByAnotherPart(MergeTreePartInfo::fromPartName(part_name, MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING));
|
||||
}
|
||||
|
||||
bool isCoveredByAnotherPart(const MergeTreePartInfo & part_info) const
|
||||
{
|
||||
auto partition_it = partitions.find(part_info.partition_id);
|
||||
if (partition_it == partitions.end())
|
||||
return false;
|
||||
|
||||
const auto & parts = partition_it->second;
|
||||
|
||||
/// Find the first part with max_block >= `part_info.min_block`.
|
||||
auto it_part = parts.lower_bound(part_info.min_block);
|
||||
if (it_part == parts.end())
|
||||
{
|
||||
/// All max_blocks < part_info.min_block, so there is no parts covering `part_info`.
|
||||
return false;
|
||||
}
|
||||
|
||||
/// part_info.min_block <= current_info.max_block
|
||||
const auto & existing_part = it_part->second;
|
||||
if (part_info.max_block < existing_part.info.min_block)
|
||||
{
|
||||
/// (prev_info.max_block < part_info.min_block) AND (part_info.max_block < current_info.min_block),
|
||||
/// so there is no parts covering `part_info`.
|
||||
return false;
|
||||
}
|
||||
|
||||
/// (part_info.min_block <= current_info.max_block) AND (part_info.max_block >= current_info.min_block), parts intersect.
|
||||
|
||||
if (existing_part.info == part_info)
|
||||
{
|
||||
/// It's the same part, it's kind of covers itself, but we check in this function whether a part is covered by another part.
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Check if `part_info` is covered by `current_info`.
|
||||
return existing_part.info.contains(part_info);
|
||||
}
|
||||
|
||||
private:
|
||||
struct PartInfo
|
||||
{
|
||||
MergeTreePartInfo info;
|
||||
std::shared_ptr<const String> replica_name;
|
||||
};
|
||||
|
||||
using Parts = std::map<Int64 /* max_block */, PartInfo>;
|
||||
std::unordered_map<String, Parts> partitions;
|
||||
const String table_name_for_logs;
|
||||
};
|
||||
|
||||
|
||||
BackupCoordinationReplicatedPartNames::BackupCoordinationReplicatedPartNames() = default;
|
||||
BackupCoordinationReplicatedPartNames::~BackupCoordinationReplicatedPartNames() = default;
|
||||
|
||||
void BackupCoordinationReplicatedPartNames::addPartNames(
|
||||
const String & table_shared_id,
|
||||
const String & table_name_for_logs,
|
||||
const String & replica_name,
|
||||
const std::vector<PartNameAndChecksum> & part_names_and_checksums)
|
||||
{
|
||||
if (part_names_prepared)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addPartNames() must not be called after getPartNames()");
|
||||
|
||||
auto & table_info = table_infos[table_shared_id];
|
||||
if (!table_info.covered_parts_finder)
|
||||
table_info.covered_parts_finder = std::make_unique<CoveredPartsFinder>(table_name_for_logs);
|
||||
|
||||
auto replica_name_ptr = std::make_shared<String>(replica_name);
|
||||
|
||||
for (const auto & part_name_and_checksum : part_names_and_checksums)
|
||||
{
|
||||
const auto & part_name = part_name_and_checksum.part_name;
|
||||
const auto & checksum = part_name_and_checksum.checksum;
|
||||
auto it = table_info.parts_replicas.find(part_name);
|
||||
if (it == table_info.parts_replicas.end())
|
||||
{
|
||||
it = table_info.parts_replicas.emplace(part_name, PartReplicas{}).first;
|
||||
it->second.checksum = checksum;
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & other = it->second;
|
||||
if (other.checksum != checksum)
|
||||
{
|
||||
const String & other_replica_name = **other.replica_names.begin();
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_BACKUP_TABLE,
|
||||
"Table {} on replica {} has part {} which is different from the part on replica {}. Must be the same",
|
||||
table_name_for_logs,
|
||||
replica_name,
|
||||
part_name,
|
||||
other_replica_name);
|
||||
}
|
||||
}
|
||||
|
||||
auto & replica_names = it->second.replica_names;
|
||||
|
||||
/// `replica_names` should be ordered because we need this vector to be in the same order on every replica.
|
||||
replica_names.insert(
|
||||
std::upper_bound(replica_names.begin(), replica_names.end(), replica_name_ptr, LessReplicaName{}), replica_name_ptr);
|
||||
|
||||
table_info.covered_parts_finder->addPartName(part_name, replica_name_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
Strings BackupCoordinationReplicatedPartNames::getPartNames(const String & table_shared_id, const String & replica_name) const
|
||||
{
|
||||
preparePartNames();
|
||||
auto it = table_infos.find(table_shared_id);
|
||||
if (it == table_infos.end())
|
||||
return {};
|
||||
const auto & replicas_parts = it->second.replicas_parts;
|
||||
auto it2 = replicas_parts.find(replica_name);
|
||||
if (it2 == replicas_parts.end())
|
||||
return {};
|
||||
return it2->second;
|
||||
}
|
||||
|
||||
void BackupCoordinationReplicatedPartNames::preparePartNames() const
|
||||
{
|
||||
if (part_names_prepared)
|
||||
return;
|
||||
|
||||
size_t counter = 0;
|
||||
for (const auto & table_info : table_infos | boost::adaptors::map_values)
|
||||
{
|
||||
for (const auto & [part_name, part_replicas] : table_info.parts_replicas)
|
||||
{
|
||||
if (table_info.covered_parts_finder->isCoveredByAnotherPart(part_name))
|
||||
continue;
|
||||
size_t chosen_index = (counter++) % part_replicas.replica_names.size();
|
||||
const auto & chosen_replica_name = *part_replicas.replica_names[chosen_index];
|
||||
table_info.replicas_parts[chosen_replica_name].push_back(part_name);
|
||||
}
|
||||
}
|
||||
|
||||
part_names_prepared = true;
|
||||
}
|
||||
|
||||
|
||||
/// Helps to wait until all hosts come to a specified stage.
|
||||
BackupCoordinationStatusSync::BackupCoordinationStatusSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_)
|
||||
: zookeeper_path(zookeeper_path_)
|
||||
, get_zookeeper(get_zookeeper_)
|
||||
, log(log_)
|
||||
{
|
||||
createRootNodes();
|
||||
}
|
||||
|
||||
void BackupCoordinationStatusSync::createRootNodes()
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
zookeeper->createAncestors(zookeeper_path);
|
||||
zookeeper->createIfNotExists(zookeeper_path, "");
|
||||
}
|
||||
|
||||
void BackupCoordinationStatusSync::set(const String & current_host, const String & new_status, const String & message)
|
||||
{
|
||||
setImpl(current_host, new_status, message, {}, {});
|
||||
}
|
||||
|
||||
Strings BackupCoordinationStatusSync::setAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts)
|
||||
{
|
||||
return setImpl(current_host, new_status, message, all_hosts, {});
|
||||
}
|
||||
|
||||
Strings BackupCoordinationStatusSync::setAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms)
|
||||
{
|
||||
return setImpl(current_host, new_status, message, all_hosts, timeout_ms);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationStatusSync::setImpl(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, const std::optional<UInt64> & timeout_ms)
|
||||
{
|
||||
/// Put new status to ZooKeeper.
|
||||
auto zookeeper = get_zookeeper();
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/" + current_host + "|" + new_status, message);
|
||||
|
||||
if (all_hosts.empty() || (new_status == kErrorStatus))
|
||||
return {};
|
||||
|
||||
if ((all_hosts.size() == 1) && (all_hosts.front() == current_host))
|
||||
return {message};
|
||||
|
||||
/// Wait for other hosts.
|
||||
|
||||
Strings ready_hosts_results;
|
||||
ready_hosts_results.resize(all_hosts.size());
|
||||
|
||||
std::map<String, std::vector<size_t> /* index in `ready_hosts_results` */> unready_hosts;
|
||||
for (size_t i = 0; i != all_hosts.size(); ++i)
|
||||
unready_hosts[all_hosts[i]].push_back(i);
|
||||
|
||||
std::optional<String> host_with_error;
|
||||
std::optional<String> error_message;
|
||||
|
||||
/// Process ZooKeeper's nodes and set `all_hosts_ready` or `unready_host` or `error_message`.
|
||||
auto process_zk_nodes = [&](const Strings & zk_nodes)
|
||||
{
|
||||
for (const String & zk_node : zk_nodes)
|
||||
{
|
||||
if (zk_node.starts_with("remove_watch-"))
|
||||
continue;
|
||||
|
||||
size_t separator_pos = zk_node.find('|');
|
||||
if (separator_pos == String::npos)
|
||||
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected zk node {}", zookeeper_path + "/" + zk_node);
|
||||
String host = zk_node.substr(0, separator_pos);
|
||||
String status = zk_node.substr(separator_pos + 1);
|
||||
if (status == kErrorStatus)
|
||||
{
|
||||
host_with_error = host;
|
||||
error_message = zookeeper->get(zookeeper_path + "/" + zk_node);
|
||||
return;
|
||||
}
|
||||
auto it = unready_hosts.find(host);
|
||||
if ((it != unready_hosts.end()) && (status == new_status))
|
||||
{
|
||||
String result = zookeeper->get(zookeeper_path + "/" + zk_node);
|
||||
for (size_t i : it->second)
|
||||
ready_hosts_results[i] = result;
|
||||
unready_hosts.erase(it);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/// Wait until all hosts are ready or an error happens or time is out.
|
||||
std::atomic<bool> watch_set = false;
|
||||
std::condition_variable watch_triggered_event;
|
||||
|
||||
auto watch_callback = [&](const Coordination::WatchResponse &)
|
||||
{
|
||||
watch_set = false; /// After it's triggered it's not set until we call getChildrenWatch() again.
|
||||
watch_triggered_event.notify_all();
|
||||
};
|
||||
|
||||
auto watch_triggered = [&] { return !watch_set; };
|
||||
|
||||
bool use_timeout = timeout_ms.has_value();
|
||||
std::chrono::milliseconds timeout{timeout_ms.value_or(0)};
|
||||
std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
|
||||
std::chrono::steady_clock::duration elapsed;
|
||||
std::mutex dummy_mutex;
|
||||
|
||||
while (!unready_hosts.empty() && !error_message)
|
||||
{
|
||||
watch_set = true;
|
||||
Strings nodes = zookeeper->getChildrenWatch(zookeeper_path, nullptr, watch_callback);
|
||||
process_zk_nodes(nodes);
|
||||
|
||||
if (!unready_hosts.empty() && !error_message)
|
||||
{
|
||||
LOG_TRACE(log, "Waiting for host {}", unready_hosts.begin()->first);
|
||||
std::unique_lock dummy_lock{dummy_mutex};
|
||||
if (use_timeout)
|
||||
{
|
||||
elapsed = std::chrono::steady_clock::now() - start_time;
|
||||
if ((elapsed > timeout) || !watch_triggered_event.wait_for(dummy_lock, timeout - elapsed, watch_triggered))
|
||||
break;
|
||||
}
|
||||
else
|
||||
watch_triggered_event.wait(dummy_lock, watch_triggered);
|
||||
}
|
||||
}
|
||||
|
||||
if (watch_set)
|
||||
{
|
||||
/// Remove watch by triggering it.
|
||||
zookeeper->create(zookeeper_path + "/remove_watch-", "", zkutil::CreateMode::EphemeralSequential);
|
||||
std::unique_lock dummy_lock{dummy_mutex};
|
||||
watch_triggered_event.wait(dummy_lock, watch_triggered);
|
||||
}
|
||||
|
||||
if (error_message)
|
||||
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Error occurred on host {}: {}", *host_with_error, *error_message);
|
||||
|
||||
if (!unready_hosts.empty())
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
|
||||
"Waited for host {} too long ({})",
|
||||
unready_hosts.begin()->first,
|
||||
to_string(elapsed));
|
||||
}
|
||||
|
||||
return ready_hosts_results;
|
||||
}
|
||||
|
||||
}
|
@ -1,81 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IBackupCoordination.h>
|
||||
#include <Backups/IRestoreCoordination.h>
|
||||
#include <Common/ZooKeeper/Common.h>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Helper designed to be used in an implementation of the IBackupCoordination interface in the part related to replicated tables.
|
||||
class BackupCoordinationReplicatedPartNames
|
||||
{
|
||||
public:
|
||||
BackupCoordinationReplicatedPartNames();
|
||||
~BackupCoordinationReplicatedPartNames();
|
||||
|
||||
using PartNameAndChecksum = IBackupCoordination::PartNameAndChecksum;
|
||||
|
||||
/// Adds part names which a specified replica of a replicated table is going to put to the backup.
|
||||
/// Multiple replicas of the replicated table call this function and then the added part names can be returned by call of the function
|
||||
/// getPartNames().
|
||||
/// Checksums are used only to control that parts under the same names on different replicas are the same.
|
||||
void addPartNames(
|
||||
const String & table_shared_id,
|
||||
const String & table_name_for_logs,
|
||||
const String & replica_name,
|
||||
const std::vector<PartNameAndChecksum> & part_names_and_checksums);
|
||||
|
||||
/// Returns the names of the parts which a specified replica of a replicated table should put to the backup.
|
||||
/// This is the same list as it was added by call of the function addPartNames() but without duplications and without
|
||||
/// parts covered by another parts.
|
||||
Strings getPartNames(const String & table_shared_id, const String & replica_name) const;
|
||||
|
||||
private:
|
||||
void preparePartNames() const;
|
||||
|
||||
class CoveredPartsFinder;
|
||||
|
||||
struct PartReplicas
|
||||
{
|
||||
std::vector<std::shared_ptr<const String>> replica_names;
|
||||
UInt128 checksum;
|
||||
};
|
||||
|
||||
struct TableInfo
|
||||
{
|
||||
std::map<String /* part_name */, PartReplicas> parts_replicas; /// Should be ordered because we need this map to be in the same order on every replica.
|
||||
mutable std::unordered_map<String /* replica_name> */, Strings> replicas_parts;
|
||||
std::unique_ptr<CoveredPartsFinder> covered_parts_finder;
|
||||
};
|
||||
|
||||
std::map<String /* table_shared_id */, TableInfo> table_infos; /// Should be ordered because we need this map to be in the same order on every replica.
|
||||
mutable bool part_names_prepared = false;
|
||||
};
|
||||
|
||||
|
||||
/// Helps to wait until all hosts come to a specified stage.
|
||||
class BackupCoordinationStatusSync
|
||||
{
|
||||
public:
|
||||
BackupCoordinationStatusSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_);
|
||||
|
||||
void set(const String & current_host, const String & new_status, const String & message);
|
||||
Strings setAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts);
|
||||
Strings setAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms);
|
||||
|
||||
static constexpr const char * kErrorStatus = "error";
|
||||
|
||||
private:
|
||||
void createRootNodes();
|
||||
Strings setImpl(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, const std::optional<UInt64> & timeout_ms);
|
||||
|
||||
String zookeeper_path;
|
||||
zkutil::GetZooKeeper get_zookeeper;
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
@ -30,60 +30,52 @@ Strings BackupCoordinationLocal::setStatusAndWaitFor(const String &, const Strin
|
||||
void BackupCoordinationLocal::addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<PartNameAndChecksum> & part_names_and_checksums)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
replicated_part_names.addPartNames(table_shared_id, table_name_for_logs, replica_name, part_names_and_checksums);
|
||||
replicated_tables.addPartNames(table_shared_id, table_name_for_logs, replica_name, part_names_and_checksums);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return replicated_part_names.getPartNames(table_shared_id, replica_name);
|
||||
return replicated_tables.getPartNames(table_shared_id, replica_name);
|
||||
}
|
||||
|
||||
|
||||
void BackupCoordinationLocal::addReplicatedMutations(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<MutationInfo> & mutations)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
replicated_tables.addMutations(table_shared_id, table_name_for_logs, replica_name, mutations);
|
||||
}
|
||||
|
||||
std::vector<IBackupCoordination::MutationInfo> BackupCoordinationLocal::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return replicated_tables.getMutations(table_shared_id, replica_name);
|
||||
}
|
||||
|
||||
|
||||
void BackupCoordinationLocal::addReplicatedDataPath(const String & table_shared_id, const String & data_path)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
replicated_data_paths[table_shared_id].push_back(data_path);
|
||||
replicated_tables.addDataPath(table_shared_id, data_path);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_shared_id) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
auto it = replicated_data_paths.find(table_shared_id);
|
||||
if (it == replicated_data_paths.end())
|
||||
return {};
|
||||
return it->second;
|
||||
return replicated_tables.getDataPaths(table_shared_id);
|
||||
}
|
||||
|
||||
|
||||
void BackupCoordinationLocal::addReplicatedAccessPath(const String & access_zk_path, const String & file_path)
|
||||
void BackupCoordinationLocal::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
replicated_access_paths[access_zk_path].push_back(file_path);
|
||||
replicated_access.addFilePath(access_zk_path, access_entity_type, host_id, file_path);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationLocal::getReplicatedAccessPaths(const String & access_zk_path) const
|
||||
Strings BackupCoordinationLocal::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
auto it = replicated_access_paths.find(access_zk_path);
|
||||
if (it == replicated_access_paths.end())
|
||||
return {};
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void BackupCoordinationLocal::setReplicatedAccessHost(const String & access_zk_path, const String & host_id)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
replicated_access_hosts[access_zk_path] = host_id;
|
||||
}
|
||||
|
||||
String BackupCoordinationLocal::getReplicatedAccessHost(const String & access_zk_path) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
auto it = replicated_access_hosts.find(access_zk_path);
|
||||
if (it == replicated_access_hosts.end())
|
||||
return {};
|
||||
return it->second;
|
||||
return replicated_access.getFilePaths(access_zk_path, access_entity_type, host_id);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,7 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IBackupCoordination.h>
|
||||
#include <Backups/BackupCoordinationHelpers.h>
|
||||
#include <Backups/BackupCoordinationReplicatedAccess.h>
|
||||
#include <Backups/BackupCoordinationReplicatedTables.h>
|
||||
#include <base/defines.h>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
@ -12,7 +13,7 @@ namespace Poco { class Logger; }
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Stores backup contents information in memory.
|
||||
/// Implementation of the IBackupCoordination interface performing coordination in memory.
|
||||
class BackupCoordinationLocal : public IBackupCoordination
|
||||
{
|
||||
public:
|
||||
@ -27,14 +28,15 @@ public:
|
||||
const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;
|
||||
Strings getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const override;
|
||||
|
||||
void addReplicatedMutations(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name,
|
||||
const std::vector<MutationInfo> & mutations) override;
|
||||
std::vector<MutationInfo> getReplicatedMutations(const String & table_shared_id, const String & replica_name) const override;
|
||||
|
||||
void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override;
|
||||
Strings getReplicatedDataPaths(const String & table_shared_id) const override;
|
||||
|
||||
void addReplicatedAccessPath(const String & access_zk_path, const String & file_path) override;
|
||||
Strings getReplicatedAccessPaths(const String & access_zk_path) const override;
|
||||
|
||||
void setReplicatedAccessHost(const String & access_zk_path, const String & host_id) override;
|
||||
String getReplicatedAccessHost(const String & access_zk_path) const override;
|
||||
void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path) override;
|
||||
Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const override;
|
||||
|
||||
void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override;
|
||||
void updateFileInfo(const FileInfo & file_info) override;
|
||||
@ -52,15 +54,12 @@ public:
|
||||
|
||||
private:
|
||||
mutable std::mutex mutex;
|
||||
BackupCoordinationReplicatedPartNames replicated_part_names TSA_GUARDED_BY(mutex);
|
||||
std::unordered_map<String, Strings> replicated_data_paths TSA_GUARDED_BY(mutex);
|
||||
std::unordered_map<String, Strings> replicated_access_paths TSA_GUARDED_BY(mutex);
|
||||
std::unordered_map<String, String> replicated_access_hosts TSA_GUARDED_BY(mutex);
|
||||
BackupCoordinationReplicatedTables replicated_tables TSA_GUARDED_BY(mutex);
|
||||
BackupCoordinationReplicatedAccess replicated_access TSA_GUARDED_BY(mutex);
|
||||
std::map<String /* file_name */, SizeAndChecksum> file_names TSA_GUARDED_BY(mutex); /// Should be ordered alphabetically, see listFiles(). For empty files we assume checksum = 0.
|
||||
std::map<SizeAndChecksum, FileInfo> file_infos TSA_GUARDED_BY(mutex); /// Information about files. Without empty files.
|
||||
Strings archive_suffixes TSA_GUARDED_BY(mutex);
|
||||
size_t current_archive_suffix TSA_GUARDED_BY(mutex) = 0;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Backups/BackupCoordinationDistributed.h>
|
||||
#include <Backups/BackupCoordinationRemote.h>
|
||||
#include <Access/Common/AccessEntityType.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
@ -27,6 +28,7 @@ namespace
|
||||
using SizeAndChecksum = IBackupCoordination::SizeAndChecksum;
|
||||
using FileInfo = IBackupCoordination::FileInfo;
|
||||
using PartNameAndChecksum = IBackupCoordination::PartNameAndChecksum;
|
||||
using MutationInfo = IBackupCoordination::MutationInfo;
|
||||
|
||||
struct ReplicatedPartNames
|
||||
{
|
||||
@ -63,6 +65,41 @@ namespace
|
||||
}
|
||||
};
|
||||
|
||||
struct ReplicatedMutations
|
||||
{
|
||||
std::vector<MutationInfo> mutations;
|
||||
String table_name_for_logs;
|
||||
|
||||
static String serialize(const std::vector<MutationInfo> & mutations_, const String & table_name_for_logs_)
|
||||
{
|
||||
WriteBufferFromOwnString out;
|
||||
writeBinary(mutations_.size(), out);
|
||||
for (const auto & mutation : mutations_)
|
||||
{
|
||||
writeBinary(mutation.id, out);
|
||||
writeBinary(mutation.entry, out);
|
||||
}
|
||||
writeBinary(table_name_for_logs_, out);
|
||||
return out.str();
|
||||
}
|
||||
|
||||
static ReplicatedMutations deserialize(const String & str)
|
||||
{
|
||||
ReadBufferFromString in{str};
|
||||
ReplicatedMutations res;
|
||||
size_t num;
|
||||
readBinary(num, in);
|
||||
res.mutations.resize(num);
|
||||
for (size_t i = 0; i != num; ++i)
|
||||
{
|
||||
readBinary(res.mutations[i].id, in);
|
||||
readBinary(res.mutations[i].entry, in);
|
||||
}
|
||||
readBinary(res.table_name_for_logs, in);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
String serializeFileInfo(const FileInfo & info)
|
||||
{
|
||||
WriteBufferFromOwnString out;
|
||||
@ -128,7 +165,7 @@ namespace
|
||||
constexpr size_t NUM_ATTEMPTS = 10;
|
||||
}
|
||||
|
||||
BackupCoordinationDistributed::BackupCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
|
||||
BackupCoordinationRemote::BackupCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
|
||||
: zookeeper_path(zookeeper_path_)
|
||||
, get_zookeeper(get_zookeeper_)
|
||||
, status_sync(zookeeper_path_ + "/status", get_zookeeper_, &Poco::Logger::get("BackupCoordination"))
|
||||
@ -136,46 +173,46 @@ BackupCoordinationDistributed::BackupCoordinationDistributed(const String & zook
|
||||
createRootNodes();
|
||||
}
|
||||
|
||||
BackupCoordinationDistributed::~BackupCoordinationDistributed() = default;
|
||||
BackupCoordinationRemote::~BackupCoordinationRemote() = default;
|
||||
|
||||
void BackupCoordinationDistributed::createRootNodes()
|
||||
void BackupCoordinationRemote::createRootNodes()
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
zookeeper->createAncestors(zookeeper_path);
|
||||
zookeeper->createIfNotExists(zookeeper_path, "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/repl_part_names", "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/repl_mutations", "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/repl_data_paths", "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/repl_access_host", "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/repl_access_paths", "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/repl_access", "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/file_names", "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/file_infos", "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/archive_suffixes", "");
|
||||
}
|
||||
|
||||
void BackupCoordinationDistributed::removeAllNodes()
|
||||
void BackupCoordinationRemote::removeAllNodes()
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
zookeeper->removeRecursive(zookeeper_path);
|
||||
}
|
||||
|
||||
|
||||
void BackupCoordinationDistributed::setStatus(const String & current_host, const String & new_status, const String & message)
|
||||
void BackupCoordinationRemote::setStatus(const String & current_host, const String & new_status, const String & message)
|
||||
{
|
||||
status_sync.set(current_host, new_status, message);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationDistributed::setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts)
|
||||
Strings BackupCoordinationRemote::setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts)
|
||||
{
|
||||
return status_sync.setAndWait(current_host, new_status, message, all_hosts);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationDistributed::setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms)
|
||||
Strings BackupCoordinationRemote::setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms)
|
||||
{
|
||||
return status_sync.setAndWaitFor(current_host, new_status, message, all_hosts, timeout_ms);
|
||||
}
|
||||
|
||||
|
||||
void BackupCoordinationDistributed::addReplicatedPartNames(
|
||||
void BackupCoordinationRemote::addReplicatedPartNames(
|
||||
const String & table_shared_id,
|
||||
const String & table_name_for_logs,
|
||||
const String & replica_name,
|
||||
@ -183,8 +220,8 @@ void BackupCoordinationDistributed::addReplicatedPartNames(
|
||||
{
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (replicated_part_names)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addPartNames() must not be called after getPartNames()");
|
||||
if (replicated_tables)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedPartNames() must not be called after preparing");
|
||||
}
|
||||
|
||||
auto zookeeper = get_zookeeper();
|
||||
@ -194,17 +231,49 @@ void BackupCoordinationDistributed::addReplicatedPartNames(
|
||||
zookeeper->create(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs), zkutil::CreateMode::Persistent);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationDistributed::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const
|
||||
Strings BackupCoordinationRemote::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
prepareReplicatedPartNames();
|
||||
return replicated_part_names->getPartNames(table_shared_id, replica_name);
|
||||
prepareReplicatedTables();
|
||||
return replicated_tables->getPartNames(table_shared_id, replica_name);
|
||||
}
|
||||
|
||||
void BackupCoordinationRemote::addReplicatedMutations(
|
||||
const String & table_shared_id,
|
||||
const String & table_name_for_logs,
|
||||
const String & replica_name,
|
||||
const std::vector<MutationInfo> & mutations)
|
||||
{
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (replicated_tables)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedMutations() must not be called after preparing");
|
||||
}
|
||||
|
||||
auto zookeeper = get_zookeeper();
|
||||
String path = zookeeper_path + "/repl_mutations/" + escapeForFileName(table_shared_id);
|
||||
zookeeper->createIfNotExists(path, "");
|
||||
path += "/" + escapeForFileName(replica_name);
|
||||
zookeeper->create(path, ReplicatedMutations::serialize(mutations, table_name_for_logs), zkutil::CreateMode::Persistent);
|
||||
}
|
||||
|
||||
std::vector<IBackupCoordination::MutationInfo> BackupCoordinationRemote::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
prepareReplicatedTables();
|
||||
return replicated_tables->getMutations(table_shared_id, replica_name);
|
||||
}
|
||||
|
||||
|
||||
void BackupCoordinationDistributed::addReplicatedDataPath(
|
||||
void BackupCoordinationRemote::addReplicatedDataPath(
|
||||
const String & table_shared_id, const String & data_path)
|
||||
{
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (replicated_tables)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedDataPath() must not be called after preparing");
|
||||
}
|
||||
|
||||
auto zookeeper = get_zookeeper();
|
||||
String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_shared_id);
|
||||
zookeeper->createIfNotExists(path, "");
|
||||
@ -212,83 +281,120 @@ void BackupCoordinationDistributed::addReplicatedDataPath(
|
||||
zookeeper->createIfNotExists(path, "");
|
||||
}
|
||||
|
||||
Strings BackupCoordinationDistributed::getReplicatedDataPaths(const String & table_shared_id) const
|
||||
Strings BackupCoordinationRemote::getReplicatedDataPaths(const String & table_shared_id) const
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_shared_id);
|
||||
Strings children = zookeeper->getChildren(path);
|
||||
Strings data_paths;
|
||||
data_paths.reserve(children.size());
|
||||
for (const String & child : children)
|
||||
data_paths.push_back(unescapeForFileName(child));
|
||||
return data_paths;
|
||||
std::lock_guard lock{mutex};
|
||||
prepareReplicatedTables();
|
||||
return replicated_tables->getDataPaths(table_shared_id);
|
||||
}
|
||||
|
||||
|
||||
void BackupCoordinationDistributed::prepareReplicatedPartNames() const
|
||||
void BackupCoordinationRemote::prepareReplicatedTables() const
|
||||
{
|
||||
if (replicated_part_names)
|
||||
if (replicated_tables)
|
||||
return;
|
||||
|
||||
replicated_part_names.emplace();
|
||||
replicated_tables.emplace();
|
||||
auto zookeeper = get_zookeeper();
|
||||
String path = zookeeper_path + "/repl_part_names";
|
||||
for (const String & escaped_table_zk_path : zookeeper->getChildren(path))
|
||||
|
||||
{
|
||||
String table_zk_path = unescapeForFileName(escaped_table_zk_path);
|
||||
String path2 = path + "/" + escaped_table_zk_path;
|
||||
for (const String & escaped_replica_name : zookeeper->getChildren(path2))
|
||||
String path = zookeeper_path + "/repl_part_names";
|
||||
for (const String & escaped_table_shared_id : zookeeper->getChildren(path))
|
||||
{
|
||||
String replica_name = unescapeForFileName(escaped_replica_name);
|
||||
auto part_names = ReplicatedPartNames::deserialize(zookeeper->get(path2 + "/" + escaped_replica_name));
|
||||
replicated_part_names->addPartNames(table_zk_path, part_names.table_name_for_logs, replica_name, part_names.part_names_and_checksums);
|
||||
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
|
||||
String path2 = path + "/" + escaped_table_shared_id;
|
||||
for (const String & escaped_replica_name : zookeeper->getChildren(path2))
|
||||
{
|
||||
String replica_name = unescapeForFileName(escaped_replica_name);
|
||||
auto part_names = ReplicatedPartNames::deserialize(zookeeper->get(path2 + "/" + escaped_replica_name));
|
||||
replicated_tables->addPartNames(table_shared_id, part_names.table_name_for_logs, replica_name, part_names.part_names_and_checksums);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
String path = zookeeper_path + "/repl_mutations";
|
||||
for (const String & escaped_table_shared_id : zookeeper->getChildren(path))
|
||||
{
|
||||
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
|
||||
String path2 = path + "/" + escaped_table_shared_id;
|
||||
for (const String & escaped_replica_name : zookeeper->getChildren(path2))
|
||||
{
|
||||
String replica_name = unescapeForFileName(escaped_replica_name);
|
||||
auto mutations = ReplicatedMutations::deserialize(zookeeper->get(path2 + "/" + escaped_replica_name));
|
||||
replicated_tables->addMutations(table_shared_id, mutations.table_name_for_logs, replica_name, mutations.mutations);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
String path = zookeeper_path + "/repl_data_paths";
|
||||
for (const String & escaped_table_shared_id : zookeeper->getChildren(path))
|
||||
{
|
||||
String table_shared_id = unescapeForFileName(escaped_table_shared_id);
|
||||
String path2 = path + "/" + escaped_table_shared_id;
|
||||
for (const String & escaped_data_path : zookeeper->getChildren(path2))
|
||||
{
|
||||
String data_path = unescapeForFileName(escaped_data_path);
|
||||
replicated_tables->addDataPath(table_shared_id, data_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void BackupCoordinationDistributed::addReplicatedAccessPath(const String & access_zk_path, const String & file_path)
|
||||
void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path)
|
||||
{
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (replicated_access)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addReplicatedAccessFilePath() must not be called after preparing");
|
||||
}
|
||||
|
||||
auto zookeeper = get_zookeeper();
|
||||
String path = zookeeper_path + "/repl_access_paths/" + escapeForFileName(access_zk_path);
|
||||
String path = zookeeper_path + "/repl_access/" + escapeForFileName(access_zk_path);
|
||||
zookeeper->createIfNotExists(path, "");
|
||||
path += "/" + escapeForFileName(file_path);
|
||||
path += "/" + AccessEntityTypeInfo::get(access_entity_type).name;
|
||||
zookeeper->createIfNotExists(path, "");
|
||||
path += "/" + host_id;
|
||||
zookeeper->createIfNotExists(path, file_path);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationDistributed::getReplicatedAccessPaths(const String & access_zk_path) const
|
||||
Strings BackupCoordinationRemote::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
String path = zookeeper_path + "/repl_access_paths/" + escapeForFileName(access_zk_path);
|
||||
Strings children = zookeeper->getChildren(path);
|
||||
Strings file_paths;
|
||||
file_paths.reserve(children.size());
|
||||
for (const String & child : children)
|
||||
file_paths.push_back(unescapeForFileName(child));
|
||||
return file_paths;
|
||||
std::lock_guard lock{mutex};
|
||||
prepareReplicatedAccess();
|
||||
return replicated_access->getFilePaths(access_zk_path, access_entity_type, host_id);
|
||||
}
|
||||
|
||||
void BackupCoordinationDistributed::setReplicatedAccessHost(const String & access_zk_path, const String & host_id)
|
||||
void BackupCoordinationRemote::prepareReplicatedAccess() const
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
String path = zookeeper_path + "/repl_access_host/" + escapeForFileName(access_zk_path);
|
||||
auto code = zookeeper->tryCreate(path, host_id, zkutil::CreateMode::Persistent);
|
||||
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
|
||||
throw zkutil::KeeperException(code, path);
|
||||
if (replicated_access)
|
||||
return;
|
||||
|
||||
if (code == Coordination::Error::ZNODEEXISTS)
|
||||
zookeeper->set(path, host_id);
|
||||
}
|
||||
|
||||
String BackupCoordinationDistributed::getReplicatedAccessHost(const String & access_zk_path) const
|
||||
{
|
||||
replicated_access.emplace();
|
||||
auto zookeeper = get_zookeeper();
|
||||
String path = zookeeper_path + "/repl_access_host/" + escapeForFileName(access_zk_path);
|
||||
return zookeeper->get(path);
|
||||
|
||||
String path = zookeeper_path + "/repl_access";
|
||||
for (const String & escaped_access_zk_path : zookeeper->getChildren(path))
|
||||
{
|
||||
String access_zk_path = unescapeForFileName(escaped_access_zk_path);
|
||||
String path2 = path + "/" + escaped_access_zk_path;
|
||||
for (const String & type_str : zookeeper->getChildren(path2))
|
||||
{
|
||||
AccessEntityType type = AccessEntityTypeInfo::parseType(type_str);
|
||||
String path3 = path2 + "/" + type_str;
|
||||
for (const String & host_id : zookeeper->getChildren(path3))
|
||||
{
|
||||
String file_path = zookeeper->get(path3 + "/" + host_id);
|
||||
replicated_access->addFilePath(access_zk_path, type, host_id, file_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void BackupCoordinationDistributed::addFileInfo(const FileInfo & file_info, bool & is_data_file_required)
|
||||
void BackupCoordinationRemote::addFileInfo(const FileInfo & file_info, bool & is_data_file_required)
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
|
||||
@ -310,7 +416,7 @@ void BackupCoordinationDistributed::addFileInfo(const FileInfo & file_info, bool
|
||||
is_data_file_required = (code == Coordination::Error::ZOK) && (file_info.size > file_info.base_size);
|
||||
}
|
||||
|
||||
void BackupCoordinationDistributed::updateFileInfo(const FileInfo & file_info)
|
||||
void BackupCoordinationRemote::updateFileInfo(const FileInfo & file_info)
|
||||
{
|
||||
if (!file_info.size)
|
||||
return; /// we don't keep FileInfos for empty files, nothing to update
|
||||
@ -332,7 +438,7 @@ void BackupCoordinationDistributed::updateFileInfo(const FileInfo & file_info)
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<FileInfo> BackupCoordinationDistributed::getAllFileInfos() const
|
||||
std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
std::vector<FileInfo> file_infos;
|
||||
@ -350,7 +456,7 @@ std::vector<FileInfo> BackupCoordinationDistributed::getAllFileInfos() const
|
||||
return file_infos;
|
||||
}
|
||||
|
||||
Strings BackupCoordinationDistributed::listFiles(const String & directory, bool recursive) const
|
||||
Strings BackupCoordinationRemote::listFiles(const String & directory, bool recursive) const
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
Strings escaped_names = zookeeper->getChildren(zookeeper_path + "/file_names");
|
||||
@ -383,7 +489,7 @@ Strings BackupCoordinationDistributed::listFiles(const String & directory, bool
|
||||
return elements;
|
||||
}
|
||||
|
||||
bool BackupCoordinationDistributed::hasFiles(const String & directory) const
|
||||
bool BackupCoordinationRemote::hasFiles(const String & directory) const
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
Strings escaped_names = zookeeper->getChildren(zookeeper_path + "/file_names");
|
||||
@ -402,7 +508,7 @@ bool BackupCoordinationDistributed::hasFiles(const String & directory) const
|
||||
return false;
|
||||
}
|
||||
|
||||
std::optional<FileInfo> BackupCoordinationDistributed::getFileInfo(const String & file_name) const
|
||||
std::optional<FileInfo> BackupCoordinationRemote::getFileInfo(const String & file_name) const
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
String size_and_checksum;
|
||||
@ -416,7 +522,7 @@ std::optional<FileInfo> BackupCoordinationDistributed::getFileInfo(const String
|
||||
return file_info;
|
||||
}
|
||||
|
||||
std::optional<FileInfo> BackupCoordinationDistributed::getFileInfo(const SizeAndChecksum & size_and_checksum) const
|
||||
std::optional<FileInfo> BackupCoordinationRemote::getFileInfo(const SizeAndChecksum & size_and_checksum) const
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
String file_info_str;
|
||||
@ -425,7 +531,7 @@ std::optional<FileInfo> BackupCoordinationDistributed::getFileInfo(const SizeAnd
|
||||
return deserializeFileInfo(file_info_str);
|
||||
}
|
||||
|
||||
std::optional<SizeAndChecksum> BackupCoordinationDistributed::getFileSizeAndChecksum(const String & file_name) const
|
||||
std::optional<SizeAndChecksum> BackupCoordinationRemote::getFileSizeAndChecksum(const String & file_name) const
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
String size_and_checksum;
|
||||
@ -434,7 +540,7 @@ std::optional<SizeAndChecksum> BackupCoordinationDistributed::getFileSizeAndChec
|
||||
return deserializeSizeAndChecksum(size_and_checksum);
|
||||
}
|
||||
|
||||
String BackupCoordinationDistributed::getNextArchiveSuffix()
|
||||
String BackupCoordinationRemote::getNextArchiveSuffix()
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
String path = zookeeper_path + "/archive_suffixes/a";
|
||||
@ -445,7 +551,7 @@ String BackupCoordinationDistributed::getNextArchiveSuffix()
|
||||
return formatArchiveSuffix(extractCounterFromSequentialNodeName(path_created));
|
||||
}
|
||||
|
||||
Strings BackupCoordinationDistributed::getAllArchiveSuffixes() const
|
||||
Strings BackupCoordinationRemote::getAllArchiveSuffixes() const
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
Strings node_names = zookeeper->getChildren(zookeeper_path + "/archive_suffixes");
|
||||
@ -454,7 +560,7 @@ Strings BackupCoordinationDistributed::getAllArchiveSuffixes() const
|
||||
return node_names;
|
||||
}
|
||||
|
||||
void BackupCoordinationDistributed::drop()
|
||||
void BackupCoordinationRemote::drop()
|
||||
{
|
||||
removeAllNodes();
|
||||
}
|
@ -1,18 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IBackupCoordination.h>
|
||||
#include <Backups/BackupCoordinationHelpers.h>
|
||||
#include <Backups/BackupCoordinationReplicatedAccess.h>
|
||||
#include <Backups/BackupCoordinationReplicatedTables.h>
|
||||
#include <Backups/BackupCoordinationStatusSync.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Stores backup temporary information in Zookeeper, used to perform BACKUP ON CLUSTER.
|
||||
class BackupCoordinationDistributed : public IBackupCoordination
|
||||
/// Implementation of the IBackupCoordination interface performing coordination via ZooKeeper. It's necessary for "BACKUP ON CLUSTER".
|
||||
class BackupCoordinationRemote : public IBackupCoordination
|
||||
{
|
||||
public:
|
||||
BackupCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_);
|
||||
~BackupCoordinationDistributed() override;
|
||||
BackupCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_);
|
||||
~BackupCoordinationRemote() override;
|
||||
|
||||
void setStatus(const String & current_host, const String & new_status, const String & message) override;
|
||||
Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) override;
|
||||
@ -26,14 +28,19 @@ public:
|
||||
|
||||
Strings getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const override;
|
||||
|
||||
void addReplicatedMutations(
|
||||
const String & table_shared_id,
|
||||
const String & table_name_for_logs,
|
||||
const String & replica_name,
|
||||
const std::vector<MutationInfo> & mutations) override;
|
||||
|
||||
std::vector<MutationInfo> getReplicatedMutations(const String & table_shared_id, const String & replica_name) const override;
|
||||
|
||||
void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override;
|
||||
Strings getReplicatedDataPaths(const String & table_shared_id) const override;
|
||||
|
||||
void addReplicatedAccessPath(const String & access_zk_path, const String & file_path) override;
|
||||
Strings getReplicatedAccessPaths(const String & access_zk_path) const override;
|
||||
|
||||
void setReplicatedAccessHost(const String & access_zk_path, const String & host_id) override;
|
||||
String getReplicatedAccessHost(const String & access_zk_path) const override;
|
||||
void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path) override;
|
||||
Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const override;
|
||||
|
||||
void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override;
|
||||
void updateFileInfo(const FileInfo & file_info) override;
|
||||
@ -53,7 +60,8 @@ public:
|
||||
private:
|
||||
void createRootNodes();
|
||||
void removeAllNodes();
|
||||
void prepareReplicatedPartNames() const;
|
||||
void prepareReplicatedTables() const;
|
||||
void prepareReplicatedAccess() const;
|
||||
|
||||
const String zookeeper_path;
|
||||
const zkutil::GetZooKeeper get_zookeeper;
|
||||
@ -61,7 +69,8 @@ private:
|
||||
BackupCoordinationStatusSync status_sync;
|
||||
|
||||
mutable std::mutex mutex;
|
||||
mutable std::optional<BackupCoordinationReplicatedPartNames> replicated_part_names;
|
||||
mutable std::optional<BackupCoordinationReplicatedTables> replicated_tables;
|
||||
mutable std::optional<BackupCoordinationReplicatedAccess> replicated_access;
|
||||
};
|
||||
|
||||
}
|
33
src/Backups/BackupCoordinationReplicatedAccess.cpp
Normal file
33
src/Backups/BackupCoordinationReplicatedAccess.cpp
Normal file
@ -0,0 +1,33 @@
|
||||
#include <Backups/BackupCoordinationReplicatedAccess.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
BackupCoordinationReplicatedAccess::BackupCoordinationReplicatedAccess() = default;
|
||||
BackupCoordinationReplicatedAccess::~BackupCoordinationReplicatedAccess() = default;
|
||||
|
||||
void BackupCoordinationReplicatedAccess::addFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path)
|
||||
{
|
||||
auto & ref = file_paths_by_zk_path[std::make_pair(access_zk_path, access_entity_type)];
|
||||
ref.file_paths.emplace(file_path);
|
||||
|
||||
/// std::max() because the calculation must give the same result being repeated on a different replica.
|
||||
ref.host_to_store_access = std::max(ref.host_to_store_access, host_id);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationReplicatedAccess::getFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const
|
||||
{
|
||||
auto it = file_paths_by_zk_path.find(std::make_pair(access_zk_path, access_entity_type));
|
||||
if (it == file_paths_by_zk_path.end())
|
||||
return {};
|
||||
|
||||
const auto & file_paths = it->second;
|
||||
if (file_paths.host_to_store_access != host_id)
|
||||
return {};
|
||||
|
||||
Strings res{file_paths.file_paths.begin(), file_paths.file_paths.end()};
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
49
src/Backups/BackupCoordinationReplicatedAccess.h
Normal file
49
src/Backups/BackupCoordinationReplicatedAccess.h
Normal file
@ -0,0 +1,49 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <map>
|
||||
#include <unordered_set>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
enum class AccessEntityType;
|
||||
|
||||
/// This class is used by hosts to coordinate the access entities of ReplicatedAccessStorage they're writing to a backup.
|
||||
/// It's designed to make all hosts save the same access entities to the backup even in case the ReplicatedAccessStorage changes
|
||||
/// while the backup is being produced. This is important to make RESTORE more predicitible.
|
||||
///
|
||||
/// For example, let's consider three replicas having a ReplicatedAccessStorage on them.
|
||||
/// This class ensures that the following files in the backup are the same:
|
||||
/// /shards/1/replicas/1/data/system/users/access01.txt
|
||||
/// /shards/1/replicas/2/data/system/users/access01.txt
|
||||
/// /shards/1/replicas/3/data/system/users/access01.txt
|
||||
///
|
||||
/// To implement that this class chooses one host to write access entities for all the hosts so in fact all those files
|
||||
/// in the example above are written by the same host.
|
||||
|
||||
class BackupCoordinationReplicatedAccess
|
||||
{
|
||||
public:
|
||||
BackupCoordinationReplicatedAccess();
|
||||
~BackupCoordinationReplicatedAccess();
|
||||
|
||||
/// Adds a path to access*.txt file keeping access entities of a ReplicatedAccessStorage.
|
||||
void addFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path);
|
||||
|
||||
/// Returns all paths added by addFilePath() if `host_id` is a host chosen to store access.
|
||||
Strings getFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const;
|
||||
|
||||
private:
|
||||
using ZkPathAndEntityType = std::pair<String, AccessEntityType>;
|
||||
|
||||
struct FilePathsAndHost
|
||||
{
|
||||
std::unordered_set<String> file_paths;
|
||||
String host_to_store_access;
|
||||
};
|
||||
|
||||
std::map<ZkPathAndEntityType, FilePathsAndHost> file_paths_by_zk_path;
|
||||
};
|
||||
|
||||
}
|
335
src/Backups/BackupCoordinationReplicatedTables.cpp
Normal file
335
src/Backups/BackupCoordinationReplicatedTables.cpp
Normal file
@ -0,0 +1,335 @@
|
||||
#include <Backups/BackupCoordinationReplicatedTables.h>
|
||||
#include <Storages/MergeTree/MergeTreePartInfo.h>
|
||||
#include <Storages/MergeTree/ReplicatedMergeTreeMutationEntry.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_BACKUP_TABLE;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
struct LessReplicaName
|
||||
{
|
||||
bool operator()(const std::shared_ptr<const String> & left, const std::shared_ptr<const String> & right) { return *left < *right; }
|
||||
};
|
||||
}
|
||||
|
||||
using MutationInfo = IBackupCoordination::MutationInfo;
|
||||
|
||||
|
||||
class BackupCoordinationReplicatedTables::CoveredPartsFinder
|
||||
{
|
||||
public:
|
||||
explicit CoveredPartsFinder(const String & table_name_for_logs_) : table_name_for_logs(table_name_for_logs_) {}
|
||||
|
||||
void addPartInfo(MergeTreePartInfo && new_part_info, const std::shared_ptr<const String> & replica_name)
|
||||
{
|
||||
auto new_min_block = new_part_info.min_block;
|
||||
auto new_max_block = new_part_info.max_block;
|
||||
auto & parts = partitions[new_part_info.partition_id];
|
||||
|
||||
/// Find the first part with max_block >= `part_info.min_block`.
|
||||
auto first_it = parts.lower_bound(new_min_block);
|
||||
if (first_it == parts.end())
|
||||
{
|
||||
/// All max_blocks < part_info.min_block, so we can safely add the `part_info` to the list of parts.
|
||||
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), replica_name});
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
/// part_info.min_block <= current_info.max_block
|
||||
const auto & part = first_it->second;
|
||||
if (new_max_block < part.info.min_block)
|
||||
{
|
||||
/// (prev_info.max_block < part_info.min_block) AND (part_info.max_block < current_info.min_block),
|
||||
/// so we can safely add the `part_info` to the list of parts.
|
||||
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), replica_name});
|
||||
return;
|
||||
}
|
||||
|
||||
/// (part_info.min_block <= current_info.max_block) AND (part_info.max_block >= current_info.min_block), parts intersect.
|
||||
|
||||
if (part.info.contains(new_part_info))
|
||||
{
|
||||
/// `part_info` is already contained in another part.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/// Probably `part_info` is going to replace multiple parts, find the range of parts to replace.
|
||||
auto last_it = first_it;
|
||||
while (last_it != parts.end())
|
||||
{
|
||||
const auto & part = last_it->second;
|
||||
if (part.info.min_block > new_max_block)
|
||||
break;
|
||||
if (!new_part_info.contains(part.info))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_BACKUP_TABLE,
|
||||
"Intersected parts detected: {} on replica {} and {} on replica {}",
|
||||
part.info.getPartName(),
|
||||
*part.replica_name,
|
||||
new_part_info.getPartName(),
|
||||
*replica_name);
|
||||
}
|
||||
++last_it;
|
||||
}
|
||||
|
||||
/// `part_info` will replace multiple parts [first_it..last_it)
|
||||
parts.erase(first_it, last_it);
|
||||
parts.emplace(new_max_block, PartInfo{std::move(new_part_info), replica_name});
|
||||
}
|
||||
|
||||
bool isCoveredByAnotherPart(const String & part_name) const
|
||||
{
|
||||
return isCoveredByAnotherPart(MergeTreePartInfo::fromPartName(part_name, MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING));
|
||||
}
|
||||
|
||||
bool isCoveredByAnotherPart(const MergeTreePartInfo & part_info) const
|
||||
{
|
||||
auto partition_it = partitions.find(part_info.partition_id);
|
||||
if (partition_it == partitions.end())
|
||||
return false;
|
||||
|
||||
const auto & parts = partition_it->second;
|
||||
|
||||
/// Find the first part with max_block >= `part_info.min_block`.
|
||||
auto it_part = parts.lower_bound(part_info.min_block);
|
||||
if (it_part == parts.end())
|
||||
{
|
||||
/// All max_blocks < part_info.min_block, so there is no parts covering `part_info`.
|
||||
return false;
|
||||
}
|
||||
|
||||
/// part_info.min_block <= current_info.max_block
|
||||
const auto & existing_part = it_part->second;
|
||||
if (part_info.max_block < existing_part.info.min_block)
|
||||
{
|
||||
/// (prev_info.max_block < part_info.min_block) AND (part_info.max_block < current_info.min_block),
|
||||
/// so there is no parts covering `part_info`.
|
||||
return false;
|
||||
}
|
||||
|
||||
/// (part_info.min_block <= current_info.max_block) AND (part_info.max_block >= current_info.min_block), parts intersect.
|
||||
|
||||
if (existing_part.info == part_info)
|
||||
{
|
||||
/// It's the same part, it's kind of covers itself, but we check in this function whether a part is covered by another part.
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Check if `part_info` is covered by `current_info`.
|
||||
return existing_part.info.contains(part_info);
|
||||
}
|
||||
|
||||
private:
|
||||
struct PartInfo
|
||||
{
|
||||
MergeTreePartInfo info;
|
||||
std::shared_ptr<const String> replica_name;
|
||||
};
|
||||
|
||||
using Parts = std::map<Int64 /* max_block */, PartInfo>;
|
||||
std::unordered_map<String, Parts> partitions;
|
||||
const String table_name_for_logs;
|
||||
};
|
||||
|
||||
|
||||
BackupCoordinationReplicatedTables::BackupCoordinationReplicatedTables() = default;
|
||||
BackupCoordinationReplicatedTables::~BackupCoordinationReplicatedTables() = default;
|
||||
|
||||
void BackupCoordinationReplicatedTables::addPartNames(
|
||||
const String & table_shared_id,
|
||||
const String & table_name_for_logs,
|
||||
const String & replica_name,
|
||||
const std::vector<PartNameAndChecksum> & part_names_and_checksums)
|
||||
{
|
||||
if (prepared)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addPartNames() must not be called after preparing");
|
||||
|
||||
auto & table_info = table_infos[table_shared_id];
|
||||
table_info.table_name_for_logs = table_name_for_logs;
|
||||
|
||||
if (!table_info.covered_parts_finder)
|
||||
table_info.covered_parts_finder = std::make_unique<CoveredPartsFinder>(table_name_for_logs);
|
||||
|
||||
auto replica_name_ptr = std::make_shared<String>(replica_name);
|
||||
|
||||
for (const auto & part_name_and_checksum : part_names_and_checksums)
|
||||
{
|
||||
const auto & part_name = part_name_and_checksum.part_name;
|
||||
const auto & checksum = part_name_and_checksum.checksum;
|
||||
auto it = table_info.replicas_by_part_name.find(part_name);
|
||||
if (it == table_info.replicas_by_part_name.end())
|
||||
{
|
||||
it = table_info.replicas_by_part_name.emplace(part_name, PartReplicas{}).first;
|
||||
it->second.checksum = checksum;
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & other = it->second;
|
||||
if (other.checksum != checksum)
|
||||
{
|
||||
const String & other_replica_name = **other.replica_names.begin();
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_BACKUP_TABLE,
|
||||
"Table {} on replica {} has part {} which is different from the part on replica {}. Must be the same",
|
||||
table_name_for_logs,
|
||||
replica_name,
|
||||
part_name,
|
||||
other_replica_name);
|
||||
}
|
||||
}
|
||||
|
||||
auto & replica_names = it->second.replica_names;
|
||||
|
||||
/// `replica_names` should be ordered because we need this vector to be in the same order on every replica.
|
||||
replica_names.insert(
|
||||
std::upper_bound(replica_names.begin(), replica_names.end(), replica_name_ptr, LessReplicaName{}), replica_name_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
Strings BackupCoordinationReplicatedTables::getPartNames(const String & table_shared_id, const String & replica_name) const
|
||||
{
|
||||
prepare();
|
||||
|
||||
auto it = table_infos.find(table_shared_id);
|
||||
if (it == table_infos.end())
|
||||
return {};
|
||||
|
||||
const auto & part_names_by_replica_name = it->second.part_names_by_replica_name;
|
||||
auto it2 = part_names_by_replica_name.find(replica_name);
|
||||
if (it2 == part_names_by_replica_name.end())
|
||||
return {};
|
||||
|
||||
return it2->second;
|
||||
}
|
||||
|
||||
void BackupCoordinationReplicatedTables::addMutations(
|
||||
const String & table_shared_id,
|
||||
const String & table_name_for_logs,
|
||||
const String & replica_name,
|
||||
const std::vector<MutationInfo> & mutations)
|
||||
{
|
||||
if (prepared)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "addMutations() must not be called after preparing");
|
||||
|
||||
auto & table_info = table_infos[table_shared_id];
|
||||
table_info.table_name_for_logs = table_name_for_logs;
|
||||
for (const auto & [mutation_id, mutation_entry] : mutations)
|
||||
table_info.mutations.emplace(mutation_id, mutation_entry);
|
||||
|
||||
/// std::max() because the calculation must give the same result being repeated on a different replica.
|
||||
table_info.replica_name_to_store_mutations = std::max(table_info.replica_name_to_store_mutations, replica_name);
|
||||
}
|
||||
|
||||
std::vector<MutationInfo>
|
||||
BackupCoordinationReplicatedTables::getMutations(const String & table_shared_id, const String & replica_name) const
|
||||
{
|
||||
prepare();
|
||||
|
||||
auto it = table_infos.find(table_shared_id);
|
||||
if (it == table_infos.end())
|
||||
return {};
|
||||
|
||||
const auto & table_info = it->second;
|
||||
if (table_info.replica_name_to_store_mutations != replica_name)
|
||||
return {};
|
||||
|
||||
std::vector<MutationInfo> res;
|
||||
for (const auto & [mutation_id, mutation_entry] : table_info.mutations)
|
||||
res.emplace_back(MutationInfo{mutation_id, mutation_entry});
|
||||
return res;
|
||||
}
|
||||
|
||||
void BackupCoordinationReplicatedTables::addDataPath(const String & table_shared_id, const String & data_path)
|
||||
{
|
||||
auto & table_info = table_infos[table_shared_id];
|
||||
table_info.data_paths.emplace(data_path);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationReplicatedTables::getDataPaths(const String & table_shared_id) const
|
||||
{
|
||||
auto it = table_infos.find(table_shared_id);
|
||||
if (it == table_infos.end())
|
||||
return {};
|
||||
|
||||
const auto & table_info = it->second;
|
||||
return Strings{table_info.data_paths.begin(), table_info.data_paths.end()};
|
||||
}
|
||||
|
||||
|
||||
void BackupCoordinationReplicatedTables::prepare() const
|
||||
{
|
||||
if (prepared)
|
||||
return;
|
||||
|
||||
size_t counter = 0;
|
||||
for (const auto & table_info : table_infos | boost::adaptors::map_values)
|
||||
{
|
||||
try
|
||||
{
|
||||
/// Remove parts covered by other parts.
|
||||
for (const auto & [part_name, part_replicas] : table_info.replicas_by_part_name)
|
||||
{
|
||||
auto part_info = MergeTreePartInfo::fromPartName(part_name, MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING);
|
||||
|
||||
auto & min_data_versions_by_partition = table_info.min_data_versions_by_partition;
|
||||
auto it2 = min_data_versions_by_partition.find(part_info.partition_id);
|
||||
if (it2 == min_data_versions_by_partition.end())
|
||||
min_data_versions_by_partition[part_info.partition_id] = part_info.getDataVersion();
|
||||
else
|
||||
it2->second = std::min(it2->second, part_info.getDataVersion());
|
||||
|
||||
table_info.covered_parts_finder->addPartInfo(std::move(part_info), part_replicas.replica_names[0]);
|
||||
}
|
||||
|
||||
for (const auto & [part_name, part_replicas] : table_info.replicas_by_part_name)
|
||||
{
|
||||
if (table_info.covered_parts_finder->isCoveredByAnotherPart(part_name))
|
||||
continue;
|
||||
size_t chosen_index = (counter++) % part_replicas.replica_names.size();
|
||||
const auto & chosen_replica_name = *part_replicas.replica_names[chosen_index];
|
||||
table_info.part_names_by_replica_name[chosen_replica_name].push_back(part_name);
|
||||
}
|
||||
|
||||
/// Remove finished or unrelated mutations.
|
||||
std::unordered_map<String, String> unfinished_mutations;
|
||||
for (const auto & [mutation_id, mutation_entry_str] : table_info.mutations)
|
||||
{
|
||||
auto mutation_entry = ReplicatedMergeTreeMutationEntry::parse(mutation_entry_str, mutation_id);
|
||||
std::map<String, Int64> new_block_numbers;
|
||||
for (const auto & [partition_id, block_number] : mutation_entry.block_numbers)
|
||||
{
|
||||
auto it = table_info.min_data_versions_by_partition.find(partition_id);
|
||||
if ((it != table_info.min_data_versions_by_partition.end()) && (it->second < block_number))
|
||||
new_block_numbers[partition_id] = block_number;
|
||||
}
|
||||
mutation_entry.block_numbers = std::move(new_block_numbers);
|
||||
if (!mutation_entry.block_numbers.empty())
|
||||
unfinished_mutations[mutation_id] = mutation_entry.toString();
|
||||
}
|
||||
table_info.mutations = unfinished_mutations;
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("While checking data of table {}", table_info.table_name_for_logs);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
prepared = true;
|
||||
}
|
||||
|
||||
}
|
103
src/Backups/BackupCoordinationReplicatedTables.h
Normal file
103
src/Backups/BackupCoordinationReplicatedTables.h
Normal file
@ -0,0 +1,103 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IBackupCoordination.h>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Replicas used this class to coordinate how they're writing replicated tables to a backup.
|
||||
/// "BACKUP ON CLUSTER" can be executed on multiple hosts and parts of replicated tables on those hosts could be slightly different
|
||||
/// at any specific moment. This class is designed so that inside the backup all replicas would contain all the parts
|
||||
/// no matter if the replication queues of those tables are fast or slow.
|
||||
/// This is important to make RESTORE more correct and not dependent on random things like how fast the replicas doing RESTORE
|
||||
/// comparing to each other or how many replicas will be when RESTORE will be executed.
|
||||
///
|
||||
/// Example 1: Let's consider two replicas of a table, and let the first replica contain part all_1_1_0 and the second replica contain
|
||||
/// all_2_2_0. The files in the backup will look like this:
|
||||
/// /shards/1/replicas/1/data/mydb/mytable/all_1_1_0
|
||||
/// /shards/1/replicas/1/data/mydb/mytable/all_2_2_0
|
||||
/// /shards/1/replicas/2/data/mydb/mytable/all_1_1_0
|
||||
/// /shards/1/replicas/2/data/mydb/mytable/all_2_2_0
|
||||
///
|
||||
/// Example 2: Let's consider two replicas again, and let the first replica contain parts all_1_1_0 and all_2_2_0 and
|
||||
/// the second replica contain part all_1_2_1 (i.e. the second replica have those parts merged).
|
||||
/// In this case the files in the backup will look like this:
|
||||
/// /shards/1/replicas/1/data/mydb/mytable/all_1_2_1
|
||||
/// /shards/1/replicas/2/data/mydb/mytable/all_1_2_1
|
||||
|
||||
class BackupCoordinationReplicatedTables
|
||||
{
|
||||
public:
|
||||
BackupCoordinationReplicatedTables();
|
||||
~BackupCoordinationReplicatedTables();
|
||||
|
||||
using PartNameAndChecksum = IBackupCoordination::PartNameAndChecksum;
|
||||
|
||||
/// Adds part names which a specified replica of a replicated table is going to put to the backup.
|
||||
/// Multiple replicas of the replicated table call this function and then the added part names can be returned by call of the function
|
||||
/// getPartNames().
|
||||
/// Checksums are used only to control that parts under the same names on different replicas are the same.
|
||||
void addPartNames(
|
||||
const String & table_shared_id,
|
||||
const String & table_name_for_logs,
|
||||
const String & replica_name,
|
||||
const std::vector<PartNameAndChecksum> & part_names_and_checksums);
|
||||
|
||||
/// Returns the names of the parts which a specified replica of a replicated table should put to the backup.
|
||||
/// This is the same list as it was added by call of the function addPartNames() but without duplications and without
|
||||
/// parts covered by another parts.
|
||||
Strings getPartNames(const String & table_shared_id, const String & replica_name) const;
|
||||
|
||||
using MutationInfo = IBackupCoordination::MutationInfo;
|
||||
|
||||
/// Adds information about mutations of a replicated table.
|
||||
void addMutations(
|
||||
const String & table_shared_id,
|
||||
const String & table_name_for_logs,
|
||||
const String & replica_name,
|
||||
const std::vector<MutationInfo> & mutations);
|
||||
|
||||
/// Returns all mutations of a replicated table which are not finished for some data parts added by addReplicatedPartNames().
|
||||
std::vector<MutationInfo> getMutations(const String & table_shared_id, const String & replica_name) const;
|
||||
|
||||
/// Adds a data path in backup for a replicated table.
|
||||
/// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function
|
||||
/// getDataPaths().
|
||||
void addDataPath(const String & table_shared_id, const String & data_path);
|
||||
|
||||
/// Returns all the data paths in backup added for a replicated table (see also addReplicatedDataPath()).
|
||||
Strings getDataPaths(const String & table_shared_id) const;
|
||||
|
||||
private:
|
||||
void prepare() const;
|
||||
|
||||
class CoveredPartsFinder;
|
||||
|
||||
struct PartReplicas
|
||||
{
|
||||
std::vector<std::shared_ptr<const String>> replica_names;
|
||||
UInt128 checksum;
|
||||
};
|
||||
|
||||
struct TableInfo
|
||||
{
|
||||
String table_name_for_logs;
|
||||
std::map<String /* part_name */, PartReplicas> replicas_by_part_name; /// Should be ordered because we need this map to be in the same order on every replica.
|
||||
mutable std::unordered_map<String /* replica_name> */, Strings> part_names_by_replica_name;
|
||||
std::unique_ptr<CoveredPartsFinder> covered_parts_finder;
|
||||
mutable std::unordered_map<String, Int64> min_data_versions_by_partition;
|
||||
mutable std::unordered_map<String, String> mutations;
|
||||
String replica_name_to_store_mutations;
|
||||
std::unordered_set<String> data_paths;
|
||||
};
|
||||
|
||||
std::map<String /* table_shared_id */, TableInfo> table_infos; /// Should be ordered because we need this map to be in the same order on every replica.
|
||||
mutable bool prepared = false;
|
||||
};
|
||||
|
||||
}
|
161
src/Backups/BackupCoordinationStatusSync.cpp
Normal file
161
src/Backups/BackupCoordinationStatusSync.cpp
Normal file
@ -0,0 +1,161 @@
|
||||
#include <Backups/BackupCoordinationStatusSync.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <base/chrono_io.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int FAILED_TO_SYNC_BACKUP_OR_RESTORE;
|
||||
}
|
||||
|
||||
|
||||
BackupCoordinationStatusSync::BackupCoordinationStatusSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_)
|
||||
: zookeeper_path(zookeeper_path_)
|
||||
, get_zookeeper(get_zookeeper_)
|
||||
, log(log_)
|
||||
{
|
||||
createRootNodes();
|
||||
}
|
||||
|
||||
void BackupCoordinationStatusSync::createRootNodes()
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
zookeeper->createAncestors(zookeeper_path);
|
||||
zookeeper->createIfNotExists(zookeeper_path, "");
|
||||
}
|
||||
|
||||
void BackupCoordinationStatusSync::set(const String & current_host, const String & new_status, const String & message)
|
||||
{
|
||||
setImpl(current_host, new_status, message, {}, {});
|
||||
}
|
||||
|
||||
Strings BackupCoordinationStatusSync::setAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts)
|
||||
{
|
||||
return setImpl(current_host, new_status, message, all_hosts, {});
|
||||
}
|
||||
|
||||
Strings BackupCoordinationStatusSync::setAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms)
|
||||
{
|
||||
return setImpl(current_host, new_status, message, all_hosts, timeout_ms);
|
||||
}
|
||||
|
||||
Strings BackupCoordinationStatusSync::setImpl(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, const std::optional<UInt64> & timeout_ms)
|
||||
{
|
||||
/// Put new status to ZooKeeper.
|
||||
auto zookeeper = get_zookeeper();
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/" + current_host + "|" + new_status, message);
|
||||
|
||||
if (all_hosts.empty() || (new_status == kErrorStatus))
|
||||
return {};
|
||||
|
||||
if ((all_hosts.size() == 1) && (all_hosts.front() == current_host))
|
||||
return {message};
|
||||
|
||||
/// Wait for other hosts.
|
||||
|
||||
Strings ready_hosts_results;
|
||||
ready_hosts_results.resize(all_hosts.size());
|
||||
|
||||
std::map<String, std::vector<size_t> /* index in `ready_hosts_results` */> unready_hosts;
|
||||
for (size_t i = 0; i != all_hosts.size(); ++i)
|
||||
unready_hosts[all_hosts[i]].push_back(i);
|
||||
|
||||
std::optional<String> host_with_error;
|
||||
std::optional<String> error_message;
|
||||
|
||||
/// Process ZooKeeper's nodes and set `all_hosts_ready` or `unready_host` or `error_message`.
|
||||
auto process_zk_nodes = [&](const Strings & zk_nodes)
|
||||
{
|
||||
for (const String & zk_node : zk_nodes)
|
||||
{
|
||||
if (zk_node.starts_with("remove_watch-"))
|
||||
continue;
|
||||
|
||||
size_t separator_pos = zk_node.find('|');
|
||||
if (separator_pos == String::npos)
|
||||
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected zk node {}", zookeeper_path + "/" + zk_node);
|
||||
String host = zk_node.substr(0, separator_pos);
|
||||
String status = zk_node.substr(separator_pos + 1);
|
||||
if (status == kErrorStatus)
|
||||
{
|
||||
host_with_error = host;
|
||||
error_message = zookeeper->get(zookeeper_path + "/" + zk_node);
|
||||
return;
|
||||
}
|
||||
auto it = unready_hosts.find(host);
|
||||
if ((it != unready_hosts.end()) && (status == new_status))
|
||||
{
|
||||
String result = zookeeper->get(zookeeper_path + "/" + zk_node);
|
||||
for (size_t i : it->second)
|
||||
ready_hosts_results[i] = result;
|
||||
unready_hosts.erase(it);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/// Wait until all hosts are ready or an error happens or time is out.
|
||||
std::atomic<bool> watch_set = false;
|
||||
std::condition_variable watch_triggered_event;
|
||||
|
||||
auto watch_callback = [&](const Coordination::WatchResponse &)
|
||||
{
|
||||
watch_set = false; /// After it's triggered it's not set until we call getChildrenWatch() again.
|
||||
watch_triggered_event.notify_all();
|
||||
};
|
||||
|
||||
auto watch_triggered = [&] { return !watch_set; };
|
||||
|
||||
bool use_timeout = timeout_ms.has_value();
|
||||
std::chrono::milliseconds timeout{timeout_ms.value_or(0)};
|
||||
std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
|
||||
std::chrono::steady_clock::duration elapsed;
|
||||
std::mutex dummy_mutex;
|
||||
|
||||
while (!unready_hosts.empty() && !error_message)
|
||||
{
|
||||
watch_set = true;
|
||||
Strings nodes = zookeeper->getChildrenWatch(zookeeper_path, nullptr, watch_callback);
|
||||
process_zk_nodes(nodes);
|
||||
|
||||
if (!unready_hosts.empty() && !error_message)
|
||||
{
|
||||
LOG_TRACE(log, "Waiting for host {}", unready_hosts.begin()->first);
|
||||
std::unique_lock dummy_lock{dummy_mutex};
|
||||
if (use_timeout)
|
||||
{
|
||||
elapsed = std::chrono::steady_clock::now() - start_time;
|
||||
if ((elapsed > timeout) || !watch_triggered_event.wait_for(dummy_lock, timeout - elapsed, watch_triggered))
|
||||
break;
|
||||
}
|
||||
else
|
||||
watch_triggered_event.wait(dummy_lock, watch_triggered);
|
||||
}
|
||||
}
|
||||
|
||||
if (watch_set)
|
||||
{
|
||||
/// Remove watch by triggering it.
|
||||
zookeeper->create(zookeeper_path + "/remove_watch-", "", zkutil::CreateMode::EphemeralSequential);
|
||||
std::unique_lock dummy_lock{dummy_mutex};
|
||||
watch_triggered_event.wait(dummy_lock, watch_triggered);
|
||||
}
|
||||
|
||||
if (error_message)
|
||||
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Error occurred on host {}: {}", *host_with_error, *error_message);
|
||||
|
||||
if (!unready_hosts.empty())
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
|
||||
"Waited for host {} too long ({})",
|
||||
unready_hosts.begin()->first,
|
||||
to_string(elapsed));
|
||||
}
|
||||
|
||||
return ready_hosts_results;
|
||||
}
|
||||
|
||||
}
|
36
src/Backups/BackupCoordinationStatusSync.h
Normal file
36
src/Backups/BackupCoordinationStatusSync.h
Normal file
@ -0,0 +1,36 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/ZooKeeper/Common.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Used to coordinate hosts so all hosts would come to a specific status at around the same time.
|
||||
class BackupCoordinationStatusSync
|
||||
{
|
||||
public:
|
||||
BackupCoordinationStatusSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_);
|
||||
|
||||
/// Sets the status of the current host and signal other hosts if there were other hosts waiting for that.
|
||||
void set(const String & current_host, const String & new_status, const String & message);
|
||||
|
||||
/// Sets the status of the current host and waits until all hosts come to the same status.
|
||||
/// The function returns the messages all hosts set when they come to the required status.
|
||||
Strings setAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts);
|
||||
|
||||
/// Almost the same as setAndWait() but this one stops waiting and throws an exception after a specific amount of time.
|
||||
Strings setAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms);
|
||||
|
||||
static constexpr const char * kErrorStatus = "error";
|
||||
|
||||
private:
|
||||
void createRootNodes();
|
||||
Strings setImpl(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, const std::optional<UInt64> & timeout_ms);
|
||||
|
||||
String zookeeper_path;
|
||||
zkutil::GetZooKeeper get_zookeeper;
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
@ -1,7 +1,6 @@
|
||||
#include <Backups/BackupEntriesCollector.h>
|
||||
#include <Backups/BackupEntryFromMemory.h>
|
||||
#include <Backups/IBackupCoordination.h>
|
||||
#include <Backups/BackupCoordinationHelpers.h>
|
||||
#include <Backups/BackupUtils.h>
|
||||
#include <Backups/DDLAdjustingForBackupVisitor.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
@ -47,7 +46,7 @@ namespace
|
||||
constexpr const char * kWritingBackupStatus = "writing backup";
|
||||
|
||||
/// Error status.
|
||||
constexpr const char * kErrorStatus = BackupCoordinationStatusSync::kErrorStatus;
|
||||
constexpr const char * kErrorStatus = IBackupCoordination::kErrorStatus;
|
||||
|
||||
/// Uppercases the first character of a passed string.
|
||||
String toUpperFirst(const String & str)
|
||||
@ -436,46 +435,7 @@ void BackupEntriesCollector::gatherTablesMetadata()
|
||||
table_infos.clear();
|
||||
for (const auto & [database_name, database_info] : database_infos)
|
||||
{
|
||||
const auto & database = database_info.database;
|
||||
bool is_temporary_database = (database_name == DatabaseCatalog::TEMPORARY_DATABASE);
|
||||
|
||||
auto filter_by_table_name = [database_info = &database_info](const String & table_name)
|
||||
{
|
||||
/// We skip inner tables of materialized views.
|
||||
if (table_name.starts_with(".inner_id."))
|
||||
return false;
|
||||
|
||||
if (database_info->tables.contains(table_name))
|
||||
return true;
|
||||
|
||||
if (database_info->all_tables)
|
||||
return !database_info->except_table_names.contains(table_name);
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
auto db_tables = database->getTablesForBackup(filter_by_table_name, context);
|
||||
|
||||
std::unordered_set<String> found_table_names;
|
||||
for (const auto & db_table : db_tables)
|
||||
{
|
||||
const auto & create_table_query = db_table.first;
|
||||
const auto & create = create_table_query->as<const ASTCreateQuery &>();
|
||||
found_table_names.emplace(create.getTable());
|
||||
|
||||
if (is_temporary_database && !create.temporary)
|
||||
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a non-temporary create query for {}", tableNameWithTypeToString(database_name, create.getTable(), false));
|
||||
|
||||
if (!is_temporary_database && (create.getDatabase() != database_name))
|
||||
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected database name {} for {}", backQuoteIfNeed(create.getDatabase()), tableNameWithTypeToString(database_name, create.getTable(), false));
|
||||
}
|
||||
|
||||
/// Check that all tables were found.
|
||||
for (const auto & [table_name, table_info] : database_info.tables)
|
||||
{
|
||||
if (table_info.throw_if_table_not_found && !found_table_names.contains(table_name))
|
||||
throw Exception(ErrorCodes::UNKNOWN_TABLE, "{} not found", tableNameWithTypeToString(database_name, table_name, true));
|
||||
}
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> db_tables = findTablesInDatabase(database_name);
|
||||
|
||||
for (const auto & db_table : db_tables)
|
||||
{
|
||||
@ -501,7 +461,7 @@ void BackupEntriesCollector::gatherTablesMetadata()
|
||||
|
||||
/// Add information to `table_infos`.
|
||||
auto & res_table_info = table_infos[QualifiedTableName{database_name, table_name}];
|
||||
res_table_info.database = database;
|
||||
res_table_info.database = database_info.database;
|
||||
res_table_info.storage = storage;
|
||||
res_table_info.create_table_query = create_table_query;
|
||||
res_table_info.metadata_path_in_backup = metadata_path_in_backup;
|
||||
@ -528,6 +488,67 @@ void BackupEntriesCollector::gatherTablesMetadata()
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> BackupEntriesCollector::findTablesInDatabase(const String & database_name) const
|
||||
{
|
||||
const auto & database_info = database_infos.at(database_name);
|
||||
const auto & database = database_info.database;
|
||||
|
||||
auto filter_by_table_name = [database_info = &database_info](const String & table_name)
|
||||
{
|
||||
/// We skip inner tables of materialized views.
|
||||
if (table_name.starts_with(".inner_id."))
|
||||
return false;
|
||||
|
||||
if (database_info->tables.contains(table_name))
|
||||
return true;
|
||||
|
||||
if (database_info->all_tables)
|
||||
return !database_info->except_table_names.contains(table_name);
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> db_tables;
|
||||
|
||||
try
|
||||
{
|
||||
db_tables = database->getTablesForBackup(filter_by_table_name, context);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("While collecting tables for backup in database {}", backQuoteIfNeed(database_name));
|
||||
throw;
|
||||
}
|
||||
|
||||
std::unordered_set<String> found_table_names;
|
||||
for (const auto & db_table : db_tables)
|
||||
{
|
||||
const auto & create_table_query = db_table.first;
|
||||
const auto & create = create_table_query->as<const ASTCreateQuery &>();
|
||||
found_table_names.emplace(create.getTable());
|
||||
|
||||
if (database_name == DatabaseCatalog::TEMPORARY_DATABASE)
|
||||
{
|
||||
if (!create.temporary)
|
||||
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a non-temporary create query for {}", tableNameWithTypeToString(database_name, create.getTable(), false));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (create.getDatabase() != database_name)
|
||||
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected database name {} for {}", backQuoteIfNeed(create.getDatabase()), tableNameWithTypeToString(database_name, create.getTable(), false));
|
||||
}
|
||||
}
|
||||
|
||||
/// Check that all tables were found.
|
||||
for (const auto & [table_name, table_info] : database_info.tables)
|
||||
{
|
||||
if (table_info.throw_if_table_not_found && !found_table_names.contains(table_name))
|
||||
throw Exception(ErrorCodes::UNKNOWN_TABLE, "{} was not found", tableNameWithTypeToString(database_name, table_name, true));
|
||||
}
|
||||
|
||||
return db_tables;
|
||||
}
|
||||
|
||||
void BackupEntriesCollector::lockTablesForReading()
|
||||
{
|
||||
for (auto & [table_name, table_info] : table_infos)
|
||||
@ -544,7 +565,7 @@ void BackupEntriesCollector::lockTablesForReading()
|
||||
{
|
||||
if (e.code() != ErrorCodes::TABLE_IS_DROPPED)
|
||||
throw;
|
||||
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} is dropped", tableNameWithTypeToString(table_name.database, table_name.table, true));
|
||||
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} was dropped during scanning", tableNameWithTypeToString(table_name.database, table_name.table, true));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -648,7 +669,7 @@ void BackupEntriesCollector::makeBackupEntriesForDatabasesDefs()
|
||||
if (!database_info.create_database_query)
|
||||
continue; /// We store CREATE DATABASE queries only if there was BACKUP DATABASE specified.
|
||||
|
||||
LOG_TRACE(log, "Adding definition of database {}", backQuoteIfNeed(database_name));
|
||||
LOG_TRACE(log, "Adding the definition of database {} to backup", backQuoteIfNeed(database_name));
|
||||
|
||||
ASTPtr new_create_query = database_info.create_database_query;
|
||||
adjustCreateQueryForBackup(new_create_query, context->getGlobalContext(), nullptr);
|
||||
@ -664,7 +685,7 @@ void BackupEntriesCollector::makeBackupEntriesForTablesDefs()
|
||||
{
|
||||
for (auto & [table_name, table_info] : table_infos)
|
||||
{
|
||||
LOG_TRACE(log, "Adding definition of {}", tableNameWithTypeToString(table_name.database, table_name.table, false));
|
||||
LOG_TRACE(log, "Adding the definition of {} to backup", tableNameWithTypeToString(table_name.database, table_name.table, false));
|
||||
|
||||
ASTPtr new_create_query = table_info.create_table_query;
|
||||
adjustCreateQueryForBackup(new_create_query, context->getGlobalContext(), &table_info.replicated_table_shared_id);
|
||||
@ -680,24 +701,40 @@ void BackupEntriesCollector::makeBackupEntriesForTablesData()
|
||||
if (backup_settings.structure_only)
|
||||
return;
|
||||
|
||||
for (const auto & [table_name, table_info] : table_infos)
|
||||
for (const auto & table_name : table_infos | boost::adaptors::map_keys)
|
||||
makeBackupEntriesForTableData(table_name);
|
||||
}
|
||||
|
||||
void BackupEntriesCollector::makeBackupEntriesForTableData(const QualifiedTableName & table_name)
|
||||
{
|
||||
if (backup_settings.structure_only)
|
||||
return;
|
||||
|
||||
const auto & table_info = table_infos.at(table_name);
|
||||
const auto & storage = table_info.storage;
|
||||
const auto & data_path_in_backup = table_info.data_path_in_backup;
|
||||
|
||||
if (!storage)
|
||||
{
|
||||
const auto & storage = table_info.storage;
|
||||
const auto & data_path_in_backup = table_info.data_path_in_backup;
|
||||
if (storage)
|
||||
{
|
||||
LOG_TRACE(log, "Adding data of {}", tableNameWithTypeToString(table_name.database, table_name.table, false));
|
||||
storage->backupData(*this, data_path_in_backup, table_info.partitions);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Storage == null means this storage exists on other replicas but it has not been created on this replica yet.
|
||||
/// If this table is replicated in this case we call IBackupCoordination::addReplicatedDataPath() which will cause
|
||||
/// other replicas to fill the storage's data in the backup.
|
||||
/// If this table is not replicated we'll do nothing leaving the storage's data empty in the backup.
|
||||
if (table_info.replicated_table_shared_id)
|
||||
backup_coordination->addReplicatedDataPath(*table_info.replicated_table_shared_id, data_path_in_backup);
|
||||
}
|
||||
/// If storage == null that means this storage exists on other replicas but it has not been created on this replica yet.
|
||||
/// If this table is replicated in this case we call IBackupCoordination::addReplicatedDataPath() which will cause
|
||||
/// other replicas to fill the storage's data in the backup.
|
||||
/// If this table is not replicated we'll do nothing leaving the storage's data empty in the backup.
|
||||
if (table_info.replicated_table_shared_id)
|
||||
backup_coordination->addReplicatedDataPath(*table_info.replicated_table_shared_id, data_path_in_backup);
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "Collecting data of {} for backup", tableNameWithTypeToString(table_name.database, table_name.table, false));
|
||||
|
||||
try
|
||||
{
|
||||
storage->backupData(*this, data_path_in_backup, table_info.partitions);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("While collecting data of {} for backup", tableNameWithTypeToString(table_name.database, table_name.table, false));
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
@ -716,21 +753,21 @@ void BackupEntriesCollector::addBackupEntry(const std::pair<String, BackupEntryP
|
||||
void BackupEntriesCollector::addBackupEntries(const BackupEntries & backup_entries_)
|
||||
{
|
||||
if (current_status == kWritingBackupStatus)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed");
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of backup entries is not allowed");
|
||||
insertAtEnd(backup_entries, backup_entries_);
|
||||
}
|
||||
|
||||
void BackupEntriesCollector::addBackupEntries(BackupEntries && backup_entries_)
|
||||
{
|
||||
if (current_status == kWritingBackupStatus)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed");
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of backup entries is not allowed");
|
||||
insertAtEnd(backup_entries, std::move(backup_entries_));
|
||||
}
|
||||
|
||||
void BackupEntriesCollector::addPostTask(std::function<void()> task)
|
||||
{
|
||||
if (current_status == kWritingBackupStatus)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding post tasks is not allowed");
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of post tasks is not allowed");
|
||||
post_tasks.push(std::move(task));
|
||||
}
|
||||
|
||||
|
@ -75,12 +75,15 @@ private:
|
||||
const std::set<DatabaseAndTableName> & except_table_names);
|
||||
|
||||
void gatherTablesMetadata();
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> findTablesInDatabase(const String & database_name) const;
|
||||
void lockTablesForReading();
|
||||
bool compareWithPrevious(std::optional<Exception> & inconsistency_error);
|
||||
|
||||
void makeBackupEntriesForDatabasesDefs();
|
||||
void makeBackupEntriesForTablesDefs();
|
||||
void makeBackupEntriesForTablesData();
|
||||
void makeBackupEntriesForTableData(const QualifiedTableName & table_name);
|
||||
|
||||
void runPostTasks();
|
||||
|
||||
Strings setStatus(const String & new_status, const String & message = "");
|
||||
|
@ -1,9 +1,7 @@
|
||||
#include <Backups/BackupIO_Disk.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <IO/ReadBufferFromFileBase.h>
|
||||
#include <IO/WriteBufferFromFileBase.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -49,17 +47,10 @@ std::unique_ptr<WriteBuffer> BackupWriterDisk::writeFile(const String & file_nam
|
||||
|
||||
void BackupWriterDisk::removeFilesAfterFailure(const Strings & file_names)
|
||||
{
|
||||
try
|
||||
{
|
||||
for (const auto & file_name : file_names)
|
||||
disk->removeFileIfExists(path / file_name);
|
||||
if (disk->isDirectory(path) && disk->isDirectoryEmpty(path))
|
||||
disk->removeDirectory(path);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
LOG_WARNING(&Poco::Logger::get("BackupWriterDisk"), "RemoveFilesAfterFailure: {}", getCurrentExceptionMessage(false));
|
||||
}
|
||||
for (const auto & file_name : file_names)
|
||||
disk->removeFileIfExists(path / file_name);
|
||||
if (disk->isDirectory(path) && disk->isDirectoryEmpty(path))
|
||||
disk->removeDirectory(path);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,8 +1,6 @@
|
||||
#include <Backups/BackupIO_File.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Disks/IO/createReadBufferFromFileBase.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
@ -50,17 +48,10 @@ std::unique_ptr<WriteBuffer> BackupWriterFile::writeFile(const String & file_nam
|
||||
|
||||
void BackupWriterFile::removeFilesAfterFailure(const Strings & file_names)
|
||||
{
|
||||
try
|
||||
{
|
||||
for (const auto & file_name : file_names)
|
||||
fs::remove(path / file_name);
|
||||
if (fs::is_directory(path) && fs::is_empty(path))
|
||||
fs::remove(path);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
LOG_WARNING(&Poco::Logger::get("BackupWriterFile"), "RemoveFilesAfterFailure: {}", getCurrentExceptionMessage(false));
|
||||
}
|
||||
for (const auto & file_name : file_names)
|
||||
fs::remove(path / file_name);
|
||||
if (fs::is_directory(path) && fs::is_empty(path))
|
||||
fs::remove(path);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <Backups/BackupIO.h>
|
||||
#include <Backups/IBackupEntry.h>
|
||||
#include <Backups/BackupCoordinationLocal.h>
|
||||
#include <Backups/BackupCoordinationDistributed.h>
|
||||
#include <Backups/BackupCoordinationRemote.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/hex.h>
|
||||
#include <Common/quoteString.h>
|
||||
@ -167,7 +167,14 @@ BackupImpl::BackupImpl(
|
||||
|
||||
BackupImpl::~BackupImpl()
|
||||
{
|
||||
close();
|
||||
try
|
||||
{
|
||||
close();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
DB::tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -231,10 +238,11 @@ void BackupImpl::close()
|
||||
archive_writer = {"", nullptr};
|
||||
|
||||
if (!is_internal_backup && writer && !writing_finalized)
|
||||
{
|
||||
LOG_INFO(log, "Removing all files of backup {} after failure", backup_name);
|
||||
removeAllFilesAfterFailure();
|
||||
}
|
||||
|
||||
writer.reset();
|
||||
reader.reset();
|
||||
coordination.reset();
|
||||
}
|
||||
|
||||
time_t BackupImpl::getTimestamp() const
|
||||
@ -733,24 +741,33 @@ std::shared_ptr<IArchiveWriter> BackupImpl::getArchiveWriter(const String & suff
|
||||
|
||||
void BackupImpl::removeAllFilesAfterFailure()
|
||||
{
|
||||
Strings files_to_remove;
|
||||
if (use_archives)
|
||||
try
|
||||
{
|
||||
files_to_remove.push_back(archive_params.archive_name);
|
||||
for (const auto & suffix : coordination->getAllArchiveSuffixes())
|
||||
LOG_INFO(log, "Removing all files of backup {} after failure", backup_name);
|
||||
|
||||
Strings files_to_remove;
|
||||
if (use_archives)
|
||||
{
|
||||
String archive_name_with_suffix = getArchiveNameWithSuffix(suffix);
|
||||
files_to_remove.push_back(std::move(archive_name_with_suffix));
|
||||
files_to_remove.push_back(archive_params.archive_name);
|
||||
for (const auto & suffix : coordination->getAllArchiveSuffixes())
|
||||
{
|
||||
String archive_name_with_suffix = getArchiveNameWithSuffix(suffix);
|
||||
files_to_remove.push_back(std::move(archive_name_with_suffix));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
files_to_remove.push_back(".backup");
|
||||
for (const auto & file_info : coordination->getAllFileInfos())
|
||||
files_to_remove.push_back(file_info.data_file_name);
|
||||
}
|
||||
|
||||
writer->removeFilesAfterFailure(files_to_remove);
|
||||
}
|
||||
else
|
||||
catch (...)
|
||||
{
|
||||
files_to_remove.push_back(".backup");
|
||||
for (const auto & file_info : coordination->getAllFileInfos())
|
||||
files_to_remove.push_back(file_info.data_file_name);
|
||||
DB::tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
|
||||
writer->removeFilesAfterFailure(files_to_remove);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -5,9 +5,9 @@
|
||||
#include <Backups/BackupUtils.h>
|
||||
#include <Backups/IBackupEntry.h>
|
||||
#include <Backups/BackupEntriesCollector.h>
|
||||
#include <Backups/BackupCoordinationDistributed.h>
|
||||
#include <Backups/BackupCoordinationRemote.h>
|
||||
#include <Backups/BackupCoordinationLocal.h>
|
||||
#include <Backups/RestoreCoordinationDistributed.h>
|
||||
#include <Backups/RestoreCoordinationRemote.h>
|
||||
#include <Backups/RestoreCoordinationLocal.h>
|
||||
#include <Backups/RestoreSettings.h>
|
||||
#include <Backups/RestorerFromBackup.h>
|
||||
@ -100,10 +100,10 @@ UUID BackupsWorker::startMakingBackup(const ASTPtr & query, const ContextPtr & c
|
||||
|
||||
/// Make a backup coordination.
|
||||
std::shared_ptr<IBackupCoordination> backup_coordination;
|
||||
SCOPE_EXIT({
|
||||
SCOPE_EXIT_SAFE(
|
||||
if (backup_coordination && !backup_settings.internal)
|
||||
backup_coordination->drop();
|
||||
});
|
||||
);
|
||||
|
||||
ClusterPtr cluster;
|
||||
if (on_cluster)
|
||||
@ -120,7 +120,7 @@ UUID BackupsWorker::startMakingBackup(const ASTPtr & query, const ContextPtr & c
|
||||
|
||||
if (!backup_settings.coordination_zk_path.empty())
|
||||
{
|
||||
backup_coordination = std::make_shared<BackupCoordinationDistributed>(
|
||||
backup_coordination = std::make_shared<BackupCoordinationRemote>(
|
||||
backup_settings.coordination_zk_path,
|
||||
[global_context = context_in_use->getGlobalContext()] { return global_context->getZooKeeper(); });
|
||||
}
|
||||
@ -278,10 +278,10 @@ UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr conte
|
||||
|
||||
/// Make a restore coordination.
|
||||
std::shared_ptr<IRestoreCoordination> restore_coordination;
|
||||
SCOPE_EXIT({
|
||||
SCOPE_EXIT_SAFE(
|
||||
if (restore_coordination && !restore_settings.internal)
|
||||
restore_coordination->drop();
|
||||
});
|
||||
);
|
||||
|
||||
if (on_cluster && restore_settings.coordination_zk_path.empty())
|
||||
{
|
||||
@ -291,7 +291,7 @@ UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr conte
|
||||
|
||||
if (!restore_settings.coordination_zk_path.empty())
|
||||
{
|
||||
restore_coordination = std::make_shared<RestoreCoordinationDistributed>(
|
||||
restore_coordination = std::make_shared<RestoreCoordinationRemote>(
|
||||
restore_settings.coordination_zk_path,
|
||||
[global_context = context_in_use->getGlobalContext()] { return global_context->getZooKeeper(); });
|
||||
}
|
||||
|
@ -6,8 +6,12 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
enum class AccessEntityType;
|
||||
|
||||
/// Keeps information about files contained in a backup.
|
||||
/// Replicas use this class to coordinate what they're writing to a backup while executing BACKUP ON CLUSTER.
|
||||
/// There are two implementation of this interface: BackupCoordinationLocal and BackupCoordinationRemote.
|
||||
/// BackupCoordinationLocal is used while executing BACKUP without ON CLUSTER and performs coordination in memory.
|
||||
/// BackupCoordinationRemote is used while executing BACKUP with ON CLUSTER and performs coordination via ZooKeeper.
|
||||
class IBackupCoordination
|
||||
{
|
||||
public:
|
||||
@ -18,6 +22,8 @@ public:
|
||||
virtual Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & other_hosts) = 0;
|
||||
virtual Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & other_hosts, UInt64 timeout_ms) = 0;
|
||||
|
||||
static constexpr const char * kErrorStatus = "error";
|
||||
|
||||
struct PartNameAndChecksum
|
||||
{
|
||||
String part_name;
|
||||
@ -36,6 +42,18 @@ public:
|
||||
/// parts covered by another parts.
|
||||
virtual Strings getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const = 0;
|
||||
|
||||
struct MutationInfo
|
||||
{
|
||||
String id;
|
||||
String entry;
|
||||
};
|
||||
|
||||
/// Adds information about mutations of a replicated table.
|
||||
virtual void addReplicatedMutations(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<MutationInfo> & mutations) = 0;
|
||||
|
||||
/// Returns all mutations of a replicated table which are not finished for some data parts added by addReplicatedPartNames().
|
||||
virtual std::vector<MutationInfo> getReplicatedMutations(const String & table_shared_id, const String & replica_name) const = 0;
|
||||
|
||||
/// Adds a data path in backup for a replicated table.
|
||||
/// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function
|
||||
/// getReplicatedDataPaths().
|
||||
@ -45,12 +63,8 @@ public:
|
||||
virtual Strings getReplicatedDataPaths(const String & table_shared_id) const = 0;
|
||||
|
||||
/// Adds a path to access.txt file keeping access entities of a ReplicatedAccessStorage.
|
||||
virtual void addReplicatedAccessPath(const String & access_zk_path, const String & file_path) = 0;
|
||||
virtual Strings getReplicatedAccessPaths(const String & access_zk_path) const = 0;
|
||||
|
||||
/// Sets the host id of a host storing access entities of a ReplicatedAccessStorage to backup.
|
||||
virtual void setReplicatedAccessHost(const String & access_zk_path, const String & host) = 0;
|
||||
virtual String getReplicatedAccessHost(const String & access_zk_path) const = 0;
|
||||
virtual void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path) = 0;
|
||||
virtual Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const = 0;
|
||||
|
||||
struct FileInfo
|
||||
{
|
||||
|
@ -1,37 +0,0 @@
|
||||
#include <Backups/IBackupEntriesBatch.h>
|
||||
#include <IO/SeekableReadBuffer.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IBackupEntriesBatch::BackupEntryFromBatch : public IBackupEntry
|
||||
{
|
||||
public:
|
||||
BackupEntryFromBatch(const std::shared_ptr<IBackupEntriesBatch> & generator_, size_t index_) : batch(generator_), index(index_)
|
||||
{
|
||||
assert(batch);
|
||||
}
|
||||
|
||||
UInt64 getSize() const override { return batch->getSize(index); }
|
||||
std::optional<UInt128> getChecksum() const override { return batch->getChecksum(index); }
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer() const override { return batch->getReadBuffer(index); }
|
||||
|
||||
private:
|
||||
const std::shared_ptr<IBackupEntriesBatch> batch;
|
||||
const size_t index;
|
||||
};
|
||||
|
||||
|
||||
BackupEntries IBackupEntriesBatch::getBackupEntries()
|
||||
{
|
||||
BackupEntries res;
|
||||
res.reserve(entry_names.size());
|
||||
for (size_t i = 0; i != entry_names.size(); ++i)
|
||||
{
|
||||
res.emplace_back(entry_names[i], std::make_unique<BackupEntryFromBatch>(shared_from_this(), i));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IBackupEntry.h>
|
||||
#include <mutex>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Helper class designed to generate multiple backup entries from one source.
|
||||
class IBackupEntriesBatch : public std::enable_shared_from_this<IBackupEntriesBatch>
|
||||
{
|
||||
public:
|
||||
BackupEntries getBackupEntries();
|
||||
|
||||
virtual ~IBackupEntriesBatch() = default;
|
||||
|
||||
protected:
|
||||
IBackupEntriesBatch(const Strings & entry_names_) : entry_names(entry_names_) {}
|
||||
|
||||
virtual std::unique_ptr<SeekableReadBuffer> getReadBuffer(size_t index) = 0;
|
||||
virtual UInt64 getSize(size_t index) = 0;
|
||||
virtual std::optional<UInt128> getChecksum(size_t) { return {}; }
|
||||
|
||||
private:
|
||||
class BackupEntryFromBatch;
|
||||
const Strings entry_names;
|
||||
};
|
||||
|
||||
}
|
77
src/Backups/IBackupEntriesLazyBatch.cpp
Normal file
77
src/Backups/IBackupEntriesLazyBatch.cpp
Normal file
@ -0,0 +1,77 @@
|
||||
#include <Backups/IBackupEntriesLazyBatch.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <IO/SeekableReadBuffer.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
class IBackupEntriesLazyBatch::BackupEntryFromBatch : public IBackupEntry
|
||||
{
|
||||
public:
|
||||
BackupEntryFromBatch(const std::shared_ptr<IBackupEntriesLazyBatch> & batch_, size_t index_) : batch(batch_), index(index_) { }
|
||||
|
||||
UInt64 getSize() const override { return getInternalBackupEntry()->getSize(); }
|
||||
std::optional<UInt128> getChecksum() const override { return getInternalBackupEntry()->getChecksum(); }
|
||||
std::unique_ptr<SeekableReadBuffer> getReadBuffer() const override { return getInternalBackupEntry()->getReadBuffer(); }
|
||||
|
||||
private:
|
||||
BackupEntryPtr getInternalBackupEntry() const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (!entry)
|
||||
{
|
||||
batch->generateIfNecessary();
|
||||
entry = batch->entries[index].second;
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
|
||||
const std::shared_ptr<IBackupEntriesLazyBatch> batch;
|
||||
const size_t index;
|
||||
mutable std::mutex mutex;
|
||||
mutable BackupEntryPtr entry;
|
||||
};
|
||||
|
||||
|
||||
BackupEntries IBackupEntriesLazyBatch::getBackupEntries()
|
||||
{
|
||||
BackupEntries res;
|
||||
size_t size = getSize();
|
||||
res.reserve(size);
|
||||
for (size_t i = 0; i != size; ++i)
|
||||
{
|
||||
res.emplace_back(getName(i), std::make_unique<BackupEntryFromBatch>(shared_from_this(), i));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void IBackupEntriesLazyBatch::generateIfNecessary()
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (generated)
|
||||
return;
|
||||
|
||||
entries = generate();
|
||||
|
||||
if (entries.size() != getSize())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup entries were generated incorrectly");
|
||||
|
||||
for (size_t i = 0; i != entries.size(); ++i)
|
||||
{
|
||||
if (entries[i].first != getName(i))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup entries were generated incorrectly");
|
||||
}
|
||||
|
||||
generated = true;
|
||||
}
|
||||
|
||||
IBackupEntriesLazyBatch::~IBackupEntriesLazyBatch() = default;
|
||||
|
||||
}
|
30
src/Backups/IBackupEntriesLazyBatch.h
Normal file
30
src/Backups/IBackupEntriesLazyBatch.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IBackupEntry.h>
|
||||
#include <mutex>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Helper class designed to generate multiple backup entries from one source.
|
||||
class IBackupEntriesLazyBatch : public std::enable_shared_from_this<IBackupEntriesLazyBatch>
|
||||
{
|
||||
public:
|
||||
BackupEntries getBackupEntries();
|
||||
virtual ~IBackupEntriesLazyBatch();
|
||||
|
||||
protected:
|
||||
virtual size_t getSize() const = 0;
|
||||
virtual const String & getName(size_t i) const = 0;
|
||||
virtual BackupEntries generate() = 0;
|
||||
|
||||
private:
|
||||
void generateIfNecessary();
|
||||
|
||||
class BackupEntryFromBatch;
|
||||
std::mutex mutex;
|
||||
BackupEntries entries;
|
||||
bool generated = false;
|
||||
};
|
||||
|
||||
}
|
@ -7,7 +7,10 @@ namespace DB
|
||||
{
|
||||
using DatabaseAndTableName = std::pair<String, String>;
|
||||
|
||||
/// Keeps information about files contained in a backup.
|
||||
/// Replicas use this class to coordinate what they're reading from a backup while executing RESTORE ON CLUSTER.
|
||||
/// There are two implementation of this interface: RestoreCoordinationLocal and RestoreCoordinationRemote.
|
||||
/// RestoreCoordinationLocal is used while executing RESTORE without ON CLUSTER and performs coordination in memory.
|
||||
/// RestoreCoordinationRemote is used while executing RESTORE with ON CLUSTER and performs coordination via ZooKeeper.
|
||||
class IRestoreCoordination
|
||||
{
|
||||
public:
|
||||
@ -18,6 +21,8 @@ public:
|
||||
virtual Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & other_hosts) = 0;
|
||||
virtual Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & other_hosts, UInt64 timeout_ms) = 0;
|
||||
|
||||
static constexpr const char * kErrorStatus = "error";
|
||||
|
||||
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
|
||||
virtual bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) = 0;
|
||||
|
||||
|
@ -11,6 +11,7 @@ namespace Poco { class Logger; }
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Implementation of the IRestoreCoordination interface performing coordination in memory.
|
||||
class RestoreCoordinationLocal : public IRestoreCoordination
|
||||
{
|
||||
public:
|
||||
|
@ -1,4 +1,4 @@
|
||||
#include <Backups/RestoreCoordinationDistributed.h>
|
||||
#include <Backups/RestoreCoordinationRemote.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
RestoreCoordinationDistributed::RestoreCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
|
||||
RestoreCoordinationRemote::RestoreCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
|
||||
: zookeeper_path(zookeeper_path_)
|
||||
, get_zookeeper(get_zookeeper_)
|
||||
, status_sync(zookeeper_path_ + "/status", get_zookeeper_, &Poco::Logger::get("RestoreCoordination"))
|
||||
@ -14,9 +14,9 @@ RestoreCoordinationDistributed::RestoreCoordinationDistributed(const String & zo
|
||||
createRootNodes();
|
||||
}
|
||||
|
||||
RestoreCoordinationDistributed::~RestoreCoordinationDistributed() = default;
|
||||
RestoreCoordinationRemote::~RestoreCoordinationRemote() = default;
|
||||
|
||||
void RestoreCoordinationDistributed::createRootNodes()
|
||||
void RestoreCoordinationRemote::createRootNodes()
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
zookeeper->createAncestors(zookeeper_path);
|
||||
@ -26,22 +26,22 @@ void RestoreCoordinationDistributed::createRootNodes()
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/repl_access_storages_acquired", "");
|
||||
}
|
||||
|
||||
void RestoreCoordinationDistributed::setStatus(const String & current_host, const String & new_status, const String & message)
|
||||
void RestoreCoordinationRemote::setStatus(const String & current_host, const String & new_status, const String & message)
|
||||
{
|
||||
status_sync.set(current_host, new_status, message);
|
||||
}
|
||||
|
||||
Strings RestoreCoordinationDistributed::setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts)
|
||||
Strings RestoreCoordinationRemote::setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts)
|
||||
{
|
||||
return status_sync.setAndWait(current_host, new_status, message, all_hosts);
|
||||
}
|
||||
|
||||
Strings RestoreCoordinationDistributed::setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms)
|
||||
Strings RestoreCoordinationRemote::setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms)
|
||||
{
|
||||
return status_sync.setAndWaitFor(current_host, new_status, message, all_hosts, timeout_ms);
|
||||
}
|
||||
|
||||
bool RestoreCoordinationDistributed::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name)
|
||||
bool RestoreCoordinationRemote::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name)
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
|
||||
@ -56,7 +56,7 @@ bool RestoreCoordinationDistributed::acquireCreatingTableInReplicatedDatabase(co
|
||||
return (code == Coordination::Error::ZOK);
|
||||
}
|
||||
|
||||
bool RestoreCoordinationDistributed::acquireInsertingDataIntoReplicatedTable(const String & table_zk_path)
|
||||
bool RestoreCoordinationRemote::acquireInsertingDataIntoReplicatedTable(const String & table_zk_path)
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
|
||||
@ -68,7 +68,7 @@ bool RestoreCoordinationDistributed::acquireInsertingDataIntoReplicatedTable(con
|
||||
return (code == Coordination::Error::ZOK);
|
||||
}
|
||||
|
||||
bool RestoreCoordinationDistributed::acquireReplicatedAccessStorage(const String & access_storage_zk_path)
|
||||
bool RestoreCoordinationRemote::acquireReplicatedAccessStorage(const String & access_storage_zk_path)
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
|
||||
@ -80,13 +80,13 @@ bool RestoreCoordinationDistributed::acquireReplicatedAccessStorage(const String
|
||||
return (code == Coordination::Error::ZOK);
|
||||
}
|
||||
|
||||
void RestoreCoordinationDistributed::removeAllNodes()
|
||||
void RestoreCoordinationRemote::removeAllNodes()
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
zookeeper->removeRecursive(zookeeper_path);
|
||||
}
|
||||
|
||||
void RestoreCoordinationDistributed::drop()
|
||||
void RestoreCoordinationRemote::drop()
|
||||
{
|
||||
removeAllNodes();
|
||||
}
|
@ -1,18 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IRestoreCoordination.h>
|
||||
#include <Backups/BackupCoordinationHelpers.h>
|
||||
#include <Backups/BackupCoordinationStatusSync.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Stores restore temporary information in Zookeeper, used to perform RESTORE ON CLUSTER.
|
||||
class RestoreCoordinationDistributed : public IRestoreCoordination
|
||||
/// Implementation of the IRestoreCoordination interface performing coordination via ZooKeeper. It's necessary for "RESTORE ON CLUSTER".
|
||||
class RestoreCoordinationRemote : public IRestoreCoordination
|
||||
{
|
||||
public:
|
||||
RestoreCoordinationDistributed(const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper);
|
||||
~RestoreCoordinationDistributed() override;
|
||||
RestoreCoordinationRemote(const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper);
|
||||
~RestoreCoordinationRemote() override;
|
||||
|
||||
/// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts.
|
||||
void setStatus(const String & current_host, const String & new_status, const String & message) override;
|
@ -1,6 +1,5 @@
|
||||
#include <Backups/RestorerFromBackup.h>
|
||||
#include <Backups/IRestoreCoordination.h>
|
||||
#include <Backups/BackupCoordinationHelpers.h>
|
||||
#include <Backups/BackupSettings.h>
|
||||
#include <Backups/IBackup.h>
|
||||
#include <Backups/IBackupEntry.h>
|
||||
@ -54,7 +53,7 @@ namespace
|
||||
constexpr const char * kInsertingDataToTablesStatus = "inserting data to tables";
|
||||
|
||||
/// Error status.
|
||||
constexpr const char * kErrorStatus = BackupCoordinationStatusSync::kErrorStatus;
|
||||
constexpr const char * kErrorStatus = IRestoreCoordination::kErrorStatus;
|
||||
|
||||
/// Uppercases the first character of a passed string.
|
||||
String toUpperFirst(const String & str)
|
||||
@ -381,11 +380,23 @@ void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name
|
||||
insertAtEnd(*res_table_info.partitions, *partitions);
|
||||
}
|
||||
|
||||
/// Special handling for ACL-related system tables.
|
||||
if (!restore_settings.structure_only && isSystemAccessTableName(table_name))
|
||||
{
|
||||
if (!access_restorer)
|
||||
access_restorer = std::make_unique<AccessRestorerFromBackup>(backup, restore_settings);
|
||||
access_restorer->addDataPath(data_path_in_backup, table_name);
|
||||
|
||||
try
|
||||
{
|
||||
/// addDataPath() will parse access*.txt files and extract access entities from them.
|
||||
/// We need to do that early because we need those access entities to check access.
|
||||
access_restorer->addDataPath(data_path_in_backup);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("While parsing data of {} from backup", tableNameWithTypeToString(table_name.database, table_name.table, false));
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -563,33 +574,57 @@ void RestorerFromBackup::checkAccessForObjectsFoundInBackup() const
|
||||
required_access = AccessRights{required_access}.getElements();
|
||||
|
||||
context->checkAccess(required_access);
|
||||
|
||||
}
|
||||
|
||||
void RestorerFromBackup::createDatabases()
|
||||
{
|
||||
for (const auto & [database_name, database_info] : database_infos)
|
||||
for (const auto & database_name : database_infos | boost::adaptors::map_keys)
|
||||
{
|
||||
bool need_create_database = (restore_settings.create_database != RestoreDatabaseCreationMode::kMustExist);
|
||||
if (database_info.is_predefined_database)
|
||||
need_create_database = false; /// Predefined databases always exist.
|
||||
createDatabase(database_name);
|
||||
checkDatabase(database_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (need_create_database)
|
||||
{
|
||||
/// Execute CREATE DATABASE query.
|
||||
auto create_database_query = database_info.create_database_query;
|
||||
if (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists)
|
||||
{
|
||||
create_database_query = create_database_query->clone();
|
||||
create_database_query->as<ASTCreateQuery &>().if_not_exists = true;
|
||||
}
|
||||
LOG_TRACE(log, "Creating database {}: {}", backQuoteIfNeed(database_name), serializeAST(*create_database_query));
|
||||
InterpreterCreateQuery interpreter{create_database_query, context};
|
||||
interpreter.setInternal(true);
|
||||
interpreter.execute();
|
||||
}
|
||||
void RestorerFromBackup::createDatabase(const String & database_name) const
|
||||
{
|
||||
if (restore_settings.create_database == RestoreDatabaseCreationMode::kMustExist)
|
||||
return;
|
||||
|
||||
/// Predefined databases always exist.
|
||||
const auto & database_info = database_infos.at(database_name);
|
||||
if (database_info.is_predefined_database)
|
||||
return;
|
||||
|
||||
auto create_database_query = database_info.create_database_query;
|
||||
if (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists)
|
||||
{
|
||||
create_database_query = create_database_query->clone();
|
||||
create_database_query->as<ASTCreateQuery &>().if_not_exists = true;
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "Creating database {}: {}", backQuoteIfNeed(database_name), serializeAST(*create_database_query));
|
||||
|
||||
try
|
||||
{
|
||||
/// Execute CREATE DATABASE query.
|
||||
InterpreterCreateQuery interpreter{create_database_query, context};
|
||||
interpreter.setInternal(true);
|
||||
interpreter.execute();
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("While creating database {}", backQuoteIfNeed(database_name));
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void RestorerFromBackup::checkDatabase(const String & database_name)
|
||||
{
|
||||
auto & database_info = database_infos.at(database_name);
|
||||
try
|
||||
{
|
||||
DatabasePtr database = DatabaseCatalog::instance().getDatabase(database_name);
|
||||
database_info.database = database;
|
||||
|
||||
if (!restore_settings.allow_different_database_def && !database_info.is_predefined_database)
|
||||
{
|
||||
@ -601,14 +636,18 @@ void RestorerFromBackup::createDatabases()
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_DATABASE,
|
||||
"The database {} has a different definition: {} "
|
||||
"The database has a different definition: {} "
|
||||
"comparing to its definition in the backup: {}",
|
||||
backQuoteIfNeed(database_name),
|
||||
serializeAST(*create_database_query),
|
||||
serializeAST(*expected_create_query));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("While checking database {}", backQuoteIfNeed(database_name));
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void RestorerFromBackup::createTables()
|
||||
@ -622,82 +661,123 @@ void RestorerFromBackup::createTables()
|
||||
|
||||
for (const auto & table_name : tables_to_create)
|
||||
{
|
||||
auto & table_info = table_infos.at(table_name);
|
||||
createTable(table_name);
|
||||
checkTable(table_name);
|
||||
insertDataToTable(table_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_name.database);
|
||||
void RestorerFromBackup::createTable(const QualifiedTableName & table_name)
|
||||
{
|
||||
if (restore_settings.create_table == RestoreTableCreationMode::kMustExist)
|
||||
return;
|
||||
|
||||
bool need_create_table = (restore_settings.create_table != RestoreTableCreationMode::kMustExist);
|
||||
if (table_info.is_predefined_table)
|
||||
need_create_table = false; /// Predefined tables always exist.
|
||||
/// Predefined tables always exist.
|
||||
auto & table_info = table_infos.at(table_name);
|
||||
if (table_info.is_predefined_table)
|
||||
return;
|
||||
|
||||
if (need_create_table)
|
||||
auto create_table_query = table_info.create_table_query;
|
||||
if (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists)
|
||||
{
|
||||
create_table_query = create_table_query->clone();
|
||||
create_table_query->as<ASTCreateQuery &>().if_not_exists = true;
|
||||
}
|
||||
|
||||
LOG_TRACE(
|
||||
log, "Creating {}: {}", tableNameWithTypeToString(table_name.database, table_name.table, false), serializeAST(*create_table_query));
|
||||
|
||||
try
|
||||
{
|
||||
DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_name.database);
|
||||
table_info.database = database;
|
||||
|
||||
/// Execute CREATE TABLE query (we call IDatabase::createTableRestoredFromBackup() to allow the database to do some
|
||||
/// database-specific things).
|
||||
database->createTableRestoredFromBackup(
|
||||
create_table_query,
|
||||
context,
|
||||
restore_coordination,
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(create_table_timeout).count());
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("While creating {}", tableNameWithTypeToString(table_name.database, table_name.table, false));
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void RestorerFromBackup::checkTable(const QualifiedTableName & table_name)
|
||||
{
|
||||
auto & table_info = table_infos.at(table_name);
|
||||
auto database = table_info.database;
|
||||
|
||||
try
|
||||
{
|
||||
if (!database)
|
||||
{
|
||||
database = DatabaseCatalog::instance().getDatabase(table_name.database);
|
||||
table_info.database = database;
|
||||
}
|
||||
|
||||
auto resolved_id = (table_name.database == DatabaseCatalog::TEMPORARY_DATABASE)
|
||||
? context->resolveStorageID(StorageID{"", table_name.table}, Context::ResolveExternal)
|
||||
: context->resolveStorageID(StorageID{table_name.database, table_name.table}, Context::ResolveGlobal);
|
||||
|
||||
StoragePtr storage = database->getTable(resolved_id.table_name, context);
|
||||
table_info.storage = storage;
|
||||
table_info.table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
|
||||
|
||||
if (!restore_settings.allow_different_table_def && !table_info.is_predefined_table)
|
||||
{
|
||||
ASTPtr create_table_query = database->getCreateTableQuery(resolved_id.table_name, context);
|
||||
adjustCreateQueryForBackup(create_table_query, context->getGlobalContext(), nullptr);
|
||||
ASTPtr expected_create_query = table_info.create_table_query;
|
||||
if (serializeAST(*create_table_query) != serializeAST(*expected_create_query))
|
||||
{
|
||||
auto create_table_query = table_info.create_table_query;
|
||||
if (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists)
|
||||
{
|
||||
create_table_query = create_table_query->clone();
|
||||
create_table_query->as<ASTCreateQuery &>().if_not_exists = true;
|
||||
}
|
||||
|
||||
LOG_TRACE(
|
||||
log,
|
||||
"Creating {}: {}",
|
||||
tableNameWithTypeToString(table_name.database, table_name.table, false),
|
||||
serializeAST(*create_table_query));
|
||||
|
||||
/// Execute CREATE TABLE query (we call IDatabase::createTableRestoredFromBackup() to allow the database to do some
|
||||
/// database-specific things).
|
||||
database->createTableRestoredFromBackup(
|
||||
create_table_query,
|
||||
context,
|
||||
restore_coordination,
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(create_table_timeout).count());
|
||||
}
|
||||
|
||||
table_info.created = true;
|
||||
|
||||
auto resolved_id = (table_name.database == DatabaseCatalog::TEMPORARY_DATABASE)
|
||||
? context->resolveStorageID(StorageID{"", table_name.table}, Context::ResolveExternal)
|
||||
: context->resolveStorageID(StorageID{table_name.database, table_name.table}, Context::ResolveGlobal);
|
||||
|
||||
auto storage = database->getTable(resolved_id.table_name, context);
|
||||
table_info.storage = storage;
|
||||
table_info.table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
|
||||
|
||||
if (!restore_settings.allow_different_table_def && !table_info.is_predefined_table)
|
||||
{
|
||||
ASTPtr create_table_query = database->getCreateTableQuery(resolved_id.table_name, context);
|
||||
adjustCreateQueryForBackup(create_table_query, context->getGlobalContext(), nullptr);
|
||||
ASTPtr expected_create_query = table_info.create_table_query;
|
||||
if (serializeAST(*create_table_query) != serializeAST(*expected_create_query))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TABLE,
|
||||
"{} has a different definition: {} "
|
||||
"comparing to its definition in the backup: {}",
|
||||
tableNameWithTypeToString(table_name.database, table_name.table, true),
|
||||
serializeAST(*create_table_query),
|
||||
serializeAST(*expected_create_query));
|
||||
}
|
||||
}
|
||||
|
||||
if (!restore_settings.structure_only)
|
||||
{
|
||||
const auto & data_path_in_backup = table_info.data_path_in_backup;
|
||||
const auto & partitions = table_info.partitions;
|
||||
if (partitions && !storage->supportsBackupPartition())
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TABLE,
|
||||
"Table engine {} doesn't support partitions, cannot restore {}",
|
||||
storage->getName(),
|
||||
tableNameWithTypeToString(table_name.database, table_name.table, false));
|
||||
}
|
||||
|
||||
storage->restoreDataFromBackup(*this, data_path_in_backup, partitions);
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TABLE,
|
||||
"The table has a different definition: {} "
|
||||
"comparing to its definition in the backup: {}",
|
||||
serializeAST(*create_table_query),
|
||||
serializeAST(*expected_create_query));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("While checking {}", tableNameWithTypeToString(table_name.database, table_name.table, false));
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void RestorerFromBackup::insertDataToTable(const QualifiedTableName & table_name)
|
||||
{
|
||||
if (restore_settings.structure_only)
|
||||
return;
|
||||
|
||||
auto & table_info = table_infos.at(table_name);
|
||||
auto storage = table_info.storage;
|
||||
|
||||
try
|
||||
{
|
||||
const auto & data_path_in_backup = table_info.data_path_in_backup;
|
||||
const auto & partitions = table_info.partitions;
|
||||
if (partitions && !storage->supportsBackupPartition())
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TABLE,
|
||||
"Table engine {} doesn't support partitions",
|
||||
storage->getName());
|
||||
}
|
||||
storage->restoreDataFromBackup(*this, data_path_in_backup, partitions);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("While restoring data of {}", tableNameWithTypeToString(table_name.database, table_name.table, false));
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the list of tables without dependencies or those which dependencies have been created before.
|
||||
@ -708,7 +788,7 @@ std::vector<QualifiedTableName> RestorerFromBackup::findTablesWithoutDependencie
|
||||
|
||||
for (const auto & [key, table_info] : table_infos)
|
||||
{
|
||||
if (table_info.created)
|
||||
if (table_info.storage)
|
||||
continue;
|
||||
|
||||
/// Found a table which is not created yet.
|
||||
@ -719,7 +799,7 @@ std::vector<QualifiedTableName> RestorerFromBackup::findTablesWithoutDependencie
|
||||
for (const auto & dependency : table_info.dependencies)
|
||||
{
|
||||
auto it = table_infos.find(dependency);
|
||||
if ((it != table_infos.end()) && !it->second.created)
|
||||
if ((it != table_infos.end()) && !it->second.storage)
|
||||
{
|
||||
all_dependencies_met = false;
|
||||
break;
|
||||
@ -740,7 +820,7 @@ std::vector<QualifiedTableName> RestorerFromBackup::findTablesWithoutDependencie
|
||||
std::vector<QualifiedTableName> tables_with_cyclic_dependencies;
|
||||
for (const auto & [key, table_info] : table_infos)
|
||||
{
|
||||
if (!table_info.created)
|
||||
if (!table_info.storage)
|
||||
tables_with_cyclic_dependencies.push_back(key);
|
||||
}
|
||||
|
||||
@ -759,14 +839,14 @@ std::vector<QualifiedTableName> RestorerFromBackup::findTablesWithoutDependencie
|
||||
void RestorerFromBackup::addDataRestoreTask(DataRestoreTask && new_task)
|
||||
{
|
||||
if (current_status == kInsertingDataToTablesStatus)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding data-restoring tasks is not allowed");
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of data-restoring tasks is not allowed");
|
||||
data_restore_tasks.push_back(std::move(new_task));
|
||||
}
|
||||
|
||||
void RestorerFromBackup::addDataRestoreTasks(DataRestoreTasks && new_tasks)
|
||||
{
|
||||
if (current_status == kInsertingDataToTablesStatus)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding data-restoring tasks is not allowed");
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding of data-restoring tasks is not allowed");
|
||||
insertAtEnd(data_restore_tasks, std::move(new_tasks));
|
||||
}
|
||||
|
||||
|
@ -15,10 +15,13 @@ class IBackup;
|
||||
using BackupPtr = std::shared_ptr<const IBackup>;
|
||||
class IRestoreCoordination;
|
||||
struct StorageID;
|
||||
class IDatabase;
|
||||
using DatabasePtr = std::shared_ptr<IDatabase>;
|
||||
class AccessRestorerFromBackup;
|
||||
struct IAccessEntity;
|
||||
using AccessEntityPtr = std::shared_ptr<const IAccessEntity>;
|
||||
|
||||
|
||||
/// Restores the definition of databases and tables and prepares tasks to restore the data of the tables.
|
||||
class RestorerFromBackup : private boost::noncopyable
|
||||
{
|
||||
@ -87,7 +90,13 @@ private:
|
||||
void checkAccessForObjectsFoundInBackup() const;
|
||||
|
||||
void createDatabases();
|
||||
void createDatabase(const String & database_name) const;
|
||||
void checkDatabase(const String & database_name);
|
||||
|
||||
void createTables();
|
||||
void createTable(const QualifiedTableName & table_name);
|
||||
void checkTable(const QualifiedTableName & table_name);
|
||||
void insertDataToTable(const QualifiedTableName & table_name);
|
||||
|
||||
DataRestoreTasks getDataRestoreTasks();
|
||||
|
||||
@ -97,6 +106,7 @@ private:
|
||||
{
|
||||
ASTPtr create_database_query;
|
||||
bool is_predefined_database = false;
|
||||
DatabasePtr database;
|
||||
};
|
||||
|
||||
struct TableInfo
|
||||
@ -107,7 +117,7 @@ private:
|
||||
bool has_data = false;
|
||||
std::filesystem::path data_path_in_backup;
|
||||
std::optional<ASTs> partitions;
|
||||
bool created = false;
|
||||
DatabasePtr database;
|
||||
StoragePtr storage;
|
||||
TableLockHolder table_lock;
|
||||
};
|
||||
|
@ -524,17 +524,35 @@ try
|
||||
const auto & out_file_node = query_with_output->out_file->as<ASTLiteral &>();
|
||||
out_file = out_file_node.value.safeGet<std::string>();
|
||||
|
||||
std::string compression_method;
|
||||
std::string compression_method_string;
|
||||
|
||||
if (query_with_output->compression)
|
||||
{
|
||||
const auto & compression_method_node = query_with_output->compression->as<ASTLiteral &>();
|
||||
compression_method = compression_method_node.value.safeGet<std::string>();
|
||||
compression_method_string = compression_method_node.value.safeGet<std::string>();
|
||||
}
|
||||
|
||||
CompressionMethod compression_method = chooseCompressionMethod(out_file, compression_method_string);
|
||||
UInt64 compression_level = 3;
|
||||
|
||||
if (query_with_output->compression_level)
|
||||
{
|
||||
const auto & compression_level_node = query_with_output->compression_level->as<ASTLiteral &>();
|
||||
bool res = compression_level_node.value.tryGet<UInt64>(compression_level);
|
||||
auto range = getCompressionLevelRange(compression_method);
|
||||
|
||||
if (!res || compression_level < range.first || compression_level > range.second)
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Invalid compression level, must be positive integer in range {}-{}",
|
||||
range.first,
|
||||
range.second);
|
||||
}
|
||||
|
||||
out_file_buf = wrapWriteBufferWithCompressionMethod(
|
||||
std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT),
|
||||
chooseCompressionMethod(out_file, compression_method),
|
||||
/* compression level = */ 3
|
||||
compression_method,
|
||||
compression_level
|
||||
);
|
||||
|
||||
// We are writing to file, so default format is the same as in non-interactive mode.
|
||||
|
@ -104,7 +104,7 @@ public:
|
||||
Field operator[](size_t n) const override { return DecimalField(data[n], scale); }
|
||||
void get(size_t n, Field & res) const override { res = (*this)[n]; }
|
||||
bool getBool(size_t n) const override { return bool(data[n].value); }
|
||||
Int64 getInt(size_t n) const override { return Int64(data[n].value) * scale; }
|
||||
Int64 getInt(size_t n) const override { return Int64(data[n].value); }
|
||||
UInt64 get64(size_t n) const override;
|
||||
bool isDefaultAt(size_t n) const override { return data[n].value == 0; }
|
||||
|
||||
|
@ -715,29 +715,37 @@ ColumnPtr ColumnNullable::replicate(const Offsets & offsets) const
|
||||
|
||||
|
||||
template <bool negative>
|
||||
void ColumnNullable::applyNullMapImpl(const ColumnUInt8 & map)
|
||||
void ColumnNullable::applyNullMapImpl(const NullMap & map)
|
||||
{
|
||||
NullMap & arr1 = getNullMapData();
|
||||
const NullMap & arr2 = map.getData();
|
||||
NullMap & arr = getNullMapData();
|
||||
|
||||
if (arr1.size() != arr2.size())
|
||||
if (arr.size() != map.size())
|
||||
throw Exception{"Inconsistent sizes of ColumnNullable objects", ErrorCodes::LOGICAL_ERROR};
|
||||
|
||||
for (size_t i = 0, size = arr1.size(); i < size; ++i)
|
||||
arr1[i] |= negative ^ arr2[i];
|
||||
for (size_t i = 0, size = arr.size(); i < size; ++i)
|
||||
arr[i] |= negative ^ map[i];
|
||||
}
|
||||
|
||||
|
||||
void ColumnNullable::applyNullMap(const ColumnUInt8 & map)
|
||||
void ColumnNullable::applyNullMap(const NullMap & map)
|
||||
{
|
||||
applyNullMapImpl<false>(map);
|
||||
}
|
||||
|
||||
void ColumnNullable::applyNegatedNullMap(const ColumnUInt8 & map)
|
||||
void ColumnNullable::applyNullMap(const ColumnUInt8 & map)
|
||||
{
|
||||
applyNullMapImpl<false>(map.getData());
|
||||
}
|
||||
|
||||
void ColumnNullable::applyNegatedNullMap(const NullMap & map)
|
||||
{
|
||||
applyNullMapImpl<true>(map);
|
||||
}
|
||||
|
||||
void ColumnNullable::applyNegatedNullMap(const ColumnUInt8 & map)
|
||||
{
|
||||
applyNullMapImpl<true>(map.getData());
|
||||
}
|
||||
|
||||
|
||||
void ColumnNullable::applyNullMap(const ColumnNullable & other)
|
||||
{
|
||||
|
@ -199,7 +199,9 @@ public:
|
||||
/// columns.
|
||||
void applyNullMap(const ColumnNullable & other);
|
||||
void applyNullMap(const ColumnUInt8 & map);
|
||||
void applyNullMap(const NullMap & map);
|
||||
void applyNegatedNullMap(const ColumnUInt8 & map);
|
||||
void applyNegatedNullMap(const NullMap & map);
|
||||
|
||||
/// Check that size of null map equals to size of nested column.
|
||||
void checkConsistency() const;
|
||||
@ -209,7 +211,7 @@ private:
|
||||
WrappedPtr null_map;
|
||||
|
||||
template <bool negative>
|
||||
void applyNullMapImpl(const ColumnUInt8 & map);
|
||||
void applyNullMapImpl(const NullMap & map);
|
||||
|
||||
int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator=nullptr) const;
|
||||
|
||||
|
@ -21,7 +21,7 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int DUPLICATE_COLUMN;
|
||||
extern const int NUMBER_OF_DIMENSIONS_MISMATHED;
|
||||
extern const int NUMBER_OF_DIMENSIONS_MISMATCHED;
|
||||
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
}
|
||||
@ -298,7 +298,7 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
|
||||
value_dim = column_dim;
|
||||
|
||||
if (value_dim != column_dim)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_DIMENSIONS_MISMATHED,
|
||||
throw Exception(ErrorCodes::NUMBER_OF_DIMENSIONS_MISMATCHED,
|
||||
"Dimension of types mismatched between inserted value and column. "
|
||||
"Dimension of value: {}. Dimension of column: {}",
|
||||
value_dim, column_dim);
|
||||
|
@ -4,6 +4,18 @@
|
||||
#include <Common/CurrentMemoryTracker.h>
|
||||
|
||||
|
||||
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
|
||||
thread_local bool memory_tracker_always_throw_logical_error_on_allocation = false;
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
@ -23,64 +35,66 @@ MemoryTracker * getMemoryTracker()
|
||||
|
||||
}
|
||||
|
||||
namespace CurrentMemoryTracker
|
||||
{
|
||||
|
||||
using DB::current_thread;
|
||||
|
||||
namespace
|
||||
void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded)
|
||||
{
|
||||
void allocImpl(Int64 size, bool throw_if_memory_exceeded)
|
||||
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
|
||||
if (unlikely(memory_tracker_always_throw_logical_error_on_allocation))
|
||||
{
|
||||
if (auto * memory_tracker = getMemoryTracker())
|
||||
{
|
||||
if (current_thread)
|
||||
{
|
||||
current_thread->untracked_memory += size;
|
||||
memory_tracker_always_throw_logical_error_on_allocation = false;
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Memory tracker: allocations not allowed.");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (current_thread->untracked_memory > current_thread->untracked_memory_limit)
|
||||
{
|
||||
/// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes
|
||||
/// more. It could be useful to enlarge Exception message in rethrow logic.
|
||||
Int64 tmp = current_thread->untracked_memory;
|
||||
current_thread->untracked_memory = 0;
|
||||
memory_tracker->allocImpl(tmp, throw_if_memory_exceeded);
|
||||
}
|
||||
}
|
||||
/// total_memory_tracker only, ignore untracked_memory
|
||||
else
|
||||
if (auto * memory_tracker = getMemoryTracker())
|
||||
{
|
||||
if (current_thread)
|
||||
{
|
||||
current_thread->untracked_memory += size;
|
||||
|
||||
if (current_thread->untracked_memory > current_thread->untracked_memory_limit)
|
||||
{
|
||||
memory_tracker->allocImpl(size, throw_if_memory_exceeded);
|
||||
/// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes
|
||||
/// more. It could be useful to enlarge Exception message in rethrow logic.
|
||||
Int64 tmp = current_thread->untracked_memory;
|
||||
current_thread->untracked_memory = 0;
|
||||
memory_tracker->allocImpl(tmp, throw_if_memory_exceeded);
|
||||
}
|
||||
}
|
||||
/// total_memory_tracker only, ignore untracked_memory
|
||||
else
|
||||
{
|
||||
memory_tracker->allocImpl(size, throw_if_memory_exceeded);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void check()
|
||||
void CurrentMemoryTracker::check()
|
||||
{
|
||||
if (auto * memory_tracker = getMemoryTracker())
|
||||
memory_tracker->allocImpl(0, true);
|
||||
}
|
||||
|
||||
void alloc(Int64 size)
|
||||
void CurrentMemoryTracker::alloc(Int64 size)
|
||||
{
|
||||
bool throw_if_memory_exceeded = true;
|
||||
allocImpl(size, throw_if_memory_exceeded);
|
||||
}
|
||||
|
||||
void allocNoThrow(Int64 size)
|
||||
void CurrentMemoryTracker::allocNoThrow(Int64 size)
|
||||
{
|
||||
bool throw_if_memory_exceeded = false;
|
||||
allocImpl(size, throw_if_memory_exceeded);
|
||||
}
|
||||
|
||||
void realloc(Int64 old_size, Int64 new_size)
|
||||
void CurrentMemoryTracker::realloc(Int64 old_size, Int64 new_size)
|
||||
{
|
||||
Int64 addition = new_size - old_size;
|
||||
addition > 0 ? alloc(addition) : free(-addition);
|
||||
}
|
||||
|
||||
void free(Int64 size)
|
||||
void CurrentMemoryTracker::free(Int64 size)
|
||||
{
|
||||
if (auto * memory_tracker = getMemoryTracker())
|
||||
{
|
||||
@ -101,4 +115,3 @@ void free(Int64 size)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -3,11 +3,17 @@
|
||||
#include <base/types.h>
|
||||
|
||||
/// Convenience methods, that use current thread's memory_tracker if it is available.
|
||||
namespace CurrentMemoryTracker
|
||||
struct CurrentMemoryTracker
|
||||
{
|
||||
void alloc(Int64 size);
|
||||
void allocNoThrow(Int64 size);
|
||||
void realloc(Int64 old_size, Int64 new_size);
|
||||
void free(Int64 size);
|
||||
void check();
|
||||
}
|
||||
/// Call the following functions before calling of corresponding operations with memory allocators.
|
||||
static void alloc(Int64 size);
|
||||
static void allocNoThrow(Int64 size);
|
||||
static void realloc(Int64 old_size, Int64 new_size);
|
||||
|
||||
/// This function should be called after memory deallocation.
|
||||
static void free(Int64 size);
|
||||
static void check();
|
||||
|
||||
private:
|
||||
static void allocImpl(Int64 size, bool throw_if_memory_exceeded);
|
||||
};
|
||||
|
@ -613,7 +613,7 @@
|
||||
M(642, CANNOT_PACK_ARCHIVE) \
|
||||
M(643, CANNOT_UNPACK_ARCHIVE) \
|
||||
M(644, REMOTE_FS_OBJECT_CACHE_ERROR) \
|
||||
M(645, NUMBER_OF_DIMENSIONS_MISMATHED) \
|
||||
M(645, NUMBER_OF_DIMENSIONS_MISMATCHED) \
|
||||
M(646, CANNOT_BACKUP_DATABASE) \
|
||||
M(647, CANNOT_BACKUP_TABLE) \
|
||||
M(648, WRONG_DDL_RENAMING_SETTINGS) \
|
||||
|
@ -7,6 +7,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SYNTAX_ERROR;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
Int32 IntervalKind::toAvgSeconds() const
|
||||
@ -15,7 +16,7 @@ Int32 IntervalKind::toAvgSeconds() const
|
||||
{
|
||||
case IntervalKind::Nanosecond:
|
||||
case IntervalKind::Microsecond:
|
||||
case IntervalKind::Millisecond: return 0; /// fractional parts of seconds have 0 seconds
|
||||
case IntervalKind::Millisecond: return 0;
|
||||
case IntervalKind::Second: return 1;
|
||||
case IntervalKind::Minute: return 60;
|
||||
case IntervalKind::Hour: return 3600;
|
||||
@ -28,6 +29,51 @@ Int32 IntervalKind::toAvgSeconds() const
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
Float64 IntervalKind::toSeconds() const
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case IntervalKind::Nanosecond:
|
||||
return 0.000000001;
|
||||
case IntervalKind::Microsecond:
|
||||
return 0.000001;
|
||||
case IntervalKind::Millisecond:
|
||||
return 0.001;
|
||||
case IntervalKind::Second:
|
||||
return 1;
|
||||
case IntervalKind::Minute:
|
||||
return 60;
|
||||
case IntervalKind::Hour:
|
||||
return 3600;
|
||||
case IntervalKind::Day:
|
||||
return 86400;
|
||||
case IntervalKind::Week:
|
||||
return 604800;
|
||||
default:
|
||||
throw Exception("Not possible to get precise number of seconds in non-precise interval", ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
bool IntervalKind::isFixedLength() const
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case IntervalKind::Nanosecond:
|
||||
case IntervalKind::Microsecond:
|
||||
case IntervalKind::Millisecond:
|
||||
case IntervalKind::Second:
|
||||
case IntervalKind::Minute:
|
||||
case IntervalKind::Hour:
|
||||
case IntervalKind::Day:
|
||||
case IntervalKind::Week: return true;
|
||||
case IntervalKind::Month:
|
||||
case IntervalKind::Quarter:
|
||||
case IntervalKind::Year: return false;
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
IntervalKind IntervalKind::fromAvgSeconds(Int64 num_seconds)
|
||||
{
|
||||
if (num_seconds)
|
||||
|
@ -33,10 +33,17 @@ struct IntervalKind
|
||||
/// For `Month`, `Quarter` and `Year` the function returns an average number of seconds.
|
||||
Int32 toAvgSeconds() const;
|
||||
|
||||
/// Returns exact number of seconds in one interval.
|
||||
/// For `Month`, `Quarter` and `Year` the function raises an error.
|
||||
Float64 toSeconds() const;
|
||||
|
||||
/// Chooses an interval kind based on number of seconds.
|
||||
/// For example, `IntervalKind::fromAvgSeconds(3600)` returns `IntervalKind::Hour`.
|
||||
static IntervalKind fromAvgSeconds(Int64 num_seconds);
|
||||
|
||||
/// Returns whether IntervalKind has a fixed number of seconds (e.g. Day) or non-fixed(e.g. Month)
|
||||
bool isFixedLength() const;
|
||||
|
||||
/// Returns an uppercased version of what `toString()` returns.
|
||||
const char * toKeyword() const;
|
||||
|
||||
|
@ -19,10 +19,6 @@
|
||||
#include <string>
|
||||
|
||||
|
||||
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
|
||||
thread_local bool memory_tracker_always_throw_logical_error_on_allocation = false;
|
||||
#endif
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
@ -95,7 +91,7 @@ MemoryTracker::MemoryTracker(MemoryTracker * parent_, VariableContext level_) :
|
||||
|
||||
MemoryTracker::~MemoryTracker()
|
||||
{
|
||||
if ((level == VariableContext::Process || level == VariableContext::User) && peak)
|
||||
if ((level == VariableContext::Process || level == VariableContext::User) && peak && log_peak_memory_usage_in_destructor)
|
||||
{
|
||||
try
|
||||
{
|
||||
@ -109,8 +105,9 @@ MemoryTracker::~MemoryTracker()
|
||||
}
|
||||
|
||||
|
||||
void MemoryTracker::logPeakMemoryUsage() const
|
||||
void MemoryTracker::logPeakMemoryUsage()
|
||||
{
|
||||
log_peak_memory_usage_in_destructor = false;
|
||||
const auto * description = description_ptr.load(std::memory_order_relaxed);
|
||||
LOG_DEBUG(&Poco::Logger::get("MemoryTracker"),
|
||||
"Peak memory usage{}: {}.", (description ? " " + std::string(description) : ""), ReadableSize(peak));
|
||||
@ -169,14 +166,6 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef MEMORY_TRACKER_DEBUG_CHECKS
|
||||
if (unlikely(memory_tracker_always_throw_logical_error_on_allocation))
|
||||
{
|
||||
memory_tracker_always_throw_logical_error_on_allocation = false;
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Memory tracker: allocations not allowed.");
|
||||
}
|
||||
#endif
|
||||
|
||||
std::bernoulli_distribution fault(fault_probability);
|
||||
if (unlikely(fault_probability && fault(thread_local_rng)) && memoryTrackerCanThrow(level, true) && throw_if_memory_exceeded)
|
||||
{
|
||||
@ -270,16 +259,12 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
|
||||
level == VariableContext::Process ? this : query_tracker);
|
||||
}
|
||||
|
||||
void MemoryTracker::alloc(Int64 size)
|
||||
void MemoryTracker::adjustWithUntrackedMemory(Int64 untracked_memory)
|
||||
{
|
||||
bool throw_if_memory_exceeded = true;
|
||||
allocImpl(size, throw_if_memory_exceeded);
|
||||
}
|
||||
|
||||
void MemoryTracker::allocNoThrow(Int64 size)
|
||||
{
|
||||
bool throw_if_memory_exceeded = false;
|
||||
allocImpl(size, throw_if_memory_exceeded);
|
||||
if (untracked_memory > 0)
|
||||
allocImpl(untracked_memory, /*throw_if_memory_exceeded*/ false);
|
||||
else
|
||||
free(-untracked_memory);
|
||||
}
|
||||
|
||||
bool MemoryTracker::updatePeak(Int64 will_be, bool log_memory_usage)
|
||||
|
@ -78,11 +78,17 @@ private:
|
||||
|
||||
std::atomic<OvercommitTracker *> overcommit_tracker = nullptr;
|
||||
|
||||
bool log_peak_memory_usage_in_destructor = true;
|
||||
|
||||
bool updatePeak(Int64 will_be, bool log_memory_usage);
|
||||
void logMemoryUsage(Int64 current) const;
|
||||
|
||||
void setOrRaiseProfilerLimit(Int64 value);
|
||||
|
||||
/// allocImpl(...) and free(...) should not be used directly
|
||||
friend struct CurrentMemoryTracker;
|
||||
void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr);
|
||||
void free(Int64 size);
|
||||
public:
|
||||
|
||||
static constexpr auto USAGE_EVENT_NAME = "MemoryTrackerUsage";
|
||||
@ -94,26 +100,7 @@ public:
|
||||
|
||||
VariableContext level;
|
||||
|
||||
/** Call the following functions before calling of corresponding operations with memory allocators.
|
||||
*/
|
||||
void alloc(Int64 size);
|
||||
|
||||
void allocNoThrow(Int64 size);
|
||||
|
||||
void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr);
|
||||
|
||||
void realloc(Int64 old_size, Int64 new_size)
|
||||
{
|
||||
Int64 addition = new_size - old_size;
|
||||
if (addition > 0)
|
||||
alloc(addition);
|
||||
else
|
||||
free(-addition);
|
||||
}
|
||||
|
||||
/** This function should be called after memory deallocation.
|
||||
*/
|
||||
void free(Int64 size);
|
||||
void adjustWithUntrackedMemory(Int64 untracked_memory);
|
||||
|
||||
Int64 get() const
|
||||
{
|
||||
@ -216,7 +203,7 @@ public:
|
||||
void set(Int64 to);
|
||||
|
||||
/// Prints info about peak memory consumption into log.
|
||||
void logPeakMemoryUsage() const;
|
||||
void logPeakMemoryUsage();
|
||||
};
|
||||
|
||||
extern MemoryTracker total_memory_tracker;
|
||||
|
@ -148,10 +148,7 @@ ThreadStatus::ThreadStatus()
|
||||
|
||||
ThreadStatus::~ThreadStatus()
|
||||
{
|
||||
if (untracked_memory > 0)
|
||||
memory_tracker.allocNoThrow(untracked_memory);
|
||||
else
|
||||
memory_tracker.free(-untracked_memory);
|
||||
memory_tracker.adjustWithUntrackedMemory(untracked_memory);
|
||||
|
||||
if (thread_group)
|
||||
{
|
||||
|
@ -24,9 +24,7 @@ public:
|
||||
static void check(Coordination::Error code, const Coordination::Requests & requests, const Coordination::Responses & responses);
|
||||
|
||||
KeeperMultiException(Coordination::Error code, const Coordination::Requests & requests, const Coordination::Responses & responses);
|
||||
|
||||
private:
|
||||
static size_t getFailedOpIndex(Coordination::Error code, const Coordination::Responses & responses);
|
||||
};
|
||||
|
||||
size_t getFailedOpIndex(Coordination::Error code, const Coordination::Responses & responses);
|
||||
}
|
||||
|
@ -885,7 +885,7 @@ void ZooKeeper::waitForEphemeralToDisappearIfAny(const std::string & path)
|
||||
if (!tryGet(path, content, nullptr, eph_node_disappeared))
|
||||
return;
|
||||
|
||||
int32_t timeout_ms = 2 * session_timeout_ms;
|
||||
int32_t timeout_ms = 3 * session_timeout_ms;
|
||||
if (!eph_node_disappeared->tryWait(timeout_ms))
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR,
|
||||
"Ephemeral node {} still exists after {}s, probably it's owned by someone else. "
|
||||
@ -1227,7 +1227,7 @@ void ZooKeeper::setZooKeeperLog(std::shared_ptr<DB::ZooKeeperLog> zk_log_)
|
||||
}
|
||||
|
||||
|
||||
size_t KeeperMultiException::getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses)
|
||||
size_t getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses)
|
||||
{
|
||||
if (responses.empty())
|
||||
throw DB::Exception("Responses for multi transaction is empty", DB::ErrorCodes::LOGICAL_ERROR);
|
||||
|
@ -134,6 +134,7 @@ struct ZooKeeperWatchResponse final : WatchResponse, ZooKeeperResponse
|
||||
|
||||
OpNum getOpNum() const override
|
||||
{
|
||||
chassert(false);
|
||||
throw Exception("OpNum for watch response doesn't exist", Error::ZRUNTIMEINCONSISTENCY);
|
||||
}
|
||||
|
||||
|
@ -215,7 +215,8 @@ void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKe
|
||||
/// Session was disconnected, just skip this response
|
||||
if (session_response_callback == session_to_response_callback.end())
|
||||
{
|
||||
LOG_TEST(log, "Cannot write response xid={}, op={}, session {} disconnected", response->xid, response->getOpNum(), session_id);
|
||||
LOG_TEST(log, "Cannot write response xid={}, op={}, session {} disconnected",
|
||||
response->xid, response->xid == Coordination::WATCH_XID ? "Watch" : toString(response->getOpNum()), session_id);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -30,12 +30,14 @@ IMPLEMENT_SETTING_ENUM(JoinStrictness, ErrorCodes::UNKNOWN_JOIN,
|
||||
{"ANY", JoinStrictness::ANY}})
|
||||
|
||||
|
||||
IMPLEMENT_SETTING_ENUM(JoinAlgorithm, ErrorCodes::UNKNOWN_JOIN,
|
||||
IMPLEMENT_SETTING_MULTI_ENUM(JoinAlgorithm, ErrorCodes::UNKNOWN_JOIN,
|
||||
{{"auto", JoinAlgorithm::AUTO},
|
||||
{"hash", JoinAlgorithm::HASH},
|
||||
{"partial_merge", JoinAlgorithm::PARTIAL_MERGE},
|
||||
{"prefer_partial_merge", JoinAlgorithm::PREFER_PARTIAL_MERGE},
|
||||
{"parallel_hash", JoinAlgorithm::PARALLEL_HASH}})
|
||||
{"parallel_hash", JoinAlgorithm::PARALLEL_HASH},
|
||||
{"direct", JoinAlgorithm::DIRECT},
|
||||
{"full_sorting_merge", JoinAlgorithm::FULL_SORTING_MERGE}})
|
||||
|
||||
|
||||
IMPLEMENT_SETTING_ENUM(TotalsMode, ErrorCodes::UNKNOWN_TOTALS_MODE,
|
||||
|
@ -43,9 +43,11 @@ enum class JoinAlgorithm
|
||||
PARTIAL_MERGE,
|
||||
PREFER_PARTIAL_MERGE,
|
||||
PARALLEL_HASH,
|
||||
DIRECT,
|
||||
FULL_SORTING_MERGE,
|
||||
};
|
||||
|
||||
DECLARE_SETTING_ENUM(JoinAlgorithm)
|
||||
DECLARE_SETTING_MULTI_ENUM(JoinAlgorithm)
|
||||
|
||||
|
||||
/// Which rows should be included in TOTALS.
|
||||
|
@ -192,7 +192,8 @@ namespace
|
||||
}
|
||||
|
||||
template <>
|
||||
SettingFieldSeconds::SettingFieldTimespan(const Field & f) : SettingFieldTimespan(float64AsSecondsToTimespan(fieldToNumber<Float64>(f)))
|
||||
SettingFieldSeconds::SettingFieldTimespan(const Field & f)
|
||||
: SettingFieldTimespan(Poco::Timespan{float64AsSecondsToTimespan(fieldToNumber<Float64>(f))})
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -142,9 +142,12 @@ struct SortCursorImpl
|
||||
bool isLast() const { return pos + 1 >= rows; }
|
||||
bool isLast(size_t size) const { return pos + size >= rows; }
|
||||
bool isValid() const { return pos < rows; }
|
||||
|
||||
void next() { ++pos; }
|
||||
void next(size_t size) { pos += size; }
|
||||
|
||||
size_t getSize() const { return rows; }
|
||||
size_t rowsLeft() const { return rows - pos; }
|
||||
|
||||
/// Prevent using pos instead of getRow()
|
||||
private:
|
||||
|
@ -395,7 +395,12 @@ private:
|
||||
#if defined(SANITIZER)
|
||||
extern "C" void __sanitizer_set_death_callback(void (*)());
|
||||
|
||||
static void sanitizerDeathCallback()
|
||||
/// Sanitizers may not expect some function calls from death callback.
|
||||
/// Let's try to disable instrumentation to avoid possible issues.
|
||||
/// However, this callback may call other functions that are still instrumented.
|
||||
/// We can try [[clang::always_inline]] attribute for statements in future (available in clang-15)
|
||||
/// See https://github.com/google/sanitizers/issues/1543 and https://github.com/google/sanitizers/issues/1549.
|
||||
static DISABLE_SANITIZER_INSTRUMENTATION void sanitizerDeathCallback()
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
/// Also need to send data via pipe. Otherwise it may lead to deadlocks or failures in printing diagnostic info.
|
||||
|
@ -4,8 +4,7 @@ add_headers_and_sources(clickhouse_dictionaries .)
|
||||
|
||||
add_headers_and_sources(clickhouse_dictionaries "${CMAKE_CURRENT_BINARY_DIR}/generated/")
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
|
||||
|
||||
if (OMIT_HEAVY_DEBUG_SYMBOLS)
|
||||
# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
|
||||
set_source_files_properties(
|
||||
FlatDictionary.cpp
|
||||
@ -15,7 +14,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELW
|
||||
RangeHashedDictionary.cpp
|
||||
DirectDictionary.cpp
|
||||
PROPERTIES COMPILE_FLAGS -g0)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
list(REMOVE_ITEM clickhouse_dictionaries_sources DictionaryFactory.cpp DictionarySourceFactory.cpp DictionaryStructure.cpp getDictionaryConfigurationFromAST.cpp)
|
||||
list(REMOVE_ITEM clickhouse_dictionaries_headers DictionaryFactory.h DictionarySourceFactory.h DictionaryStructure.h getDictionaryConfigurationFromAST.h)
|
||||
|
@ -35,25 +35,8 @@ if (TARGET OpenSSL::Crypto)
|
||||
target_link_libraries(clickhouse_functions PUBLIC OpenSSL::Crypto)
|
||||
endif()
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
|
||||
OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
|
||||
OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
|
||||
set (STRIP_DSF_DEFAULT ON)
|
||||
else()
|
||||
set (STRIP_DSF_DEFAULT OFF)
|
||||
endif()
|
||||
|
||||
|
||||
# Provides faster linking and lower binary size.
|
||||
# Tradeoff is the inability to debug some source files with e.g. gdb
|
||||
# (empty stack frames and no local variables)."
|
||||
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS "Do not generate debugger info for ClickHouse functions" ${STRIP_DSF_DEFAULT})
|
||||
|
||||
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
|
||||
message(INFO "Not generating debugger info for ClickHouse functions")
|
||||
if (OMIT_HEAVY_DEBUG_SYMBOLS)
|
||||
target_compile_options(clickhouse_functions PRIVATE "-g0")
|
||||
else()
|
||||
message(STATUS "Generating debugger info for ClickHouse functions")
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::icu)
|
||||
|
@ -46,7 +46,7 @@ public:
|
||||
size_t getNumberOfArguments() const override { return 3; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
@ -65,26 +65,30 @@ public:
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
const ColumnPtr & column_haystack = arguments[0].column;
|
||||
const ColumnPtr & num_ptr = arguments[1].column;
|
||||
const ColumnPtr & arr_ptr = arguments[2].column;
|
||||
const ColumnPtr & haystack_ptr = arguments[0].column;
|
||||
const ColumnPtr & edit_distance_ptr = arguments[1].column;
|
||||
const ColumnPtr & needles_ptr = arguments[2].column;
|
||||
|
||||
const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack);
|
||||
assert(col_haystack_vector); // getReturnTypeImpl() checks the data type
|
||||
const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*haystack_ptr);
|
||||
const ColumnConst * col_haystack_const = checkAndGetColumnConst<ColumnString>(&*haystack_ptr);
|
||||
assert(static_cast<bool>(col_haystack_vector) ^ static_cast<bool>(col_haystack_const));
|
||||
|
||||
UInt32 edit_distance = 0;
|
||||
if (const auto * col_const_uint8 = checkAndGetColumnConst<ColumnUInt8>(num_ptr.get()))
|
||||
if (const auto * col_const_uint8 = checkAndGetColumnConst<ColumnUInt8>(edit_distance_ptr.get()))
|
||||
edit_distance = col_const_uint8->getValue<UInt8>();
|
||||
else if (const auto * col_const_uint16 = checkAndGetColumnConst<ColumnUInt16>(num_ptr.get()))
|
||||
else if (const auto * col_const_uint16 = checkAndGetColumnConst<ColumnUInt16>(edit_distance_ptr.get()))
|
||||
edit_distance = col_const_uint16->getValue<UInt16>();
|
||||
else if (const auto * col_const_uint32 = checkAndGetColumnConst<ColumnUInt32>(num_ptr.get()))
|
||||
else if (const auto * col_const_uint32 = checkAndGetColumnConst<ColumnUInt32>(edit_distance_ptr.get()))
|
||||
edit_distance = col_const_uint32->getValue<UInt32>();
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}. The number is not const or does not fit in UInt32", arguments[1].column->getName());
|
||||
|
||||
const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arr_ptr.get());
|
||||
if (!col_const_arr)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}. The array is not const", arguments[2].column->getName());
|
||||
const ColumnArray * col_needles_vector = checkAndGetColumn<ColumnArray>(needles_ptr.get());
|
||||
const ColumnConst * col_needles_const = checkAndGetColumnConst<ColumnArray>(needles_ptr.get());
|
||||
assert(static_cast<bool>(col_needles_vector) ^ static_cast<bool>(col_needles_const));
|
||||
|
||||
if (col_haystack_const && col_needles_vector)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support search with non-constant needles in constant haystack", name);
|
||||
|
||||
using ResultType = typename Impl::ResultType;
|
||||
auto col_res = ColumnVector<ResultType>::create();
|
||||
@ -92,12 +96,30 @@ public:
|
||||
|
||||
auto & vec_res = col_res->getData();
|
||||
auto & offsets_res = col_offsets->getData();
|
||||
// the implementations are responsible for resizing the output column
|
||||
/// the implementations are responsible for resizing the output column
|
||||
|
||||
Array needles_arr = col_const_arr->getValue<Array>();
|
||||
Impl::vectorConstant(
|
||||
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needles_arr, vec_res, offsets_res, edit_distance,
|
||||
allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
if (col_needles_const)
|
||||
{
|
||||
Impl::vectorConstant(
|
||||
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(),
|
||||
col_needles_const->getValue<Array>(),
|
||||
vec_res, offsets_res,
|
||||
edit_distance,
|
||||
allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
Impl::vectorVector(
|
||||
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(),
|
||||
col_needles_vector->getData(), col_needles_vector->getOffsets(),
|
||||
vec_res, offsets_res,
|
||||
edit_distance,
|
||||
allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
}
|
||||
|
||||
// the combination of const haystack + const needle is not implemented because
|
||||
// useDefaultImplementationForConstants() == true makes upper layers convert both to
|
||||
// non-const columns
|
||||
|
||||
if constexpr (Impl::is_column_array)
|
||||
return ColumnArray::create(std::move(col_res), std::move(col_offsets));
|
||||
|
@ -61,7 +61,6 @@ public:
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
@ -77,15 +76,19 @@ public:
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
const ColumnPtr & column_haystack = arguments[0].column;
|
||||
const ColumnPtr & arr_ptr = arguments[1].column;
|
||||
const ColumnPtr & haystack_ptr = arguments[0].column;
|
||||
const ColumnPtr & needles_ptr = arguments[1].column;
|
||||
|
||||
const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack);
|
||||
assert(col_haystack_vector); // getReturnTypeImpl() checks the data type
|
||||
const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*haystack_ptr);
|
||||
const ColumnConst * col_haystack_const = checkAndGetColumnConst<ColumnString>(&*haystack_ptr);
|
||||
assert(static_cast<bool>(col_haystack_vector) ^ static_cast<bool>(col_haystack_const));
|
||||
|
||||
const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arr_ptr.get());
|
||||
if (!col_const_arr)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}. The array is not const", arguments[1].column->getName());
|
||||
const ColumnArray * col_needles_vector = checkAndGetColumn<ColumnArray>(needles_ptr.get());
|
||||
const ColumnConst * col_needles_const = checkAndGetColumnConst<ColumnArray>(needles_ptr.get());
|
||||
assert(static_cast<bool>(col_needles_vector) ^ static_cast<bool>(col_needles_const));
|
||||
|
||||
if (col_haystack_const && col_needles_vector)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support search with non-constant needles in constant haystack", name);
|
||||
|
||||
using ResultType = typename Impl::ResultType;
|
||||
auto col_res = ColumnVector<ResultType>::create();
|
||||
@ -93,12 +96,28 @@ public:
|
||||
|
||||
auto & vec_res = col_res->getData();
|
||||
auto & offsets_res = col_offsets->getData();
|
||||
// the implementations are responsible for resizing the output column
|
||||
/// the implementations are responsible for resizing the output column
|
||||
|
||||
Array needles_arr = col_const_arr->getValue<Array>();
|
||||
Impl::vectorConstant(
|
||||
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needles_arr, vec_res, offsets_res,
|
||||
allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
if (col_needles_const)
|
||||
{
|
||||
Impl::vectorConstant(
|
||||
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(),
|
||||
col_needles_const->getValue<Array>(),
|
||||
vec_res, offsets_res,
|
||||
allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
Impl::vectorVector(
|
||||
col_haystack_vector->getChars(), col_haystack_vector->getOffsets(),
|
||||
col_needles_vector->getData(), col_needles_vector->getOffsets(),
|
||||
vec_res, offsets_res,
|
||||
allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
}
|
||||
|
||||
// the combination of const haystack + const needle is not implemented because
|
||||
// useDefaultImplementationForConstants() == true makes upper layers convert both to
|
||||
// non-const columns
|
||||
|
||||
if constexpr (Impl::is_column_array)
|
||||
return ColumnArray::create(std::move(col_res), std::move(col_offsets));
|
||||
|
@ -12,7 +12,7 @@ if (HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
|
||||
target_compile_definitions(clickhouse_functions_gatherutils PUBLIC HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
|
||||
endif()
|
||||
|
||||
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
|
||||
if (OMIT_HEAVY_DEBUG_SYMBOLS)
|
||||
target_compile_options(clickhouse_functions_gatherutils PRIVATE "-g0")
|
||||
endif()
|
||||
|
||||
|
@ -8,6 +8,6 @@ target_link_libraries(clickhouse_functions_jsonpath PRIVATE dbms)
|
||||
target_link_libraries(clickhouse_functions_jsonpath PRIVATE clickhouse_parsers)
|
||||
target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_jsonpath)
|
||||
|
||||
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
|
||||
if (OMIT_HEAVY_DEBUG_SYMBOLS)
|
||||
target_compile_options(clickhouse_functions_jsonpath PRIVATE "-g0")
|
||||
endif()
|
||||
|
@ -58,15 +58,15 @@ struct MultiMatchAllIndicesImpl
|
||||
}
|
||||
|
||||
static void vectorConstant(
|
||||
[[maybe_unused]] const ColumnString::Chars & haystack_data,
|
||||
[[maybe_unused]] const ColumnString::Offsets & haystack_offsets,
|
||||
[[maybe_unused]] const Array & needles_arr,
|
||||
[[maybe_unused]] PaddedPODArray<ResultType> & res,
|
||||
[[maybe_unused]] PaddedPODArray<UInt64> & offsets,
|
||||
[[maybe_unused]] std::optional<UInt32> edit_distance,
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const Array & needles_arr,
|
||||
PaddedPODArray<ResultType> & res,
|
||||
PaddedPODArray<UInt64> & offsets,
|
||||
std::optional<UInt32> edit_distance,
|
||||
bool allow_hyperscan,
|
||||
[[maybe_unused]] size_t max_hyperscan_regexp_length,
|
||||
[[maybe_unused]] size_t max_hyperscan_regexp_total_length)
|
||||
size_t max_hyperscan_regexp_length,
|
||||
size_t max_hyperscan_regexp_total_length)
|
||||
{
|
||||
if (!allow_hyperscan)
|
||||
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0");
|
||||
@ -79,6 +79,7 @@ struct MultiMatchAllIndicesImpl
|
||||
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
|
||||
offsets.resize(haystack_offsets.size());
|
||||
|
||||
const auto & hyperscan_regex = MultiRegexps::get</*SaveIndices=*/true, WithEditDistance>(needles, edit_distance);
|
||||
hs_scratch_t * scratch = nullptr;
|
||||
hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch);
|
||||
@ -102,10 +103,10 @@ struct MultiMatchAllIndicesImpl
|
||||
for (size_t i = 0; i < haystack_offsets_size; ++i)
|
||||
{
|
||||
UInt64 length = haystack_offsets[i] - offset - 1;
|
||||
/// Hyperscan restriction.
|
||||
/// vectorscan restriction.
|
||||
if (length > std::numeric_limits<UInt32>::max())
|
||||
throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES);
|
||||
/// Scan, check, update the offsets array and the offset of haystack.
|
||||
/// scan, check, update the offsets array and the offset of haystack.
|
||||
err = hs_scan(
|
||||
hyperscan_regex->getDB(),
|
||||
reinterpret_cast<const char *>(haystack_data.data()) + offset,
|
||||
@ -120,6 +121,121 @@ struct MultiMatchAllIndicesImpl
|
||||
offset = haystack_offsets[i];
|
||||
}
|
||||
#else
|
||||
(void)haystack_data;
|
||||
(void)haystack_offsets;
|
||||
(void)needles_arr;
|
||||
(void)res;
|
||||
(void)offsets;
|
||||
(void)edit_distance;
|
||||
(void)max_hyperscan_regexp_length;
|
||||
(void)max_hyperscan_regexp_total_length;
|
||||
throw Exception(
|
||||
"multi-search all indices is not implemented when vectorscan is off",
|
||||
ErrorCodes::NOT_IMPLEMENTED);
|
||||
#endif // USE_VECTORSCAN
|
||||
}
|
||||
|
||||
static void vectorVector(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const IColumn & needles_data,
|
||||
const ColumnArray::Offsets & needles_offsets,
|
||||
PaddedPODArray<ResultType> & res,
|
||||
PaddedPODArray<UInt64> & offsets,
|
||||
bool allow_hyperscan,
|
||||
size_t max_hyperscan_regexp_length,
|
||||
size_t max_hyperscan_regexp_total_length)
|
||||
{
|
||||
vectorVector(haystack_data, haystack_offsets, needles_data, needles_offsets, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
}
|
||||
|
||||
static void vectorVector(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const IColumn & needles_data,
|
||||
const ColumnArray::Offsets & needles_offsets,
|
||||
PaddedPODArray<ResultType> & res,
|
||||
PaddedPODArray<UInt64> & offsets,
|
||||
std::optional<UInt32> edit_distance,
|
||||
bool allow_hyperscan,
|
||||
size_t max_hyperscan_regexp_length,
|
||||
size_t max_hyperscan_regexp_total_length)
|
||||
{
|
||||
if (!allow_hyperscan)
|
||||
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0");
|
||||
#if USE_VECTORSCAN
|
||||
offsets.resize(haystack_offsets.size());
|
||||
size_t prev_haystack_offset = 0;
|
||||
size_t prev_needles_offset = 0;
|
||||
|
||||
const ColumnString * needles_data_string = checkAndGetColumn<ColumnString>(&needles_data);
|
||||
|
||||
std::vector<std::string_view> needles;
|
||||
|
||||
for (size_t i = 0; i < haystack_offsets.size(); ++i)
|
||||
{
|
||||
needles.reserve(needles_offsets[i] - prev_needles_offset);
|
||||
|
||||
for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j)
|
||||
{
|
||||
needles.emplace_back(needles_data_string->getDataAt(j).toView());
|
||||
}
|
||||
|
||||
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
|
||||
const auto & hyperscan_regex = MultiRegexps::get</*SaveIndices=*/true, WithEditDistance>(needles, edit_distance);
|
||||
hs_scratch_t * scratch = nullptr;
|
||||
hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch);
|
||||
|
||||
if (err != HS_SUCCESS)
|
||||
throw Exception("Could not clone scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
|
||||
MultiRegexps::ScratchPtr smart_scratch(scratch);
|
||||
|
||||
auto on_match = [](unsigned int id,
|
||||
unsigned long long /* from */, // NOLINT
|
||||
unsigned long long /* to */, // NOLINT
|
||||
unsigned int /* flags */,
|
||||
void * context) -> int
|
||||
{
|
||||
static_cast<PaddedPODArray<ResultType>*>(context)->push_back(id);
|
||||
return 0;
|
||||
};
|
||||
|
||||
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
|
||||
|
||||
/// vectorscan restriction.
|
||||
if (cur_haystack_length > std::numeric_limits<UInt32>::max())
|
||||
throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES);
|
||||
|
||||
/// scan, check, update the offsets array and the offset of haystack.
|
||||
err = hs_scan(
|
||||
hyperscan_regex->getDB(),
|
||||
reinterpret_cast<const char *>(haystack_data.data()) + prev_haystack_offset,
|
||||
cur_haystack_length,
|
||||
0,
|
||||
smart_scratch.get(),
|
||||
on_match,
|
||||
&res);
|
||||
if (err != HS_SUCCESS)
|
||||
throw Exception("Failed to scan with vectorscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT);
|
||||
|
||||
offsets[i] = res.size();
|
||||
|
||||
prev_haystack_offset = haystack_offsets[i];
|
||||
prev_needles_offset = needles_offsets[i];
|
||||
needles.clear();
|
||||
}
|
||||
#else
|
||||
(void)haystack_data;
|
||||
(void)haystack_offsets;
|
||||
(void)needles_data;
|
||||
(void)needles_offsets;
|
||||
(void)res;
|
||||
(void)offsets;
|
||||
(void)edit_distance;
|
||||
(void)max_hyperscan_regexp_length;
|
||||
(void)max_hyperscan_regexp_total_length;
|
||||
throw Exception(
|
||||
"multi-search all indices is not implemented when vectorscan is off",
|
||||
ErrorCodes::NOT_IMPLEMENTED);
|
||||
|
@ -14,6 +14,7 @@
|
||||
# include <hs.h>
|
||||
#else
|
||||
# include "MatchImpl.h"
|
||||
#include <Common/Volnitsky.h>
|
||||
#endif
|
||||
|
||||
|
||||
@ -29,7 +30,7 @@ namespace ErrorCodes
|
||||
extern const int TOO_MANY_BYTES;
|
||||
}
|
||||
|
||||
// For more readable instantiations of MultiMatchAnyImpl<>
|
||||
/// For more readable instantiations of MultiMatchAnyImpl<>
|
||||
struct MultiMatchTraits
|
||||
{
|
||||
enum class Find
|
||||
@ -75,7 +76,7 @@ struct MultiMatchAnyImpl
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const Array & needles_arr,
|
||||
PaddedPODArray<ResultType> & res,
|
||||
[[maybe_unused]] PaddedPODArray<UInt64> & offsets,
|
||||
PaddedPODArray<UInt64> & /*offsets*/,
|
||||
[[maybe_unused]] std::optional<UInt32> edit_distance,
|
||||
bool allow_hyperscan,
|
||||
size_t max_hyperscan_regexp_length,
|
||||
@ -93,7 +94,7 @@ struct MultiMatchAnyImpl
|
||||
|
||||
res.resize(haystack_offsets.size());
|
||||
#if USE_VECTORSCAN
|
||||
const auto & hyperscan_regex = MultiRegexps::get<FindAnyIndex, WithEditDistance>(needles, edit_distance);
|
||||
const auto & hyperscan_regex = MultiRegexps::get</*SaveIndices*/ FindAnyIndex, WithEditDistance>(needles, edit_distance);
|
||||
hs_scratch_t * scratch = nullptr;
|
||||
hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch);
|
||||
|
||||
@ -120,10 +121,10 @@ struct MultiMatchAnyImpl
|
||||
for (size_t i = 0; i < haystack_offsets_size; ++i)
|
||||
{
|
||||
UInt64 length = haystack_offsets[i] - offset - 1;
|
||||
/// Vectorscan restriction.
|
||||
/// vectorscan restriction.
|
||||
if (length > std::numeric_limits<UInt32>::max())
|
||||
throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES);
|
||||
/// Zero the result, scan, check, update the offset.
|
||||
/// zero the result, scan, check, update the offset.
|
||||
res[i] = 0;
|
||||
err = hs_scan(
|
||||
hyperscan_regex->getDB(),
|
||||
@ -138,7 +139,7 @@ struct MultiMatchAnyImpl
|
||||
offset = haystack_offsets[i];
|
||||
}
|
||||
#else
|
||||
// fallback if vectorscan is not compiled
|
||||
/// fallback if vectorscan is not compiled
|
||||
if constexpr (WithEditDistance)
|
||||
throw Exception(
|
||||
"Edit distance multi-search is not implemented when vectorscan is off",
|
||||
@ -157,6 +158,218 @@ struct MultiMatchAnyImpl
|
||||
res[i] = j + 1;
|
||||
}
|
||||
}
|
||||
#endif // USE_VECTORSCAN
|
||||
}
|
||||
|
||||
static void vectorVector(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const IColumn & needles_data,
|
||||
const ColumnArray::Offsets & needles_offsets,
|
||||
PaddedPODArray<ResultType> & res,
|
||||
PaddedPODArray<UInt64> & offsets,
|
||||
bool allow_hyperscan,
|
||||
size_t max_hyperscan_regexp_length,
|
||||
size_t max_hyperscan_regexp_total_length)
|
||||
{
|
||||
vectorVector(haystack_data, haystack_offsets, needles_data, needles_offsets, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
}
|
||||
|
||||
static void vectorVector(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const IColumn & needles_data,
|
||||
const ColumnArray::Offsets & needles_offsets,
|
||||
PaddedPODArray<ResultType> & res,
|
||||
PaddedPODArray<UInt64> & /*offsets*/,
|
||||
std::optional<UInt32> edit_distance,
|
||||
bool allow_hyperscan,
|
||||
size_t max_hyperscan_regexp_length,
|
||||
size_t max_hyperscan_regexp_total_length)
|
||||
{
|
||||
if (!allow_hyperscan)
|
||||
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0");
|
||||
|
||||
res.resize(haystack_offsets.size());
|
||||
#if USE_VECTORSCAN
|
||||
size_t prev_haystack_offset = 0;
|
||||
size_t prev_needles_offset = 0;
|
||||
|
||||
const ColumnString * needles_data_string = checkAndGetColumn<ColumnString>(&needles_data);
|
||||
|
||||
std::vector<std::string_view> needles;
|
||||
|
||||
for (size_t i = 0; i < haystack_offsets.size(); ++i)
|
||||
{
|
||||
needles.reserve(needles_offsets[i] - prev_needles_offset);
|
||||
|
||||
for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j)
|
||||
{
|
||||
needles.emplace_back(needles_data_string->getDataAt(j).toView());
|
||||
}
|
||||
|
||||
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
|
||||
const auto & hyperscan_regex = MultiRegexps::get</*SaveIndices*/ FindAnyIndex, WithEditDistance>(needles, edit_distance);
|
||||
hs_scratch_t * scratch = nullptr;
|
||||
hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch);
|
||||
|
||||
if (err != HS_SUCCESS)
|
||||
throw Exception("Could not clone scratch space for vectorscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
|
||||
MultiRegexps::ScratchPtr smart_scratch(scratch);
|
||||
|
||||
auto on_match = []([[maybe_unused]] unsigned int id,
|
||||
unsigned long long /* from */, // NOLINT
|
||||
unsigned long long /* to */, // NOLINT
|
||||
unsigned int /* flags */,
|
||||
void * context) -> int
|
||||
{
|
||||
if constexpr (FindAnyIndex)
|
||||
*reinterpret_cast<ResultType *>(context) = id;
|
||||
else if constexpr (FindAny)
|
||||
*reinterpret_cast<ResultType *>(context) = 1;
|
||||
/// Once we hit the callback, there is no need to search for others.
|
||||
return 1;
|
||||
};
|
||||
|
||||
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
|
||||
|
||||
/// vectorscan restriction.
|
||||
if (cur_haystack_length > std::numeric_limits<UInt32>::max())
|
||||
throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES);
|
||||
|
||||
/// zero the result, scan, check, update the offset.
|
||||
res[i] = 0;
|
||||
err = hs_scan(
|
||||
hyperscan_regex->getDB(),
|
||||
reinterpret_cast<const char *>(haystack_data.data()) + prev_haystack_offset,
|
||||
cur_haystack_length,
|
||||
0,
|
||||
smart_scratch.get(),
|
||||
on_match,
|
||||
&res[i]);
|
||||
if (err != HS_SUCCESS && err != HS_SCAN_TERMINATED)
|
||||
throw Exception("Failed to scan with vectorscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT);
|
||||
|
||||
prev_haystack_offset = haystack_offsets[i];
|
||||
prev_needles_offset = needles_offsets[i];
|
||||
needles.clear();
|
||||
}
|
||||
#else
|
||||
/// fallback if vectorscan is not compiled
|
||||
/// -- the code is copypasted from vectorVector() in MatchImpl.h and quite complex code ... all of it can be removed once vectorscan is
|
||||
/// enabled on all platforms (#38906)
|
||||
if constexpr (WithEditDistance)
|
||||
throw Exception(
|
||||
"Edit distance multi-search is not implemented when vectorscan is off",
|
||||
ErrorCodes::NOT_IMPLEMENTED);
|
||||
|
||||
(void)edit_distance;
|
||||
|
||||
memset(res.data(), 0, res.size() * sizeof(res.front()));
|
||||
|
||||
size_t prev_haystack_offset = 0;
|
||||
size_t prev_needles_offset = 0;
|
||||
|
||||
const ColumnString * needles_data_string = checkAndGetColumn<ColumnString>(&needles_data);
|
||||
|
||||
std::vector<std::string_view> needles;
|
||||
|
||||
for (size_t i = 0; i < haystack_offsets.size(); ++i)
|
||||
{
|
||||
const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
|
||||
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
|
||||
|
||||
needles.reserve(needles_offsets[i] - prev_needles_offset);
|
||||
|
||||
for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j)
|
||||
{
|
||||
needles.emplace_back(needles_data_string->getDataAt(j).toView());
|
||||
}
|
||||
|
||||
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
||||
|
||||
for (size_t j = 0; j < needles.size(); ++j)
|
||||
{
|
||||
String needle(needles[j]);
|
||||
|
||||
const auto & regexp = Regexps::Regexp(Regexps::createRegexp</*like*/ false, /*no_capture*/ true, /*case_insensitive*/ false>(needle));
|
||||
|
||||
String required_substr;
|
||||
bool is_trivial;
|
||||
bool required_substring_is_prefix; /// for `anchored` execution of the regexp.
|
||||
|
||||
regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix);
|
||||
|
||||
if (required_substr.empty())
|
||||
{
|
||||
if (!regexp.getRE2()) /// An empty regexp. Always matches.
|
||||
{
|
||||
if constexpr (FindAny)
|
||||
res[i] |= 1;
|
||||
else if (FindAnyIndex)
|
||||
res[i] = j + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
const bool match = regexp.getRE2()->Match(
|
||||
{reinterpret_cast<const char *>(cur_haystack_data), cur_haystack_length},
|
||||
0,
|
||||
cur_haystack_length,
|
||||
re2_st::RE2::UNANCHORED,
|
||||
nullptr,
|
||||
0);
|
||||
if constexpr (FindAny)
|
||||
res[i] |= match;
|
||||
else if (FindAnyIndex && match)
|
||||
res[i] = j + 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Volnitsky searcher(required_substr.data(), required_substr.size(), cur_haystack_length);
|
||||
const auto * match = searcher.search(cur_haystack_data, cur_haystack_length);
|
||||
|
||||
if (match == cur_haystack_data + cur_haystack_length)
|
||||
{
|
||||
/// no match
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_trivial)
|
||||
{
|
||||
/// no wildcards in pattern
|
||||
if constexpr (FindAny)
|
||||
res[i] |= 1;
|
||||
else if (FindAnyIndex)
|
||||
res[i] = j + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
const size_t start_pos = (required_substring_is_prefix) ? (match - cur_haystack_data) : 0;
|
||||
const size_t end_pos = cur_haystack_length;
|
||||
|
||||
const bool match2 = regexp.getRE2()->Match(
|
||||
{reinterpret_cast<const char *>(cur_haystack_data), cur_haystack_length},
|
||||
start_pos,
|
||||
end_pos,
|
||||
re2_st::RE2::UNANCHORED,
|
||||
nullptr,
|
||||
0);
|
||||
if constexpr (FindAny)
|
||||
res[i] |= match2;
|
||||
else if (FindAnyIndex && match2)
|
||||
res[i] = j + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
prev_haystack_offset = haystack_offsets[i];
|
||||
prev_needles_offset = needles_offsets[i];
|
||||
needles.clear();
|
||||
}
|
||||
#endif // USE_VECTORSCAN
|
||||
}
|
||||
};
|
||||
|
@ -29,7 +29,7 @@ struct MultiSearchFirstIndexImpl
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const Array & needles_arr,
|
||||
PaddedPODArray<UInt64> & res,
|
||||
[[maybe_unused]] PaddedPODArray<UInt64> & offsets,
|
||||
PaddedPODArray<UInt64> & /*offsets*/,
|
||||
bool /*allow_hyperscan*/,
|
||||
size_t /*max_hyperscan_regexp_length*/,
|
||||
size_t /*max_hyperscan_regexp_total_length*/)
|
||||
@ -46,26 +46,83 @@ struct MultiSearchFirstIndexImpl
|
||||
needles.emplace_back(needle.get<String>());
|
||||
|
||||
auto searcher = Impl::createMultiSearcherInBigHaystack(needles);
|
||||
const size_t haystack_string_size = haystack_offsets.size();
|
||||
res.resize(haystack_string_size);
|
||||
|
||||
const size_t haystack_size = haystack_offsets.size();
|
||||
res.resize(haystack_size);
|
||||
|
||||
size_t iteration = 0;
|
||||
while (searcher.hasMoreToSearch())
|
||||
{
|
||||
size_t prev_offset = 0;
|
||||
for (size_t j = 0; j < haystack_string_size; ++j)
|
||||
size_t prev_haystack_offset = 0;
|
||||
for (size_t j = 0; j < haystack_size; ++j)
|
||||
{
|
||||
const auto * haystack = &haystack_data[prev_offset];
|
||||
const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
|
||||
const auto * haystack = &haystack_data[prev_haystack_offset];
|
||||
const auto * haystack_end = haystack + haystack_offsets[j] - prev_haystack_offset - 1;
|
||||
/// hasMoreToSearch traverse needles in increasing order
|
||||
if (iteration == 0 || res[j] == 0)
|
||||
res[j] = searcher.searchOneFirstIndex(haystack, haystack_end);
|
||||
prev_offset = haystack_offsets[j];
|
||||
prev_haystack_offset = haystack_offsets[j];
|
||||
}
|
||||
++iteration;
|
||||
}
|
||||
if (iteration == 0)
|
||||
std::fill(res.begin(), res.end(), 0);
|
||||
}
|
||||
|
||||
static void vectorVector(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const IColumn & needles_data,
|
||||
const ColumnArray::Offsets & needles_offsets,
|
||||
PaddedPODArray<ResultType> & res,
|
||||
PaddedPODArray<UInt64> & /*offsets*/,
|
||||
bool /*allow_hyperscan*/,
|
||||
size_t /*max_hyperscan_regexp_length*/,
|
||||
size_t /*max_hyperscan_regexp_total_length*/)
|
||||
{
|
||||
const size_t haystack_size = haystack_offsets.size();
|
||||
res.resize(haystack_size);
|
||||
|
||||
size_t prev_haystack_offset = 0;
|
||||
size_t prev_needles_offset = 0;
|
||||
|
||||
const ColumnString * needles_data_string = checkAndGetColumn<ColumnString>(&needles_data);
|
||||
|
||||
std::vector<std::string_view> needles;
|
||||
|
||||
for (size_t i = 0; i < haystack_size; ++i)
|
||||
{
|
||||
needles.reserve(needles_offsets[i] - prev_needles_offset);
|
||||
|
||||
for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j)
|
||||
{
|
||||
needles.emplace_back(needles_data_string->getDataAt(j).toView());
|
||||
}
|
||||
|
||||
auto searcher = Impl::createMultiSearcherInBigHaystack(needles); // sub-optimal
|
||||
|
||||
const auto * const haystack = &haystack_data[prev_haystack_offset];
|
||||
const auto * haystack_end = haystack + haystack_offsets[i] - prev_haystack_offset - 1;
|
||||
|
||||
size_t iteration = 0;
|
||||
while (searcher.hasMoreToSearch())
|
||||
{
|
||||
if (iteration == 0 || res[i] == 0)
|
||||
{
|
||||
res[i] = searcher.searchOneFirstIndex(haystack, haystack_end);
|
||||
}
|
||||
++iteration;
|
||||
}
|
||||
if (iteration == 0)
|
||||
{
|
||||
res[i] = 0;
|
||||
}
|
||||
|
||||
prev_haystack_offset = haystack_offsets[i];
|
||||
prev_needles_offset = needles_offsets[i];
|
||||
needles.clear();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ struct MultiSearchFirstPositionImpl
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const Array & needles_arr,
|
||||
PaddedPODArray<UInt64> & res,
|
||||
[[maybe_unused]] PaddedPODArray<UInt64> & offsets,
|
||||
PaddedPODArray<UInt64> & /*offsets*/,
|
||||
bool /*allow_hyperscan*/,
|
||||
size_t /*max_hyperscan_regexp_length*/,
|
||||
size_t /*max_hyperscan_regexp_total_length*/)
|
||||
@ -50,16 +50,18 @@ struct MultiSearchFirstPositionImpl
|
||||
return 1 + Impl::countChars(reinterpret_cast<const char *>(start), reinterpret_cast<const char *>(end));
|
||||
};
|
||||
auto searcher = Impl::createMultiSearcherInBigHaystack(needles);
|
||||
const size_t haystack_string_size = haystack_offsets.size();
|
||||
res.resize(haystack_string_size);
|
||||
|
||||
const size_t haystack_size = haystack_offsets.size();
|
||||
res.resize(haystack_size);
|
||||
|
||||
size_t iteration = 0;
|
||||
while (searcher.hasMoreToSearch())
|
||||
{
|
||||
size_t prev_offset = 0;
|
||||
for (size_t j = 0; j < haystack_string_size; ++j)
|
||||
size_t prev_haystack_offset = 0;
|
||||
for (size_t j = 0; j < haystack_size; ++j)
|
||||
{
|
||||
const auto * haystack = &haystack_data[prev_offset];
|
||||
const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
|
||||
const auto * haystack = &haystack_data[prev_haystack_offset];
|
||||
const auto * haystack_end = haystack + haystack_offsets[j] - prev_haystack_offset - 1;
|
||||
if (iteration == 0 || res[j] == 0)
|
||||
res[j] = searcher.searchOneFirstPosition(haystack, haystack_end, res_callback);
|
||||
else
|
||||
@ -68,13 +70,81 @@ struct MultiSearchFirstPositionImpl
|
||||
if (result != 0)
|
||||
res[j] = std::min(result, res[j]);
|
||||
}
|
||||
prev_offset = haystack_offsets[j];
|
||||
prev_haystack_offset = haystack_offsets[j];
|
||||
}
|
||||
++iteration;
|
||||
}
|
||||
if (iteration == 0)
|
||||
std::fill(res.begin(), res.end(), 0);
|
||||
}
|
||||
|
||||
static void vectorVector(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const IColumn & needles_data,
|
||||
const ColumnArray::Offsets & needles_offsets,
|
||||
PaddedPODArray<ResultType> & res,
|
||||
PaddedPODArray<UInt64> & /*offsets*/,
|
||||
bool /*allow_hyperscan*/,
|
||||
size_t /*max_hyperscan_regexp_length*/,
|
||||
size_t /*max_hyperscan_regexp_total_length*/)
|
||||
{
|
||||
const size_t haystack_size = haystack_offsets.size();
|
||||
res.resize(haystack_size);
|
||||
|
||||
size_t prev_haystack_offset = 0;
|
||||
size_t prev_needles_offset = 0;
|
||||
|
||||
const ColumnString * needles_data_string = checkAndGetColumn<ColumnString>(&needles_data);
|
||||
|
||||
std::vector<std::string_view> needles;
|
||||
|
||||
auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64
|
||||
{
|
||||
return 1 + Impl::countChars(reinterpret_cast<const char *>(start), reinterpret_cast<const char *>(end));
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < haystack_size; ++i)
|
||||
{
|
||||
needles.reserve(needles_offsets[i] - prev_needles_offset);
|
||||
|
||||
for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j)
|
||||
{
|
||||
needles.emplace_back(needles_data_string->getDataAt(j).toView());
|
||||
}
|
||||
|
||||
auto searcher = Impl::createMultiSearcherInBigHaystack(needles); // sub-optimal
|
||||
|
||||
const auto * const haystack = &haystack_data[prev_haystack_offset];
|
||||
const auto * haystack_end = haystack + haystack_offsets[i] - prev_haystack_offset - 1;
|
||||
|
||||
size_t iteration = 0;
|
||||
while (searcher.hasMoreToSearch())
|
||||
{
|
||||
if (iteration == 0 || res[i] == 0)
|
||||
{
|
||||
res[i] = searcher.searchOneFirstPosition(haystack, haystack_end, res_callback);
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt64 result = searcher.searchOneFirstPosition(haystack, haystack_end, res_callback);
|
||||
if (result != 0)
|
||||
{
|
||||
res[i] = std::min(result, res[i]);
|
||||
}
|
||||
}
|
||||
++iteration;
|
||||
}
|
||||
if (iteration == 0)
|
||||
{
|
||||
res[i] = 0;
|
||||
}
|
||||
|
||||
prev_haystack_offset = haystack_offsets[i];
|
||||
prev_needles_offset = needles_offsets[i];
|
||||
needles.clear();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ struct MultiSearchImpl
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const Array & needles_arr,
|
||||
PaddedPODArray<UInt8> & res,
|
||||
[[maybe_unused]] PaddedPODArray<UInt64> & offsets,
|
||||
PaddedPODArray<UInt64> & /*offsets*/,
|
||||
bool /*allow_hyperscan*/,
|
||||
size_t /*max_hyperscan_regexp_length*/,
|
||||
size_t /*max_hyperscan_regexp_total_length*/)
|
||||
@ -46,25 +46,80 @@ struct MultiSearchImpl
|
||||
needles.emplace_back(needle.get<String>());
|
||||
|
||||
auto searcher = Impl::createMultiSearcherInBigHaystack(needles);
|
||||
const size_t haystack_string_size = haystack_offsets.size();
|
||||
res.resize(haystack_string_size);
|
||||
|
||||
const size_t haystack_size = haystack_offsets.size();
|
||||
res.resize(haystack_size);
|
||||
|
||||
size_t iteration = 0;
|
||||
while (searcher.hasMoreToSearch())
|
||||
{
|
||||
size_t prev_offset = 0;
|
||||
for (size_t j = 0; j < haystack_string_size; ++j)
|
||||
size_t prev_haystack_offset = 0;
|
||||
for (size_t j = 0; j < haystack_size; ++j)
|
||||
{
|
||||
const auto * haystack = &haystack_data[prev_offset];
|
||||
const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
|
||||
const auto * haystack = &haystack_data[prev_haystack_offset];
|
||||
const auto * haystack_end = haystack + haystack_offsets[j] - prev_haystack_offset - 1;
|
||||
if (iteration == 0 || !res[j])
|
||||
res[j] = searcher.searchOne(haystack, haystack_end);
|
||||
prev_offset = haystack_offsets[j];
|
||||
prev_haystack_offset = haystack_offsets[j];
|
||||
}
|
||||
++iteration;
|
||||
}
|
||||
if (iteration == 0)
|
||||
std::fill(res.begin(), res.end(), 0);
|
||||
}
|
||||
|
||||
static void vectorVector(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const IColumn & needles_data,
|
||||
const ColumnArray::Offsets & needles_offsets,
|
||||
PaddedPODArray<ResultType> & res,
|
||||
PaddedPODArray<UInt64> & /*offsets*/,
|
||||
bool /*allow_hyperscan*/,
|
||||
size_t /*max_hyperscan_regexp_length*/,
|
||||
size_t /*max_hyperscan_regexp_total_length*/)
|
||||
{
|
||||
const size_t haystack_size = haystack_offsets.size();
|
||||
res.resize(haystack_size);
|
||||
|
||||
size_t prev_haystack_offset = 0;
|
||||
size_t prev_needles_offset = 0;
|
||||
|
||||
const ColumnString * needles_data_string = checkAndGetColumn<ColumnString>(&needles_data);
|
||||
|
||||
std::vector<std::string_view> needles;
|
||||
|
||||
for (size_t i = 0; i < haystack_size; ++i)
|
||||
{
|
||||
needles.reserve(needles_offsets[i] - prev_needles_offset);
|
||||
|
||||
for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j)
|
||||
{
|
||||
needles.emplace_back(needles_data_string->getDataAt(j).toView());
|
||||
}
|
||||
|
||||
const auto * const haystack = &haystack_data[prev_haystack_offset];
|
||||
const size_t haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
|
||||
|
||||
size_t iteration = 0;
|
||||
for (const auto & needle : needles)
|
||||
{
|
||||
auto searcher = Impl::createSearcherInSmallHaystack(needle.data(), needle.size());
|
||||
if (iteration == 0 || !res[i])
|
||||
{
|
||||
const auto * match = searcher.search(haystack, haystack_length);
|
||||
res[i] = (match != haystack + haystack_length);
|
||||
}
|
||||
++iteration;
|
||||
}
|
||||
if (iteration == 0)
|
||||
res[i] = 0;
|
||||
|
||||
prev_haystack_offset = haystack_offsets[i];
|
||||
prev_needles_offset = needles_offsets[i];
|
||||
needles.clear();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -3,7 +3,7 @@ add_headers_and_sources(clickhouse_functions_url .)
|
||||
add_library(clickhouse_functions_url ${clickhouse_functions_url_sources} ${clickhouse_functions_url_headers})
|
||||
target_link_libraries(clickhouse_functions_url PRIVATE dbms)
|
||||
|
||||
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
|
||||
if (OMIT_HEAVY_DEBUG_SYMBOLS)
|
||||
target_compile_options(clickhouse_functions_url PRIVATE "-g0")
|
||||
endif()
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user