mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge branch 'master' into MeiliSearch
This commit is contained in:
commit
0e2a86dcee
292
.clang-tidy
292
.clang-tidy
@ -1,172 +1,148 @@
|
||||
Checks: '-*,
|
||||
misc-misplaced-const,
|
||||
misc-redundant-expression,
|
||||
misc-static-assert,
|
||||
misc-throw-by-value-catch-by-reference,
|
||||
misc-unconventional-assign-operator,
|
||||
misc-uniqueptr-reset-release,
|
||||
misc-unused-alias-decls,
|
||||
misc-unused-parameters,
|
||||
misc-unused-using-decls,
|
||||
Checks: '*,
|
||||
-abseil-*,
|
||||
|
||||
modernize-avoid-bind,
|
||||
modernize-loop-convert,
|
||||
modernize-make-shared,
|
||||
modernize-make-unique,
|
||||
modernize-raw-string-literal,
|
||||
modernize-redundant-void-arg,
|
||||
modernize-replace-random-shuffle,
|
||||
modernize-use-bool-literals,
|
||||
modernize-use-equals-default,
|
||||
modernize-use-equals-delete,
|
||||
modernize-use-nullptr,
|
||||
modernize-use-using,
|
||||
-altera-*,
|
||||
|
||||
performance-faster-string-find,
|
||||
performance-for-range-copy,
|
||||
performance-implicit-conversion-in-loop,
|
||||
performance-inefficient-algorithm,
|
||||
performance-inefficient-vector-operation,
|
||||
performance-move-const-arg,
|
||||
performance-move-constructor-init,
|
||||
performance-no-automatic-move,
|
||||
performance-noexcept-move-constructor,
|
||||
performance-trivially-destructible,
|
||||
performance-unnecessary-copy-initialization,
|
||||
-android-*,
|
||||
|
||||
readability-avoid-const-params-in-decls,
|
||||
readability-const-return-type,
|
||||
readability-container-contains,
|
||||
readability-container-size-empty,
|
||||
readability-convert-member-functions-to-static,
|
||||
readability-delete-null-pointer,
|
||||
readability-deleted-default,
|
||||
readability-identifier-naming,
|
||||
readability-inconsistent-declaration-parameter-name,
|
||||
readability-make-member-function-const,
|
||||
readability-misplaced-array-index,
|
||||
readability-non-const-parameter,
|
||||
readability-qualified-auto,
|
||||
readability-redundant-access-specifiers,
|
||||
readability-redundant-control-flow,
|
||||
readability-redundant-function-ptr-dereference,
|
||||
readability-redundant-member-init,
|
||||
readability-redundant-smartptr-get,
|
||||
readability-redundant-string-cstr,
|
||||
readability-redundant-string-init,
|
||||
readability-simplify-boolean-expr,
|
||||
readability-simplify-subscript-expr,
|
||||
readability-static-definition-in-anonymous-namespace,
|
||||
readability-string-compare,
|
||||
readability-uniqueptr-delete-release,
|
||||
-bugprone-assert-side-effect,
|
||||
-bugprone-branch-clone,
|
||||
-bugprone-dynamic-static-initializers,
|
||||
-bugprone-easily-swappable-parameters,
|
||||
-bugprone-exception-escape,
|
||||
-bugprone-forwarding-reference-overload,
|
||||
-bugprone-implicit-widening-of-multiplication-result,
|
||||
-bugprone-lambda-function-name,
|
||||
-bugprone-misplaced-widening-cast,
|
||||
-bugprone-narrowing-conversions,
|
||||
-bugprone-no-escape,
|
||||
-bugprone-not-null-terminated-result,
|
||||
-bugprone-signal-handler,
|
||||
-bugprone-spuriously-wake-up-functions,
|
||||
-bugprone-suspicious-semicolon,
|
||||
-bugprone-unhandled-exception-at-new,
|
||||
-bugprone-unhandled-self-assignment,
|
||||
|
||||
bugprone-argument-comment,
|
||||
bugprone-bad-signal-to-kill-thread,
|
||||
bugprone-bool-pointer-implicit-conversion,
|
||||
bugprone-copy-constructor-init,
|
||||
bugprone-dangling-handle,
|
||||
bugprone-fold-init-type,
|
||||
bugprone-forward-declaration-namespace,
|
||||
bugprone-inaccurate-erase,
|
||||
bugprone-incorrect-roundings,
|
||||
bugprone-infinite-loop,
|
||||
bugprone-integer-division,
|
||||
bugprone-macro-parentheses,
|
||||
bugprone-macro-repeated-side-effects,
|
||||
bugprone-misplaced-operator-in-strlen-in-alloc,
|
||||
bugprone-misplaced-pointer-artithmetic-in-alloc,
|
||||
bugprone-misplaced-widening-cast,
|
||||
bugprone-move-forwarding-reference,
|
||||
bugprone-multiple-statement-macro,
|
||||
bugprone-parent-virtual-call,
|
||||
bugprone-posix-return,
|
||||
bugprone-reserved-identifier,
|
||||
bugprone-signed-char-misuse,
|
||||
bugprone-sizeof-container,
|
||||
bugprone-sizeof-expression,
|
||||
bugprone-string-constructor,
|
||||
bugprone-string-integer-assignment,
|
||||
bugprone-string-literal-with-embedded-nul,
|
||||
bugprone-suspicious-enum-usage,
|
||||
bugprone-suspicious-include,
|
||||
bugprone-suspicious-memset-usage,
|
||||
bugprone-suspicious-missing-comma,
|
||||
bugprone-suspicious-string-compare,
|
||||
bugprone-swapped-arguments,
|
||||
bugprone-terminating-continue,
|
||||
bugprone-throw-keyword-missing,
|
||||
bugprone-too-small-loop-variable,
|
||||
bugprone-undefined-memory-manipulation,
|
||||
bugprone-undelegated-constructor,
|
||||
bugprone-unhandled-self-assignment,
|
||||
bugprone-unused-raii,
|
||||
bugprone-unused-return-value,
|
||||
bugprone-use-after-move,
|
||||
bugprone-virtual-near-miss,
|
||||
-cert-dcl16-c,
|
||||
-cert-dcl37-c,
|
||||
-cert-dcl51-cpp,
|
||||
-cert-dcl58-cpp,
|
||||
-cert-err58-cpp,
|
||||
-cert-err60-cpp,
|
||||
-cert-msc32-c,
|
||||
-cert-msc51-cpp,
|
||||
-cert-oop54-cpp,
|
||||
-cert-oop57-cpp,
|
||||
-cert-oop58-cpp,
|
||||
|
||||
cert-dcl21-cpp,
|
||||
cert-dcl50-cpp,
|
||||
cert-env33-c,
|
||||
cert-err34-c,
|
||||
cert-err52-cpp,
|
||||
cert-flp30-c,
|
||||
cert-mem57-cpp,
|
||||
cert-msc50-cpp,
|
||||
cert-oop58-cpp,
|
||||
-clang-analyzer-core.DynamicTypePropagation,
|
||||
-clang-analyzer-core.uninitialized.CapturedBlockVariable,
|
||||
|
||||
google-build-explicit-make-pair,
|
||||
google-build-namespaces,
|
||||
google-default-arguments,
|
||||
google-explicit-constructor,
|
||||
google-readability-avoid-underscore-in-googletest-name,
|
||||
google-readability-casting,
|
||||
google-runtime-int,
|
||||
google-runtime-operator,
|
||||
-clang-analyzer-optin.performance.Padding,
|
||||
-clang-analyzer-optin.portability.UnixAPI,
|
||||
|
||||
hicpp-exception-baseclass,
|
||||
-clang-analyzer-security.insecureAPI.bzero,
|
||||
-clang-analyzer-security.insecureAPI.strcpy,
|
||||
|
||||
clang-analyzer-core.CallAndMessage,
|
||||
clang-analyzer-core.DivideZero,
|
||||
clang-analyzer-core.NonNullParamChecker,
|
||||
clang-analyzer-core.NullDereference,
|
||||
clang-analyzer-core.StackAddressEscape,
|
||||
clang-analyzer-core.UndefinedBinaryOperatorResult,
|
||||
clang-analyzer-core.VLASize,
|
||||
clang-analyzer-core.uninitialized.ArraySubscript,
|
||||
clang-analyzer-core.uninitialized.Assign,
|
||||
clang-analyzer-core.uninitialized.Branch,
|
||||
clang-analyzer-core.uninitialized.CapturedBlockVariable,
|
||||
clang-analyzer-core.uninitialized.UndefReturn,
|
||||
clang-analyzer-cplusplus.InnerPointer,
|
||||
clang-analyzer-cplusplus.Move,
|
||||
clang-analyzer-cplusplus.NewDelete,
|
||||
clang-analyzer-cplusplus.NewDeleteLeaks,
|
||||
clang-analyzer-cplusplus.PlacementNewChecker,
|
||||
clang-analyzer-cplusplus.SelfAssignment,
|
||||
clang-analyzer-deadcode.DeadStores,
|
||||
clang-analyzer-optin.cplusplus.UninitializedObject,
|
||||
clang-analyzer-optin.cplusplus.VirtualCall,
|
||||
clang-analyzer-security.insecureAPI.UncheckedReturn,
|
||||
clang-analyzer-security.insecureAPI.bcmp,
|
||||
clang-analyzer-security.insecureAPI.bcopy,
|
||||
clang-analyzer-security.insecureAPI.bzero,
|
||||
clang-analyzer-security.insecureAPI.getpw,
|
||||
clang-analyzer-security.insecureAPI.gets,
|
||||
clang-analyzer-security.insecureAPI.mkstemp,
|
||||
clang-analyzer-security.insecureAPI.mktemp,
|
||||
clang-analyzer-security.insecureAPI.rand,
|
||||
clang-analyzer-security.insecureAPI.strcpy,
|
||||
clang-analyzer-unix.Malloc,
|
||||
clang-analyzer-unix.MallocSizeof,
|
||||
clang-analyzer-unix.MismatchedDeallocator,
|
||||
clang-analyzer-unix.Vfork,
|
||||
clang-analyzer-unix.cstring.BadSizeArg,
|
||||
clang-analyzer-unix.cstring.NullArg,
|
||||
-cppcoreguidelines-*,
|
||||
|
||||
boost-use-to-string,
|
||||
-concurrency-mt-unsafe,
|
||||
|
||||
alpha.security.cert.env.InvalidPtr,
|
||||
-darwin-*,
|
||||
|
||||
-fuchsia-*,
|
||||
|
||||
-google-build-using-namespace,
|
||||
-google-global-names-in-headers,
|
||||
-google-readability-braces-around-statements,
|
||||
-google-readability-function-size,
|
||||
-google-readability-namespace-comments,
|
||||
-google-readability-todo,
|
||||
-google-upgrade-googletest-case,
|
||||
|
||||
-hicpp-avoid-c-arrays,
|
||||
-hicpp-avoid-goto,
|
||||
-hicpp-braces-around-statements,
|
||||
-hicpp-deprecated-headers,
|
||||
-hicpp-explicit-conversions,
|
||||
-hicpp-function-size,
|
||||
-hicpp-invalid-access-moved,
|
||||
-hicpp-member-init,
|
||||
-hicpp-move-const-arg,
|
||||
-hicpp-multiway-paths-covered,
|
||||
-hicpp-named-parameter,
|
||||
-hicpp-no-array-decay,
|
||||
-hicpp-no-assembler,
|
||||
-hicpp-no-malloc,
|
||||
-hicpp-signed-bitwise,
|
||||
-hicpp-special-member-functions,
|
||||
-hicpp-uppercase-literal-suffix,
|
||||
-hicpp-use-auto,
|
||||
-hicpp-use-emplace,
|
||||
-hicpp-use-equals-default,
|
||||
-hicpp-use-noexcept,
|
||||
-hicpp-use-override,
|
||||
-hicpp-vararg,
|
||||
|
||||
-llvm-*,
|
||||
|
||||
-llvmlibc-*,
|
||||
|
||||
-openmp-*,
|
||||
|
||||
-misc-definitions-in-headers,
|
||||
-misc-new-delete-overloads,
|
||||
-misc-no-recursion,
|
||||
-misc-non-copyable-objects,
|
||||
-misc-non-private-member-variables-in-classes,
|
||||
-misc-static-assert,
|
||||
|
||||
-modernize-avoid-c-arrays,
|
||||
-modernize-concat-nested-namespaces,
|
||||
-modernize-deprecated-headers,
|
||||
-modernize-deprecated-ios-base-aliases,
|
||||
-modernize-pass-by-value,
|
||||
-modernize-replace-auto-ptr,
|
||||
-modernize-replace-disallow-copy-and-assign-macro,
|
||||
-modernize-return-braced-init-list,
|
||||
-modernize-unary-static-assert,
|
||||
-modernize-use-auto,
|
||||
-modernize-use-default-member-init,
|
||||
-modernize-use-emplace,
|
||||
-modernize-use-equals-default,
|
||||
-modernize-use-nodiscard,
|
||||
-modernize-use-noexcept,
|
||||
-modernize-use-override,
|
||||
-modernize-use-trailing-return-type,
|
||||
|
||||
-performance-inefficient-string-concatenation,
|
||||
-performance-no-int-to-ptr,
|
||||
-performance-type-promotion-in-math-fn,
|
||||
-performance-trivially-destructible,
|
||||
-performance-unnecessary-value-param,
|
||||
|
||||
-portability-simd-intrinsics,
|
||||
|
||||
-readability-convert-member-functions-to-static,
|
||||
-readability-braces-around-statements,
|
||||
-readability-else-after-return,
|
||||
-readability-function-cognitive-complexity,
|
||||
-readability-function-size,
|
||||
-readability-implicit-bool-conversion,
|
||||
-readability-isolate-declaration,
|
||||
-readability-magic-numbers,
|
||||
-readability-misleading-indentation,
|
||||
-readability-named-parameter,
|
||||
-readability-qualified-auto,
|
||||
-readability-redundant-declaration,
|
||||
-readability-static-accessed-through-instance,
|
||||
-readability-suspicious-call-argument,
|
||||
-readability-uppercase-literal-suffix,
|
||||
-readability-use-anyofallof,
|
||||
|
||||
-zirkon-*,
|
||||
'
|
||||
|
||||
WarningsAsErrors: '*'
|
||||
|
||||
CheckOptions:
|
||||
|
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -1,7 +1,7 @@
|
||||
### Changelog category (leave one):
|
||||
- New Feature
|
||||
- Improvement
|
||||
- Bug Fix (user-visible misbehaviour in official stable or prestable release)
|
||||
- Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
- Performance Improvement
|
||||
- Backward Incompatible Change
|
||||
- Build/Testing/Packaging Improvement
|
||||
|
48
.github/workflows/codeql.yml
vendored
48
.github/workflows/codeql.yml
vendored
@ -1,48 +0,0 @@
|
||||
name: "CodeQL"
|
||||
|
||||
"on":
|
||||
schedule:
|
||||
- cron: '0 0 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
CC: clang-14
|
||||
CXX: clang++-14
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: ['cpp']
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: 'true'
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
sudo apt-get install -yq ninja-build
|
||||
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 ..
|
||||
ninja
|
||||
rm -rf ../contrib
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
16
.github/workflows/docs_check.yml
vendored
16
.github/workflows/docs_check.yml
vendored
@ -156,3 +156,19 @@ jobs:
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
FinishCheck:
|
||||
needs:
|
||||
- StyleCheck
|
||||
- DockerHubPush
|
||||
- DocsCheck
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Finish label
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 finish_check.py
|
||||
|
158
.github/workflows/master.yml
vendored
158
.github/workflows/master.yml
vendored
@ -362,50 +362,50 @@ jobs:
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinGCC:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
CHECK_NAME=ClickHouse build check (actions)
|
||||
BUILD_NAME=binary_gcc
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
# BuilderBinGCC:
|
||||
# needs: [DockerHubPush]
|
||||
# runs-on: [self-hosted, builder]
|
||||
# steps:
|
||||
# - name: Set envs
|
||||
# run: |
|
||||
# cat >> "$GITHUB_ENV" << 'EOF'
|
||||
# TEMP_PATH=${{runner.temp}}/build_check
|
||||
# IMAGES_PATH=${{runner.temp}}/images_path
|
||||
# REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
# CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
# CHECK_NAME=ClickHouse build check (actions)
|
||||
# BUILD_NAME=binary_gcc
|
||||
# EOF
|
||||
# - name: Download changed images
|
||||
# uses: actions/download-artifact@v2
|
||||
# with:
|
||||
# name: changed_images
|
||||
# path: ${{ env.IMAGES_PATH }}
|
||||
# - name: Clear repository
|
||||
# run: |
|
||||
# sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
# - name: Check out repository code
|
||||
# uses: actions/checkout@v2
|
||||
# - name: Build
|
||||
# run: |
|
||||
# git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
# git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
# sudo rm -fr "$TEMP_PATH"
|
||||
# mkdir -p "$TEMP_PATH"
|
||||
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
# cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
|
||||
# - name: Upload build URLs to artifacts
|
||||
# if: ${{ success() || failure() }}
|
||||
# uses: actions/upload-artifact@v2
|
||||
# with:
|
||||
# name: ${{ env.BUILD_URLS }}
|
||||
# path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
# - name: Cleanup
|
||||
# if: always()
|
||||
# run: |
|
||||
# docker kill "$(docker ps -q)" ||:
|
||||
# docker rm -f "$(docker ps -a -q)" ||:
|
||||
# sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderDebAsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
@ -1030,7 +1030,7 @@ jobs:
|
||||
- BuilderBinDarwin
|
||||
- BuilderBinDarwinAarch64
|
||||
- BuilderBinFreeBSD
|
||||
- BuilderBinGCC
|
||||
# - BuilderBinGCC
|
||||
- BuilderBinPPC64
|
||||
- BuilderBinTidy
|
||||
- BuilderDebSplitted
|
||||
@ -2685,40 +2685,40 @@ jobs:
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
UnitTestsReleaseGCC:
|
||||
needs: [BuilderBinGCC]
|
||||
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/unit_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Unit tests (release-gcc, actions)
|
||||
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Unit test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 unit_tests_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
# UnitTestsReleaseGCC:
|
||||
# needs: [BuilderBinGCC]
|
||||
# runs-on: [self-hosted, fuzzer-unit-tester]
|
||||
# steps:
|
||||
# - name: Set envs
|
||||
# run: |
|
||||
# cat >> "$GITHUB_ENV" << 'EOF'
|
||||
# TEMP_PATH=${{runner.temp}}/unit_tests_asan
|
||||
# REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
# CHECK_NAME=Unit tests (release-gcc, actions)
|
||||
# REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
|
||||
# EOF
|
||||
# - name: Download json reports
|
||||
# uses: actions/download-artifact@v2
|
||||
# with:
|
||||
# path: ${{ env.REPORTS_PATH }}
|
||||
# - name: Clear repository
|
||||
# run: |
|
||||
# sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
# - name: Check out repository code
|
||||
# uses: actions/checkout@v2
|
||||
# - name: Unit test
|
||||
# run: |
|
||||
# sudo rm -fr "$TEMP_PATH"
|
||||
# mkdir -p "$TEMP_PATH"
|
||||
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
# cd "$REPO_COPY/tests/ci"
|
||||
# python3 unit_tests_check.py "$CHECK_NAME"
|
||||
# - name: Cleanup
|
||||
# if: always()
|
||||
# run: |
|
||||
# docker kill "$(docker ps -q)" ||:
|
||||
# docker rm -f "$(docker ps -a -q)" ||:
|
||||
# sudo rm -fr "$TEMP_PATH"
|
||||
UnitTestsTsan:
|
||||
needs: [BuilderDebTsan]
|
||||
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||
|
158
.github/workflows/pull_request.yml
vendored
158
.github/workflows/pull_request.yml
vendored
@ -375,50 +375,50 @@ jobs:
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinGCC:
|
||||
needs: [DockerHubPush, FastTest]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
CHECK_NAME=ClickHouse build check (actions)
|
||||
BUILD_NAME=binary_gcc
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/images_path
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
# BuilderBinGCC:
|
||||
# needs: [DockerHubPush, FastTest]
|
||||
# runs-on: [self-hosted, builder]
|
||||
# steps:
|
||||
# - name: Set envs
|
||||
# run: |
|
||||
# cat >> "$GITHUB_ENV" << 'EOF'
|
||||
# TEMP_PATH=${{runner.temp}}/build_check
|
||||
# IMAGES_PATH=${{runner.temp}}/images_path
|
||||
# REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
# CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
# CHECK_NAME=ClickHouse build check (actions)
|
||||
# BUILD_NAME=binary_gcc
|
||||
# EOF
|
||||
# - name: Download changed images
|
||||
# uses: actions/download-artifact@v2
|
||||
# with:
|
||||
# name: changed_images
|
||||
# path: ${{ runner.temp }}/images_path
|
||||
# - name: Clear repository
|
||||
# run: |
|
||||
# sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
# - name: Check out repository code
|
||||
# uses: actions/checkout@v2
|
||||
# - name: Build
|
||||
# run: |
|
||||
# git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
# git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
# sudo rm -fr "$TEMP_PATH"
|
||||
# mkdir -p "$TEMP_PATH"
|
||||
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
# cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
|
||||
# - name: Upload build URLs to artifacts
|
||||
# if: ${{ success() || failure() }}
|
||||
# uses: actions/upload-artifact@v2
|
||||
# with:
|
||||
# name: ${{ env.BUILD_URLS }}
|
||||
# path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
# - name: Cleanup
|
||||
# if: always()
|
||||
# run: |
|
||||
# docker kill "$(docker ps -q)" ||:
|
||||
# docker rm -f "$(docker ps -a -q)" ||:
|
||||
# sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderDebAarch64:
|
||||
needs: [DockerHubPush, FastTest]
|
||||
runs-on: [self-hosted, builder]
|
||||
@ -1077,7 +1077,7 @@ jobs:
|
||||
- BuilderBinDarwin
|
||||
- BuilderBinDarwinAarch64
|
||||
- BuilderBinFreeBSD
|
||||
- BuilderBinGCC
|
||||
# - BuilderBinGCC
|
||||
- BuilderBinPPC64
|
||||
- BuilderBinTidy
|
||||
- BuilderDebSplitted
|
||||
@ -2886,40 +2886,40 @@ jobs:
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
UnitTestsReleaseGCC:
|
||||
needs: [BuilderBinGCC]
|
||||
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/unit_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Unit tests (release-gcc, actions)
|
||||
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Unit test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 unit_tests_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
# UnitTestsReleaseGCC:
|
||||
# needs: [BuilderBinGCC]
|
||||
# runs-on: [self-hosted, fuzzer-unit-tester]
|
||||
# steps:
|
||||
# - name: Set envs
|
||||
# run: |
|
||||
# cat >> "$GITHUB_ENV" << 'EOF'
|
||||
# TEMP_PATH=${{runner.temp}}/unit_tests_asan
|
||||
# REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
# CHECK_NAME=Unit tests (release-gcc, actions)
|
||||
# REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
|
||||
# EOF
|
||||
# - name: Download json reports
|
||||
# uses: actions/download-artifact@v2
|
||||
# with:
|
||||
# path: ${{ env.REPORTS_PATH }}
|
||||
# - name: Clear repository
|
||||
# run: |
|
||||
# sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
# - name: Check out repository code
|
||||
# uses: actions/checkout@v2
|
||||
# - name: Unit test
|
||||
# run: |
|
||||
# sudo rm -fr "$TEMP_PATH"
|
||||
# mkdir -p "$TEMP_PATH"
|
||||
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
# cd "$REPO_COPY/tests/ci"
|
||||
# python3 unit_tests_check.py "$CHECK_NAME"
|
||||
# - name: Cleanup
|
||||
# if: always()
|
||||
# run: |
|
||||
# docker kill "$(docker ps -q)" ||:
|
||||
# docker rm -f "$(docker ps -a -q)" ||:
|
||||
# sudo rm -fr "$TEMP_PATH"
|
||||
UnitTestsTsan:
|
||||
needs: [BuilderDebTsan]
|
||||
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||
|
@ -25,7 +25,6 @@
|
||||
* Introduce format `ProtobufList` (all records as repeated messages in out Protobuf). Closes [#16436](https://github.com/ClickHouse/ClickHouse/issues/16436). [#35152](https://github.com/ClickHouse/ClickHouse/pull/35152) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Add `h3PointDistM`, `h3PointDistKm`, `h3PointDistRads`, `h3GetRes0Indexes`, `h3GetPentagonIndexes` functions. [#34568](https://github.com/ClickHouse/ClickHouse/pull/34568) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Add `toLastDayOfMonth` function which rounds up a date or date with time to the last day of the month. [#33501](https://github.com/ClickHouse/ClickHouse/issues/33501). [#34394](https://github.com/ClickHouse/ClickHouse/pull/34394) ([Habibullah Oladepo](https://github.com/holadepo)).
|
||||
* New aggregation function groupSortedArray to obtain an array of first N values. [#34055](https://github.com/ClickHouse/ClickHouse/pull/34055) ([palegre-tiny](https://github.com/palegre-tiny)).
|
||||
* Added load balancing setting for \[Zoo\]Keeper client. Closes [#29617](https://github.com/ClickHouse/ClickHouse/issues/29617). [#30325](https://github.com/ClickHouse/ClickHouse/pull/30325) ([小路](https://github.com/nicelulu)).
|
||||
* Add a new kind of row policies named `simple`. Before this PR we had two kinds or row policies: `permissive` and `restrictive`. A `simple` row policy adds a new filter on a table without any side-effects like it was for permissive and restrictive policies. [#35345](https://github.com/ClickHouse/ClickHouse/pull/35345) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Added an ability to specify cluster secret in replicated database. [#35333](https://github.com/ClickHouse/ClickHouse/pull/35333) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
|
@ -20,7 +20,9 @@ if (NOT DEFINED ENV{CLION_IDE} AND NOT DEFINED ENV{XCODE_IDE})
|
||||
endif()
|
||||
|
||||
# Check if environment is polluted.
|
||||
if (DEFINED ENV{CFLAGS} OR DEFINED ENV{CXXFLAGS} OR DEFINED ENV{LDFLAGS}
|
||||
if (NOT $ENV{CFLAGS} STREQUAL ""
|
||||
OR NOT $ENV{CXXFLAGS} STREQUAL ""
|
||||
OR NOT $ENV{LDFLAGS} STREQUAL ""
|
||||
OR CMAKE_C_FLAGS OR CMAKE_CXX_FLAGS OR CMAKE_EXE_LINKER_FLAGS OR CMAKE_SHARED_LINKER_FLAGS OR CMAKE_MODULE_LINKER_FLAGS
|
||||
OR CMAKE_C_FLAGS_INIT OR CMAKE_CXX_FLAGS_INIT OR CMAKE_EXE_LINKER_FLAGS_INIT OR CMAKE_SHARED_LINKER_FLAGS_INIT OR CMAKE_MODULE_LINKER_FLAGS_INIT)
|
||||
|
||||
|
@ -37,7 +37,8 @@ struct StringRef
|
||||
size_t size = 0;
|
||||
|
||||
/// Non-constexpr due to reinterpret_cast.
|
||||
template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
|
||||
template <typename CharT>
|
||||
requires (sizeof(CharT) == 1)
|
||||
StringRef(const CharT * data_, size_t size_) : data(reinterpret_cast<const char *>(data_)), size(size_)
|
||||
{
|
||||
/// Sanity check for overflowed values.
|
||||
|
@ -21,10 +21,12 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename G, typename = std::enable_if_t<std::is_convertible_v<G, F>, void>>
|
||||
template <typename G>
|
||||
requires std::is_convertible_v<G, F>
|
||||
constexpr basic_scope_guard(basic_scope_guard<G> && src) : function{src.release()} {}
|
||||
|
||||
template <typename G, typename = std::enable_if_t<std::is_convertible_v<G, F>, void>>
|
||||
template <typename G>
|
||||
requires std::is_convertible_v<G, F>
|
||||
constexpr basic_scope_guard & operator=(basic_scope_guard<G> && src)
|
||||
{
|
||||
if (this != &src)
|
||||
@ -35,10 +37,12 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename G, typename = std::enable_if_t<std::is_convertible_v<G, F>, void>>
|
||||
template <typename G>
|
||||
requires std::is_convertible_v<G, F>
|
||||
constexpr basic_scope_guard(const G & function_) : function{function_} {}
|
||||
|
||||
template <typename G, typename = std::enable_if_t<std::is_convertible_v<G, F>, void>>
|
||||
template <typename G>
|
||||
requires std::is_convertible_v<G, F>
|
||||
constexpr basic_scope_guard(G && function_) : function{std::move(function_)} {}
|
||||
|
||||
~basic_scope_guard() { invoke(); }
|
||||
@ -64,7 +68,8 @@ public:
|
||||
return std::exchange(function, {});
|
||||
}
|
||||
|
||||
template <typename G, typename = std::enable_if_t<std::is_convertible_v<G, F>, void>>
|
||||
template <typename G>
|
||||
requires std::is_convertible_v<G, F>
|
||||
basic_scope_guard<F> & join(basic_scope_guard<G> && other)
|
||||
{
|
||||
if (other.function)
|
||||
|
@ -1,36 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
|
||||
/** Allows to make std::shared_ptr from T with protected constructor.
|
||||
*
|
||||
* Derive your T class from shared_ptr_helper<T> and add shared_ptr_helper<T> as a friend
|
||||
* and you will have static 'create' method in your class.
|
||||
*/
|
||||
template <typename T>
|
||||
struct shared_ptr_helper
|
||||
{
|
||||
template <typename... TArgs>
|
||||
static std::shared_ptr<T> create(TArgs &&... args)
|
||||
{
|
||||
return std::shared_ptr<T>(new T(std::forward<TArgs>(args)...));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct is_shared_ptr
|
||||
{
|
||||
static constexpr bool value = false;
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct is_shared_ptr<std::shared_ptr<T>>
|
||||
{
|
||||
static constexpr bool value = true;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline constexpr bool is_shared_ptr_v = is_shared_ptr<T>::value;
|
43
benchmark/clickhouse/benchmark_cloud.sh
Executable file
43
benchmark/clickhouse/benchmark_cloud.sh
Executable file
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
QUERIES_FILE="queries.sql"
|
||||
TABLE=$1
|
||||
TRIES=3
|
||||
|
||||
PARAMS="--host ... --secure --password ..."
|
||||
|
||||
if [ -x ./clickhouse ]
|
||||
then
|
||||
CLICKHOUSE_CLIENT="./clickhouse client"
|
||||
elif command -v clickhouse-client >/dev/null 2>&1
|
||||
then
|
||||
CLICKHOUSE_CLIENT="clickhouse-client"
|
||||
else
|
||||
echo "clickhouse-client is not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
QUERY_ID_PREFIX="benchmark_$RANDOM"
|
||||
QUERY_NUM=1
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query
|
||||
do
|
||||
for i in $(seq 1 $TRIES)
|
||||
do
|
||||
QUERY_ID="${QUERY_ID_PREFIX}_${QUERY_NUM}_${i}"
|
||||
${CLICKHOUSE_CLIENT} ${PARAMS} --query_id "${QUERY_ID}" --format=Null --max_memory_usage=100G --query="$query"
|
||||
echo -n '.'
|
||||
done
|
||||
QUERY_NUM=$((QUERY_NUM + 1))
|
||||
echo
|
||||
done
|
||||
|
||||
sleep 10
|
||||
|
||||
${CLICKHOUSE_CLIENT} ${PARAMS} --query "
|
||||
WITH extractGroups(query_id, '(\d+)_(\d+)\$') AS num_run, num_run[1]::UInt8 AS num, num_run[2]::UInt8 AS run
|
||||
SELECT groupArrayInsertAt(query_duration_ms / 1000, (run - 1)::UInt8)::String || ','
|
||||
FROM clusterAllReplicas(default, system.query_log)
|
||||
WHERE event_date >= yesterday() AND type = 2 AND query_id LIKE '${QUERY_ID_PREFIX}%'
|
||||
GROUP BY num ORDER BY num FORMAT TSV
|
||||
"
|
@ -364,10 +364,8 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_snappy.cc" ${ARROW_SRCS})
|
||||
add_definitions(-DARROW_WITH_ZLIB)
|
||||
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS})
|
||||
|
||||
if (ARROW_WITH_ZSTD)
|
||||
add_definitions(-DARROW_WITH_ZSTD)
|
||||
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})
|
||||
endif ()
|
||||
add_definitions(-DARROW_WITH_ZSTD)
|
||||
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})
|
||||
|
||||
|
||||
add_library(_arrow ${ARROW_SRCS})
|
||||
@ -383,7 +381,6 @@ target_link_libraries(_arrow PRIVATE
|
||||
ch_contrib::snappy
|
||||
ch_contrib::zlib
|
||||
ch_contrib::zstd
|
||||
ch_contrib::zstd
|
||||
)
|
||||
target_link_libraries(_arrow PUBLIC _orc)
|
||||
|
||||
|
2
contrib/minizip-ng
vendored
2
contrib/minizip-ng
vendored
@ -1 +1 @@
|
||||
Subproject commit 6cffc951851620e0fac1993be75e4713c334de03
|
||||
Subproject commit f3d400e999056ca290998b3fd89cc5a74e4b8b58
|
@ -1,3 +1,4 @@
|
||||
# rebuild in #36968
|
||||
# docker build -t clickhouse/docs-builder .
|
||||
# nodejs 17 prefers ipv6 and is broken in our environment
|
||||
FROM node:16.14.2-alpine3.15
|
||||
|
@ -28,8 +28,11 @@ done
|
||||
set -e
|
||||
|
||||
# cleanup for retry run if volume is not recreated
|
||||
docker kill "$(docker ps -aq)" || true
|
||||
docker rm "$(docker ps -aq)" || true
|
||||
# shellcheck disable=SC2046
|
||||
{
|
||||
docker kill $(docker ps -aq) || true
|
||||
docker rm $(docker ps -aq) || true
|
||||
}
|
||||
|
||||
echo "Start tests"
|
||||
export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/clickhouse
|
||||
|
@ -256,20 +256,6 @@ for conn_index, c in enumerate(all_connections):
|
||||
|
||||
reportStageEnd("settings")
|
||||
|
||||
# Check tables that should exist. If they don't exist, just skip this test.
|
||||
tables = [e.text for e in root.findall("preconditions/table_exists")]
|
||||
for t in tables:
|
||||
for c in all_connections:
|
||||
try:
|
||||
res = c.execute("select 1 from {} limit 1".format(t))
|
||||
except:
|
||||
exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
|
||||
skipped_message = " ".join(exception_message.split("\n")[:2])
|
||||
print(f"skipped\t{tsv_escape(skipped_message)}")
|
||||
sys.exit(0)
|
||||
|
||||
reportStageEnd("preconditions")
|
||||
|
||||
if not args.use_existing_tables:
|
||||
# Run create and fill queries. We will run them simultaneously for both
|
||||
# servers, to save time. The weird XML search + filter is because we want to
|
||||
|
@ -652,12 +652,16 @@ if args.report == "main":
|
||||
# Don't show mildly unstable queries, only the very unstable ones we
|
||||
# treat as errors.
|
||||
if very_unstable_queries:
|
||||
if very_unstable_queries > 5:
|
||||
error_tests += very_unstable_queries
|
||||
status = "failure"
|
||||
message_array.append(str(very_unstable_queries) + " unstable")
|
||||
# FIXME: uncomment the following lines when tests are stable and
|
||||
# reliable
|
||||
# if very_unstable_queries > 5:
|
||||
# error_tests += very_unstable_queries
|
||||
# status = "failure"
|
||||
#
|
||||
# error_tests += slow_average_tests
|
||||
# FIXME: until here
|
||||
|
||||
error_tests += slow_average_tests
|
||||
if error_tests:
|
||||
status = "failure"
|
||||
message_array.insert(0, str(error_tests) + " errors")
|
||||
|
@ -131,6 +131,23 @@ ls -la /
|
||||
|
||||
clickhouse-client -q "system flush logs" ||:
|
||||
|
||||
# Stop server so we can safely read data with clickhouse-local.
|
||||
# Why do we read data with clickhouse-local?
|
||||
# Because it's the simplest way to read it when server has crashed.
|
||||
if [ "$NUM_TRIES" -gt "1" ]; then
|
||||
clickhouse-client -q "system shutdown" ||:
|
||||
sleep 10
|
||||
else
|
||||
sudo clickhouse stop ||:
|
||||
fi
|
||||
|
||||
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
clickhouse-client --port 19000 -q "system shutdown" ||:
|
||||
clickhouse-client --port 29000 -q "system shutdown" ||:
|
||||
sleep 10
|
||||
fi
|
||||
|
||||
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||:
|
||||
pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz &
|
||||
|
||||
@ -143,18 +160,17 @@ pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhous
|
||||
# for files >64MB, we want this files to be compressed explicitly
|
||||
for table in query_log zookeeper_log trace_log transactions_info_log
|
||||
do
|
||||
clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz &
|
||||
clickhouse-local --path /var/lib/clickhouse/ -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||:
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.1.tsv.gz &
|
||||
clickhouse-client --port 29000 -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.2.tsv.gz &
|
||||
clickhouse-local --path /var/lib/clickhouse1/ -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.1.tsv.gz ||:
|
||||
clickhouse-local --path /var/lib/clickhouse2/ -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.2.tsv.gz ||:
|
||||
fi
|
||||
done
|
||||
wait ||:
|
||||
|
||||
# Also export trace log in flamegraph-friendly format.
|
||||
for trace_type in CPU Memory Real
|
||||
do
|
||||
clickhouse-client -q "
|
||||
clickhouse-local --path /var/lib/clickhouse/ -q "
|
||||
select
|
||||
arrayStringConcat((arrayMap(x -> concat(splitByChar('/', addressToLine(x))[-1], '#', demangle(addressToSymbol(x)) ), trace)), ';') AS stack,
|
||||
count(*) AS samples
|
||||
@ -164,10 +180,9 @@ do
|
||||
order by samples desc
|
||||
settings allow_introspection_functions = 1
|
||||
format TabSeparated" \
|
||||
| pigz > "/test_output/trace-log-$trace_type-flamegraph.tsv.gz" &
|
||||
| pigz > "/test_output/trace-log-$trace_type-flamegraph.tsv.gz" ||:
|
||||
done
|
||||
|
||||
wait ||:
|
||||
|
||||
# Compressed (FIXME: remove once only github actions will be left)
|
||||
rm /var/log/clickhouse-server/clickhouse-server.log
|
||||
|
@ -101,7 +101,12 @@ EOL
|
||||
|
||||
function stop()
|
||||
{
|
||||
clickhouse stop
|
||||
clickhouse stop --do-not-kill && return
|
||||
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
|
||||
kill -TERM "$(pidof gdb)" ||:
|
||||
sleep 5
|
||||
gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" ||:
|
||||
clickhouse stop --force
|
||||
}
|
||||
|
||||
function start()
|
||||
@ -198,10 +203,14 @@ clickhouse-client --query "SHOW TABLES FROM test"
|
||||
stop
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log
|
||||
|
||||
# NOTE Disable thread fuzzer before server start with data after stress test.
|
||||
# In debug build it can take a lot of time.
|
||||
unset "${!THREAD_@}"
|
||||
|
||||
start
|
||||
|
||||
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|
||||
|| (echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| (echo -e 'Server failed to start (see application_errors.txt and clickhouse-server.clean.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
&& grep -Fa "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt)
|
||||
|
||||
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
|
||||
@ -387,7 +396,7 @@ for table in query_log trace_log; do
|
||||
done
|
||||
|
||||
# Write check result into check_status.tsv
|
||||
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%') LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
|
||||
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
|
||||
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
|
||||
|
||||
# Core dumps (see gcore)
|
||||
|
@ -12,7 +12,7 @@ UNKNOWN_SIGN = "[ UNKNOWN "
|
||||
SKIPPED_SIGN = "[ SKIPPED "
|
||||
HUNG_SIGN = "Found hung queries in processlist"
|
||||
|
||||
NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"]
|
||||
SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"]
|
||||
|
||||
RETRIES_SIGN = "Some tests were restarted"
|
||||
|
||||
@ -25,14 +25,14 @@ def process_test_log(log_path):
|
||||
success = 0
|
||||
hung = False
|
||||
retries = False
|
||||
task_timeout = True
|
||||
success_finish = False
|
||||
test_results = []
|
||||
with open(log_path, "r") as test_file:
|
||||
for line in test_file:
|
||||
original_line = line
|
||||
line = line.strip()
|
||||
if any(s in line for s in NO_TASK_TIMEOUT_SIGNS):
|
||||
task_timeout = False
|
||||
if any(s in line for s in SUCCESS_FINISH_SIGNS):
|
||||
success_finish = True
|
||||
if HUNG_SIGN in line:
|
||||
hung = True
|
||||
if RETRIES_SIGN in line:
|
||||
@ -81,7 +81,7 @@ def process_test_log(log_path):
|
||||
failed,
|
||||
success,
|
||||
hung,
|
||||
task_timeout,
|
||||
success_finish,
|
||||
retries,
|
||||
test_results,
|
||||
)
|
||||
@ -108,7 +108,7 @@ def process_result(result_path):
|
||||
failed,
|
||||
success,
|
||||
hung,
|
||||
task_timeout,
|
||||
success_finish,
|
||||
retries,
|
||||
test_results,
|
||||
) = process_test_log(result_path)
|
||||
@ -123,10 +123,10 @@ def process_result(result_path):
|
||||
description = "Some queries hung, "
|
||||
state = "failure"
|
||||
test_results.append(("Some queries hung", "FAIL", "0", ""))
|
||||
elif task_timeout:
|
||||
description = "Timeout, "
|
||||
elif not success_finish:
|
||||
description = "Tests are not finished, "
|
||||
state = "failure"
|
||||
test_results.append(("Timeout", "FAIL", "0", ""))
|
||||
test_results.append(("Tests are not finished", "FAIL", "0", ""))
|
||||
elif retries:
|
||||
description = "Some tests restarted, "
|
||||
test_results.append(("Some tests restarted", "SKIPPED", "0", ""))
|
||||
|
@ -167,6 +167,34 @@ Config is read from multiple files (in XML or YAML format) and merged into singl
|
||||
|
||||
For queries and subsystems other than `Server` config is accessible using `Context::getConfigRef()` method. Every subsystem that is capable of reloading it's config without server restart should register itself in reload callback in `Server::main()` method. Note that if newer config has an error, most subsystems will ignore new config, log warning messages and keep working with previously loaded config. Due to the nature of `AbstractConfiguration` it is not possible to pass reference to specific section, so `String config_prefix` is usually used instead.
|
||||
|
||||
## Threads and jobs {#threads-and-jobs}
|
||||
|
||||
To execute queries and do side activities ClickHouse allocates threads from one of thread pools to avoid frequent thread creation and destruction. There are a few thread pools, which are selected depending on a purpose and structure of a job:
|
||||
* Server pool for incoming client sessions.
|
||||
* Global thread pool for general purpose jobs, background activities and standalone threads.
|
||||
* IO thread pool for jobs that are mostly blocked on some IO and are not CPU-intensive.
|
||||
* Background pools for periodic tasks.
|
||||
* Pools for preemptable tasks that can be split into steps.
|
||||
|
||||
Server pool is a `Poco::ThreadPool` class instance defined in `Server::main()` method. It can have at most `max_connection` threads. Every thread is dedicated to a single active connection.
|
||||
|
||||
Global thread pool is `GlobalThreadPool` singleton class. To allocate thread from it `ThreadFromGlobalPool` is used. It has an interface similar to `std::thread`, but pulls thread from the global pool and does all necessary initializations. It is configured with the following settings:
|
||||
* `max_thread_pool_size` - limit on thread count in pool.
|
||||
* `max_thread_pool_free_size` - limit on idle thread count waiting for new jobs.
|
||||
* `thread_pool_queue_size` - limit on scheduled job count.
|
||||
|
||||
Global pool is universal and all pools described below are implemented on top of it. This can be thought of as a hierarchy of pools. Any specialized pool takes its threads from the global pool using `ThreadPool` class. So the main purpose of any specialized pool is to apply limit on the number of simultaneous jobs and do job scheduling. If there are more jobs scheduled than threads in a pool, `ThreadPool` accumulates jobs in a queue with priorities. Each job has an integer priority. Default priority is zero. All jobs with higher priority values are started before any job with lower priority value. But there is no difference between already executing jobs, thus priority matters only when the pool in overloaded.
|
||||
|
||||
IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPool::get()` method. It is configured in the same way as global pool with `max_io_thread_pool_size`, `max_io_thread_pool_free_size` and `io_thread_pool_queue_size` settings. The main purpose of IO thread pool is to avoid exhaustion of the global pool with IO jobs, which could prevent queries from fully utilizing CPU.
|
||||
|
||||
For periodic task execution there is `BackgroundSchedulePool` class. You can register tasks using `BackgroundSchedulePool::TaskHolder` objects and the pool ensures that no task runs two jobs at the same time. It also allows you to postpone task execution to a specific instant in the future or temporarily deactivate task. Global `Context` provides a few instances of this class for different purposes. For general purpose tasks `Context::getSchedulePool()` is used.
|
||||
|
||||
There are also specialized thread pools for preemptable tasks. Such `IExecutableTask` task can be split into ordered sequence of jobs, called steps. To schedule these tasks in a manner allowing short tasks to be prioritied over long ones `MergeTreeBackgroundExecutor` is used. As name suggests it is used for background MergeTree related operations such as merges, mutations, fetches and moves. Pool instances are available using `Context::getCommonExecutor()` and other similar methods.
|
||||
|
||||
No matter what pool is used for a job, at start `ThreadStatus` instance is created for this job. It encapsulates all per-thread information: thread id, query id, performance counters, resource consumption and many other useful data. Job can access it via thread local pointer by `CurrentThread::get()` call, so we do not need to pass it to every function.
|
||||
|
||||
If thread is related to query execution, then the most important thing attached to `ThreadStatus` is query context `ContextPtr`. Every query has its master thread in the server pool. Master thread does the attachment by holding an `ThreadStatus::QueryScope query_scope(query_context)` object. Master thread also creates a thread group represented with `ThreadGroupStatus` object. Every additional thread that is allocated during this query execution is attached to its thread group by `CurrentThread::attachTo(thread_group)` call. Thread groups are used to aggregate profile event counters and track memory consumption by all threads dedicated to a single task (see `MemoryTracker` and `ProfileEvents::Counters` classes for more information).
|
||||
|
||||
## Distributed Query Execution {#distributed-query-execution}
|
||||
|
||||
Servers in a cluster setup are mostly independent. You can create a `Distributed` table on one or all servers in a cluster. The `Distributed` table does not store data itself – it only provides a “view” to all local tables on multiple nodes of a cluster. When you SELECT from a `Distributed` table, it rewrites that query, chooses remote nodes according to load balancing settings, and sends the query to them. The `Distributed` table requests remote servers to process a query just up to a stage where intermediate results from different servers can be merged. Then it receives the intermediate results and merges them. The distributed table tries to distribute as much work as possible to remote servers and does not send much intermediate data over the network.
|
||||
|
@ -694,6 +694,49 @@ auto s = std::string{"Hello"};
|
||||
|
||||
**2.** Exception specifiers from C++03 are not used.
|
||||
|
||||
**3.** Constructs which have convenient syntactic sugar in modern C++, e.g.
|
||||
|
||||
```
|
||||
// Traditional way without syntactic sugar
|
||||
template <typename G, typename = std::enable_if_t<std::is_same<G, F>::value, void>> // SFINAE via std::enable_if, usage of ::value
|
||||
std::pair<int, int> func(const E<G> & e) // explicitly specified return type
|
||||
{
|
||||
if (elements.count(e)) // .count() membership test
|
||||
{
|
||||
// ...
|
||||
}
|
||||
|
||||
elements.erase(
|
||||
std::remove_if(
|
||||
elements.begin(), elements.end(),
|
||||
[&](const auto x){
|
||||
return x == 1;
|
||||
}),
|
||||
elements.end()); // remove-erase idiom
|
||||
|
||||
return std::make_pair(1, 2); // create pair via make_pair()
|
||||
}
|
||||
|
||||
// With syntactic sugar (C++14/17/20)
|
||||
template <typename G>
|
||||
requires std::same_v<G, F> // SFINAE via C++20 concept, usage of C++14 template alias
|
||||
auto func(const E<G> & e) // auto return type (C++14)
|
||||
{
|
||||
if (elements.contains(e)) // C++20 .contains membership test
|
||||
{
|
||||
// ...
|
||||
}
|
||||
|
||||
elements.erase_if(
|
||||
elements,
|
||||
[&](const auto x){
|
||||
return x == 1;
|
||||
}); // C++20 std::erase_if
|
||||
|
||||
return {1, 2}; // or: return std::pair(1, 2); // create pair via initialization list or value initialization (C++17)
|
||||
}
|
||||
```
|
||||
|
||||
## Platform {#platform}
|
||||
|
||||
**1.** We write code for a specific platform.
|
||||
|
@ -83,7 +83,7 @@ $ ./src/unit_tests_dbms --gtest_filter=LocalAddress*
|
||||
|
||||
Performance tests allow to measure and compare performance of some isolated part of ClickHouse on synthetic queries. Tests are located at `tests/performance`. Each test is represented by `.xml` file with description of test case. Tests are run with `docker/tests/performance-comparison` tool . See the readme file for invocation.
|
||||
|
||||
Each test run one or multiple queries (possibly with combinations of parameters) in a loop. Some tests can contain preconditions on preloaded test dataset.
|
||||
Each test run one or multiple queries (possibly with combinations of parameters) in a loop.
|
||||
|
||||
If you want to improve performance of ClickHouse in some scenario, and if improvements can be observed on simple queries, it is highly recommended to write a performance test. It always makes sense to use `perf top` or other perf tools during your tests.
|
||||
|
||||
|
@ -13,7 +13,7 @@ Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_
|
||||
|
||||
Engine parameters:
|
||||
|
||||
- `database` – Database name. Instead of the database name, you can use a constant expression that returns a string.
|
||||
- `database` – Database name. You can use `currentDatabase()` or another constant expression that returns a string.
|
||||
- `table` – Table to flush data to.
|
||||
- `num_layers` – Parallelism layer. Physically, the table will be represented as `num_layers` of independent buffers. Recommended value: 16.
|
||||
- `min_time`, `max_time`, `min_rows`, `max_rows`, `min_bytes`, and `max_bytes` – Conditions for flushing data from the buffer.
|
||||
|
@ -45,7 +45,7 @@ clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets"
|
||||
# for hits_v1
|
||||
clickhouse-client --query "CREATE TABLE datasets.hits_v1 ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
|
||||
# for hits_100m_obfuscated
|
||||
clickhouse-client --query="CREATE TABLE hits_100m_obfuscated (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
|
||||
clickhouse-client --query="CREATE TABLE default.hits_100m_obfuscated (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
|
||||
|
||||
# import data
|
||||
cat hits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.hits_v1 FORMAT TSV" --max_insert_block_size=100000
|
||||
|
@ -127,22 +127,22 @@ After that downloaded archives should be unpacked and installed with installatio
|
||||
LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \
|
||||
grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1)
|
||||
export LATEST_VERSION
|
||||
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz"
|
||||
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz"
|
||||
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz"
|
||||
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz"
|
||||
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION-amd64.tgz"
|
||||
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION-amd64.tgz"
|
||||
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION-amd64.tgz"
|
||||
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION-amd64.tgz"
|
||||
|
||||
tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz"
|
||||
tar -xzvf "clickhouse-common-static-$LATEST_VERSION-amd64.tgz"
|
||||
sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh"
|
||||
|
||||
tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz"
|
||||
tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION-amd64.tgz"
|
||||
sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh"
|
||||
|
||||
tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz"
|
||||
tar -xzvf "clickhouse-server-$LATEST_VERSION-amd64.tgz"
|
||||
sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh"
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
|
||||
tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz"
|
||||
tar -xzvf "clickhouse-client-$LATEST_VERSION-amd64.tgz"
|
||||
sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh"
|
||||
```
|
||||
|
||||
|
@ -9,66 +9,67 @@ ClickHouse can accept and return data in various formats. A format supported for
|
||||
results of a `SELECT`, and to perform `INSERT`s into a file-backed table.
|
||||
|
||||
The supported formats are:
|
||||
|
||||
| Format | Input | Output |
|
||||
|-----------------------------------------------------------------------------------------|-------|--------|
|
||||
| [TabSeparated](#tabseparated) | ✔ | ✔ |
|
||||
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
|
||||
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
|
||||
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
|
||||
| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ |
|
||||
| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ |
|
||||
| [Template](#format-template) | ✔ | ✔ |
|
||||
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
|
||||
| [CSV](#csv) | ✔ | ✔ |
|
||||
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
|
||||
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
|
||||
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
|
||||
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
|
||||
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
|
||||
| [Values](#data-format-values) | ✔ | ✔ |
|
||||
| [Vertical](#vertical) | ✗ | ✔ |
|
||||
| [JSON](#json) | ✗ | ✔ |
|
||||
| [JSONAsString](#jsonasstring) | ✔ | ✗ |
|
||||
| Format | Input | Output |
|
||||
|-------------------------------------------------------------------------------------------|-------|--------|
|
||||
| [TabSeparated](#tabseparated) | ✔ | ✔ |
|
||||
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
|
||||
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
|
||||
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
|
||||
| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ |
|
||||
| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ |
|
||||
| [Template](#format-template) | ✔ | ✔ |
|
||||
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
|
||||
| [CSV](#csv) | ✔ | ✔ |
|
||||
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
|
||||
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
|
||||
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
|
||||
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
|
||||
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
|
||||
| [Values](#data-format-values) | ✔ | ✔ |
|
||||
| [Vertical](#vertical) | ✗ | ✔ |
|
||||
| [JSON](#json) | ✗ | ✔ |
|
||||
| [JSONAsString](#jsonasstring) | ✔ | ✗ |
|
||||
| [JSONStrings](#jsonstrings) | ✗ | ✔ |
|
||||
| [JSONCompact](#jsoncompact) | ✗ | ✔ |
|
||||
| [JSONCompact](#jsoncompact) | ✗ | ✔ |
|
||||
| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ |
|
||||
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
|
||||
| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ |
|
||||
| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ |
|
||||
| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ |
|
||||
| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ |
|
||||
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
|
||||
| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ |
|
||||
| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ |
|
||||
| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ |
|
||||
| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ |
|
||||
| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ |
|
||||
| [TSKV](#tskv) | ✔ | ✔ |
|
||||
| [Pretty](#pretty) | ✗ | ✔ |
|
||||
| [PrettyCompact](#prettycompact) | ✗ | ✔ |
|
||||
| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ |
|
||||
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
|
||||
| [PrettySpace](#prettyspace) | ✗ | ✔ |
|
||||
| [Prometheus](#prometheus) | ✗ | ✔ |
|
||||
| [Protobuf](#protobuf) | ✔ | ✔ |
|
||||
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
|
||||
| [Avro](#data-format-avro) | ✔ | ✔ |
|
||||
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
|
||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||
| [Arrow](#data-format-arrow) | ✔ | ✔ |
|
||||
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
|
||||
| [ORC](#data-format-orc) | ✔ | ✔ |
|
||||
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
||||
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [Native](#native) | ✔ | ✔ |
|
||||
| [Null](#null) | ✗ | ✔ |
|
||||
| [XML](#xml) | ✗ | ✔ |
|
||||
| [CapnProto](#capnproto) | ✔ | ✔ |
|
||||
| [LineAsString](#lineasstring) | ✔ | ✗ |
|
||||
| [Regexp](#data-format-regexp) | ✔ | ✗ |
|
||||
| [RawBLOB](#rawblob) | ✔ | ✔ |
|
||||
| [MsgPack](#msgpack) | ✔ | ✔ |
|
||||
| [TSKV](#tskv) | ✔ | ✔ |
|
||||
| [Pretty](#pretty) | ✗ | ✔ |
|
||||
| [PrettyCompact](#prettycompact) | ✗ | ✔ |
|
||||
| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ |
|
||||
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
|
||||
| [PrettySpace](#prettyspace) | ✗ | ✔ |
|
||||
| [Prometheus](#prometheus) | ✗ | ✔ |
|
||||
| [Protobuf](#protobuf) | ✔ | ✔ |
|
||||
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
|
||||
| [Avro](#data-format-avro) | ✔ | ✔ |
|
||||
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
|
||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||
| [Arrow](#data-format-arrow) | ✔ | ✔ |
|
||||
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
|
||||
| [ORC](#data-format-orc) | ✔ | ✔ |
|
||||
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
||||
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [Native](#native) | ✔ | ✔ |
|
||||
| [Null](#null) | ✗ | ✔ |
|
||||
| [XML](#xml) | ✗ | ✔ |
|
||||
| [CapnProto](#capnproto) | ✔ | ✔ |
|
||||
| [LineAsString](#lineasstring) | ✔ | ✗ |
|
||||
| [Regexp](#data-format-regexp) | ✔ | ✗ |
|
||||
| [RawBLOB](#rawblob) | ✔ | ✔ |
|
||||
| [MsgPack](#msgpack) | ✔ | ✔ |
|
||||
| [MySQLDump](#mysqldump) | ✔ | ✗ |
|
||||
|
||||
|
||||
You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section.
|
||||
|
||||
@ -184,7 +185,7 @@ Differs from the `TabSeparated` format in that the column names are written in t
|
||||
During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness.
|
||||
|
||||
If setting [input_format_with_names_use_header](../operations/settings/settings.md#settings-input_format_with_names_use_header) is set to 1,
|
||||
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input_format_skip_unknown_fields) is set to 1.
|
||||
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1.
|
||||
Otherwise, the first row will be skipped.
|
||||
|
||||
This format is also available under the name `TSVWithNames`.
|
||||
@ -1776,3 +1777,70 @@ $ clickhouse-client --query="CREATE TABLE msgpack (array Array(UInt8)) ENGINE =
|
||||
$ clickhouse-client --query="INSERT INTO msgpack VALUES ([0, 1, 2, 3, 42, 253, 254, 255]), ([255, 254, 253, 42, 3, 2, 1, 0])";
|
||||
$ clickhouse-client --query="SELECT * FROM msgpack FORMAT MsgPack" > tmp_msgpack.msgpk;
|
||||
```
|
||||
|
||||
## MySQLDump {#msgpack}
|
||||
|
||||
ClickHouse supports reading MySQL [dumps](https://dev.mysql.com/doc/refman/8.0/en/mysqldump.html).
|
||||
It reads all data from INSERT queries belonging to one table in dump. If there are more than one table, by default it reads data from the first one.
|
||||
You can specify the name of the table from which to read data from using [input_format_mysql_dump_table_name](../operations/settings/settings.md#settings-input-format-mysql-dump-table-name) settings.
|
||||
If setting [input_format_mysql_dump_map_columns](../operations/settings/settings.md#settings-input-format-mysql-dump-map-columns) is set to 1 and
|
||||
dump contains CREATE query for specified table or column names in INSERT query the columns from input data will be mapped to the columns from the table by their names,
|
||||
columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input-format-skip-unknown-fields) is set to 1.
|
||||
This format supports schema inference: if the dump contains CREATE query for the specified table, the structure is extracted from it, otherwise schema is inferred from the data of INSERT queries.
|
||||
|
||||
Examples:
|
||||
|
||||
File dump.sql:
|
||||
```sql
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!50503 SET character_set_client = utf8mb4 */;
|
||||
CREATE TABLE `test` (
|
||||
`x` int DEFAULT NULL,
|
||||
`y` int DEFAULT NULL
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
INSERT INTO `test` VALUES (1,NULL),(2,NULL),(3,NULL),(3,NULL),(4,NULL),(5,NULL),(6,7);
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!50503 SET character_set_client = utf8mb4 */;
|
||||
CREATE TABLE `test 3` (
|
||||
`y` int DEFAULT NULL
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
INSERT INTO `test 3` VALUES (1);
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!50503 SET character_set_client = utf8mb4 */;
|
||||
CREATE TABLE `test2` (
|
||||
`x` int DEFAULT NULL
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
INSERT INTO `test2` VALUES (1),(2),(3);
|
||||
```
|
||||
|
||||
Queries:
|
||||
|
||||
```sql
|
||||
:) desc file(dump.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'
|
||||
|
||||
DESCRIBE TABLE file(dump.sql, MySQLDump)
|
||||
SETTINGS input_format_mysql_dump_table_name = 'test2'
|
||||
|
||||
Query id: 25e66c89-e10a-42a8-9b42-1ee8bbbde5ef
|
||||
|
||||
┌─name─┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ x │ Nullable(Int32) │ │ │ │ │ │
|
||||
└──────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
|
||||
:) select * from file(dump.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'
|
||||
|
||||
SELECT *
|
||||
FROM file(dump.sql, MySQLDump)
|
||||
SETTINGS input_format_mysql_dump_table_name = 'test2'
|
||||
|
||||
Query id: 17d59664-ebce-4053-bb79-d46a516fb590
|
||||
|
||||
┌─x─┐
|
||||
│ 1 │
|
||||
│ 2 │
|
||||
│ 3 │
|
||||
└───┘
|
||||
```
|
||||
|
@ -426,7 +426,7 @@ Now `rule` can configure `method`, `headers`, `url`, `handler`:
|
||||
|
||||
- `status` — use with `static` type, response status code.
|
||||
|
||||
- `content_type` — use with `static` type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
|
||||
- `content_type` — use with any type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
|
||||
|
||||
- `response_content` — use with `static` type, response content sent to client, when using the prefix ‘file://’ or ‘config://’, find the content from the file or configuration sends to client.
|
||||
|
||||
|
72
docs/en/interfaces/postgresql.md
Normal file
72
docs/en/interfaces/postgresql.md
Normal file
@ -0,0 +1,72 @@
|
||||
---
|
||||
sidebar_position: 20
|
||||
sidebar_label: PostgreSQL Interface
|
||||
---
|
||||
|
||||
# PostgreSQL Interface
|
||||
|
||||
ClickHouse supports the PostgreSQL wire protocol, which allows you to use Postgres clients to connect to ClickHouse. In a sense, ClickHouse can pretend to be a PostgreSQL instance - allowing you to connect a PostgreSQL client application to ClickHouse that is not already directy supported by ClickHouse (for example, Amazon Redshift).
|
||||
|
||||
To enable the PostgreSQL wire protocol, add the [postgresql_port](../operations/server-configuration-parameters/settings#server_configuration_parameters-postgresql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<postgresql_port>9005</postgresql_port>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Startup your ClickHouse server and look for a log message similar to the following that mentions **Listening for PostgreSQL compatibility protocol**:
|
||||
|
||||
```response
|
||||
{} <Information> Application: Listening for PostgreSQL compatibility protocol: 127.0.0.1:9005
|
||||
```
|
||||
|
||||
## Connect psql to ClickHouse
|
||||
|
||||
The following command demonstrates how to connect the PostgreSQL client `psql` to ClickHouse:
|
||||
|
||||
```bash
|
||||
psql -p [port] -h [hostname] -U [username] [database_name]
|
||||
```
|
||||
|
||||
For example:
|
||||
|
||||
```bash
|
||||
psql -p 9005 -h 127.0.0.1 -U alice default
|
||||
```
|
||||
|
||||
:::note
|
||||
The `psql` client requires a login with a password, so you will not be able connect using the `default` user with no password. Either assign a password to the `default` user, or login as a different user.
|
||||
:::
|
||||
|
||||
The `psql` client prompts for the password:
|
||||
|
||||
```response
|
||||
Password for user alice:
|
||||
psql (14.2, server 22.3.1.1)
|
||||
WARNING: psql major version 14, server major version 22.
|
||||
Some psql features might not work.
|
||||
Type "help" for help.
|
||||
|
||||
default=>
|
||||
```
|
||||
|
||||
And that's it! You now have a PostgreSQL client connected to ClickHouse, and all commands and queries are executed on ClickHouse.
|
||||
|
||||
:::caution
|
||||
The PostgreSQL protocol currently only supports plain-text passwords.
|
||||
:::
|
||||
|
||||
## Using SSL
|
||||
|
||||
If you have SSL/TLS configured on your ClickHouse instance, then `postgresql_port` will use the same settings (the port is shared for both secure and unsecure clients).
|
||||
|
||||
Each client has their own method of how to connect using SSL. The following command demonstrates how to pass in the certificates and key to securely connect `psql` to ClickHouse:
|
||||
|
||||
```bash
|
||||
psql "port=9005 host=127.0.0.1 user=alice dbname=default sslcert=/path/to/certificate.pem sslkey=/path/to/key.pem sslrootcert=/path/to/rootcert.pem sslmode=verify-ca"
|
||||
```
|
||||
|
||||
View the [PostgreSQL docs](https://jdbc.postgresql.org/documentation/head/ssl-client.html) for more details on their SSL settings.
|
||||
|
||||
[Original article](https://clickhouse.com/docs/en/interfaces/postgresql)
|
@ -267,7 +267,7 @@ See also “[Executable User Defined Functions](../../sql-reference/functions/in
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<user_defined_executable_functions_config>*_dictionary.xml</user_defined_executable_functions_config>
|
||||
<user_defined_executable_functions_config>*_function.xml</user_defined_executable_functions_config>
|
||||
```
|
||||
|
||||
## dictionaries_lazy_load {#server_configuration_parameters-dictionaries_lazy_load}
|
||||
|
@ -6,6 +6,29 @@ slug: /en/operations/settings/settings
|
||||
|
||||
# Settings {#settings}
|
||||
|
||||
## allow_nondeterministic_mutations {#allow_nondeterministic_mutations}
|
||||
|
||||
User-level setting that allows mutations on replicated tables to make use of non-deterministic functions such as `dictGet`.
|
||||
|
||||
Given that, for example, dictionaries, can be out of sync across nodes, mutations that pull values from them are disallowed on replicated tables by default. Enabling this setting allows this behavior, making it the user's responsibility to ensure that the data used is in sync across all nodes.
|
||||
|
||||
Default value: 0.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<profiles>
|
||||
<default>
|
||||
<allow_nondeterministic_mutations>1</allow_nondeterministic_mutations>
|
||||
|
||||
<!-- ... -->
|
||||
</default>
|
||||
|
||||
<!-- ... -->
|
||||
|
||||
</profiles>
|
||||
```
|
||||
|
||||
## distributed_product_mode {#distributed-product-mode}
|
||||
|
||||
Changes the behaviour of [distributed subqueries](../../sql-reference/operators/in.md).
|
||||
@ -4225,3 +4248,18 @@ Default value: 0.
|
||||
The waiting time in seconds for currently handled connections when shutdown server.
|
||||
|
||||
Default Value: 5.
|
||||
|
||||
## input_format_mysql_dump_table_name (#input-format-mysql-dump-table-name)
|
||||
|
||||
The name of the table from which to read data from in MySQLDump input format.
|
||||
|
||||
## input_format_mysql_dump_map_columns (#input-format-mysql-dump-map-columns)
|
||||
|
||||
Enables matching columns from table in MySQL dump and columns from ClickHouse table by names in MySQLDump input format.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Disabled.
|
||||
- 1 — Enabled.
|
||||
|
||||
Default value: 1.
|
||||
|
@ -1,48 +0,0 @@
|
||||
---
|
||||
sidebar_position: 108
|
||||
---
|
||||
|
||||
# groupArraySorted {#groupArraySorted}
|
||||
|
||||
Returns an array with the first N items in ascending order.
|
||||
|
||||
``` sql
|
||||
groupArraySorted(N)(column)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `N` – The number of elements to return.
|
||||
|
||||
If the parameter is omitted, default value 10 is used.
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `column` – The value.
|
||||
- `expr` — Optional. The field or expresion to sort by. If not set values are sorted by themselves.
|
||||
|
||||
**Example**
|
||||
|
||||
Gets the first 10 numbers:
|
||||
|
||||
``` sql
|
||||
SELECT groupArraySorted(10)(number) FROM numbers(100)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─groupArraySorted(10)(number)─┐
|
||||
│ [0,1,2,3,4,5,6,7,8,9] │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
Or the last 10:
|
||||
|
||||
``` sql
|
||||
SELECT groupArraySorted(10)(number, -number) FROM numbers(100)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─groupArraySorted(10)(number, negate(number))─┐
|
||||
│ [99,98,97,96,95,94,93,92,91,90] │
|
||||
└──────────────────────────────────────────────┘
|
||||
```
|
@ -620,9 +620,9 @@ arraySlice(array, offset[, length])
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `array` – Array of data.
|
||||
- `offset` – Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1.
|
||||
- `length` – The length of the required slice. If you specify a negative value, the function returns an open slice `[offset, array_length - length)`. If you omit the value, the function returns the slice `[offset, the_end_of_array]`.
|
||||
- `array` – Array of data.
|
||||
- `offset` – Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1.
|
||||
- `length` – The length of the required slice. If you specify a negative value, the function returns an open slice `[offset, array_length - length]`. If you omit the value, the function returns the slice `[offset, the_end_of_array]`.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -130,13 +130,9 @@ bitSlice(s, offset[, length])
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `s` — s is [String](../../sql-reference/data-types/string.md)
|
||||
or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an
|
||||
indent on the right. Numbering of the bits begins with 1.
|
||||
- `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring [
|
||||
offset, array_length - length). If you omit the value, the function returns the substring [offset, the_end_string].
|
||||
If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right.
|
||||
- `s` — s is [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the bits begins with 1.
|
||||
- `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring \[offset, array_length - length\]. If you omit the value, the function returns the substring \[offset, the_end_string\]. If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right.
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -478,3 +478,17 @@ Result:
|
||||
│ 0 │
|
||||
└──────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128')─┐
|
||||
│ 0 │
|
||||
└────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
@ -480,7 +480,7 @@ Result:
|
||||
|
||||
## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) {#substring}
|
||||
|
||||
Returns a substring starting with the byte from the ‘offset’ index that is ‘length’ bytes long. Character indexing starts from one (as in standard SQL). The ‘offset’ and ‘length’ arguments must be constants.
|
||||
Returns a substring starting with the byte from the ‘offset’ index that is ‘length’ bytes long. Character indexing starts from one (as in standard SQL).
|
||||
|
||||
## substringUTF8(s, offset, length) {#substringutf8}
|
||||
|
||||
|
@ -84,6 +84,7 @@ Result:
|
||||
Returns the inclusive lower bound of the corresponding tumbling window.
|
||||
|
||||
``` sql
|
||||
tumbleStart(bounds_tuple);
|
||||
tumbleStart(time_attr, interval [, timezone]);
|
||||
```
|
||||
|
||||
@ -92,6 +93,7 @@ tumbleStart(time_attr, interval [, timezone]);
|
||||
Returns the exclusive upper bound of the corresponding tumbling window.
|
||||
|
||||
``` sql
|
||||
tumbleEnd(bounds_tuple);
|
||||
tumbleEnd(time_attr, interval [, timezone]);
|
||||
```
|
||||
|
||||
@ -100,6 +102,7 @@ tumbleEnd(time_attr, interval [, timezone]);
|
||||
Returns the inclusive lower bound of the corresponding hopping window.
|
||||
|
||||
``` sql
|
||||
hopStart(bounds_tuple);
|
||||
hopStart(time_attr, hop_interval, window_interval [, timezone]);
|
||||
```
|
||||
|
||||
@ -108,5 +111,6 @@ hopStart(time_attr, hop_interval, window_interval [, timezone]);
|
||||
Returns the exclusive upper bound of the corresponding hopping window.
|
||||
|
||||
``` sql
|
||||
hopEnd(bounds_tuple);
|
||||
hopEnd(time_attr, hop_interval, window_interval [, timezone]);
|
||||
```
|
||||
```
|
||||
|
@ -410,7 +410,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
|
||||
|
||||
- `status` — используется с типом `static`, возвращает код состояния ответа.
|
||||
|
||||
- `content_type` — используется с типом `static`, возвращает [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
|
||||
- `content_type` — используется со всеми типами, возвращает [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
|
||||
|
||||
- `response_content` — используется с типом`static`, содержимое ответа, отправленное клиенту, при использовании префикса ‘file://’ or ‘config://’, находит содержимое из файла или конфигурации, отправленного клиенту.
|
||||
|
||||
|
@ -263,7 +263,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
|
||||
**Пример**
|
||||
|
||||
``` xml
|
||||
<user_defined_executable_functions_config>*_dictionary.xml</user_defined_executable_functions_config>
|
||||
<user_defined_executable_functions_config>*_function.xml</user_defined_executable_functions_config>
|
||||
```
|
||||
|
||||
## dictionaries_lazy_load {#server_configuration_parameters-dictionaries_lazy_load}
|
||||
|
@ -31,7 +31,5 @@ sidebar_label: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64
|
||||
- `UInt16` — \[0 : 65535\]
|
||||
- `UInt32` — \[0 : 4294967295\]
|
||||
- `UInt64` — \[0 : 18446744073709551615\]
|
||||
- `UInt128` — \[0 : 340282366920938463463374607431768211455\]
|
||||
- `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\]
|
||||
|
||||
`UInt128` пока не реализован.
|
||||
|
||||
|
@ -21,7 +21,7 @@ LowCardinality(data_type)
|
||||
|
||||
`LowCardinality` — это надстройка, изменяющая способ хранения и правила обработки данных. ClickHouse применяет [словарное кодирование](https://en.wikipedia.org/wiki/Dictionary_coder) в столбцы типа `LowCardinality`. Работа с данными, представленными в словарном виде, может значительно увеличивать производительность запросов [SELECT](../statements/select/index.md) для многих приложений.
|
||||
|
||||
Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
|
||||
Эффективность использования типа данных `LowCardinality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
|
||||
|
||||
При работе со строками использование `LowCardinality` вместо [Enum](enum.md) обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
|
||||
|
||||
|
@ -574,9 +574,9 @@ arraySlice(array, offset[, length])
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `array` – массив данных.
|
||||
- `offset` – отступ от края массива. Положительное значение - отступ слева, отрицательное значение - отступ справа. Отсчет элементов массива начинается с 1.
|
||||
- `length` – длина необходимого среза. Если указать отрицательное значение, то функция вернёт открытый срез `[offset, array_length - length)`. Если не указать значение, то функция вернёт срез `[offset, the_end_of_array]`.
|
||||
- `array` – массив данных.
|
||||
- `offset` – отступ от края массива. Положительное значение - отступ слева, отрицательное значение - отступ справа. Отсчёт элементов массива начинается с 1.
|
||||
- `length` – длина необходимого среза. Если указать отрицательное значение, то функция вернёт открытый срез `[offset, array_length - length]`. Если не указать значение, то функция вернёт срез `[offset, the_end_of_array]`.
|
||||
|
||||
**Пример**
|
||||
|
||||
|
@ -446,3 +446,17 @@ SELECT isIPAddressInRange('127.0.0.1', 'ffff::/16');
|
||||
│ 0 │
|
||||
└──────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128')─┐
|
||||
│ 0 │
|
||||
└────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
@ -157,6 +157,7 @@ def build(args):
|
||||
|
||||
if not args.skip_website:
|
||||
website.process_benchmark_results(args)
|
||||
website.minify_website(args)
|
||||
redirects.build_static_redirects(args)
|
||||
|
||||
|
||||
|
@ -1,7 +1,10 @@
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
import bs4
|
||||
|
||||
import util
|
||||
@ -178,6 +181,59 @@ def build_website(args):
|
||||
f.write(content.encode("utf-8"))
|
||||
|
||||
|
||||
def get_css_in(args):
|
||||
return [
|
||||
f"'{args.website_dir}/css/bootstrap.css'",
|
||||
f"'{args.website_dir}/css/docsearch.css'",
|
||||
f"'{args.website_dir}/css/base.css'",
|
||||
f"'{args.website_dir}/css/blog.css'",
|
||||
f"'{args.website_dir}/css/docs.css'",
|
||||
f"'{args.website_dir}/css/highlight.css'",
|
||||
f"'{args.website_dir}/css/main.css'",
|
||||
]
|
||||
|
||||
|
||||
def get_js_in(args):
|
||||
return [
|
||||
f"'{args.website_dir}/js/jquery.js'",
|
||||
f"'{args.website_dir}/js/popper.js'",
|
||||
f"'{args.website_dir}/js/bootstrap.js'",
|
||||
f"'{args.website_dir}/js/sentry.js'",
|
||||
f"'{args.website_dir}/js/base.js'",
|
||||
f"'{args.website_dir}/js/index.js'",
|
||||
f"'{args.website_dir}/js/docsearch.js'",
|
||||
f"'{args.website_dir}/js/docs.js'",
|
||||
f"'{args.website_dir}/js/main.js'",
|
||||
]
|
||||
|
||||
|
||||
def minify_website(args):
|
||||
css_in = " ".join(get_css_in(args))
|
||||
css_out = f"{args.output_dir}/docs/css/base.css"
|
||||
os.makedirs(f"{args.output_dir}/docs/css")
|
||||
|
||||
command = f"cat {css_in}"
|
||||
output = subprocess.check_output(command, shell=True)
|
||||
with open(css_out, "wb+") as f:
|
||||
f.write(output)
|
||||
|
||||
with open(css_out, "rb") as f:
|
||||
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
|
||||
|
||||
js_in = " ".join(get_js_in(args))
|
||||
js_out = f"{args.output_dir}/docs/js/base.js"
|
||||
os.makedirs(f"{args.output_dir}/docs/js")
|
||||
|
||||
command = f"cat {js_in}"
|
||||
output = subprocess.check_output(command, shell=True)
|
||||
with open(js_out, "wb+") as f:
|
||||
f.write(output)
|
||||
|
||||
with open(js_out, "rb") as f:
|
||||
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
|
||||
logging.info(js_digest)
|
||||
|
||||
|
||||
def process_benchmark_results(args):
|
||||
benchmark_root = os.path.join(args.website_dir, "benchmark")
|
||||
required_keys = {
|
||||
@ -190,7 +246,7 @@ def process_benchmark_results(args):
|
||||
results_root = os.path.join(benchmark_root, benchmark_kind, "results")
|
||||
for result in sorted(os.listdir(results_root)):
|
||||
result_file = os.path.join(results_root, result)
|
||||
logging.debug(f"Reading benchmark result from {result_file}")
|
||||
logging.info(f"Reading benchmark result from {result_file}")
|
||||
with open(result_file, "r") as f:
|
||||
result = json.loads(f.read())
|
||||
for item in result:
|
||||
|
@ -397,9 +397,9 @@ SELECT arrayPushFront(['b'], 'a') AS res
|
||||
|
||||
**参数**
|
||||
|
||||
- `array` – 数组。
|
||||
- `offset` – 数组的偏移。正值表示左侧的偏移量,负值表示右侧的缩进值。数组下标从1开始。
|
||||
- `length` - 子数组的长度。如果指定负值,则该函数返回`[offset,array_length - length`。如果省略该值,则该函数返回`[offset,the_end_of_array]`。
|
||||
- `array` – 数组。
|
||||
- `offset` – 数组的偏移。正值表示左侧的偏移量,负值表示右侧的缩进值。数组下标从1开始。
|
||||
- `length` - 子数组的长度。如果指定负值,则该函数返回`[offset,array_length - length]`。如果省略该值,则该函数返回`[offset,the_end_of_array]`。
|
||||
|
||||
**示例**
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
#include <Common/scope_guard_safe.h>
|
||||
#include <boost/program_options.hpp>
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
@ -48,6 +49,7 @@
|
||||
#endif
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
using namespace std::literals;
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -1038,6 +1040,158 @@ void Client::processConfig()
|
||||
client_info.quota_key = config().getString("quota_key", "");
|
||||
}
|
||||
|
||||
|
||||
void Client::readArguments(
|
||||
int argc,
|
||||
char ** argv,
|
||||
Arguments & common_arguments,
|
||||
std::vector<Arguments> & external_tables_arguments,
|
||||
std::vector<Arguments> & hosts_and_ports_arguments)
|
||||
{
|
||||
/** We allow different groups of arguments:
|
||||
* - common arguments;
|
||||
* - arguments for any number of external tables each in form "--external args...",
|
||||
* where possible args are file, name, format, structure, types;
|
||||
* - param arguments for prepared statements.
|
||||
* Split these groups before processing.
|
||||
*/
|
||||
bool in_external_group = false;
|
||||
|
||||
std::string prev_host_arg;
|
||||
std::string prev_port_arg;
|
||||
|
||||
for (int arg_num = 1; arg_num < argc; ++arg_num)
|
||||
{
|
||||
std::string_view arg = argv[arg_num];
|
||||
|
||||
if (arg == "--external")
|
||||
{
|
||||
in_external_group = true;
|
||||
external_tables_arguments.emplace_back(Arguments{""});
|
||||
}
|
||||
/// Options with value after equal sign.
|
||||
else if (
|
||||
in_external_group
|
||||
&& (arg.starts_with("--file=") || arg.starts_with("--name=") || arg.starts_with("--format=") || arg.starts_with("--structure=")
|
||||
|| arg.starts_with("--types=")))
|
||||
{
|
||||
external_tables_arguments.back().emplace_back(arg);
|
||||
}
|
||||
/// Options with value after whitespace.
|
||||
else if (in_external_group && (arg == "--file" || arg == "--name" || arg == "--format" || arg == "--structure" || arg == "--types"))
|
||||
{
|
||||
if (arg_num + 1 < argc)
|
||||
{
|
||||
external_tables_arguments.back().emplace_back(arg);
|
||||
++arg_num;
|
||||
arg = argv[arg_num];
|
||||
external_tables_arguments.back().emplace_back(arg);
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
in_external_group = false;
|
||||
if (arg == "--file"sv || arg == "--name"sv || arg == "--structure"sv || arg == "--types"sv)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter must be in external group, try add --external before {}", arg);
|
||||
|
||||
/// Parameter arg after underline.
|
||||
if (arg.starts_with("--param_"))
|
||||
{
|
||||
auto param_continuation = arg.substr(strlen("--param_"));
|
||||
auto equal_pos = param_continuation.find_first_of('=');
|
||||
|
||||
if (equal_pos == std::string::npos)
|
||||
{
|
||||
/// param_name value
|
||||
++arg_num;
|
||||
if (arg_num >= argc)
|
||||
throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS);
|
||||
arg = argv[arg_num];
|
||||
query_parameters.emplace(String(param_continuation), String(arg));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (equal_pos == 0)
|
||||
throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
/// param_name=value
|
||||
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
|
||||
}
|
||||
}
|
||||
else if (arg.starts_with("--host") || arg.starts_with("-h"))
|
||||
{
|
||||
std::string host_arg;
|
||||
/// --host host
|
||||
if (arg == "--host" || arg == "-h")
|
||||
{
|
||||
++arg_num;
|
||||
if (arg_num >= argc)
|
||||
throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS);
|
||||
arg = argv[arg_num];
|
||||
host_arg = "--host=";
|
||||
host_arg.append(arg);
|
||||
}
|
||||
else
|
||||
host_arg = arg;
|
||||
|
||||
/// --port port1 --host host1
|
||||
if (!prev_port_arg.empty())
|
||||
{
|
||||
hosts_and_ports_arguments.push_back({host_arg, prev_port_arg});
|
||||
prev_port_arg.clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
/// --host host1 --host host2
|
||||
if (!prev_host_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_host_arg});
|
||||
|
||||
prev_host_arg = host_arg;
|
||||
}
|
||||
}
|
||||
else if (arg.starts_with("--port"))
|
||||
{
|
||||
auto port_arg = String{arg};
|
||||
/// --port port
|
||||
if (arg == "--port")
|
||||
{
|
||||
port_arg.push_back('=');
|
||||
++arg_num;
|
||||
if (arg_num >= argc)
|
||||
throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS);
|
||||
arg = argv[arg_num];
|
||||
port_arg.append(arg);
|
||||
}
|
||||
|
||||
/// --host host1 --port port1
|
||||
if (!prev_host_arg.empty())
|
||||
{
|
||||
hosts_and_ports_arguments.push_back({port_arg, prev_host_arg});
|
||||
prev_host_arg.clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
/// --port port1 --port port2
|
||||
if (!prev_port_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_port_arg});
|
||||
|
||||
prev_port_arg = port_arg;
|
||||
}
|
||||
}
|
||||
else if (arg == "--allow_repeated_settings")
|
||||
allow_repeated_settings = true;
|
||||
else
|
||||
common_arguments.emplace_back(arg);
|
||||
}
|
||||
}
|
||||
if (!prev_host_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_host_arg});
|
||||
if (!prev_port_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_port_arg});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -36,6 +36,13 @@ protected:
|
||||
|
||||
void processConfig() override;
|
||||
|
||||
void readArguments(
|
||||
int argc,
|
||||
char ** argv,
|
||||
Arguments & common_arguments,
|
||||
std::vector<Arguments> & external_tables_arguments,
|
||||
std::vector<Arguments> & hosts_and_ports_arguments) override;
|
||||
|
||||
private:
|
||||
void printChangedSettings() const;
|
||||
std::vector<String> loadWarningMessages();
|
||||
|
@ -68,6 +68,7 @@ namespace ErrorCodes
|
||||
extern const int NOT_ENOUGH_SPACE;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int CANNOT_KILL;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1062,8 +1063,11 @@ namespace
|
||||
return pid;
|
||||
}
|
||||
|
||||
int stop(const fs::path & pid_file, bool force)
|
||||
int stop(const fs::path & pid_file, bool force, bool do_not_kill)
|
||||
{
|
||||
if (force && do_not_kill)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified flags are incompatible");
|
||||
|
||||
UInt64 pid = isRunning(pid_file);
|
||||
|
||||
if (!pid)
|
||||
@ -1092,9 +1096,15 @@ namespace
|
||||
|
||||
if (try_num == num_tries)
|
||||
{
|
||||
fmt::print("Will terminate forcefully.\n", pid);
|
||||
if (do_not_kill)
|
||||
{
|
||||
fmt::print("Process (pid = {}) is still running. Will not try to kill it.\n", pid);
|
||||
return 1;
|
||||
}
|
||||
|
||||
fmt::print("Will terminate forcefully (pid = {}).\n", pid);
|
||||
if (0 == kill(pid, 9))
|
||||
fmt::print("Sent kill signal.\n", pid);
|
||||
fmt::print("Sent kill signal (pid = {}).\n", pid);
|
||||
else
|
||||
throwFromErrno("Cannot send kill signal", ErrorCodes::SYSTEM_ERROR);
|
||||
|
||||
@ -1175,6 +1185,7 @@ int mainEntryClickHouseStop(int argc, char ** argv)
|
||||
("prefix", po::value<std::string>()->default_value("/"), "prefix for all paths")
|
||||
("pid-path", po::value<std::string>()->default_value("var/run/clickhouse-server"), "directory for pid file")
|
||||
("force", po::bool_switch(), "Stop with KILL signal instead of TERM")
|
||||
("do-not-kill", po::bool_switch(), "Do not send KILL even if TERM did not help")
|
||||
;
|
||||
|
||||
po::variables_map options;
|
||||
@ -1189,7 +1200,9 @@ int mainEntryClickHouseStop(int argc, char ** argv)
|
||||
fs::path prefix = options["prefix"].as<std::string>();
|
||||
fs::path pid_file = prefix / options["pid-path"].as<std::string>() / "clickhouse-server.pid";
|
||||
|
||||
return stop(pid_file, options["force"].as<bool>());
|
||||
bool force = options["force"].as<bool>();
|
||||
bool do_not_kill = options["do-not-kill"].as<bool>();
|
||||
return stop(pid_file, force, do_not_kill);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -1247,6 +1260,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
|
||||
("pid-path", po::value<std::string>()->default_value("var/run/clickhouse-server"), "directory for pid file")
|
||||
("user", po::value<std::string>()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user")
|
||||
("force", po::value<bool>()->default_value(false), "Stop with KILL signal instead of TERM")
|
||||
("do-not-kill", po::bool_switch(), "Do not send KILL even if TERM did not help")
|
||||
;
|
||||
|
||||
po::variables_map options;
|
||||
@ -1265,7 +1279,9 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
|
||||
fs::path config = prefix / options["config-path"].as<std::string>() / "config.xml";
|
||||
fs::path pid_file = prefix / options["pid-path"].as<std::string>() / "clickhouse-server.pid";
|
||||
|
||||
if (int res = stop(pid_file, options["force"].as<bool>()))
|
||||
bool force = options["force"].as<bool>();
|
||||
bool do_not_kill = options["do-not-kill"].as<bool>();
|
||||
if (int res = stop(pid_file, force, do_not_kill))
|
||||
return res;
|
||||
|
||||
return start(user, executable, config, pid_file);
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
#include <IO/UseSSL.h>
|
||||
#include <IO/IOThreadPool.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
#include <Common/ErrorHandlers.h>
|
||||
@ -105,6 +106,17 @@ void LocalServer::initialize(Poco::Util::Application & self)
|
||||
auto loaded_config = config_processor.loadConfig();
|
||||
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
|
||||
}
|
||||
|
||||
GlobalThreadPool::initialize(
|
||||
config().getUInt("max_thread_pool_size", 10000),
|
||||
config().getUInt("max_thread_pool_free_size", 1000),
|
||||
config().getUInt("thread_pool_queue_size", 10000)
|
||||
);
|
||||
|
||||
IOThreadPool::initialize(
|
||||
config().getUInt("max_io_thread_pool_size", 100),
|
||||
config().getUInt("max_io_thread_pool_free_size", 0),
|
||||
config().getUInt("io_thread_pool_queue_size", 10000));
|
||||
}
|
||||
|
||||
|
||||
@ -726,6 +738,15 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
|
||||
config().setString("send_logs_level", options["send_logs_level"].as<std::string>());
|
||||
}
|
||||
|
||||
void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &)
|
||||
{
|
||||
for (int arg_num = 1; arg_num < argc; ++arg_num)
|
||||
{
|
||||
const char * arg = argv[arg_num];
|
||||
common_arguments.emplace_back(arg);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wunused-function"
|
||||
|
@ -45,6 +45,8 @@ protected:
|
||||
const std::vector<Arguments> &, const std::vector<Arguments> &) override;
|
||||
|
||||
void processConfig() override;
|
||||
void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &) override;
|
||||
|
||||
|
||||
void updateLoggerLevel(const String & logs_level) override;
|
||||
|
||||
|
@ -540,7 +540,7 @@ static void sanityChecks(Server & server)
|
||||
try
|
||||
{
|
||||
if (readString("/sys/devices/system/clocksource/clocksource0/current_clocksource").find("tsc") == std::string::npos)
|
||||
server.context()->addWarningMessage("Linux is not using fast TSC clock source. Performance can be degraded.");
|
||||
server.context()->addWarningMessage("Linux is not using a fast TSC clock source. Performance can be degraded.");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -558,7 +558,7 @@ static void sanityChecks(Server & server)
|
||||
try
|
||||
{
|
||||
if (readString("/sys/kernel/mm/transparent_hugepage/enabled").find("[always]") != std::string::npos)
|
||||
server.context()->addWarningMessage("Linux transparent hugepage are set to \"always\".");
|
||||
server.context()->addWarningMessage("Linux transparent hugepages are set to \"always\".");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -1088,11 +1088,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
|
||||
|
||||
auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker();
|
||||
if (config->has("global_memory_usage_overcommit_max_wait_microseconds"))
|
||||
{
|
||||
UInt64 max_overcommit_wait_time = config->getUInt64("global_memory_usage_overcommit_max_wait_microseconds", 0);
|
||||
global_overcommit_tracker->setMaxWaitTime(max_overcommit_wait_time);
|
||||
}
|
||||
UInt64 max_overcommit_wait_time = config->getUInt64("global_memory_usage_overcommit_max_wait_microseconds", 200);
|
||||
global_overcommit_tracker->setMaxWaitTime(max_overcommit_wait_time);
|
||||
total_memory_tracker.setOvercommitTracker(global_overcommit_tracker);
|
||||
|
||||
// FIXME logging-related things need synchronization -- see the 'Logger * log' saved
|
||||
@ -1294,17 +1291,11 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
LOG_INFO(log, "Listening for {}", server.getDescription());
|
||||
}
|
||||
|
||||
auto & access_control = global_context->getAccessControl();
|
||||
if (config().has("custom_settings_prefixes"))
|
||||
access_control.setCustomSettingsPrefixes(config().getString("custom_settings_prefixes"));
|
||||
|
||||
access_control.setNoPasswordAllowed(config().getBool("allow_no_password", true));
|
||||
access_control.setPlaintextPasswordAllowed(config().getBool("allow_plaintext_password", true));
|
||||
|
||||
/// Initialize access storages.
|
||||
auto & access_control = global_context->getAccessControl();
|
||||
try
|
||||
{
|
||||
access_control.addStoragesFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
|
||||
access_control.setUpFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -367,7 +367,7 @@
|
||||
|
||||
<!-- Path to temporary data for processing hard queries. -->
|
||||
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
|
||||
|
||||
|
||||
<!-- Disable AuthType plaintext_password and no_password for ACL. -->
|
||||
<!-- <allow_plaintext_password>0</allow_plaintext_password> -->
|
||||
<!-- <allow_no_password>0</allow_no_password> -->`
|
||||
@ -545,6 +545,14 @@
|
||||
-->
|
||||
</user_directories>
|
||||
|
||||
<access_control_improvements>
|
||||
<!-- Enables logic that users without permissive row policies can still read rows using a SELECT query.
|
||||
For example, if there two users A, B and a row policy is defined only for A, then
|
||||
if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows.
|
||||
By default this setting is false for compatibility with earlier access configurations. -->
|
||||
<users_without_row_policies_can_read_rows>false</users_without_row_policies_can_read_rows>
|
||||
</access_control_improvements>
|
||||
|
||||
<!-- Default profile of settings. -->
|
||||
<default_profile>default</default_profile>
|
||||
|
||||
@ -1297,8 +1305,8 @@
|
||||
-->
|
||||
|
||||
<!-- Uncomment if enable merge tree metadata cache -->
|
||||
<merge_tree_metadata_cache>
|
||||
<!--merge_tree_metadata_cache>
|
||||
<lru_cache_size>268435456</lru_cache_size>
|
||||
<continue_if_corrupted>true</continue_if_corrupted>
|
||||
</merge_tree_metadata_cache>
|
||||
</merge_tree_metadata_cache-->
|
||||
</clickhouse>
|
||||
|
@ -129,8 +129,8 @@
|
||||
|
||||
#query_div
|
||||
{
|
||||
/* Make enough space for even huge queries. */
|
||||
height: 20%;
|
||||
/* Make enough space for medium/large queries but allowing query textarea to grow. */
|
||||
min-height: 20%;
|
||||
}
|
||||
|
||||
#query
|
||||
@ -233,7 +233,7 @@
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
td.right
|
||||
.right
|
||||
{
|
||||
text-align: right;
|
||||
}
|
||||
@ -272,6 +272,26 @@
|
||||
max-width: none;
|
||||
}
|
||||
|
||||
td.transposed
|
||||
{
|
||||
max-width: none;
|
||||
overflow: auto;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
td.empty-result
|
||||
{
|
||||
text-align: center;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
div.empty-result
|
||||
{
|
||||
opacity: 10%;
|
||||
font-size: 7vw;
|
||||
font-family: Liberation Sans, DejaVu Sans, sans-serif;
|
||||
}
|
||||
|
||||
/* The style for SQL NULL */
|
||||
.null
|
||||
{
|
||||
@ -613,8 +633,108 @@
|
||||
}
|
||||
}
|
||||
|
||||
function renderCell(cell, col_idx, settings)
|
||||
{
|
||||
let td = document.createElement('td');
|
||||
|
||||
let is_null = (cell === null);
|
||||
let is_link = false;
|
||||
|
||||
/// Test: SELECT number, toString(number) AS str, number % 2 ? number : NULL AS nullable, range(number) AS arr, CAST((['hello', 'world'], [number, number % 2]) AS Map(String, UInt64)) AS map FROM numbers(10)
|
||||
let text;
|
||||
if (is_null) {
|
||||
text = 'ᴺᵁᴸᴸ';
|
||||
} else if (typeof(cell) === 'object') {
|
||||
text = JSON.stringify(cell);
|
||||
} else {
|
||||
text = cell;
|
||||
|
||||
/// If it looks like URL, create a link. This is for convenience.
|
||||
if (typeof(cell) == 'string' && cell.match(/^https?:\/\/\S+$/)) {
|
||||
is_link = true;
|
||||
}
|
||||
}
|
||||
|
||||
let node = document.createTextNode(text);
|
||||
if (is_link) {
|
||||
let link = document.createElement('a');
|
||||
link.appendChild(node);
|
||||
link.href = text;
|
||||
link.setAttribute('target', '_blank');
|
||||
node = link;
|
||||
}
|
||||
|
||||
if (settings.is_transposed) {
|
||||
td.className = 'left transposed';
|
||||
} else {
|
||||
td.className = settings.column_is_number[col_idx] ? 'right' : 'left';
|
||||
}
|
||||
if (is_null) {
|
||||
td.className += ' null';
|
||||
}
|
||||
|
||||
/// If it's a number, render bar in background.
|
||||
if (!settings.is_transposed && settings.column_need_render_bars[col_idx] && text > 0) {
|
||||
const ratio = 100 * text / settings.column_maximums[col_idx];
|
||||
|
||||
let div = document.createElement('div');
|
||||
|
||||
div.style.width = '100%';
|
||||
div.style.background = `linear-gradient(to right,
|
||||
var(--bar-color) 0%, var(--bar-color) ${ratio}%,
|
||||
transparent ${ratio}%, transparent 100%)`;
|
||||
|
||||
div.appendChild(node);
|
||||
node = div;
|
||||
}
|
||||
|
||||
td.appendChild(node);
|
||||
return td;
|
||||
}
|
||||
|
||||
function renderTableTransposed(response)
|
||||
{
|
||||
let tbody = document.createElement('tbody');
|
||||
for (let col_idx in response.meta) {
|
||||
let tr = document.createElement('tr');
|
||||
{
|
||||
let th = document.createElement('th');
|
||||
th.className = 'right';
|
||||
th.style.width = '0';
|
||||
th.appendChild(document.createTextNode(response.meta[col_idx].name));
|
||||
tr.appendChild(th);
|
||||
}
|
||||
for (let row_idx in response.data)
|
||||
{
|
||||
let cell = response.data[row_idx][col_idx];
|
||||
const td = renderCell(cell, col_idx, {is_transposed: true});
|
||||
tr.appendChild(td);
|
||||
}
|
||||
if (response.data.length == 0 && col_idx == 0)
|
||||
{
|
||||
/// If result is empty, show this fact with a style.
|
||||
let td = document.createElement('td');
|
||||
td.rowSpan = response.meta.length;
|
||||
td.className = 'empty-result';
|
||||
let div = document.createElement('div');
|
||||
div.appendChild(document.createTextNode("empty result"));
|
||||
div.className = 'empty-result';
|
||||
td.appendChild(div);
|
||||
tr.appendChild(td);
|
||||
}
|
||||
tbody.appendChild(tr);
|
||||
}
|
||||
let table = document.getElementById('data-table');
|
||||
table.appendChild(tbody);
|
||||
}
|
||||
|
||||
function renderTable(response)
|
||||
{
|
||||
if (response.data.length <= 1 && response.meta.length >= 5) {
|
||||
renderTableTransposed(response)
|
||||
return;
|
||||
}
|
||||
|
||||
let thead = document.createElement('thead');
|
||||
for (let idx in response.meta) {
|
||||
let th = document.createElement('th');
|
||||
@ -628,66 +748,25 @@
|
||||
const max_rows = 10000 / response.meta.length;
|
||||
let row_num = 0;
|
||||
|
||||
const column_is_number = response.meta.map(elem => !!elem.type.match(/^(U?Int|Decimal|Float)/));
|
||||
const column_is_number = response.meta.map(elem => !!elem.type.match(/^(Nullable\()?(U?Int|Decimal|Float)/));
|
||||
const column_maximums = column_is_number.map((elem, idx) => elem ? Math.max(...response.data.map(row => row[idx])) : 0);
|
||||
const column_minimums = column_is_number.map((elem, idx) => elem ? Math.min(...response.data.map(row => Math.max(0, row[idx]))) : 0);
|
||||
const column_need_render_bars = column_is_number.map((elem, idx) => column_maximums[idx] > 0 && column_maximums[idx] > column_minimums[idx]);
|
||||
|
||||
const settings = {
|
||||
is_transposed: false,
|
||||
column_is_number: column_is_number,
|
||||
column_maximums: column_maximums,
|
||||
column_minimums: column_minimums,
|
||||
column_need_render_bars: column_need_render_bars,
|
||||
};
|
||||
|
||||
let tbody = document.createElement('tbody');
|
||||
for (let row_idx in response.data) {
|
||||
let tr = document.createElement('tr');
|
||||
for (let col_idx in response.data[row_idx]) {
|
||||
let td = document.createElement('td');
|
||||
let cell = response.data[row_idx][col_idx];
|
||||
|
||||
let is_null = (cell === null);
|
||||
let is_link = false;
|
||||
|
||||
/// Test: SELECT number, toString(number) AS str, number % 2 ? number : NULL AS nullable, range(number) AS arr, CAST((['hello', 'world'], [number, number % 2]) AS Map(String, UInt64)) AS map FROM numbers(10)
|
||||
let text;
|
||||
if (is_null) {
|
||||
text = 'ᴺᵁᴸᴸ';
|
||||
} else if (typeof(cell) === 'object') {
|
||||
text = JSON.stringify(cell);
|
||||
} else {
|
||||
text = cell;
|
||||
|
||||
/// If it looks like URL, create a link. This is for convenience.
|
||||
if (typeof(cell) == 'string' && cell.match(/^https?:\/\/\S+$/)) {
|
||||
is_link = true;
|
||||
}
|
||||
}
|
||||
|
||||
let node = document.createTextNode(text);
|
||||
if (is_link) {
|
||||
let link = document.createElement('a');
|
||||
link.appendChild(node);
|
||||
link.href = text;
|
||||
link.setAttribute('target', '_blank');
|
||||
node = link;
|
||||
}
|
||||
|
||||
td.className = column_is_number[col_idx] ? 'right' : 'left';
|
||||
if (is_null) {
|
||||
td.className += ' null';
|
||||
}
|
||||
|
||||
/// If it's a number, render bar in background.
|
||||
if (column_need_render_bars[col_idx] && text > 0) {
|
||||
const ratio = 100 * text / column_maximums[col_idx];
|
||||
|
||||
let div = document.createElement('div');
|
||||
|
||||
div.style.width = '100%';
|
||||
div.style.background = `linear-gradient(to right,
|
||||
var(--bar-color) 0%, var(--bar-color) ${ratio}%,
|
||||
transparent ${ratio}%, transparent 100%)`;
|
||||
|
||||
div.appendChild(node);
|
||||
node = div;
|
||||
}
|
||||
|
||||
td.appendChild(node);
|
||||
const td = renderCell(cell, col_idx, settings);
|
||||
tr.appendChild(td);
|
||||
}
|
||||
tbody.appendChild(tr);
|
||||
@ -787,10 +866,7 @@
|
||||
document.documentElement.setAttribute('data-theme', theme);
|
||||
}
|
||||
|
||||
/**
|
||||
* First we check if theme is set via the 'theme' GET parameter, if not, we check localStorage,
|
||||
* otherwise we check OS preference
|
||||
*/
|
||||
/// First we check if theme is set via the 'theme' GET parameter, if not, we check localStorage, otherwise we check OS preference.
|
||||
let theme = current_url.searchParams.get('theme');
|
||||
if (['dark', 'light'].indexOf(theme) === -1) {
|
||||
theme = window.localStorage.getItem('theme');
|
||||
|
@ -6,9 +6,6 @@
|
||||
<profiles>
|
||||
<!-- Default settings. -->
|
||||
<default>
|
||||
<!-- Maximum memory usage for processing single query, in bytes. -->
|
||||
<max_memory_usage>10000000000</max_memory_usage>
|
||||
|
||||
<!-- How to choose between replicas during distributed query processing.
|
||||
random - choose random replica from set of replicas with minimum number of errors
|
||||
nearest_hostname - from set of replicas with minimum number of errors, choose replica
|
||||
|
@ -149,6 +149,24 @@ AccessControl::AccessControl()
|
||||
|
||||
AccessControl::~AccessControl() = default;
|
||||
|
||||
|
||||
void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
|
||||
const zkutil::GetZooKeeper & get_zookeeper_function_)
|
||||
{
|
||||
if (config_.has("custom_settings_prefixes"))
|
||||
setCustomSettingsPrefixes(config_.getString("custom_settings_prefixes"));
|
||||
|
||||
setNoPasswordAllowed(config_.getBool("allow_no_password", true));
|
||||
setPlaintextPasswordAllowed(config_.getBool("allow_plaintext_password", true));
|
||||
|
||||
setEnabledUsersWithoutRowPoliciesCanReadRows(config_.getBool(
|
||||
"access_control_improvements.users_without_row_policies_can_read_rows",
|
||||
false /* false because we need to be compatible with earlier access configurations */));
|
||||
|
||||
addStoragesFromMainConfig(config_, config_path_, get_zookeeper_function_);
|
||||
}
|
||||
|
||||
|
||||
void AccessControl::setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_)
|
||||
{
|
||||
auto storages = getStoragesPtr();
|
||||
@ -170,11 +188,7 @@ void AccessControl::addUsersConfigStorage(const Poco::Util::AbstractConfiguratio
|
||||
|
||||
void AccessControl::addUsersConfigStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & users_config_)
|
||||
{
|
||||
auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); };
|
||||
auto is_no_password_allowed_function = [this]() -> bool { return isNoPasswordAllowed(); };
|
||||
auto is_plaintext_password_allowed_function = [this]() -> bool { return isPlaintextPasswordAllowed(); };
|
||||
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function,
|
||||
is_no_password_allowed_function, is_plaintext_password_allowed_function);
|
||||
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this);
|
||||
new_storage->setConfig(users_config_);
|
||||
addStorage(new_storage);
|
||||
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}",
|
||||
@ -207,11 +221,7 @@ void AccessControl::addUsersConfigStorage(
|
||||
return;
|
||||
}
|
||||
}
|
||||
auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); };
|
||||
auto is_no_password_allowed_function = [this]() -> bool { return isNoPasswordAllowed(); };
|
||||
auto is_plaintext_password_allowed_function = [this]() -> bool { return isPlaintextPasswordAllowed(); };
|
||||
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function,
|
||||
is_no_password_allowed_function, is_plaintext_password_allowed_function);
|
||||
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this);
|
||||
new_storage->load(users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_);
|
||||
addStorage(new_storage);
|
||||
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
|
||||
|
@ -50,6 +50,9 @@ public:
|
||||
AccessControl();
|
||||
~AccessControl() override;
|
||||
|
||||
void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
|
||||
const zkutil::GetZooKeeper & get_zookeeper_function_);
|
||||
|
||||
/// Parses access entities from a configuration loaded from users.xml.
|
||||
/// This function add UsersConfigAccessStorage if it wasn't added before.
|
||||
void setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_);
|
||||
@ -122,6 +125,12 @@ public:
|
||||
void setPlaintextPasswordAllowed(const bool allow_plaintext_password_);
|
||||
bool isPlaintextPasswordAllowed() const;
|
||||
|
||||
/// Enables logic that users without permissive row policies can still read rows using a SELECT query.
|
||||
/// For example, if there two users A, B and a row policy is defined only for A, then
|
||||
/// if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows.
|
||||
void setEnabledUsersWithoutRowPoliciesCanReadRows(bool enable) { users_without_row_policies_can_read_rows = enable; }
|
||||
bool isEnabledUsersWithoutRowPoliciesCanReadRows() const { return users_without_row_policies_can_read_rows; }
|
||||
|
||||
UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const;
|
||||
void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
@ -178,6 +187,7 @@ private:
|
||||
std::unique_ptr<CustomSettingsPrefixes> custom_settings_prefixes;
|
||||
std::atomic_bool allow_plaintext_password = true;
|
||||
std::atomic_bool allow_no_password = true;
|
||||
std::atomic_bool users_without_row_policies_can_read_rows = false;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include <Interpreters/ClientInfo.h>
|
||||
#include <Core/UUID.h>
|
||||
#include <base/scope_guard.h>
|
||||
#include <base/shared_ptr_helper.h>
|
||||
#include <boost/container/flat_set.hpp>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
|
@ -28,17 +28,25 @@ namespace
|
||||
permissions.push_back(filter);
|
||||
}
|
||||
|
||||
ASTPtr getResult() &&
|
||||
ASTPtr getResult(bool users_without_row_policies_can_read_rows) &&
|
||||
{
|
||||
/// Process permissive filters.
|
||||
restrictions.push_back(makeASTForLogicalOr(std::move(permissions)));
|
||||
if (!permissions.empty() || !users_without_row_policies_can_read_rows)
|
||||
{
|
||||
/// Process permissive filters.
|
||||
restrictions.push_back(makeASTForLogicalOr(std::move(permissions)));
|
||||
}
|
||||
|
||||
/// Process restrictive filters.
|
||||
auto result = makeASTForLogicalAnd(std::move(restrictions));
|
||||
ASTPtr result;
|
||||
if (!restrictions.empty())
|
||||
result = makeASTForLogicalAnd(std::move(restrictions));
|
||||
|
||||
bool value;
|
||||
if (tryGetLiteralBool(result.get(), value) && value)
|
||||
result = nullptr; /// The condition is always true, no need to check it.
|
||||
if (result)
|
||||
{
|
||||
bool value;
|
||||
if (tryGetLiteralBool(result.get(), value) && value)
|
||||
result = nullptr; /// The condition is always true, no need to check it.
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -234,7 +242,7 @@ void RowPolicyCache::mixFiltersFor(EnabledRowPolicies & enabled)
|
||||
{
|
||||
auto & mixed_filter = (*mixed_filters)[key];
|
||||
mixed_filter.database_and_table_name = mixer.database_and_table_name;
|
||||
mixed_filter.ast = std::move(mixer.mixer).getResult();
|
||||
mixed_filter.ast = std::move(mixer.mixer).getResult(access_control.isEnabledUsersWithoutRowPoliciesCanReadRows());
|
||||
}
|
||||
|
||||
enabled.mixed_filters.store(mixed_filters);
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Access/RowPolicy.h>
|
||||
#include <Access/User.h>
|
||||
#include <Access/SettingsProfile.h>
|
||||
#include <Access/AccessControl.h>
|
||||
#include <Dictionaries/IDictionary.h>
|
||||
#include <Common/Config/ConfigReloader.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
@ -339,7 +340,7 @@ namespace
|
||||
}
|
||||
|
||||
|
||||
std::vector<AccessEntityPtr> parseRowPolicies(const Poco::Util::AbstractConfiguration & config)
|
||||
std::vector<AccessEntityPtr> parseRowPolicies(const Poco::Util::AbstractConfiguration & config, bool users_without_row_policies_can_read_rows)
|
||||
{
|
||||
std::map<std::pair<String /* database */, String /* table */>, std::unordered_map<String /* user */, String /* filter */>> all_filters_map;
|
||||
|
||||
@ -395,8 +396,19 @@ namespace
|
||||
const auto & [database, table_name] = database_and_table_name;
|
||||
for (const String & user_name : user_names)
|
||||
{
|
||||
String filter;
|
||||
auto it = user_to_filters.find(user_name);
|
||||
String filter = (it != user_to_filters.end()) ? it->second : "1";
|
||||
if (it != user_to_filters.end())
|
||||
{
|
||||
filter = it->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (users_without_row_policies_can_read_rows)
|
||||
continue;
|
||||
else
|
||||
filter = "1";
|
||||
}
|
||||
|
||||
auto policy = std::make_shared<RowPolicy>();
|
||||
policy->setFullName(user_name, database, table_name);
|
||||
@ -411,7 +423,7 @@ namespace
|
||||
|
||||
SettingsProfileElements parseSettingsConstraints(const Poco::Util::AbstractConfiguration & config,
|
||||
const String & path_to_constraints,
|
||||
Fn<void(std::string_view)> auto && check_setting_name_function)
|
||||
const AccessControl & access_control)
|
||||
{
|
||||
SettingsProfileElements profile_elements;
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
@ -419,8 +431,7 @@ namespace
|
||||
|
||||
for (const String & setting_name : keys)
|
||||
{
|
||||
if (check_setting_name_function)
|
||||
check_setting_name_function(setting_name);
|
||||
access_control.checkSettingNameIsAllowed(setting_name);
|
||||
|
||||
SettingsProfileElement profile_element;
|
||||
profile_element.setting_name = setting_name;
|
||||
@ -448,7 +459,7 @@ namespace
|
||||
std::shared_ptr<SettingsProfile> parseSettingsProfile(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & profile_name,
|
||||
Fn<void(std::string_view)> auto && check_setting_name_function)
|
||||
const AccessControl & access_control)
|
||||
{
|
||||
auto profile = std::make_shared<SettingsProfile>();
|
||||
profile->setName(profile_name);
|
||||
@ -470,13 +481,12 @@ namespace
|
||||
|
||||
if (key == "constraints" || key.starts_with("constraints["))
|
||||
{
|
||||
profile->elements.merge(parseSettingsConstraints(config, profile_config + "." + key, check_setting_name_function));
|
||||
profile->elements.merge(parseSettingsConstraints(config, profile_config + "." + key, access_control));
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto & setting_name = key;
|
||||
if (check_setting_name_function)
|
||||
check_setting_name_function(setting_name);
|
||||
access_control.checkSettingNameIsAllowed(setting_name);
|
||||
|
||||
SettingsProfileElement profile_element;
|
||||
profile_element.setting_name = setting_name;
|
||||
@ -490,7 +500,7 @@ namespace
|
||||
|
||||
std::vector<AccessEntityPtr> parseSettingsProfiles(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
Fn<void(std::string_view)> auto && check_setting_name_function)
|
||||
const AccessControl & access_control)
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys profile_names;
|
||||
config.keys("profiles", profile_names);
|
||||
@ -502,7 +512,7 @@ namespace
|
||||
{
|
||||
try
|
||||
{
|
||||
profiles.push_back(parseSettingsProfile(config, profile_name, check_setting_name_function));
|
||||
profiles.push_back(parseSettingsProfile(config, profile_name, access_control));
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
@ -515,13 +525,8 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
UsersConfigAccessStorage::UsersConfigAccessStorage(const CheckSettingNameFunction & check_setting_name_function_, const IsNoPasswordFunction & is_no_password_allowed_function_, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_)
|
||||
: UsersConfigAccessStorage(STORAGE_TYPE, check_setting_name_function_, is_no_password_allowed_function_, is_plaintext_password_allowed_function_)
|
||||
{
|
||||
}
|
||||
|
||||
UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, const CheckSettingNameFunction & check_setting_name_function_, const IsNoPasswordFunction & is_no_password_allowed_function_, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_)
|
||||
: IAccessStorage(storage_name_), check_setting_name_function(check_setting_name_function_),is_no_password_allowed_function(is_no_password_allowed_function_), is_plaintext_password_allowed_function(is_plaintext_password_allowed_function_)
|
||||
UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, const AccessControl & access_control_)
|
||||
: IAccessStorage(storage_name_), access_control(access_control_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -563,16 +568,16 @@ void UsersConfigAccessStorage::parseFromConfig(const Poco::Util::AbstractConfigu
|
||||
{
|
||||
try
|
||||
{
|
||||
bool no_password_allowed = is_no_password_allowed_function();
|
||||
bool plaintext_password_allowed = is_plaintext_password_allowed_function();
|
||||
bool no_password_allowed = access_control.isNoPasswordAllowed();
|
||||
bool plaintext_password_allowed = access_control.isPlaintextPasswordAllowed();
|
||||
std::vector<std::pair<UUID, AccessEntityPtr>> all_entities;
|
||||
for (const auto & entity : parseUsers(config, no_password_allowed, plaintext_password_allowed))
|
||||
all_entities.emplace_back(generateID(*entity), entity);
|
||||
for (const auto & entity : parseQuotas(config))
|
||||
all_entities.emplace_back(generateID(*entity), entity);
|
||||
for (const auto & entity : parseRowPolicies(config))
|
||||
for (const auto & entity : parseRowPolicies(config, access_control.isEnabledUsersWithoutRowPoliciesCanReadRows()))
|
||||
all_entities.emplace_back(generateID(*entity), entity);
|
||||
for (const auto & entity : parseSettingsProfiles(config, check_setting_name_function))
|
||||
for (const auto & entity : parseSettingsProfiles(config, access_control))
|
||||
all_entities.emplace_back(generateID(*entity), entity);
|
||||
memory_storage.setAll(all_entities);
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ namespace Poco::Util
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class AccessControl;
|
||||
class ConfigReloader;
|
||||
|
||||
/// Implementation of IAccessStorage which loads all from users.xml periodically.
|
||||
@ -20,13 +21,8 @@ class UsersConfigAccessStorage : public IAccessStorage
|
||||
public:
|
||||
|
||||
static constexpr char STORAGE_TYPE[] = "users.xml";
|
||||
using CheckSettingNameFunction = std::function<void(const std::string_view &)>;
|
||||
using IsNoPasswordFunction = std::function<bool()>;
|
||||
using IsPlaintextPasswordFunction = std::function<bool()>;
|
||||
|
||||
UsersConfigAccessStorage(const String & storage_name_ = STORAGE_TYPE, const CheckSettingNameFunction & check_setting_name_function_ = {}, const IsNoPasswordFunction & is_no_password_allowed_function_ ={}, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_ = {}); /// NOLINT
|
||||
UsersConfigAccessStorage(const CheckSettingNameFunction & check_setting_name_function_, const IsNoPasswordFunction & is_no_password_allowed_function_, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_); /// NOLINT
|
||||
|
||||
UsersConfigAccessStorage(const String & storage_name_, const AccessControl & access_control_);
|
||||
~UsersConfigAccessStorage() override;
|
||||
|
||||
const char * getStorageType() const override { return STORAGE_TYPE; }
|
||||
@ -58,10 +54,8 @@ private:
|
||||
scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override;
|
||||
scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override;
|
||||
|
||||
const AccessControl & access_control;
|
||||
MemoryAccessStorage memory_storage;
|
||||
CheckSettingNameFunction check_setting_name_function;
|
||||
IsNoPasswordFunction is_no_password_allowed_function;
|
||||
IsPlaintextPasswordFunction is_plaintext_password_allowed_function;
|
||||
String path;
|
||||
std::unique_ptr<ConfigReloader> config_reloader;
|
||||
mutable std::mutex load_mutex;
|
||||
|
@ -225,26 +225,38 @@ public:
|
||||
}
|
||||
|
||||
void
|
||||
addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena *, ssize_t if_argument_pos) const final
|
||||
addBatchSinglePlace(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena *,
|
||||
ssize_t if_argument_pos) const final
|
||||
{
|
||||
AggregateFunctionSumData<Numerator> sum_data;
|
||||
const auto & column = assert_cast<const ColVecType &>(*columns[0]);
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
sum_data.addManyConditional(column.getData().data(), flags.data(), batch_size);
|
||||
this->data(place).denominator += countBytesInFilter(flags.data(), batch_size);
|
||||
sum_data.addManyConditional(column.getData().data(), flags.data(), row_begin, row_end);
|
||||
this->data(place).denominator += countBytesInFilter(flags.data(), row_begin, row_end);
|
||||
}
|
||||
else
|
||||
{
|
||||
sum_data.addMany(column.getData().data(), batch_size);
|
||||
this->data(place).denominator += batch_size;
|
||||
sum_data.addMany(column.getData().data(), row_begin, row_end);
|
||||
this->data(place).denominator += (row_end - row_begin);
|
||||
}
|
||||
increment(place, sum_data.sum);
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena *, ssize_t if_argument_pos)
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
const UInt8 * null_map,
|
||||
Arena *,
|
||||
ssize_t if_argument_pos)
|
||||
const final
|
||||
{
|
||||
AggregateFunctionSumData<Numerator> sum_data;
|
||||
@ -253,22 +265,22 @@ public:
|
||||
{
|
||||
/// Merge the 2 sets of flags (null and if) into a single one. This allows us to use parallelizable sums when available
|
||||
const auto * if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data();
|
||||
auto final_flags = std::make_unique<UInt8[]>(batch_size);
|
||||
auto final_flags = std::make_unique<UInt8[]>(row_end);
|
||||
size_t used_value = 0;
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
UInt8 kept = (!null_map[i]) & !!if_flags[i];
|
||||
final_flags[i] = kept;
|
||||
used_value += kept;
|
||||
}
|
||||
|
||||
sum_data.addManyConditional(column.getData().data(), final_flags.get(), batch_size);
|
||||
sum_data.addManyConditional(column.getData().data(), final_flags.get(), row_begin, row_end);
|
||||
this->data(place).denominator += used_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
sum_data.addManyNotNull(column.getData().data(), null_map, batch_size);
|
||||
this->data(place).denominator += batch_size - countBytesInFilter(null_map, batch_size);
|
||||
sum_data.addManyNotNull(column.getData().data(), null_map, row_begin, row_end);
|
||||
this->data(place).denominator += (row_end - row_begin) - countBytesInFilter(null_map, row_begin, row_end);
|
||||
}
|
||||
increment(place, sum_data.sum);
|
||||
}
|
||||
|
@ -54,7 +54,12 @@ public:
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena *, ssize_t if_argument_pos) const override
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena *,
|
||||
ssize_t if_argument_pos) const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
@ -63,12 +68,13 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
data(place).count += batch_size;
|
||||
data(place).count += row_end - row_begin;
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull(
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
const UInt8 * null_map,
|
||||
@ -78,11 +84,12 @@ public:
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
data(place).count += countBytesInFilterWithNull(flags, null_map);
|
||||
data(place).count += countBytesInFilterWithNull(flags, null_map, row_begin, row_end);
|
||||
}
|
||||
else
|
||||
{
|
||||
data(place).count += batch_size - countBytesInFilter(null_map, batch_size);
|
||||
size_t rows = row_end - row_begin;
|
||||
data(place).count += rows - countBytesInFilter(null_map, row_begin, row_end);
|
||||
}
|
||||
}
|
||||
|
||||
@ -204,17 +211,23 @@ public:
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena *, ssize_t if_argument_pos) const override
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena *,
|
||||
ssize_t if_argument_pos) const override
|
||||
{
|
||||
const auto & nc = assert_cast<const ColumnNullable &>(*columns[0]);
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
data(place).count += countBytesInFilterWithNull(flags, nc.getNullMapData().data());
|
||||
data(place).count += countBytesInFilterWithNull(flags, nc.getNullMapData().data(), row_begin, row_end);
|
||||
}
|
||||
else
|
||||
{
|
||||
data(place).count += batch_size - countBytesInFilter(nc.getNullMapData().data(), batch_size);
|
||||
size_t rows = row_end - row_begin;
|
||||
data(place).count += rows - countBytesInFilter(nc.getNullMapData().data(), row_begin, row_end);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -200,7 +200,7 @@ public:
|
||||
arguments_raw[i] = arguments[i].get();
|
||||
|
||||
assert(!arguments.empty());
|
||||
nested_func->addBatchSinglePlace(arguments[0]->size(), getNestedPlace(place), arguments_raw.data(), arena);
|
||||
nested_func->addBatchSinglePlace(0, arguments[0]->size(), getNestedPlace(place), arguments_raw.data(), arena);
|
||||
nested_func->insertResultInto(getNestedPlace(place), to, arena);
|
||||
}
|
||||
|
||||
|
@ -1,147 +0,0 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionGroupArraySorted.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Common/FieldVisitorConvertToNumber.h>
|
||||
|
||||
|
||||
static inline constexpr UInt64 GROUP_SORTED_ARRAY_MAX_SIZE = 0xFFFFFF;
|
||||
static inline constexpr UInt64 GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD = 10;
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
|
||||
class AggregateFunctionGroupArraySortedNumeric : public AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>
|
||||
{
|
||||
using AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>::AggregateFunctionGroupArraySorted;
|
||||
};
|
||||
|
||||
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
|
||||
class AggregateFunctionGroupArraySortedFieldType
|
||||
: public AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>
|
||||
{
|
||||
using AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>::
|
||||
AggregateFunctionGroupArraySorted;
|
||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<T>()); }
|
||||
};
|
||||
|
||||
template <template <typename, bool, typename, bool> class AggregateFunctionTemplate, typename TColumnA, bool expr_sorted, typename TColumnB, bool is_plain_b, typename... TArgs>
|
||||
AggregateFunctionPtr
|
||||
createAggregateFunctionGroupArraySortedTypedFinal(TArgs && ... args)
|
||||
{
|
||||
return AggregateFunctionPtr(new AggregateFunctionTemplate<TColumnA, expr_sorted, TColumnB, is_plain_b>(std::forward<TArgs>(args)...));
|
||||
}
|
||||
|
||||
template <bool expr_sorted = false, typename TColumnB = UInt64, bool is_plain_b = false>
|
||||
AggregateFunctionPtr
|
||||
createAggregateFunctionGroupArraySortedTyped(const DataTypes & argument_types, const Array & params, UInt64 threshold)
|
||||
{
|
||||
#define DISPATCH(A, C, B) \
|
||||
if (which.idx == TypeIndex::A) \
|
||||
return createAggregateFunctionGroupArraySortedTypedFinal<C, B, expr_sorted, TColumnB, is_plain_b>(threshold, argument_types, params);
|
||||
#define DISPATCH_NUMERIC(A) DISPATCH(A, AggregateFunctionGroupArraySortedNumeric, A)
|
||||
WhichDataType which(argument_types[0]);
|
||||
FOR_NUMERIC_TYPES(DISPATCH_NUMERIC)
|
||||
DISPATCH(Enum8, AggregateFunctionGroupArraySortedNumeric, Int8)
|
||||
DISPATCH(Enum16, AggregateFunctionGroupArraySortedNumeric, Int16)
|
||||
DISPATCH(Date, AggregateFunctionGroupArraySortedFieldType, DataTypeDate)
|
||||
DISPATCH(DateTime, AggregateFunctionGroupArraySortedFieldType, DataTypeDateTime)
|
||||
#undef DISPATCH
|
||||
#undef DISPATCH_NUMERIC
|
||||
|
||||
if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
||||
{
|
||||
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, true, expr_sorted, TColumnB, is_plain_b>(
|
||||
threshold, argument_types, params));
|
||||
}
|
||||
else
|
||||
{
|
||||
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, false, expr_sorted, TColumnB, is_plain_b>(
|
||||
threshold, argument_types, params));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionGroupArraySorted(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
UInt64 threshold = GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD;
|
||||
|
||||
if (params.size() == 1)
|
||||
{
|
||||
UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
|
||||
|
||||
if (k > GROUP_SORTED_ARRAY_MAX_SIZE)
|
||||
throw Exception(
|
||||
"Too large parameter(s) for aggregate function " + name + ". Maximum: " + toString(GROUP_SORTED_ARRAY_MAX_SIZE),
|
||||
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
|
||||
if (k == 0)
|
||||
throw Exception("Parameter 0 is illegal for aggregate function " + name, ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
|
||||
threshold = k;
|
||||
}
|
||||
else if (!params.empty())
|
||||
{
|
||||
throw Exception("Aggregate function " + name + " only supports 1 parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
}
|
||||
|
||||
if (argument_types.size() == 2)
|
||||
{
|
||||
if (isNumber(argument_types[1]))
|
||||
{
|
||||
#define DISPATCH2(A, B) \
|
||||
if (which.idx == TypeIndex::A) \
|
||||
return createAggregateFunctionGroupArraySortedTyped<true, B>(argument_types, params, threshold);
|
||||
#define DISPATCH(A) DISPATCH2(A, A)
|
||||
WhichDataType which(argument_types[1]);
|
||||
FOR_NUMERIC_TYPES(DISPATCH)
|
||||
DISPATCH2(Enum8, Int8)
|
||||
DISPATCH2(Enum16, Int16)
|
||||
#undef DISPATCH
|
||||
#undef DISPATCH2
|
||||
throw Exception("Invalid parameter type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
else if (argument_types[1]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
||||
{
|
||||
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, true>(argument_types, params, threshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, false>(argument_types, params, threshold);
|
||||
}
|
||||
}
|
||||
else if (argument_types.size() == 1)
|
||||
{
|
||||
return createAggregateFunctionGroupArraySortedTyped<>(argument_types, params, threshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(
|
||||
"Aggregate function " + name + " requires one or two parameters.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
|
||||
factory.registerFunction("groupArraySorted", {createAggregateFunctionGroupArraySorted, properties});
|
||||
}
|
||||
}
|
@ -1,310 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionGroupArraySortedData.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
template <typename TColumn, bool is_plain>
|
||||
inline TColumn readItem(const IColumn * column, Arena * arena, size_t row)
|
||||
{
|
||||
if constexpr (std::is_same_v<TColumn, StringRef>)
|
||||
{
|
||||
if constexpr (is_plain)
|
||||
{
|
||||
StringRef str = column->getDataAt(row);
|
||||
auto ptr = arena->alloc(str.size);
|
||||
std::copy(str.data, str.data + str.size, ptr);
|
||||
return StringRef(ptr, str.size);
|
||||
}
|
||||
else
|
||||
{
|
||||
const char * begin = nullptr;
|
||||
return column->serializeValueIntoArena(row, *arena, begin);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (std::is_same_v<TColumn, UInt64>)
|
||||
return column->getUInt(row);
|
||||
else
|
||||
return column->getInt(row);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TColumn, typename TFilter = void>
|
||||
size_t
|
||||
getFirstNElements_low_threshold(const TColumn * data, int num_elements, int threshold, size_t * results, const TFilter * filter = nullptr)
|
||||
{
|
||||
for (int i = 0; i < threshold; i++)
|
||||
{
|
||||
results[i] = 0;
|
||||
}
|
||||
|
||||
threshold = std::min(num_elements, threshold);
|
||||
int current_max = 0;
|
||||
int cur;
|
||||
int z;
|
||||
for (int i = 0; i < num_elements; i++)
|
||||
{
|
||||
if constexpr (!std::is_same_v<TFilter, void>)
|
||||
{
|
||||
if (filter[i] == 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
//Starting from the highest values and we look for the immediately lower than the given one
|
||||
for (cur = current_max; cur > 0; cur--)
|
||||
{
|
||||
if (data[i] > data[results[cur - 1]])
|
||||
break;
|
||||
}
|
||||
|
||||
if (cur < threshold)
|
||||
{
|
||||
//Move all the higher values 1 position to the right
|
||||
for (z = std::min(threshold - 1, current_max); z > cur; z--)
|
||||
results[z] = results[z - 1];
|
||||
|
||||
if (current_max < threshold)
|
||||
++current_max;
|
||||
|
||||
//insert element into the given position
|
||||
results[cur] = i;
|
||||
}
|
||||
}
|
||||
|
||||
return current_max;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct SortableItem
|
||||
{
|
||||
T a;
|
||||
size_t b;
|
||||
bool operator<(const SortableItem & other) const { return (this->a < other.a); }
|
||||
};
|
||||
|
||||
template <typename TColumn, typename TFilter = void>
|
||||
size_t getFirstNElements_high_threshold(
|
||||
const TColumn * data, size_t num_elements, size_t threshold, size_t * results, const TFilter * filter = nullptr)
|
||||
{
|
||||
std::vector<SortableItem<TColumn>> dataIndexed(num_elements);
|
||||
size_t num_elements_filtered = 0;
|
||||
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
if constexpr (!std::is_same_v<TFilter, void>)
|
||||
{
|
||||
if (filter[i] == 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
dataIndexed.data()[num_elements_filtered].a = data[i];
|
||||
dataIndexed.data()[num_elements_filtered].b = i;
|
||||
num_elements_filtered++;
|
||||
}
|
||||
|
||||
threshold = std::min(num_elements_filtered, threshold);
|
||||
|
||||
std::nth_element(dataIndexed.data(), dataIndexed.data() + threshold, dataIndexed.data() + num_elements_filtered);
|
||||
std::sort(dataIndexed.data(), dataIndexed.data() + threshold);
|
||||
|
||||
for (size_t i = 0; i < threshold; i++)
|
||||
{
|
||||
results[i] = dataIndexed[i].b;
|
||||
}
|
||||
|
||||
return threshold;
|
||||
}
|
||||
|
||||
static const size_t THRESHOLD_MAX_CUSTOM_FUNCTION = 1000;
|
||||
|
||||
template <typename TColumn>
|
||||
size_t getFirstNElements(const TColumn * data, size_t num_elements, size_t threshold, size_t * results, const UInt8 * filter = nullptr)
|
||||
{
|
||||
if (threshold < THRESHOLD_MAX_CUSTOM_FUNCTION)
|
||||
{
|
||||
if (filter != nullptr)
|
||||
return getFirstNElements_low_threshold(data, num_elements, threshold, results, filter);
|
||||
else
|
||||
return getFirstNElements_low_threshold(data, num_elements, threshold, results);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (filter != nullptr)
|
||||
return getFirstNElements_high_threshold(data, num_elements, threshold, results, filter);
|
||||
else
|
||||
return getFirstNElements_high_threshold(data, num_elements, threshold, results);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TColumnA, bool is_plain_a, bool use_column_b, typename TColumnB, bool is_plain_b>
|
||||
class AggregateFunctionGroupArraySorted : public IAggregateFunctionDataHelper<
|
||||
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
|
||||
AggregateFunctionGroupArraySorted<TColumnA, is_plain_a, use_column_b, TColumnB, is_plain_b>>
|
||||
{
|
||||
protected:
|
||||
using State = AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>;
|
||||
using Base = IAggregateFunctionDataHelper<
|
||||
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
|
||||
AggregateFunctionGroupArraySorted>;
|
||||
|
||||
UInt64 threshold;
|
||||
DataTypePtr & input_data_type;
|
||||
mutable std::mutex mutex;
|
||||
|
||||
static void deserializeAndInsert(StringRef str, IColumn & data_to);
|
||||
|
||||
public:
|
||||
AggregateFunctionGroupArraySorted(UInt64 threshold_, const DataTypes & argument_types_, const Array & params)
|
||||
: IAggregateFunctionDataHelper<
|
||||
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
|
||||
AggregateFunctionGroupArraySorted>(argument_types_, params)
|
||||
, threshold(threshold_)
|
||||
, input_data_type(this->argument_types[0])
|
||||
{
|
||||
}
|
||||
|
||||
void create(AggregateDataPtr place) const override
|
||||
{
|
||||
Base::create(place);
|
||||
this->data(place).threshold = threshold;
|
||||
}
|
||||
|
||||
String getName() const override { return "groupArraySorted"; }
|
||||
|
||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(input_data_type); }
|
||||
|
||||
bool allocatesMemoryInArena() const override
|
||||
{
|
||||
if constexpr (std::is_same_v<TColumnA, StringRef>)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
State & data = this->data(place);
|
||||
if constexpr (use_column_b)
|
||||
{
|
||||
data.add(
|
||||
readItem<TColumnA, is_plain_a>(columns[0], arena, row_num), readItem<TColumnB, is_plain_b>(columns[1], arena, row_num));
|
||||
}
|
||||
else
|
||||
{
|
||||
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row_num));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TColumn, bool is_plain, typename TFunc>
|
||||
void
|
||||
forFirstRows(size_t batch_size, const IColumn ** columns, size_t data_column, Arena * arena, ssize_t if_argument_pos, TFunc func) const
|
||||
{
|
||||
const TColumn * values = nullptr;
|
||||
std::unique_ptr<std::vector<TColumn>> values_vector;
|
||||
std::vector<size_t> best_rows(threshold);
|
||||
|
||||
if constexpr (std::is_same_v<TColumn, StringRef>)
|
||||
{
|
||||
values_vector.reset(new std::vector<TColumn>(batch_size));
|
||||
for (size_t i = 0; i < batch_size; i++)
|
||||
(*values_vector)[i] = readItem<TColumn, is_plain>(columns[data_column], arena, i);
|
||||
values = (*values_vector).data();
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & column = assert_cast<const ColumnVector<TColumn> &>(*columns[data_column]);
|
||||
values = column.getData().data();
|
||||
}
|
||||
|
||||
const UInt8 * filter = nullptr;
|
||||
StringRef refFilter;
|
||||
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
refFilter = columns[if_argument_pos]->getRawData();
|
||||
filter = reinterpret_cast<const UInt8 *>(refFilter.data);
|
||||
}
|
||||
|
||||
size_t num_elements = getFirstNElements(values, batch_size, threshold, best_rows.data(), filter);
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
func(best_rows[i], values);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos) const override
|
||||
{
|
||||
State & data = this->data(place);
|
||||
|
||||
if constexpr (use_column_b)
|
||||
{
|
||||
forFirstRows<TColumnB, is_plain_b>(
|
||||
batch_size, columns, 1, arena, if_argument_pos, [columns, &arena, &data](size_t row, const TColumnB * values)
|
||||
{
|
||||
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row), values[row]);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
forFirstRows<TColumnA, is_plain_a>(
|
||||
batch_size, columns, 0, arena, if_argument_pos, [&data](size_t row, const TColumnA * values)
|
||||
{
|
||||
data.add(values[row]);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void
|
||||
deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).deserialize(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * /*arena*/) const override
|
||||
{
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
|
||||
auto & values = this->data(place).values;
|
||||
offsets_to.push_back(offsets_to.back() + values.size());
|
||||
|
||||
IColumn & data_to = arr_to.getData();
|
||||
for (auto value : values)
|
||||
{
|
||||
if constexpr (std::is_same_v<TColumnA, StringRef>)
|
||||
{
|
||||
auto str = State::itemValue(value);
|
||||
if constexpr (is_plain_a)
|
||||
{
|
||||
data_to.insertData(str.data, str.size);
|
||||
}
|
||||
else
|
||||
{
|
||||
data_to.deserializeAndInsertFromArena(str.data);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
data_to.insert(State::itemValue(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
@ -1,162 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/VarInt.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
static inline constexpr UInt64 GROUP_SORTED_DEFAULT_THRESHOLD = 0xFFFFFF;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
template <typename T>
|
||||
static void writeOneItem(WriteBuffer & buf, T item)
|
||||
{
|
||||
if constexpr (std::numeric_limits<T>::is_signed)
|
||||
{
|
||||
writeVarInt(item, buf);
|
||||
}
|
||||
else
|
||||
{
|
||||
writeVarUInt(item, buf);
|
||||
}
|
||||
}
|
||||
|
||||
static void writeOneItem(WriteBuffer & buf, const StringRef & item)
|
||||
{
|
||||
writeBinary(item, buf);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void readOneItem(ReadBuffer & buf, Arena * /*arena*/, T & item)
|
||||
{
|
||||
if constexpr (std::numeric_limits<T>::is_signed)
|
||||
{
|
||||
DB::Int64 val;
|
||||
readVarT(val, buf);
|
||||
item = val;
|
||||
}
|
||||
else
|
||||
{
|
||||
DB::UInt64 val;
|
||||
readVarT(val, buf);
|
||||
item = val;
|
||||
}
|
||||
}
|
||||
|
||||
static void readOneItem(ReadBuffer & buf, Arena * arena, StringRef & item)
|
||||
{
|
||||
item = readStringBinaryInto(*arena, buf);
|
||||
}
|
||||
|
||||
template <typename Storage>
|
||||
struct AggregateFunctionGroupArraySortedDataBase
|
||||
{
|
||||
typedef typename Storage::value_type ValueType;
|
||||
AggregateFunctionGroupArraySortedDataBase(UInt64 threshold_ = GROUP_SORTED_DEFAULT_THRESHOLD) : threshold(threshold_) { }
|
||||
|
||||
virtual ~AggregateFunctionGroupArraySortedDataBase() { }
|
||||
inline void narrowDown()
|
||||
{
|
||||
while (values.size() > threshold)
|
||||
values.erase(--values.end());
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionGroupArraySortedDataBase & other)
|
||||
{
|
||||
values.merge(Storage(other.values));
|
||||
narrowDown();
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
writeOneItem(buf, UInt64(values.size()));
|
||||
for (auto value : values)
|
||||
{
|
||||
serializeItem(buf, value);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void serializeItem(WriteBuffer & buf, ValueType & val) const = 0;
|
||||
virtual ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const = 0;
|
||||
|
||||
void deserialize(ReadBuffer & buf, Arena * arena)
|
||||
{
|
||||
values.clear();
|
||||
UInt64 length;
|
||||
readOneItem(buf, nullptr, length);
|
||||
|
||||
while (length--)
|
||||
{
|
||||
values.insert(deserializeItem(buf, arena));
|
||||
}
|
||||
|
||||
narrowDown();
|
||||
}
|
||||
|
||||
UInt64 threshold;
|
||||
Storage values;
|
||||
};
|
||||
|
||||
template <typename T, bool expr_sorted, typename TIndex>
|
||||
struct AggregateFunctionGroupArraySortedData
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename TIndex>
|
||||
struct AggregateFunctionGroupArraySortedData<T, true, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>
|
||||
{
|
||||
using Base = AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>;
|
||||
using Base::Base;
|
||||
|
||||
void add(T item, TIndex weight)
|
||||
{
|
||||
Base::values.insert({weight, item});
|
||||
Base::narrowDown();
|
||||
}
|
||||
|
||||
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override
|
||||
{
|
||||
writeOneItem(buf, value.first);
|
||||
writeOneItem(buf, value.second);
|
||||
}
|
||||
|
||||
virtual typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
|
||||
{
|
||||
TIndex first;
|
||||
T second;
|
||||
readOneItem(buf, arena, first);
|
||||
readOneItem(buf, arena, second);
|
||||
|
||||
return {first, second};
|
||||
}
|
||||
|
||||
static T itemValue(typename Base::ValueType & value) { return value.second; }
|
||||
};
|
||||
|
||||
template <typename T, typename TIndex>
|
||||
struct AggregateFunctionGroupArraySortedData<T, false, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>
|
||||
{
|
||||
using Base = AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>;
|
||||
using Base::Base;
|
||||
|
||||
void add(T item)
|
||||
{
|
||||
Base::values.insert(item);
|
||||
Base::narrowDown();
|
||||
}
|
||||
|
||||
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override { writeOneItem(buf, value); }
|
||||
|
||||
typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
|
||||
{
|
||||
T value;
|
||||
readOneItem(buf, arena, value);
|
||||
return value;
|
||||
}
|
||||
|
||||
static T itemValue(typename Base::ValueType & value) { return value; }
|
||||
};
|
||||
}
|
@ -119,7 +119,13 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t) const override
|
||||
void addBatchSinglePlace(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t) const override
|
||||
{
|
||||
const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
|
||||
const UInt8 * null_map = column->getNullMapData().data();
|
||||
@ -142,25 +148,31 @@ public:
|
||||
/// Combine the 2 flag arrays so we can call a simplified version (one check vs 2)
|
||||
/// Note that now the null map will contain 0 if not null and not filtered, or 1 for null or filtered (or both)
|
||||
|
||||
auto final_nulls = std::make_unique<UInt8[]>(batch_size);
|
||||
auto final_nulls = std::make_unique<UInt8[]>(row_end);
|
||||
|
||||
if (filter_null_map)
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
final_nulls[i] = (!!null_map[i]) | (!filter_values[i]) | (!!filter_null_map[i]);
|
||||
else
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
final_nulls[i] = (!!null_map[i]) | (!filter_values[i]);
|
||||
|
||||
if constexpr (result_is_nullable)
|
||||
{
|
||||
if (!memoryIsByte(final_nulls.get(), batch_size, 1))
|
||||
if (!memoryIsByte(final_nulls.get(), row_begin, row_end, 1))
|
||||
this->setFlag(place);
|
||||
else
|
||||
return; /// No work to do.
|
||||
}
|
||||
|
||||
this->nested_function->addBatchSinglePlaceNotNull(
|
||||
batch_size, this->nestedPlace(place), columns_param, final_nulls.get(), arena, -1);
|
||||
row_begin,
|
||||
row_end,
|
||||
this->nestedPlace(place),
|
||||
columns_param,
|
||||
final_nulls.get(),
|
||||
arena,
|
||||
-1);
|
||||
}
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
|
@ -98,31 +98,38 @@ public:
|
||||
}
|
||||
|
||||
void addBatch(
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t) const override
|
||||
{
|
||||
nested_func->addBatch(batch_size, places, place_offset, columns, arena, num_arguments - 1);
|
||||
nested_func->addBatch(row_begin, row_end, places, place_offset, columns, arena, num_arguments - 1);
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t) const override
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t) const override
|
||||
{
|
||||
nested_func->addBatchSinglePlace(batch_size, place, columns, arena, num_arguments - 1);
|
||||
nested_func->addBatchSinglePlace(row_begin, row_end, place, columns, arena, num_arguments - 1);
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull(
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
const UInt8 * null_map,
|
||||
Arena * arena,
|
||||
ssize_t) const override
|
||||
{
|
||||
nested_func->addBatchSinglePlaceNotNull(batch_size, place, columns, null_map, arena, num_arguments - 1);
|
||||
nested_func->addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, num_arguments - 1);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
@ -131,13 +138,14 @@ public:
|
||||
}
|
||||
|
||||
void mergeBatch(
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const AggregateDataPtr * rhs,
|
||||
Arena * arena) const override
|
||||
{
|
||||
nested_func->mergeBatch(batch_size, places, place_offset, rhs, arena);
|
||||
nested_func->mergeBatch(row_begin, row_end, places, place_offset, rhs, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version) const override
|
||||
|
@ -1159,7 +1159,12 @@ public:
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos) const override
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos) const override
|
||||
{
|
||||
if constexpr (is_any)
|
||||
if (this->data(place).has())
|
||||
@ -1167,7 +1172,7 @@ public:
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
if (flags[i])
|
||||
{
|
||||
@ -1179,7 +1184,7 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
this->data(place).changeIfBetter(*columns[0], i, arena);
|
||||
if constexpr (is_any)
|
||||
@ -1189,7 +1194,8 @@ public:
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull( /// NOLINT
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
const UInt8 * null_map,
|
||||
@ -1203,7 +1209,7 @@ public:
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
if (!null_map[i] && flags[i])
|
||||
{
|
||||
@ -1215,7 +1221,7 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
if (!null_map[i])
|
||||
{
|
||||
|
@ -307,17 +307,22 @@ public:
|
||||
}
|
||||
|
||||
void addBatchSinglePlace( /// NOLINT
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1) const override
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1) const override
|
||||
{
|
||||
const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
|
||||
const IColumn * nested_column = &column->getNestedColumn();
|
||||
const UInt8 * null_map = column->getNullMapData().data();
|
||||
|
||||
this->nested_function->addBatchSinglePlaceNotNull(
|
||||
batch_size, this->nestedPlace(place), &nested_column, null_map, arena, if_argument_pos);
|
||||
row_begin, row_end, this->nestedPlace(place), &nested_column, null_map, arena, if_argument_pos);
|
||||
|
||||
if constexpr (result_is_nullable)
|
||||
if (!memoryIsByte(null_map, batch_size, 1))
|
||||
if (!memoryIsByte(null_map, row_begin, row_end, 1))
|
||||
this->setFlag(place);
|
||||
}
|
||||
|
||||
|
@ -109,7 +109,8 @@ public:
|
||||
}
|
||||
|
||||
void addBatch( /// NOLINT
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const IColumn ** columns,
|
||||
@ -119,7 +120,7 @@ public:
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
if (flags[i] && places[i])
|
||||
add(places[i] + place_offset, columns, i, arena);
|
||||
@ -127,21 +128,26 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
nested_function->addBatch(batch_size, places, place_offset, columns, arena, if_argument_pos);
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
nested_function->addBatch(row_begin, row_end, places, place_offset, columns, arena, if_argument_pos);
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
if (places[i])
|
||||
(places[i] + place_offset)[size_of_data] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlace( /// NOLINT
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1) const override
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1) const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
nested_function->addBatchSinglePlace(batch_size, place, columns, arena, if_argument_pos);
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
nested_function->addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
if (flags[i])
|
||||
{
|
||||
@ -152,16 +158,17 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
if (batch_size)
|
||||
if (row_end != row_begin)
|
||||
{
|
||||
nested_function->addBatchSinglePlace(batch_size, place, columns, arena, if_argument_pos);
|
||||
nested_function->addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
|
||||
place[size_of_data] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull( /// NOLINT
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
const UInt8 * null_map,
|
||||
@ -171,8 +178,8 @@ public:
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
nested_function->addBatchSinglePlaceNotNull(batch_size, place, columns, null_map, arena, if_argument_pos);
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
nested_function->addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
if (flags[i] && !null_map[i])
|
||||
{
|
||||
@ -183,10 +190,10 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
if (batch_size)
|
||||
if (row_end != row_begin)
|
||||
{
|
||||
nested_function->addBatchSinglePlaceNotNull(batch_size, place, columns, null_map, arena, if_argument_pos);
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
nested_function->addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
if (!null_map[i])
|
||||
{
|
||||
@ -208,14 +215,15 @@ public:
|
||||
}
|
||||
|
||||
void mergeBatch(
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const AggregateDataPtr * rhs,
|
||||
Arena * arena) const override
|
||||
{
|
||||
nested_function->mergeBatch(batch_size, places, place_offset, rhs, arena);
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
nested_function->mergeBatch(row_begin, row_end, places, place_offset, rhs, arena);
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
(places[i] + place_offset)[size_of_data] |= rhs[i][size_of_data];
|
||||
}
|
||||
|
||||
|
@ -52,6 +52,8 @@ public:
|
||||
return nested_func->getDefaultVersion();
|
||||
}
|
||||
|
||||
size_t getVersionFromRevision(size_t revision) const override { return nested_func->getVersionFromRevision(revision); }
|
||||
|
||||
void create(AggregateDataPtr __restrict place) const override
|
||||
{
|
||||
nested_func->create(place);
|
||||
|
@ -59,9 +59,11 @@ struct AggregateFunctionSumData
|
||||
|
||||
/// Vectorized version
|
||||
template <typename Value>
|
||||
void NO_SANITIZE_UNDEFINED NO_INLINE addMany(const Value * __restrict ptr, size_t count)
|
||||
void NO_SANITIZE_UNDEFINED NO_INLINE addMany(const Value * __restrict ptr, size_t start, size_t end)
|
||||
{
|
||||
const auto * end = ptr + count;
|
||||
ptr += start;
|
||||
size_t count = end - start;
|
||||
const auto * end_ptr = ptr + count;
|
||||
|
||||
if constexpr (std::is_floating_point_v<T>)
|
||||
{
|
||||
@ -87,7 +89,7 @@ struct AggregateFunctionSumData
|
||||
|
||||
/// clang cannot vectorize the loop if accumulator is class member instead of local variable.
|
||||
T local_sum{};
|
||||
while (ptr < end)
|
||||
while (ptr < end_ptr)
|
||||
{
|
||||
Impl::add(local_sum, *ptr);
|
||||
++ptr;
|
||||
@ -97,9 +99,11 @@ struct AggregateFunctionSumData
|
||||
|
||||
template <typename Value, bool add_if_zero>
|
||||
void NO_SANITIZE_UNDEFINED NO_INLINE
|
||||
addManyConditionalInternal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t count)
|
||||
addManyConditionalInternal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
|
||||
{
|
||||
const auto * end = ptr + count;
|
||||
ptr += start;
|
||||
size_t count = end - start;
|
||||
const auto * end_ptr = ptr + count;
|
||||
|
||||
if constexpr (
|
||||
(is_integer<T> && !is_big_int_v<T>)
|
||||
@ -108,7 +112,7 @@ struct AggregateFunctionSumData
|
||||
/// For integers we can vectorize the operation if we replace the null check using a multiplication (by 0 for null, 1 for not null)
|
||||
/// https://quick-bench.com/q/MLTnfTvwC2qZFVeWHfOBR3U7a8I
|
||||
T local_sum{};
|
||||
while (ptr < end)
|
||||
while (ptr < end_ptr)
|
||||
{
|
||||
T multiplier = !*condition_map == add_if_zero;
|
||||
Impl::add(local_sum, *ptr * multiplier);
|
||||
@ -151,7 +155,7 @@ struct AggregateFunctionSumData
|
||||
}
|
||||
|
||||
T local_sum{};
|
||||
while (ptr < end)
|
||||
while (ptr < end_ptr)
|
||||
{
|
||||
if (!*condition_map == add_if_zero)
|
||||
Impl::add(local_sum, *ptr);
|
||||
@ -162,15 +166,15 @@ struct AggregateFunctionSumData
|
||||
}
|
||||
|
||||
template <typename Value>
|
||||
void ALWAYS_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
|
||||
void ALWAYS_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t start, size_t end)
|
||||
{
|
||||
return addManyConditionalInternal<Value, true>(ptr, null_map, count);
|
||||
return addManyConditionalInternal<Value, true>(ptr, null_map, start, end);
|
||||
}
|
||||
|
||||
template <typename Value>
|
||||
void ALWAYS_INLINE addManyConditional(const Value * __restrict ptr, const UInt8 * __restrict cond_map, size_t count)
|
||||
void ALWAYS_INLINE addManyConditional(const Value * __restrict ptr, const UInt8 * __restrict cond_map, size_t start, size_t end)
|
||||
{
|
||||
return addManyConditionalInternal<Value, false>(ptr, cond_map, count);
|
||||
return addManyConditionalInternal<Value, false>(ptr, cond_map, start, end);
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED merge(const AggregateFunctionSumData & rhs)
|
||||
@ -220,7 +224,7 @@ struct AggregateFunctionSumKahanData
|
||||
|
||||
/// Vectorized version
|
||||
template <typename Value>
|
||||
void NO_INLINE addMany(const Value * __restrict ptr, size_t count)
|
||||
void NO_INLINE addMany(const Value * __restrict ptr, size_t start, size_t end)
|
||||
{
|
||||
/// Less than in ordinary sum, because the algorithm is more complicated and too large loop unrolling is questionable.
|
||||
/// But this is just a guess.
|
||||
@ -228,7 +232,10 @@ struct AggregateFunctionSumKahanData
|
||||
T partial_sums[unroll_count]{};
|
||||
T partial_compensations[unroll_count]{};
|
||||
|
||||
const auto * end = ptr + count;
|
||||
ptr += start;
|
||||
size_t count = end - start;
|
||||
|
||||
const auto * end_ptr = ptr + count;
|
||||
const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
|
||||
|
||||
while (ptr < unrolled_end)
|
||||
@ -241,7 +248,7 @@ struct AggregateFunctionSumKahanData
|
||||
for (size_t i = 0; i < unroll_count; ++i)
|
||||
mergeImpl(sum, compensation, partial_sums[i], partial_compensations[i]);
|
||||
|
||||
while (ptr < end)
|
||||
while (ptr < end_ptr)
|
||||
{
|
||||
addImpl(*ptr, sum, compensation);
|
||||
++ptr;
|
||||
@ -249,13 +256,16 @@ struct AggregateFunctionSumKahanData
|
||||
}
|
||||
|
||||
template <typename Value, bool add_if_zero>
|
||||
void NO_INLINE addManyConditionalInternal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t count)
|
||||
void NO_INLINE addManyConditionalInternal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
|
||||
{
|
||||
constexpr size_t unroll_count = 4;
|
||||
T partial_sums[unroll_count]{};
|
||||
T partial_compensations[unroll_count]{};
|
||||
|
||||
const auto * end = ptr + count;
|
||||
ptr += start;
|
||||
size_t count = end - start;
|
||||
|
||||
const auto * end_ptr = ptr + count;
|
||||
const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
|
||||
|
||||
while (ptr < unrolled_end)
|
||||
@ -270,7 +280,7 @@ struct AggregateFunctionSumKahanData
|
||||
for (size_t i = 0; i < unroll_count; ++i)
|
||||
mergeImpl(sum, compensation, partial_sums[i], partial_compensations[i]);
|
||||
|
||||
while (ptr < end)
|
||||
while (ptr < end_ptr)
|
||||
{
|
||||
if ((!*condition_map) == add_if_zero)
|
||||
addImpl(*ptr, sum, compensation);
|
||||
@ -280,15 +290,15 @@ struct AggregateFunctionSumKahanData
|
||||
}
|
||||
|
||||
template <typename Value>
|
||||
void ALWAYS_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
|
||||
void ALWAYS_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t start, size_t end)
|
||||
{
|
||||
return addManyConditionalInternal<Value, true>(ptr, null_map, count);
|
||||
return addManyConditionalInternal<Value, true>(ptr, null_map, start, end);
|
||||
}
|
||||
|
||||
template <typename Value>
|
||||
void ALWAYS_INLINE addManyConditional(const Value * __restrict ptr, const UInt8 * __restrict cond_map, size_t count)
|
||||
void ALWAYS_INLINE addManyConditional(const Value * __restrict ptr, const UInt8 * __restrict cond_map, size_t start, size_t end)
|
||||
{
|
||||
return addManyConditionalInternal<Value, false>(ptr, cond_map, count);
|
||||
return addManyConditionalInternal<Value, false>(ptr, cond_map, start, end);
|
||||
}
|
||||
|
||||
void ALWAYS_INLINE mergeImpl(T & to_sum, T & to_compensation, T from_sum, T from_compensation)
|
||||
@ -385,22 +395,33 @@ public:
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena *, ssize_t if_argument_pos) const override
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena *,
|
||||
ssize_t if_argument_pos) const override
|
||||
{
|
||||
const auto & column = assert_cast<const ColVecType &>(*columns[0]);
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
this->data(place).addManyConditional(column.getData().data(), flags.data(), batch_size);
|
||||
this->data(place).addManyConditional(column.getData().data(), flags.data(), row_begin, row_end);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->data(place).addMany(column.getData().data(), batch_size);
|
||||
this->data(place).addMany(column.getData().data(), row_begin, row_end);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena *, ssize_t if_argument_pos)
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
const UInt8 * null_map,
|
||||
Arena *,
|
||||
ssize_t if_argument_pos)
|
||||
const override
|
||||
{
|
||||
const auto & column = assert_cast<const ColVecType &>(*columns[0]);
|
||||
@ -408,15 +429,15 @@ public:
|
||||
{
|
||||
/// Merge the 2 sets of flags (null and if) into a single one. This allows us to use parallelizable sums when available
|
||||
const auto * if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data();
|
||||
auto final_flags = std::make_unique<UInt8[]>(batch_size);
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
auto final_flags = std::make_unique<UInt8[]>(row_end);
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
final_flags[i] = (!null_map[i]) & if_flags[i];
|
||||
|
||||
this->data(place).addManyConditional(column.getData().data(), final_flags.get(), batch_size);
|
||||
this->data(place).addManyConditional(column.getData().data(), final_flags.get(), row_begin, row_end);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->data(place).addManyNotNull(column.getData().data(), null_map, batch_size);
|
||||
this->data(place).addManyNotNull(column.getData().data(), null_map, row_begin, row_end);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -175,7 +175,8 @@ public:
|
||||
* and do a single call to "addBatch" for devirtualization and inlining.
|
||||
*/
|
||||
virtual void addBatch( /// NOLINT
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const IColumn ** columns,
|
||||
@ -184,13 +185,16 @@ public:
|
||||
|
||||
/// The version of "addBatch", that handle sparse columns as arguments.
|
||||
virtual void addBatchSparse(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const IColumn ** columns,
|
||||
Arena * arena) const = 0;
|
||||
|
||||
virtual void mergeBatch(
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const AggregateDataPtr * rhs,
|
||||
@ -199,17 +203,27 @@ public:
|
||||
/** The same for single place.
|
||||
*/
|
||||
virtual void addBatchSinglePlace( /// NOLINT
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1) const = 0;
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1) const = 0;
|
||||
|
||||
/// The version of "addBatchSinglePlace", that handle sparse columns as arguments.
|
||||
virtual void addBatchSparseSinglePlace(
|
||||
AggregateDataPtr place, const IColumn ** columns, Arena * arena) const = 0;
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena) const = 0;
|
||||
|
||||
/** The same for single place when need to aggregate only filtered data.
|
||||
* Instead of using an if-column, the condition is combined inside the null_map
|
||||
*/
|
||||
virtual void addBatchSinglePlaceNotNull( /// NOLINT
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
const UInt8 * null_map,
|
||||
@ -217,7 +231,12 @@ public:
|
||||
ssize_t if_argument_pos = -1) const = 0;
|
||||
|
||||
virtual void addBatchSinglePlaceFromInterval( /// NOLINT
|
||||
size_t batch_begin, size_t batch_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1)
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1)
|
||||
const = 0;
|
||||
|
||||
/** In addition to addBatch, this method collects multiple rows of arguments into array "places"
|
||||
@ -226,7 +245,8 @@ public:
|
||||
* "places" contains a large number of same values consecutively.
|
||||
*/
|
||||
virtual void addBatchArray(
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const IColumn ** columns,
|
||||
@ -237,7 +257,8 @@ public:
|
||||
* and pointers to aggregation states are stored in AggregateDataPtr[256] lookup table.
|
||||
*/
|
||||
virtual void addBatchLookupTable8(
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
std::function<void(AggregateDataPtr &)> init,
|
||||
@ -251,7 +272,8 @@ public:
|
||||
* All places that were not inserted must be destroyed if there was exception during insert into result column.
|
||||
*/
|
||||
virtual void insertResultIntoBatch(
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
IColumn & to,
|
||||
@ -261,7 +283,8 @@ public:
|
||||
/** Destroy batch of aggregate places.
|
||||
*/
|
||||
virtual void destroyBatch(
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset) const noexcept = 0;
|
||||
|
||||
@ -355,7 +378,8 @@ public:
|
||||
AddFunc getAddressOfAddFunction() const override { return &addFree; }
|
||||
|
||||
void addBatch( /// NOLINT
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const IColumn ** columns,
|
||||
@ -365,7 +389,7 @@ public:
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
if (flags[i] && places[i])
|
||||
static_cast<const Derived *>(this)->add(places[i] + place_offset, columns, i, arena);
|
||||
@ -373,13 +397,15 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
if (places[i])
|
||||
static_cast<const Derived *>(this)->add(places[i] + place_offset, columns, i, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSparse(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const IColumn ** columns,
|
||||
@ -387,33 +413,42 @@ public:
|
||||
{
|
||||
const auto & column_sparse = assert_cast<const ColumnSparse &>(*columns[0]);
|
||||
const auto * values = &column_sparse.getValuesColumn();
|
||||
size_t batch_size = column_sparse.size();
|
||||
auto offset_it = column_sparse.begin();
|
||||
|
||||
for (size_t i = 0; i < batch_size; ++i, ++offset_it)
|
||||
/// FIXME: make it more optimal
|
||||
for (size_t i = 0; i < row_begin; ++i, ++offset_it)
|
||||
;
|
||||
|
||||
for (size_t i = 0; i < row_end; ++i, ++offset_it)
|
||||
static_cast<const Derived *>(this)->add(places[offset_it.getCurrentRow()] + place_offset,
|
||||
&values, offset_it.getValueIndex(), arena);
|
||||
}
|
||||
|
||||
void mergeBatch(
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const AggregateDataPtr * rhs,
|
||||
Arena * arena) const override
|
||||
{
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
if (places[i])
|
||||
static_cast<const Derived *>(this)->merge(places[i] + place_offset, rhs[i], arena);
|
||||
}
|
||||
|
||||
void addBatchSinglePlace( /// NOLINT
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1) const override
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1) const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
if (flags[i])
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
@ -421,26 +456,34 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSparseSinglePlace(
|
||||
AggregateDataPtr place, const IColumn ** columns, Arena * arena) const override
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena) const override
|
||||
{
|
||||
/// TODO: add values and defaults separately if order of adding isn't important.
|
||||
const auto & column_sparse = assert_cast<const ColumnSparse &>(*columns[0]);
|
||||
const auto * values = &column_sparse.getValuesColumn();
|
||||
size_t batch_size = column_sparse.size();
|
||||
auto offset_it = column_sparse.begin();
|
||||
|
||||
for (size_t i = 0; i < batch_size; ++i, ++offset_it)
|
||||
/// FIXME: make it more optimal
|
||||
for (size_t i = 0; i < row_begin; ++i, ++offset_it)
|
||||
;
|
||||
|
||||
for (size_t i = 0; i < row_end; ++i, ++offset_it)
|
||||
static_cast<const Derived *>(this)->add(place, &values, offset_it.getValueIndex(), arena);
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull( /// NOLINT
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
const UInt8 * null_map,
|
||||
@ -450,26 +493,31 @@ public:
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
if (!null_map[i] && flags[i])
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
if (!null_map[i])
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceFromInterval( /// NOLINT
|
||||
size_t batch_begin, size_t batch_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1)
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1)
|
||||
const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = batch_begin; i < batch_end; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
if (flags[i])
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
@ -477,17 +525,23 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = batch_begin; i < batch_end; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchArray(
|
||||
size_t batch_size, AggregateDataPtr * places, size_t place_offset, const IColumn ** columns, const UInt64 * offsets, Arena * arena)
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const IColumn ** columns,
|
||||
const UInt64 * offsets,
|
||||
Arena * arena)
|
||||
const override
|
||||
{
|
||||
size_t current_offset = 0;
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
size_t next_offset = offsets[i];
|
||||
for (size_t j = current_offset; j < next_offset; ++j)
|
||||
@ -498,7 +552,8 @@ public:
|
||||
}
|
||||
|
||||
void addBatchLookupTable8(
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * map,
|
||||
size_t place_offset,
|
||||
std::function<void(AggregateDataPtr &)> init,
|
||||
@ -508,10 +563,10 @@ public:
|
||||
{
|
||||
static constexpr size_t UNROLL_COUNT = 8;
|
||||
|
||||
size_t i = 0;
|
||||
size_t i = row_begin;
|
||||
|
||||
size_t batch_size_unrolled = batch_size / UNROLL_COUNT * UNROLL_COUNT;
|
||||
for (; i < batch_size_unrolled; i += UNROLL_COUNT)
|
||||
size_t size_unrolled = (row_end - row_begin) / UNROLL_COUNT * UNROLL_COUNT;
|
||||
for (; i < size_unrolled; i += UNROLL_COUNT)
|
||||
{
|
||||
AggregateDataPtr places[UNROLL_COUNT];
|
||||
for (size_t j = 0; j < UNROLL_COUNT; ++j)
|
||||
@ -527,7 +582,7 @@ public:
|
||||
static_cast<const Derived *>(this)->add(places[j] + place_offset, columns, i + j, arena);
|
||||
}
|
||||
|
||||
for (; i < batch_size; ++i)
|
||||
for (; i < row_end; ++i)
|
||||
{
|
||||
AggregateDataPtr & place = map[key[i]];
|
||||
if (unlikely(!place))
|
||||
@ -536,13 +591,20 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultIntoBatch(size_t batch_size, AggregateDataPtr * places, size_t place_offset, IColumn & to, Arena * arena, bool destroy_place_after_insert) const override
|
||||
void insertResultIntoBatch(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
IColumn & to,
|
||||
Arena * arena,
|
||||
bool destroy_place_after_insert) const override
|
||||
{
|
||||
size_t batch_index = 0;
|
||||
size_t batch_index = row_begin;
|
||||
|
||||
try
|
||||
{
|
||||
for (; batch_index < batch_size; ++batch_index)
|
||||
for (; batch_index < row_end; ++batch_index)
|
||||
{
|
||||
static_cast<const Derived *>(this)->insertResultInto(places[batch_index] + place_offset, to, arena);
|
||||
|
||||
@ -552,16 +614,20 @@ public:
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
for (size_t destroy_index = batch_index; destroy_index < batch_size; ++destroy_index)
|
||||
for (size_t destroy_index = batch_index; destroy_index < row_end; ++destroy_index)
|
||||
static_cast<const Derived *>(this)->destroy(places[destroy_index] + place_offset);
|
||||
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void destroyBatch(size_t batch_size, AggregateDataPtr * places, size_t place_offset) const noexcept override
|
||||
void destroyBatch(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset) const noexcept override
|
||||
{
|
||||
for (size_t i = 0; i < batch_size; ++i)
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
static_cast<const Derived *>(this)->destroy(places[i] + place_offset);
|
||||
}
|
||||
@ -612,7 +678,8 @@ public:
|
||||
}
|
||||
|
||||
void addBatchLookupTable8(
|
||||
size_t batch_size,
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * map,
|
||||
size_t place_offset,
|
||||
std::function<void(AggregateDataPtr &)> init,
|
||||
@ -626,7 +693,7 @@ public:
|
||||
|
||||
if (func.allocatesMemoryInArena() || sizeof(Data) > 16 || func.sizeOfData() != sizeof(Data))
|
||||
{
|
||||
IAggregateFunctionHelper<Derived>::addBatchLookupTable8(batch_size, map, place_offset, init, key, columns, arena);
|
||||
IAggregateFunctionHelper<Derived>::addBatchLookupTable8(row_begin, row_end, map, place_offset, init, key, columns, arena);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -637,12 +704,12 @@ public:
|
||||
std::unique_ptr<Data[]> places{new Data[256 * UNROLL_COUNT]};
|
||||
bool has_data[256 * UNROLL_COUNT]{}; /// Separate flags array to avoid heavy initialization.
|
||||
|
||||
size_t i = 0;
|
||||
size_t i = row_begin;
|
||||
|
||||
/// Aggregate data into different lookup tables.
|
||||
|
||||
size_t batch_size_unrolled = batch_size / UNROLL_COUNT * UNROLL_COUNT;
|
||||
for (; i < batch_size_unrolled; i += UNROLL_COUNT)
|
||||
size_t size_unrolled = (row_end - row_begin) / UNROLL_COUNT * UNROLL_COUNT;
|
||||
for (; i < size_unrolled; i += UNROLL_COUNT)
|
||||
{
|
||||
for (size_t j = 0; j < UNROLL_COUNT; ++j)
|
||||
{
|
||||
@ -676,7 +743,7 @@ public:
|
||||
|
||||
/// Process tails and add directly to the final destination.
|
||||
|
||||
for (; i < batch_size; ++i)
|
||||
for (; i < row_end; ++i)
|
||||
{
|
||||
size_t k = key[i];
|
||||
AggregateDataPtr & place = map[k];
|
||||
|
@ -59,7 +59,6 @@ void registerAggregateFunctionNothing(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
|
||||
|
||||
class AggregateFunctionCombinatorFactory;
|
||||
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
||||
@ -131,7 +130,6 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionIntervalLengthSum(factory);
|
||||
registerAggregateFunctionExponentialMovingAverage(factory);
|
||||
registerAggregateFunctionSparkbar(factory);
|
||||
registerAggregateFunctionGroupArraySorted(factory);
|
||||
|
||||
registerWindowFunctions(factory);
|
||||
}
|
||||
|
@ -3,6 +3,8 @@
|
||||
#include <Core/SettingsFields.h>
|
||||
#include <Parsers/ASTBackupQuery.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -10,9 +12,10 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_PARSE_BACKUP_SETTINGS;
|
||||
extern const int WRONG_BACKUP_SETTINGS;
|
||||
}
|
||||
|
||||
/// List of backup settings except base_backup_name.
|
||||
/// List of backup settings except base_backup_name and cluster_host_ids.
|
||||
#define LIST_OF_BACKUP_SETTINGS(M) \
|
||||
M(String, compression_method) \
|
||||
M(Int64, compression_level) \
|
||||
@ -23,16 +26,13 @@ namespace ErrorCodes
|
||||
M(UInt64, replica_num) \
|
||||
M(Bool, allow_storing_multiple_replicas) \
|
||||
M(Bool, internal) \
|
||||
M(String, host_id) \
|
||||
M(String, coordination_zk_path)
|
||||
|
||||
|
||||
BackupSettings BackupSettings::fromBackupQuery(const ASTBackupQuery & query)
|
||||
{
|
||||
BackupSettings res;
|
||||
|
||||
if (query.base_backup_name)
|
||||
res.base_backup_info = BackupInfo::fromAST(*query.base_backup_name);
|
||||
|
||||
if (query.settings)
|
||||
{
|
||||
const auto & settings = query.settings->as<const ASTSetQuery &>().changes;
|
||||
@ -48,25 +48,149 @@ BackupSettings BackupSettings::fromBackupQuery(const ASTBackupQuery & query)
|
||||
}
|
||||
}
|
||||
|
||||
if (query.base_backup_name)
|
||||
res.base_backup_info = BackupInfo::fromAST(*query.base_backup_name);
|
||||
|
||||
if (query.cluster_host_ids)
|
||||
res.cluster_host_ids = Util::clusterHostIDsFromAST(*query.cluster_host_ids);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void BackupSettings::copySettingsToBackupQuery(ASTBackupQuery & query) const
|
||||
void BackupSettings::copySettingsToQuery(ASTBackupQuery & query) const
|
||||
{
|
||||
query.base_backup_name = base_backup_info ? base_backup_info->toAST() : nullptr;
|
||||
|
||||
auto query_settings = std::make_shared<ASTSetQuery>();
|
||||
query_settings->is_standalone = false;
|
||||
|
||||
static const BackupSettings default_settings;
|
||||
bool all_settings_are_default = true;
|
||||
|
||||
#define SET_SETTINGS_IN_BACKUP_QUERY_HELPER(TYPE, NAME) \
|
||||
if ((NAME) != default_settings.NAME) \
|
||||
query_settings->changes.emplace_back(#NAME, static_cast<Field>(SettingField##TYPE{NAME}));
|
||||
{ \
|
||||
query_settings->changes.emplace_back(#NAME, static_cast<Field>(SettingField##TYPE{NAME})); \
|
||||
all_settings_are_default = false; \
|
||||
}
|
||||
|
||||
LIST_OF_BACKUP_SETTINGS(SET_SETTINGS_IN_BACKUP_QUERY_HELPER)
|
||||
|
||||
if (all_settings_are_default)
|
||||
query_settings = nullptr;
|
||||
|
||||
query.settings = query_settings;
|
||||
|
||||
query.base_backup_name = base_backup_info ? base_backup_info->toAST() : nullptr;
|
||||
query.cluster_host_ids = !cluster_host_ids.empty() ? Util::clusterHostIDsToAST(cluster_host_ids) : nullptr;
|
||||
}
|
||||
|
||||
std::vector<Strings> BackupSettings::Util::clusterHostIDsFromAST(const IAST & ast)
|
||||
{
|
||||
std::vector<Strings> res;
|
||||
|
||||
const auto * array_of_shards = typeid_cast<const ASTFunction *>(&ast);
|
||||
if (!array_of_shards || (array_of_shards->name != "array"))
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS,
|
||||
"Setting cluster_host_ids has wrong format, must be array of arrays of string literals");
|
||||
|
||||
if (array_of_shards->arguments)
|
||||
{
|
||||
const ASTs shards = array_of_shards->arguments->children;
|
||||
res.resize(shards.size());
|
||||
|
||||
for (size_t i = 0; i != shards.size(); ++i)
|
||||
{
|
||||
const auto * array_of_replicas = typeid_cast<const ASTLiteral *>(shards[i].get());
|
||||
if (!array_of_replicas || (array_of_replicas->value.getType() != Field::Types::Array))
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS,
|
||||
"Setting cluster_host_ids has wrong format, must be array of arrays of string literals");
|
||||
const auto & replicas = array_of_replicas->value.get<const Array &>();
|
||||
res[i].resize(replicas.size());
|
||||
for (size_t j = 0; j != replicas.size(); ++j)
|
||||
{
|
||||
const auto & replica = replicas[j];
|
||||
if (replica.getType() != Field::Types::String)
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS,
|
||||
"Setting cluster_host_ids has wrong format, must be array of arrays of string literals");
|
||||
res[i][j] = replica.get<const String &>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
ASTPtr BackupSettings::Util::clusterHostIDsToAST(const std::vector<Strings> & cluster_host_ids)
|
||||
{
|
||||
if (cluster_host_ids.empty())
|
||||
return nullptr;
|
||||
|
||||
auto res = std::make_shared<ASTFunction>();
|
||||
res->name = "array";
|
||||
auto res_replicas = std::make_shared<ASTExpressionList>();
|
||||
res->arguments = res_replicas;
|
||||
res->children.push_back(res_replicas);
|
||||
res_replicas->children.resize(cluster_host_ids.size());
|
||||
|
||||
for (size_t i = 0; i != cluster_host_ids.size(); ++i)
|
||||
{
|
||||
const auto & shard = cluster_host_ids[i];
|
||||
|
||||
Array res_shard;
|
||||
res_shard.resize(shard.size());
|
||||
for (size_t j = 0; j != shard.size(); ++j)
|
||||
res_shard[j] = Field{shard[j]};
|
||||
|
||||
res_replicas->children[i] = std::make_shared<ASTLiteral>(Field{std::move(res_shard)});
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
std::pair<size_t, size_t> BackupSettings::Util::findShardNumAndReplicaNum(const std::vector<Strings> & cluster_host_ids, const String & host_id)
|
||||
{
|
||||
for (size_t i = 0; i != cluster_host_ids.size(); ++i)
|
||||
{
|
||||
for (size_t j = 0; j != cluster_host_ids[i].size(); ++j)
|
||||
if (cluster_host_ids[i][j] == host_id)
|
||||
return {i + 1, j + 1};
|
||||
}
|
||||
throw Exception(ErrorCodes::WRONG_BACKUP_SETTINGS, "Cannot determine shard number or replica number, the current host {} is not found in the cluster's hosts", host_id);
|
||||
}
|
||||
|
||||
Strings BackupSettings::Util::filterHostIDs(const std::vector<Strings> & cluster_host_ids, size_t only_shard_num, size_t only_replica_num)
|
||||
{
|
||||
Strings collected_host_ids;
|
||||
|
||||
auto collect_replicas = [&](size_t shard_index)
|
||||
{
|
||||
const auto & shard = cluster_host_ids[shard_index - 1];
|
||||
if (only_replica_num)
|
||||
{
|
||||
if (only_replica_num <= shard.size())
|
||||
collected_host_ids.push_back(shard[only_replica_num - 1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t replica_index = 1; replica_index <= shard.size(); ++replica_index)
|
||||
collected_host_ids.push_back(shard[replica_index - 1]);
|
||||
}
|
||||
};
|
||||
|
||||
if (only_shard_num)
|
||||
{
|
||||
if (only_shard_num <= cluster_host_ids.size())
|
||||
collect_replicas(only_shard_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t shard_index = 1; shard_index <= cluster_host_ids.size(); ++shard_index)
|
||||
collect_replicas(shard_index);
|
||||
}
|
||||
|
||||
return collected_host_ids;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -43,12 +43,28 @@ struct BackupSettings
|
||||
/// Whether this backup is a part of a distributed backup created by BACKUP ON CLUSTER.
|
||||
bool internal = false;
|
||||
|
||||
/// Internal, should not be specified by user.
|
||||
/// The current host's ID in the format 'escaped_host_name:port'.
|
||||
String host_id;
|
||||
|
||||
/// Internal, should not be specified by user.
|
||||
/// Cluster's hosts' IDs in the format 'escaped_host_name:port' for all shards and replicas in a cluster specified in BACKUP ON CLUSTER.
|
||||
std::vector<Strings> cluster_host_ids;
|
||||
|
||||
/// Internal, should not be specified by user.
|
||||
/// Path in Zookeeper used to coordinate a distributed backup created by BACKUP ON CLUSTER.
|
||||
String coordination_zk_path;
|
||||
|
||||
static BackupSettings fromBackupQuery(const ASTBackupQuery & query);
|
||||
void copySettingsToBackupQuery(ASTBackupQuery & query) const;
|
||||
void copySettingsToQuery(ASTBackupQuery & query) const;
|
||||
|
||||
struct Util
|
||||
{
|
||||
static std::vector<Strings> clusterHostIDsFromAST(const IAST & ast);
|
||||
static ASTPtr clusterHostIDsToAST(const std::vector<Strings> & cluster_host_ids);
|
||||
static std::pair<size_t, size_t> findShardNumAndReplicaNum(const std::vector<Strings> & cluster_host_ids, const String & host_id);
|
||||
static Strings filterHostIDs(const std::vector<Strings> & cluster_host_ids, size_t only_shard_num, size_t only_replica_num);
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -15,8 +15,6 @@ std::string_view toString(BackupStatus backup_status)
|
||||
{
|
||||
switch (backup_status)
|
||||
{
|
||||
case BackupStatus::PREPARING:
|
||||
return "PREPARING";
|
||||
case BackupStatus::MAKING_BACKUP:
|
||||
return "MAKING_BACKUP";
|
||||
case BackupStatus::BACKUP_COMPLETE:
|
||||
|
@ -9,7 +9,6 @@ namespace DB
|
||||
enum class BackupStatus
|
||||
{
|
||||
/// Statuses of making backups
|
||||
PREPARING,
|
||||
MAKING_BACKUP,
|
||||
BACKUP_COMPLETE,
|
||||
FAILED_TO_BACKUP,
|
||||
|
@ -88,7 +88,6 @@ namespace
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
using Kind = ASTBackupQuery::Kind;
|
||||
using Element = ASTBackupQuery::Element;
|
||||
using Elements = ASTBackupQuery::Elements;
|
||||
@ -107,8 +106,8 @@ namespace
|
||||
/// Prepares internal structures for making backup entries.
|
||||
void prepare(const ASTBackupQuery::Elements & elements)
|
||||
{
|
||||
String current_database = context->getCurrentDatabase();
|
||||
renaming_settings.setFromBackupQuery(elements, current_database);
|
||||
calculateShardNumAndReplicaNumInBackup();
|
||||
renaming_settings.setFromBackupQuery(elements);
|
||||
|
||||
for (const auto & element : elements)
|
||||
{
|
||||
@ -116,11 +115,7 @@ namespace
|
||||
{
|
||||
case ElementType::TABLE:
|
||||
{
|
||||
const String & table_name = element.name.second;
|
||||
String database_name = element.name.first;
|
||||
if (database_name.empty())
|
||||
database_name = current_database;
|
||||
prepareToBackupTable(DatabaseAndTableName{database_name, table_name}, element.partitions);
|
||||
prepareToBackupTable(element.name, element.partitions);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -155,7 +150,7 @@ namespace
|
||||
auto data_backup = info.storage->backupData(context, info.partitions);
|
||||
if (!data_backup.empty())
|
||||
{
|
||||
String data_path = PathsInBackup::getDataPath(*info.create_query, backup_settings.shard_num, backup_settings.replica_num);
|
||||
String data_path = PathsInBackup::getDataPath(*info.create_query, shard_num_in_backup, replica_num_in_backup);
|
||||
for (auto & [path_in_backup, backup_entry] : data_backup)
|
||||
res.emplace_back(data_path + path_in_backup, std::move(backup_entry));
|
||||
}
|
||||
@ -170,6 +165,19 @@ namespace
|
||||
}
|
||||
|
||||
private:
|
||||
void calculateShardNumAndReplicaNumInBackup()
|
||||
{
|
||||
size_t shard_num = 0;
|
||||
size_t replica_num = 0;
|
||||
if (!backup_settings.host_id.empty())
|
||||
{
|
||||
std::tie(shard_num, replica_num)
|
||||
= BackupSettings::Util::findShardNumAndReplicaNum(backup_settings.cluster_host_ids, backup_settings.host_id);
|
||||
}
|
||||
shard_num_in_backup = shard_num;
|
||||
replica_num_in_backup = replica_num;
|
||||
}
|
||||
|
||||
/// Prepares to backup a single table and probably its database's definition.
|
||||
void prepareToBackupTable(const DatabaseAndTableName & table_name_, const ASTs & partitions_)
|
||||
{
|
||||
@ -286,7 +294,7 @@ namespace
|
||||
std::pair<String, BackupEntryPtr> makeBackupEntryForMetadata(const IAST & create_query) const
|
||||
{
|
||||
auto metadata_entry = std::make_unique<BackupEntryFromMemory>(serializeAST(create_query));
|
||||
String metadata_path = PathsInBackup::getMetadataPath(create_query, backup_settings.shard_num, backup_settings.replica_num);
|
||||
String metadata_path = PathsInBackup::getMetadataPath(create_query, shard_num_in_backup, replica_num_in_backup);
|
||||
return {metadata_path, std::move(metadata_entry)};
|
||||
}
|
||||
|
||||
@ -307,6 +315,8 @@ namespace
|
||||
|
||||
ContextPtr context;
|
||||
BackupSettings backup_settings;
|
||||
size_t shard_num_in_backup = 0;
|
||||
size_t replica_num_in_backup = 0;
|
||||
DDLRenamingSettings renaming_settings;
|
||||
std::unordered_map<String /* db_name_in_backup */, CreateDatabaseInfo> databases;
|
||||
std::map<DatabaseAndTableName /* table_name_in_backup */, CreateTableInfo> tables;
|
||||
@ -322,16 +332,15 @@ BackupEntries makeBackupEntries(const ContextPtr & context, const Elements & ele
|
||||
}
|
||||
|
||||
|
||||
void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, size_t num_threads)
|
||||
void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, ThreadPool & thread_pool)
|
||||
{
|
||||
if (!num_threads || !backup->supportsWritingInMultipleThreads())
|
||||
num_threads = 1;
|
||||
std::vector<ThreadFromGlobalPool> threads;
|
||||
size_t num_active_threads = 0;
|
||||
size_t num_active_jobs = 0;
|
||||
std::mutex mutex;
|
||||
std::condition_variable cond;
|
||||
std::condition_variable event;
|
||||
std::exception_ptr exception;
|
||||
|
||||
bool always_single_threaded = !backup->supportsWritingInMultipleThreads();
|
||||
|
||||
for (auto & name_and_entry : backup_entries)
|
||||
{
|
||||
auto & name = name_and_entry.first;
|
||||
@ -341,14 +350,23 @@ void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries
|
||||
std::unique_lock lock{mutex};
|
||||
if (exception)
|
||||
break;
|
||||
cond.wait(lock, [&] { return num_active_threads < num_threads; });
|
||||
if (exception)
|
||||
break;
|
||||
++num_active_threads;
|
||||
++num_active_jobs;
|
||||
}
|
||||
|
||||
threads.emplace_back([backup, &name, &entry, &mutex, &cond, &num_active_threads, &exception]()
|
||||
auto job = [&]()
|
||||
{
|
||||
SCOPE_EXIT({
|
||||
std::lock_guard lock{mutex};
|
||||
if (!--num_active_jobs)
|
||||
event.notify_all();
|
||||
});
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (exception)
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
backup->writeFile(name, std::move(entry));
|
||||
@ -359,17 +377,16 @@ void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries
|
||||
if (!exception)
|
||||
exception = std::current_exception();
|
||||
}
|
||||
};
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
--num_active_threads;
|
||||
cond.notify_all();
|
||||
}
|
||||
});
|
||||
if (always_single_threaded || !thread_pool.trySchedule(job))
|
||||
job();
|
||||
}
|
||||
|
||||
for (auto & thread : threads)
|
||||
thread.join();
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
event.wait(lock, [&] { return !num_active_jobs; });
|
||||
}
|
||||
|
||||
backup_entries.clear();
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/ASTBackupQuery.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -19,6 +20,6 @@ using ContextPtr = std::shared_ptr<const Context>;
|
||||
BackupEntries makeBackupEntries(const ContextPtr & context, const ASTBackupQuery::Elements & elements, const BackupSettings & backup_settings);
|
||||
|
||||
/// Write backup entries to an opened backup.
|
||||
void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, size_t num_threads);
|
||||
void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, ThreadPool & thread_pool);
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,21 @@
|
||||
#include <Backups/BackupsWorker.h>
|
||||
#include <Backups/BackupFactory.h>
|
||||
#include <Backups/BackupInfo.h>
|
||||
#include <Backups/BackupSettings.h>
|
||||
#include <Backups/BackupUtils.h>
|
||||
#include <Backups/IBackupEntry.h>
|
||||
#include <Backups/IRestoreTask.h>
|
||||
#include <Backups/RestoreCoordinationDistributed.h>
|
||||
#include <Backups/RestoreCoordinationLocal.h>
|
||||
#include <Backups/RestoreSettings.h>
|
||||
#include <Backups/RestoreUtils.h>
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/executeDDLQueryOnCluster.h>
|
||||
#include <Parsers/ASTBackupQuery.h>
|
||||
#include <Processors/Executors/PullingPipelineExecutor.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/Macros.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
|
||||
@ -7,86 +23,331 @@ namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int QUERY_IS_PROHIBITED;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
BackupsWorker & BackupsWorker::instance()
|
||||
namespace
|
||||
{
|
||||
static BackupsWorker the_instance;
|
||||
return the_instance;
|
||||
void checkNoMultipleReplicas(const std::vector<Strings> & cluster_host_ids, size_t only_shard_num)
|
||||
{
|
||||
if (only_shard_num)
|
||||
{
|
||||
if ((only_shard_num <= cluster_host_ids.size()) && (cluster_host_ids[only_shard_num - 1].size() > 1))
|
||||
throw Exception(ErrorCodes::QUERY_IS_PROHIBITED, "Backup of multiple replicas is disabled. Choose one replica with the replica_num setting or specify allow_storing_multiple_replicas=true");
|
||||
}
|
||||
for (const auto & shard : cluster_host_ids)
|
||||
{
|
||||
if (shard.size() > 1)
|
||||
throw Exception(ErrorCodes::QUERY_IS_PROHIBITED, "Backup of multiple replicas is disabled. Choose one replica with the replica_num setting or specify allow_storing_multiple_replicas=true");
|
||||
}
|
||||
}
|
||||
|
||||
void executeBackupImpl(const ASTBackupQuery & query, const UUID & backup_uuid, const ContextPtr & context, ThreadPool & thread_pool)
|
||||
{
|
||||
const auto backup_info = BackupInfo::fromAST(*query.backup_name);
|
||||
const auto backup_settings = BackupSettings::fromBackupQuery(query);
|
||||
|
||||
std::shared_ptr<ASTBackupQuery> new_query = std::static_pointer_cast<ASTBackupQuery>(query.clone());
|
||||
|
||||
BackupFactory::CreateParams backup_create_params;
|
||||
backup_create_params.open_mode = IBackup::OpenMode::WRITE;
|
||||
backup_create_params.context = context;
|
||||
backup_create_params.backup_info = backup_info;
|
||||
backup_create_params.base_backup_info = backup_settings.base_backup_info;
|
||||
backup_create_params.compression_method = backup_settings.compression_method;
|
||||
backup_create_params.compression_level = backup_settings.compression_level;
|
||||
backup_create_params.password = backup_settings.password;
|
||||
backup_create_params.backup_uuid = backup_uuid;
|
||||
backup_create_params.is_internal_backup = backup_settings.internal;
|
||||
backup_create_params.coordination_zk_path = backup_settings.coordination_zk_path;
|
||||
|
||||
ClusterPtr cluster;
|
||||
if (!query.cluster.empty())
|
||||
{
|
||||
new_query->cluster = context->getMacros()->expand(query.cluster);
|
||||
cluster = context->getCluster(new_query->cluster);
|
||||
auto new_backup_settings = backup_settings;
|
||||
new_backup_settings.cluster_host_ids = cluster->getHostIDs();
|
||||
if (!backup_settings.allow_storing_multiple_replicas && !backup_settings.replica_num)
|
||||
checkNoMultipleReplicas(new_backup_settings.cluster_host_ids, backup_settings.shard_num);
|
||||
if (backup_settings.coordination_zk_path.empty())
|
||||
{
|
||||
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
|
||||
new_backup_settings.coordination_zk_path
|
||||
= query.cluster.empty() ? "" : (root_zk_path + "/backup-" + toString(backup_uuid));
|
||||
backup_create_params.coordination_zk_path = new_backup_settings.coordination_zk_path;
|
||||
}
|
||||
new_backup_settings.copySettingsToQuery(*new_query);
|
||||
}
|
||||
|
||||
BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params);
|
||||
|
||||
if (!query.cluster.empty())
|
||||
{
|
||||
DDLQueryOnClusterParams params;
|
||||
params.cluster = cluster;
|
||||
params.only_shard_num = backup_settings.shard_num;
|
||||
params.only_replica_num = backup_settings.replica_num;
|
||||
auto res = executeDDLQueryOnCluster(new_query, context, params);
|
||||
|
||||
PullingPipelineExecutor executor(res.pipeline);
|
||||
Block block;
|
||||
while (executor.pull(block));
|
||||
|
||||
backup->finalizeWriting();
|
||||
}
|
||||
else
|
||||
{
|
||||
new_query->setDatabase(context->getCurrentDatabase());
|
||||
auto backup_entries = makeBackupEntries(context, new_query->elements, backup_settings);
|
||||
writeBackupEntries(backup, std::move(backup_entries), thread_pool);
|
||||
}
|
||||
}
|
||||
|
||||
void executeRestoreImpl(const ASTBackupQuery & query, const UUID & restore_uuid, ContextMutablePtr context, ThreadPool & thread_pool)
|
||||
{
|
||||
const auto backup_info = BackupInfo::fromAST(*query.backup_name);
|
||||
const auto restore_settings = RestoreSettings::fromRestoreQuery(query);
|
||||
bool is_internal_restore = restore_settings.internal;
|
||||
|
||||
std::shared_ptr<IRestoreCoordination> restore_coordination;
|
||||
SCOPE_EXIT({
|
||||
if (!is_internal_restore && restore_coordination)
|
||||
restore_coordination->drop();
|
||||
});
|
||||
|
||||
std::shared_ptr<ASTBackupQuery> new_query = std::static_pointer_cast<ASTBackupQuery>(query.clone());
|
||||
|
||||
ClusterPtr cluster;
|
||||
if (!query.cluster.empty())
|
||||
{
|
||||
new_query->cluster = context->getMacros()->expand(query.cluster);
|
||||
cluster = context->getCluster(new_query->cluster);
|
||||
auto new_restore_settings = restore_settings;
|
||||
new_restore_settings.cluster_host_ids = cluster->getHostIDs();
|
||||
if (new_restore_settings.coordination_zk_path.empty())
|
||||
{
|
||||
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
|
||||
new_restore_settings.coordination_zk_path
|
||||
= query.cluster.empty() ? "" : (root_zk_path + "/restore-" + toString(restore_uuid));
|
||||
}
|
||||
new_restore_settings.copySettingsToQuery(*new_query);
|
||||
}
|
||||
|
||||
if (!restore_settings.coordination_zk_path.empty())
|
||||
restore_coordination = std::make_shared<RestoreCoordinationDistributed>(restore_settings.coordination_zk_path, [context=context] { return context->getZooKeeper(); });
|
||||
else
|
||||
restore_coordination = std::make_shared<RestoreCoordinationLocal>();
|
||||
|
||||
if (!query.cluster.empty())
|
||||
{
|
||||
DDLQueryOnClusterParams params;
|
||||
params.cluster = cluster;
|
||||
params.only_shard_num = restore_settings.shard_num;
|
||||
params.only_replica_num = restore_settings.replica_num;
|
||||
auto res = executeDDLQueryOnCluster(new_query, context, params);
|
||||
|
||||
PullingPipelineExecutor executor(res.pipeline);
|
||||
Block block;
|
||||
while (executor.pull(block));
|
||||
}
|
||||
else
|
||||
{
|
||||
new_query->setDatabase(context->getCurrentDatabase());
|
||||
|
||||
BackupFactory::CreateParams backup_open_params;
|
||||
backup_open_params.open_mode = IBackup::OpenMode::READ;
|
||||
backup_open_params.context = context;
|
||||
backup_open_params.backup_info = backup_info;
|
||||
backup_open_params.base_backup_info = restore_settings.base_backup_info;
|
||||
backup_open_params.password = restore_settings.password;
|
||||
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
|
||||
|
||||
auto timeout_for_restoring_metadata = std::chrono::seconds{context->getConfigRef().getUInt("backups.restore_metadata_timeout", 0)};
|
||||
auto restore_tasks = makeRestoreTasks(context, backup, new_query->elements, restore_settings, restore_coordination, timeout_for_restoring_metadata);
|
||||
executeRestoreTasks(std::move(restore_tasks), thread_pool, restore_settings, restore_coordination, timeout_for_restoring_metadata);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BackupsWorker::BackupsWorker() = default;
|
||||
|
||||
size_t BackupsWorker::add(const String & backup_name, BackupStatus status, const String & error)
|
||||
BackupsWorker::BackupsWorker(size_t num_backup_threads, size_t num_restore_threads)
|
||||
: backups_thread_pool(num_backup_threads)
|
||||
, restores_thread_pool(num_restore_threads)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
}
|
||||
|
||||
size_t task_id = ++current_task_id;
|
||||
size_t pos;
|
||||
auto it = entries_by_name.find(backup_name);
|
||||
if (it != entries_by_name.end())
|
||||
UUID BackupsWorker::start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context)
|
||||
{
|
||||
const ASTBackupQuery & backup_query = typeid_cast<const ASTBackupQuery &>(*backup_or_restore_query);
|
||||
if (backup_query.kind == ASTBackupQuery::Kind::BACKUP)
|
||||
return startMakingBackup(backup_or_restore_query, context);
|
||||
else
|
||||
return startRestoring(backup_or_restore_query, context);
|
||||
}
|
||||
|
||||
UUID BackupsWorker::startMakingBackup(const ASTPtr & query, const ContextPtr & context)
|
||||
{
|
||||
UUID uuid = UUIDHelpers::generateV4();
|
||||
|
||||
BackupInfo backup_info;
|
||||
BackupSettings backup_settings;
|
||||
{
|
||||
pos = it->second;
|
||||
entries_by_task_id.erase(entries[pos].task_id);
|
||||
const ASTBackupQuery & backup_query = typeid_cast<const ASTBackupQuery &>(*query);
|
||||
backup_info = BackupInfo::fromAST(*backup_query.backup_name);
|
||||
backup_settings = BackupSettings::fromBackupQuery(backup_query);
|
||||
}
|
||||
|
||||
{
|
||||
Info info;
|
||||
info.uuid = uuid;
|
||||
info.backup_name = backup_info.toString();
|
||||
info.status = BackupStatus::MAKING_BACKUP;
|
||||
info.status_changed_time = time(nullptr);
|
||||
info.internal = backup_settings.internal;
|
||||
std::lock_guard lock{infos_mutex};
|
||||
infos.emplace(uuid, std::move(info));
|
||||
}
|
||||
|
||||
auto job = [this, query, context, uuid]
|
||||
{
|
||||
try
|
||||
{
|
||||
const ASTBackupQuery & backup_query = typeid_cast<const ASTBackupQuery &>(*query);
|
||||
executeBackupImpl(backup_query, uuid, context, backups_thread_pool);
|
||||
std::lock_guard lock{infos_mutex};
|
||||
auto & info = infos.at(uuid);
|
||||
info.status = BackupStatus::BACKUP_COMPLETE;
|
||||
info.status_changed_time = time(nullptr);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::lock_guard lock{infos_mutex};
|
||||
auto & info = infos.at(uuid);
|
||||
info.status = BackupStatus::FAILED_TO_BACKUP;
|
||||
info.status_changed_time = time(nullptr);
|
||||
info.error_message = getCurrentExceptionMessage(false);
|
||||
info.exception = std::current_exception();
|
||||
}
|
||||
};
|
||||
|
||||
if (backup_settings.async)
|
||||
{
|
||||
backups_thread_pool.scheduleOrThrowOnError(job);
|
||||
}
|
||||
else
|
||||
{
|
||||
pos = entries.size();
|
||||
entries.emplace_back().backup_name = backup_name;
|
||||
entries_by_name.emplace(backup_name, pos);
|
||||
job();
|
||||
std::lock_guard lock{infos_mutex};
|
||||
auto & info = infos.at(uuid);
|
||||
if (info.status == BackupStatus::FAILED_TO_BACKUP)
|
||||
std::rethrow_exception(info.exception);
|
||||
}
|
||||
|
||||
entries_by_task_id.emplace(task_id, pos);
|
||||
|
||||
Entry & entry = entries[pos];
|
||||
entry.task_id = task_id;
|
||||
entry.status = status;
|
||||
entry.error = error;
|
||||
entry.timestamp = std::time(nullptr);
|
||||
|
||||
return task_id;
|
||||
return uuid;
|
||||
}
|
||||
|
||||
void BackupsWorker::update(size_t task_id, BackupStatus status, const String & error)
|
||||
UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr context)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
auto it = entries_by_task_id.find(task_id);
|
||||
if ((it == entries_by_task_id.end()) || (it->second >= entries.size()))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "BackupsWorker: entry_id is out of range");
|
||||
Entry & entry = entries[it->second];
|
||||
entry.status = status;
|
||||
entry.error = error;
|
||||
entry.timestamp = std::time(nullptr);
|
||||
UUID uuid = UUIDHelpers::generateV4();
|
||||
|
||||
BackupInfo backup_info;
|
||||
RestoreSettings restore_settings;
|
||||
{
|
||||
const ASTBackupQuery & restore_query = typeid_cast<const ASTBackupQuery &>(*query);
|
||||
backup_info = BackupInfo::fromAST(*restore_query.backup_name);
|
||||
restore_settings = RestoreSettings::fromRestoreQuery(restore_query);
|
||||
}
|
||||
|
||||
{
|
||||
Info info;
|
||||
info.uuid = uuid;
|
||||
info.backup_name = backup_info.toString();
|
||||
info.status = BackupStatus::RESTORING;
|
||||
info.status_changed_time = time(nullptr);
|
||||
info.internal = restore_settings.internal;
|
||||
std::lock_guard lock{infos_mutex};
|
||||
infos.emplace(uuid, std::move(info));
|
||||
}
|
||||
|
||||
auto job = [this, query, context, uuid]
|
||||
{
|
||||
try
|
||||
{
|
||||
const ASTBackupQuery & restore_query = typeid_cast<const ASTBackupQuery &>(*query);
|
||||
executeRestoreImpl(restore_query, uuid, context, restores_thread_pool);
|
||||
std::lock_guard lock{infos_mutex};
|
||||
auto & info = infos.at(uuid);
|
||||
info.status = BackupStatus::RESTORED;
|
||||
info.status_changed_time = time(nullptr);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::lock_guard lock{infos_mutex};
|
||||
auto & info = infos.at(uuid);
|
||||
info.status = BackupStatus::FAILED_TO_RESTORE;
|
||||
info.status_changed_time = time(nullptr);
|
||||
info.error_message = getCurrentExceptionMessage(false);
|
||||
info.exception = std::current_exception();
|
||||
}
|
||||
};
|
||||
|
||||
if (restore_settings.async)
|
||||
{
|
||||
restores_thread_pool.scheduleOrThrowOnError(job);
|
||||
}
|
||||
else
|
||||
{
|
||||
job();
|
||||
std::lock_guard lock{infos_mutex};
|
||||
auto & info = infos.at(uuid);
|
||||
if (info.status == BackupStatus::FAILED_TO_RESTORE)
|
||||
std::rethrow_exception(info.exception);
|
||||
}
|
||||
|
||||
return uuid;
|
||||
}
|
||||
|
||||
BackupsWorker::Entry BackupsWorker::getEntry(size_t task_id) const
|
||||
void BackupsWorker::wait(const UUID & backup_or_restore_uuid)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
auto it = entries_by_task_id.find(task_id);
|
||||
if ((it == entries_by_task_id.end()) || (it->second >= entries.size()))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "BackupsWorker: entry_id is out of range");
|
||||
return entries[it->second];
|
||||
std::unique_lock lock{infos_mutex};
|
||||
status_changed.wait(lock, [&]
|
||||
{
|
||||
auto it = infos.find(backup_or_restore_uuid);
|
||||
if (it == infos.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "BackupsWorker: Unknown UUID {}", toString(backup_or_restore_uuid));
|
||||
auto current_status = it->second.status;
|
||||
return (current_status == BackupStatus::BACKUP_COMPLETE) || (current_status == BackupStatus::RESTORED);
|
||||
});
|
||||
}
|
||||
|
||||
std::vector<BackupsWorker::Entry> BackupsWorker::getEntries() const
|
||||
BackupsWorker::Info BackupsWorker::getInfo(const UUID & backup_or_restore_uuid) const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return entries;
|
||||
std::lock_guard lock{infos_mutex};
|
||||
auto it = infos.find(backup_or_restore_uuid);
|
||||
if (it == infos.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "BackupsWorker: Unknown UUID {}", toString(backup_or_restore_uuid));
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void BackupsWorker::run(std::function<void()> && task)
|
||||
std::vector<BackupsWorker::Info> BackupsWorker::getAllInfos() const
|
||||
{
|
||||
thread_pool.scheduleOrThrowOnError(std::move(task));
|
||||
std::vector<Info> res_infos;
|
||||
std::lock_guard lock{infos_mutex};
|
||||
for (const auto & info : infos | boost::adaptors::map_values)
|
||||
res_infos.push_back(info);
|
||||
return res_infos;
|
||||
}
|
||||
|
||||
void BackupsWorker::shutdown()
|
||||
{
|
||||
size_t num_active_tasks = thread_pool.active();
|
||||
if (!num_active_tasks)
|
||||
size_t num_active_backups = backups_thread_pool.active();
|
||||
size_t num_active_restores = restores_thread_pool.active();
|
||||
if (!num_active_backups && !num_active_restores)
|
||||
return;
|
||||
LOG_INFO(&Poco::Logger::get("BackupsWorker"), "Waiting for {} backup or restore tasks to be finished", num_active_tasks);
|
||||
thread_pool.wait();
|
||||
LOG_INFO(&Poco::Logger::get("BackupsWorker"), "Waiting for {} backup and {} restore tasks to be finished", num_active_backups, num_active_restores);
|
||||
backups_thread_pool.wait();
|
||||
restores_thread_pool.wait();
|
||||
LOG_INFO(&Poco::Logger::get("BackupsWorker"), "All backup and restore tasks have finished");
|
||||
}
|
||||
|
||||
|
@ -3,49 +3,63 @@
|
||||
#include <Backups/BackupStatus.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Core/UUID.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
namespace Poco::Util { class AbstractConfiguration; }
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Manager of backups and restores: executes backups and restores' threads in the background.
|
||||
/// Keeps information about backups and restores started in this session.
|
||||
class BackupsWorker
|
||||
{
|
||||
public:
|
||||
static BackupsWorker & instance();
|
||||
|
||||
size_t add(const String & backup_name, BackupStatus status, const String & error = {});
|
||||
void update(size_t task_id, BackupStatus status, const String & error = {});
|
||||
|
||||
struct Entry
|
||||
{
|
||||
String backup_name;
|
||||
size_t task_id;
|
||||
BackupStatus status;
|
||||
String error;
|
||||
time_t timestamp;
|
||||
};
|
||||
|
||||
Entry getEntry(size_t task_id) const;
|
||||
std::vector<Entry> getEntries() const;
|
||||
|
||||
/// Schedules a new task and performs it in the background thread.
|
||||
void run(std::function<void()> && task);
|
||||
BackupsWorker(size_t num_backup_threads, size_t num_restore_threads);
|
||||
|
||||
/// Waits until all tasks have been completed.
|
||||
void shutdown();
|
||||
|
||||
private:
|
||||
BackupsWorker();
|
||||
/// Starts executing a BACKUP or RESTORE query. Returns UUID of the operation.
|
||||
UUID start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context);
|
||||
|
||||
mutable std::mutex mutex;
|
||||
std::vector<Entry> entries;
|
||||
std::unordered_map<String, size_t /* position in entries */> entries_by_name;
|
||||
std::unordered_map<size_t /* task_id */, size_t /* position in entries */ > entries_by_task_id;
|
||||
size_t current_task_id = 0;
|
||||
ThreadPool thread_pool;
|
||||
/// Waits until a BACKUP or RESTORE query started by start() is finished.
|
||||
/// The function returns immediately if the operation is already finished.
|
||||
void wait(const UUID & backup_or_restore_uuid);
|
||||
|
||||
/// Information about executing a BACKUP or RESTORE query started by calling start().
|
||||
struct Info
|
||||
{
|
||||
UUID uuid;
|
||||
|
||||
/// Backup's name, a string like "Disk('backups', 'my_backup')"
|
||||
String backup_name;
|
||||
|
||||
BackupStatus status;
|
||||
time_t status_changed_time;
|
||||
|
||||
String error_message;
|
||||
std::exception_ptr exception;
|
||||
|
||||
/// Whether this operation is internal, i.e. caused by another BACKUP or RESTORE operation.
|
||||
/// For example BACKUP ON CLUSTER executes an internal BACKUP commands per each node.
|
||||
bool internal = false;
|
||||
};
|
||||
|
||||
Info getInfo(const UUID & backup_or_restore_uuid) const;
|
||||
std::vector<Info> getAllInfos() const;
|
||||
|
||||
private:
|
||||
UUID startMakingBackup(const ASTPtr & query, const ContextPtr & context);
|
||||
UUID startRestoring(const ASTPtr & query, ContextMutablePtr context);
|
||||
|
||||
ThreadPool backups_thread_pool;
|
||||
ThreadPool restores_thread_pool;
|
||||
|
||||
std::unordered_map<UUID, Info> infos;
|
||||
std::condition_variable status_changed;
|
||||
mutable std::mutex infos_mutex;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -251,6 +251,9 @@ namespace
|
||||
|
||||
void DDLRenamingSettings::setNewTableName(const DatabaseAndTableName & old_table_name, const DatabaseAndTableName & new_table_name)
|
||||
{
|
||||
if (old_table_name.first.empty() || old_table_name.second.empty() || new_table_name.first.empty() || new_table_name.second.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty names are not allowed for DDLRenamingSettings::setNewTableName");
|
||||
|
||||
auto it = old_to_new_table_names.find(old_table_name);
|
||||
if ((it != old_to_new_table_names.end()))
|
||||
{
|
||||
@ -266,6 +269,9 @@ void DDLRenamingSettings::setNewTableName(const DatabaseAndTableName & old_table
|
||||
|
||||
void DDLRenamingSettings::setNewDatabaseName(const String & old_database_name, const String & new_database_name)
|
||||
{
|
||||
if (old_database_name.empty() || new_database_name.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty names are not allowed for DDLRenamingSettings::setNewDatabaseName");
|
||||
|
||||
auto it = old_to_new_database_names.find(old_database_name);
|
||||
if ((it != old_to_new_database_names.end()))
|
||||
{
|
||||
@ -277,12 +283,12 @@ void DDLRenamingSettings::setNewDatabaseName(const String & old_database_name, c
|
||||
old_to_new_database_names[old_database_name] = new_database_name;
|
||||
}
|
||||
|
||||
void DDLRenamingSettings::setFromBackupQuery(const ASTBackupQuery & backup_query, const String & current_database)
|
||||
void DDLRenamingSettings::setFromBackupQuery(const ASTBackupQuery & backup_query)
|
||||
{
|
||||
setFromBackupQuery(backup_query.elements, current_database);
|
||||
setFromBackupQuery(backup_query.elements);
|
||||
}
|
||||
|
||||
void DDLRenamingSettings::setFromBackupQuery(const ASTBackupQuery::Elements & backup_query_elements, const String & current_database)
|
||||
void DDLRenamingSettings::setFromBackupQuery(const ASTBackupQuery::Elements & backup_query_elements)
|
||||
{
|
||||
old_to_new_table_names.clear();
|
||||
old_to_new_database_names.clear();
|
||||
@ -299,15 +305,15 @@ void DDLRenamingSettings::setFromBackupQuery(const ASTBackupQuery::Elements & ba
|
||||
String database_name = element.name.first;
|
||||
if (element.name_is_in_temp_db)
|
||||
database_name = DatabaseCatalog::TEMPORARY_DATABASE;
|
||||
else if (database_name.empty())
|
||||
database_name = current_database;
|
||||
assert(!table_name.empty());
|
||||
assert(!database_name.empty());
|
||||
|
||||
const String & new_table_name = element.new_name.second;
|
||||
String new_database_name = element.new_name.first;
|
||||
if (element.new_name_is_in_temp_db)
|
||||
new_database_name = DatabaseCatalog::TEMPORARY_DATABASE;
|
||||
else if (new_database_name.empty())
|
||||
new_database_name = current_database;
|
||||
assert(!new_table_name.empty());
|
||||
assert(!new_database_name.empty());
|
||||
|
||||
setNewTableName({database_name, table_name}, {new_database_name, new_table_name});
|
||||
break;
|
||||
@ -318,10 +324,12 @@ void DDLRenamingSettings::setFromBackupQuery(const ASTBackupQuery::Elements & ba
|
||||
String database_name = element.name.first;
|
||||
if (element.name_is_in_temp_db)
|
||||
database_name = DatabaseCatalog::TEMPORARY_DATABASE;
|
||||
assert(!database_name.empty());
|
||||
|
||||
String new_database_name = element.new_name.first;
|
||||
if (element.new_name_is_in_temp_db)
|
||||
new_database_name = DatabaseCatalog::TEMPORARY_DATABASE;
|
||||
assert(!new_database_name.empty());
|
||||
|
||||
setNewDatabaseName(database_name, new_database_name);
|
||||
break;
|
||||
|
@ -26,8 +26,8 @@ public:
|
||||
void setNewTableName(const DatabaseAndTableName & old_table_name, const DatabaseAndTableName & new_table_name);
|
||||
void setNewDatabaseName(const String & old_database_name, const String & new_database_name);
|
||||
|
||||
void setFromBackupQuery(const ASTBackupQuery & backup_query, const String & current_database);
|
||||
void setFromBackupQuery(const ASTBackupQuery::Elements & backup_query_elements, const String & current_database);
|
||||
void setFromBackupQuery(const ASTBackupQuery & backup_query);
|
||||
void setFromBackupQuery(const ASTBackupQuery::Elements & backup_query_elements);
|
||||
|
||||
/// Changes names according to the renaming.
|
||||
DatabaseAndTableName getNewTableName(const DatabaseAndTableName & old_table_name) const;
|
||||
|
@ -11,6 +11,8 @@ namespace DB
|
||||
class IBackupCoordination
|
||||
{
|
||||
public:
|
||||
virtual ~IBackupCoordination() = default;
|
||||
|
||||
struct FileInfo
|
||||
{
|
||||
String file_name;
|
||||
@ -32,8 +34,6 @@ public:
|
||||
UInt64 pos_in_archive = static_cast<UInt64>(-1);
|
||||
};
|
||||
|
||||
virtual ~IBackupCoordination() = default;
|
||||
|
||||
/// Adds file information.
|
||||
/// If specified checksum+size are new for this IBackupContentsInfo the function sets `is_data_file_required`.
|
||||
virtual void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) = 0;
|
||||
|
@ -1,10 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
#include <Core/Types.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using DatabaseAndTableName = std::pair<String, String>;
|
||||
|
||||
/// Keeps information about files contained in a backup.
|
||||
class IRestoreCoordination
|
||||
@ -12,25 +13,52 @@ class IRestoreCoordination
|
||||
public:
|
||||
virtual ~IRestoreCoordination() = default;
|
||||
|
||||
/// Sets or gets path in the backup for a specified path in ZooKeeper.
|
||||
virtual void setOrGetPathInBackupForZkPath(const String & zk_path_, String & path_in_backup_) = 0;
|
||||
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
|
||||
virtual bool startCreatingTableInReplicatedDB(
|
||||
const String & host_id, const String & database_name, const String & database_zk_path, const String & table_name)
|
||||
= 0;
|
||||
|
||||
/// Sets that this replica is going to restore a partition in a replicated table or a table in a replicated database.
|
||||
/// This function should be called to prevent other replicas from doing that in parallel.
|
||||
virtual bool acquireZkPathAndName(const String & zk_path_, const String & name_) = 0;
|
||||
/// Sets that either we have been created a table in a replicated database or failed doing that.
|
||||
/// In the latter case `error_message` should be set.
|
||||
/// Calling this function unblocks other hosts waiting for this table to be created (see waitForCreatingTableInReplicatedDB()).
|
||||
virtual void finishCreatingTableInReplicatedDB(
|
||||
const String & host_id,
|
||||
const String & database_name,
|
||||
const String & database_zk_path,
|
||||
const String & table_name,
|
||||
const String & error_message = {})
|
||||
= 0;
|
||||
|
||||
enum Result
|
||||
{
|
||||
SUCCEEDED,
|
||||
FAILED,
|
||||
};
|
||||
/// Wait for another host to create a table in a replicated database.
|
||||
virtual void waitForCreatingTableInReplicatedDB(
|
||||
const String & database_name,
|
||||
const String & database_zk_path,
|
||||
const String & table_name,
|
||||
std::chrono::seconds timeout = std::chrono::seconds::zero())
|
||||
= 0;
|
||||
|
||||
/// Sets the result for an acquired path and name.
|
||||
virtual void setResultForZkPathAndName(const String & zk_path_, const String & name_, Result res_) = 0;
|
||||
/// Sets that a specified host has finished restoring metadata, successfully or with an error.
|
||||
/// In the latter case `error_message` should be set.
|
||||
virtual void finishRestoringMetadata(const String & host_id, const String & error_message = {}) = 0;
|
||||
|
||||
/// Waits for the result set by another replica for another replica's acquired path and name.
|
||||
/// Returns false if time is out.
|
||||
virtual bool getResultForZkPathAndName(const String & zk_path_, const String & name_, Result & res_, std::chrono::milliseconds timeout_) const = 0;
|
||||
/// Waits for a specified list of hosts to finish restoring their metadata.
|
||||
virtual void waitForAllHostsToRestoreMetadata(const Strings & host_ids, std::chrono::seconds timeout = std::chrono::seconds::zero()) const = 0;
|
||||
|
||||
/// Sets path in backup used by a replicated table.
|
||||
/// This function can be called multiple times for the same table with different `host_id`, and in that case
|
||||
/// getReplicatedTableDataPath() will choose `data_path_in_backup` with the lexicographycally first `host_id`.
|
||||
virtual void setReplicatedTableDataPath(
|
||||
const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path, const String & data_path_in_backup)
|
||||
= 0;
|
||||
|
||||
/// Gets path in backup used by a replicated table.
|
||||
virtual String getReplicatedTableDataPath(const String & table_zk_path) const = 0;
|
||||
|
||||
/// Sets that this replica is going to restore a partition in a replicated table.
|
||||
/// The function returns false if this partition is being already restored by another replica.
|
||||
virtual bool startInsertingDataToPartitionInReplicatedTable(
|
||||
const String & host_id, const DatabaseAndTableName & table_name, const String & table_zk_path, const String & partition_name)
|
||||
= 0;
|
||||
|
||||
/// Removes remotely stored information.
|
||||
virtual void drop() {}
|
||||
|
@ -14,15 +14,20 @@ public:
|
||||
IRestoreTask() = default;
|
||||
virtual ~IRestoreTask() = default;
|
||||
|
||||
enum class RestoreKind
|
||||
{
|
||||
/// This task restores metadata (definitions of databases and tables).
|
||||
/// Tasks restoring metadata are executed first and strictly in one thread.
|
||||
METADATA,
|
||||
|
||||
/// This task restores tables' data. Such tasks can be executed in parallel.
|
||||
DATA,
|
||||
};
|
||||
|
||||
virtual RestoreKind getRestoreKind() const { return RestoreKind::DATA; }
|
||||
|
||||
/// Perform restoring, the function also can return a list of nested tasks that should be run later.
|
||||
virtual std::vector<std::unique_ptr<IRestoreTask>> run() = 0;
|
||||
|
||||
/// Is it necessary to run this task sequentially?
|
||||
/// Sequential tasks are executed first and strictly in one thread.
|
||||
virtual bool isSequential() const { return false; }
|
||||
|
||||
/// Reverts the effect of run(). If that's not possible, the function does nothing.
|
||||
virtual void rollback() {}
|
||||
};
|
||||
|
||||
using RestoreTaskPtr = std::unique_ptr<IRestoreTask>;
|
||||
|
@ -1,13 +1,81 @@
|
||||
#include <Backups/RestoreCoordinationDistributed.h>
|
||||
#include <Backups/formatTableNameOrTemporaryTableName.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <base/chrono_io.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int FAILED_TO_RESTORE_METADATA_ON_OTHER_NODE;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
struct TableInReplicatedDatabaseStatus
|
||||
{
|
||||
String host_id;
|
||||
DatabaseAndTableName table_name;
|
||||
bool ready = false;
|
||||
String error_message;
|
||||
size_t increment = 0;
|
||||
|
||||
void write(WriteBuffer & out) const
|
||||
{
|
||||
writeBinary(host_id, out);
|
||||
writeBinary(table_name.first, out);
|
||||
writeBinary(table_name.second, out);
|
||||
writeBinary(ready, out);
|
||||
writeBinary(error_message, out);
|
||||
writeBinary(increment, out);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & in)
|
||||
{
|
||||
readBinary(host_id, in);
|
||||
readBinary(table_name.first, in);
|
||||
readBinary(table_name.second, in);
|
||||
readBinary(ready, in);
|
||||
readBinary(error_message, in);
|
||||
readBinary(increment, in);
|
||||
}
|
||||
};
|
||||
|
||||
struct ReplicatedTableDataPath
|
||||
{
|
||||
String host_id;
|
||||
DatabaseAndTableName table_name;
|
||||
String data_path_in_backup;
|
||||
|
||||
void write(WriteBuffer & out) const
|
||||
{
|
||||
writeBinary(host_id, out);
|
||||
writeBinary(table_name.first, out);
|
||||
writeBinary(table_name.second, out);
|
||||
writeBinary(data_path_in_backup, out);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & in)
|
||||
{
|
||||
readBinary(host_id, in);
|
||||
readBinary(table_name.first, in);
|
||||
readBinary(table_name.second, in);
|
||||
readBinary(data_path_in_backup, in);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
RestoreCoordinationDistributed::RestoreCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_)
|
||||
: zookeeper_path(zookeeper_path_), get_zookeeper(get_zookeeper_)
|
||||
: zookeeper_path(zookeeper_path_), get_zookeeper(get_zookeeper_), log(&Poco::Logger::get("RestoreCoordinationDistributed"))
|
||||
{
|
||||
createRootNodes();
|
||||
}
|
||||
@ -19,8 +87,10 @@ void RestoreCoordinationDistributed::createRootNodes()
|
||||
auto zookeeper = get_zookeeper();
|
||||
zookeeper->createAncestors(zookeeper_path);
|
||||
zookeeper->createIfNotExists(zookeeper_path, "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/paths_in_backup", "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/acquired", "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/tables_in_repl_databases", "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/metadata_ready", "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/repl_tables_data_paths", "");
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/repl_tables_partitions", "");
|
||||
}
|
||||
|
||||
void RestoreCoordinationDistributed::removeAllNodes()
|
||||
@ -29,115 +99,356 @@ void RestoreCoordinationDistributed::removeAllNodes()
|
||||
zookeeper->removeRecursive(zookeeper_path);
|
||||
}
|
||||
|
||||
void RestoreCoordinationDistributed::setOrGetPathInBackupForZkPath(const String & zk_path_, String & path_in_backup_)
|
||||
bool RestoreCoordinationDistributed::startCreatingTableInReplicatedDB(
|
||||
const String & host_id_, const String & database_name_, const String & database_zk_path_, const String & table_name_)
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
|
||||
String path = zookeeper_path + "/tables_in_repl_databases/" + escapeForFileName(database_zk_path_);
|
||||
zookeeper->createIfNotExists(path, "");
|
||||
|
||||
TableInReplicatedDatabaseStatus status;
|
||||
status.host_id = host_id_;
|
||||
status.table_name = DatabaseAndTableName{database_name_, table_name_};
|
||||
String status_str;
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
auto it = paths_in_backup_by_zk_path.find(zk_path_);
|
||||
if (it != paths_in_backup_by_zk_path.end())
|
||||
{
|
||||
path_in_backup_ = it->second;
|
||||
return;
|
||||
}
|
||||
WriteBufferFromOwnString buf;
|
||||
status.write(buf);
|
||||
status_str = buf.str();
|
||||
}
|
||||
|
||||
auto zookeeper = get_zookeeper();
|
||||
String combined_path = zookeeper_path + "/paths_in_backup/" + escapeForFileName(zk_path_);
|
||||
auto code = zookeeper->tryCreate(combined_path, path_in_backup_, zkutil::CreateMode::Persistent);
|
||||
path += "/" + escapeForFileName(table_name_);
|
||||
|
||||
auto code = zookeeper->tryCreate(path, status_str, zkutil::CreateMode::Persistent);
|
||||
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
|
||||
throw zkutil::KeeperException(code, combined_path);
|
||||
throw zkutil::KeeperException(code, path);
|
||||
|
||||
if (code == Coordination::Error::ZNODEEXISTS)
|
||||
path_in_backup_ = zookeeper->get(combined_path);
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
paths_in_backup_by_zk_path[zk_path_] = path_in_backup_;
|
||||
}
|
||||
return (code == Coordination::Error::ZOK);
|
||||
}
|
||||
|
||||
bool RestoreCoordinationDistributed::acquireZkPathAndName(const String & zk_path_, const String & name_)
|
||||
/// Ends creating table in a replicated database, successfully or with an error.
|
||||
/// In the latter case `error_message` should be set.
|
||||
void RestoreCoordinationDistributed::finishCreatingTableInReplicatedDB(
|
||||
const String & /* host_id_ */,
|
||||
const String & database_name_,
|
||||
const String & database_zk_path_,
|
||||
const String & table_name_,
|
||||
const String & error_message_)
|
||||
{
|
||||
std::pair<String, String> key{zk_path_, name_};
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (acquired.contains(key))
|
||||
return true;
|
||||
}
|
||||
if (error_message_.empty())
|
||||
LOG_TRACE(log, "Created table {}.{}", database_name_, table_name_);
|
||||
else
|
||||
LOG_TRACE(log, "Failed to created table {}.{}: {}", database_name_, table_name_, error_message_);
|
||||
|
||||
auto zookeeper = get_zookeeper();
|
||||
String combined_path = zookeeper_path + "/acquired/" + escapeForFileName(zk_path_) + "|" + escapeForFileName(name_);
|
||||
auto code = zookeeper->tryCreate(combined_path, "", zkutil::CreateMode::Persistent);
|
||||
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
|
||||
throw zkutil::KeeperException(code, combined_path);
|
||||
String path = zookeeper_path + "/tables_in_repl_databases/" + escapeForFileName(database_zk_path_) + "/" + escapeForFileName(table_name_);
|
||||
|
||||
if (code == Coordination::Error::ZNODEEXISTS)
|
||||
return false;
|
||||
TableInReplicatedDatabaseStatus status;
|
||||
String status_str = zookeeper->get(path);
|
||||
{
|
||||
ReadBufferFromString buf{status_str};
|
||||
status.read(buf);
|
||||
}
|
||||
|
||||
status.error_message = error_message_;
|
||||
status.ready = error_message_.empty();
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
acquired.emplace(key, std::nullopt);
|
||||
return true;
|
||||
WriteBufferFromOwnString buf;
|
||||
status.write(buf);
|
||||
status_str = buf.str();
|
||||
}
|
||||
|
||||
zookeeper->set(path, status_str);
|
||||
}
|
||||
|
||||
void RestoreCoordinationDistributed::setResultForZkPathAndName(const String & zk_path_, const String & name_, Result res_)
|
||||
/// Wait for another host to create a table in a replicated database.
|
||||
void RestoreCoordinationDistributed::waitForCreatingTableInReplicatedDB(
|
||||
const String & /* database_name_ */, const String & database_zk_path_, const String & table_name_, std::chrono::seconds timeout_)
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
String combined_path = zookeeper_path + "/acquired/" + escapeForFileName(zk_path_) + "|" + escapeForFileName(name_);
|
||||
zookeeper->set(combined_path, (res_ == Result::SUCCEEDED) ? "1" : "0");
|
||||
String path = zookeeper_path + "/tables_in_repl_databases/" + escapeForFileName(database_zk_path_) + "/" + escapeForFileName(table_name_);
|
||||
|
||||
TableInReplicatedDatabaseStatus status;
|
||||
|
||||
std::atomic<bool> watch_set = false;
|
||||
std::condition_variable watch_triggered_event;
|
||||
|
||||
auto watch_callback = [&](const Coordination::WatchResponse &)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
acquired[std::pair{zk_path_, name_}] = res_;
|
||||
}
|
||||
}
|
||||
|
||||
bool RestoreCoordinationDistributed::getResultForZkPathAndName(const String & zk_path_, const String & name_, Result & res_, std::chrono::milliseconds timeout_) const
|
||||
{
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
auto value = acquired[std::pair{zk_path_, name_}];
|
||||
if (value)
|
||||
{
|
||||
res_ = *value;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
auto zookeeper = get_zookeeper();
|
||||
String combined_path = zookeeper_path + "/acquired/" + escapeForFileName(zk_path_) + "|" + escapeForFileName(name_);
|
||||
|
||||
std::atomic<bool> changed = false;
|
||||
std::condition_variable changed_condvar;
|
||||
const auto watch = [&changed, &changed_condvar, zk_path_, name_](const Coordination::WatchResponse &)
|
||||
{
|
||||
changed = true;
|
||||
changed_condvar.notify_one();
|
||||
watch_set = false; /// After it's triggered it's not set until we call getChildrenWatch() again.
|
||||
watch_triggered_event.notify_all();
|
||||
};
|
||||
|
||||
String res_str = zookeeper->getWatch(combined_path, nullptr, watch);
|
||||
if (res_str.empty())
|
||||
auto watch_triggered = [&] { return !watch_set; };
|
||||
|
||||
bool use_timeout = (timeout_.count() > 0);
|
||||
std::chrono::steady_clock::duration time_left = timeout_;
|
||||
std::mutex dummy_mutex;
|
||||
|
||||
while (!use_timeout || (time_left.count() > 0))
|
||||
{
|
||||
std::mutex dummy_mutex;
|
||||
std::unique_lock lock{dummy_mutex};
|
||||
changed_condvar.wait_for(lock, timeout_, [&changed] { return changed.load(); });
|
||||
res_str = zookeeper->get(combined_path);
|
||||
watch_set = true;
|
||||
String status_str = zookeeper->getWatch(path, nullptr, watch_callback);
|
||||
{
|
||||
ReadBufferFromString buf{status_str};
|
||||
status.read(buf);
|
||||
}
|
||||
|
||||
if (!status.error_message.empty())
|
||||
throw Exception(
|
||||
ErrorCodes::FAILED_TO_RESTORE_METADATA_ON_OTHER_NODE,
|
||||
"Host {} failed to create table {}.{}: {}", status.host_id, status.table_name.first, status.table_name.second, status.error_message);
|
||||
|
||||
if (status.ready)
|
||||
{
|
||||
LOG_TRACE(log, "Host {} created table {}.{}", status.host_id, status.table_name.first, status.table_name.second);
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "Waiting for host {} to create table {}.{}", status.host_id, status.table_name.first, status.table_name.second);
|
||||
|
||||
std::chrono::steady_clock::time_point start_time;
|
||||
if (use_timeout)
|
||||
start_time = std::chrono::steady_clock::now();
|
||||
|
||||
bool waited;
|
||||
{
|
||||
std::unique_lock dummy_lock{dummy_mutex};
|
||||
if (use_timeout)
|
||||
{
|
||||
waited = watch_triggered_event.wait_for(dummy_lock, time_left, watch_triggered);
|
||||
}
|
||||
else
|
||||
{
|
||||
watch_triggered_event.wait(dummy_lock, watch_triggered);
|
||||
waited = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (use_timeout)
|
||||
{
|
||||
time_left -= (std::chrono::steady_clock::now() - start_time);
|
||||
if (time_left.count() < 0)
|
||||
time_left = std::chrono::steady_clock::duration::zero();
|
||||
}
|
||||
|
||||
if (!waited)
|
||||
break;
|
||||
}
|
||||
|
||||
if (res_str.empty())
|
||||
return false;
|
||||
|
||||
res_ = (res_str == "1") ? Result::SUCCEEDED : Result::FAILED;
|
||||
|
||||
if (watch_set)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
acquired[std::pair{zk_path_, name_}] = res_;
|
||||
/// Remove watch by triggering it.
|
||||
++status.increment;
|
||||
WriteBufferFromOwnString buf;
|
||||
status.write(buf);
|
||||
zookeeper->set(path, buf.str());
|
||||
std::unique_lock dummy_lock{dummy_mutex};
|
||||
watch_triggered_event.wait_for(dummy_lock, timeout_, watch_triggered);
|
||||
}
|
||||
|
||||
return true;
|
||||
throw Exception(
|
||||
ErrorCodes::FAILED_TO_RESTORE_METADATA_ON_OTHER_NODE,
|
||||
"Host {} was unable to create table {}.{} in {}",
|
||||
status.host_id,
|
||||
status.table_name.first,
|
||||
table_name_,
|
||||
to_string(timeout_));
|
||||
}
|
||||
|
||||
void RestoreCoordinationDistributed::finishRestoringMetadata(const String & host_id_, const String & error_message_)
|
||||
{
|
||||
LOG_TRACE(log, "Finished restoring metadata{}", (error_message_.empty() ? "" : (" with error " + error_message_)));
|
||||
auto zookeeper = get_zookeeper();
|
||||
if (error_message_.empty())
|
||||
zookeeper->create(zookeeper_path + "/metadata_ready/" + host_id_ + ":ready", "", zkutil::CreateMode::Persistent);
|
||||
else
|
||||
zookeeper->create(zookeeper_path + "/metadata_ready/" + host_id_ + ":error", error_message_, zkutil::CreateMode::Persistent);
|
||||
}
|
||||
|
||||
void RestoreCoordinationDistributed::waitForAllHostsToRestoreMetadata(const Strings & host_ids_, std::chrono::seconds timeout_) const
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
|
||||
bool all_hosts_ready = false;
|
||||
String not_ready_host_id;
|
||||
String error_host_id;
|
||||
String error_message;
|
||||
|
||||
/// Returns true of everything's ready, or false if we need to wait more.
|
||||
auto process_nodes = [&](const Strings & nodes)
|
||||
{
|
||||
std::unordered_set<std::string_view> set{nodes.begin(), nodes.end()};
|
||||
for (const String & host_id : host_ids_)
|
||||
{
|
||||
if (set.contains(host_id + ":error"))
|
||||
{
|
||||
error_host_id = host_id;
|
||||
error_message = zookeeper->get(zookeeper_path + "/metadata_ready/" + host_id + ":error");
|
||||
return;
|
||||
}
|
||||
if (!set.contains(host_id + ":ready"))
|
||||
{
|
||||
LOG_TRACE(log, "Waiting for host {} to restore its metadata", host_id);
|
||||
not_ready_host_id = host_id;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
all_hosts_ready = true;
|
||||
};
|
||||
|
||||
std::atomic<bool> watch_set = false;
|
||||
std::condition_variable watch_triggered_event;
|
||||
|
||||
auto watch_callback = [&](const Coordination::WatchResponse &)
|
||||
{
|
||||
watch_set = false; /// After it's triggered it's not set until we call getChildrenWatch() again.
|
||||
watch_triggered_event.notify_all();
|
||||
};
|
||||
|
||||
auto watch_triggered = [&] { return !watch_set; };
|
||||
|
||||
bool use_timeout = (timeout_.count() > 0);
|
||||
std::chrono::steady_clock::duration time_left = timeout_;
|
||||
std::mutex dummy_mutex;
|
||||
|
||||
while (!use_timeout || (time_left.count() > 0))
|
||||
{
|
||||
watch_set = true;
|
||||
Strings children = zookeeper->getChildrenWatch(zookeeper_path + "/metadata_ready", nullptr, watch_callback);
|
||||
process_nodes(children);
|
||||
|
||||
if (!error_message.empty())
|
||||
throw Exception(
|
||||
ErrorCodes::FAILED_TO_RESTORE_METADATA_ON_OTHER_NODE,
|
||||
"Host {} was unable to restore its metadata: {}",
|
||||
error_host_id,
|
||||
error_message);
|
||||
|
||||
if (all_hosts_ready)
|
||||
{
|
||||
LOG_TRACE(log, "All hosts have finished restoring metadata");
|
||||
return;
|
||||
}
|
||||
|
||||
std::chrono::steady_clock::time_point start_time;
|
||||
if (use_timeout)
|
||||
start_time = std::chrono::steady_clock::now();
|
||||
|
||||
bool waited;
|
||||
{
|
||||
std::unique_lock dummy_lock{dummy_mutex};
|
||||
if (use_timeout)
|
||||
{
|
||||
waited = watch_triggered_event.wait_for(dummy_lock, time_left, watch_triggered);
|
||||
}
|
||||
else
|
||||
{
|
||||
watch_triggered_event.wait(dummy_lock, watch_triggered);
|
||||
waited = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (use_timeout)
|
||||
{
|
||||
time_left -= (std::chrono::steady_clock::now() - start_time);
|
||||
if (time_left.count() < 0)
|
||||
time_left = std::chrono::steady_clock::duration::zero();
|
||||
}
|
||||
|
||||
if (!waited)
|
||||
break;
|
||||
}
|
||||
|
||||
if (watch_set)
|
||||
{
|
||||
/// Remove watch by triggering it.
|
||||
zookeeper->create(zookeeper_path + "/metadata_ready/remove_watch-", "", zkutil::CreateMode::EphemeralSequential);
|
||||
std::unique_lock dummy_lock{dummy_mutex};
|
||||
watch_triggered_event.wait_for(dummy_lock, timeout_, watch_triggered);
|
||||
}
|
||||
|
||||
throw Exception(
|
||||
ErrorCodes::FAILED_TO_RESTORE_METADATA_ON_OTHER_NODE,
|
||||
"Host {} was unable to restore its metadata in {}",
|
||||
not_ready_host_id,
|
||||
to_string(timeout_));
|
||||
}
|
||||
|
||||
void RestoreCoordinationDistributed::setReplicatedTableDataPath(
|
||||
const String & host_id_,
|
||||
const DatabaseAndTableName & table_name_,
|
||||
const String & table_zk_path_,
|
||||
const String & data_path_in_backup_)
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
String path = zookeeper_path + "/repl_tables_data_paths/" + escapeForFileName(table_zk_path_);
|
||||
|
||||
String new_info_str;
|
||||
{
|
||||
ReplicatedTableDataPath new_info;
|
||||
new_info.host_id = host_id_;
|
||||
new_info.table_name = table_name_;
|
||||
new_info.data_path_in_backup = data_path_in_backup_;
|
||||
WriteBufferFromOwnString buf;
|
||||
new_info.write(buf);
|
||||
new_info_str = buf.str();
|
||||
}
|
||||
|
||||
auto code = zookeeper->tryCreate(path, new_info_str, zkutil::CreateMode::Persistent);
|
||||
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
|
||||
throw zkutil::KeeperException(code, path);
|
||||
|
||||
while (code != Coordination::Error::ZOK)
|
||||
{
|
||||
Coordination::Stat stat;
|
||||
String cur_info_str = zookeeper->get(path, &stat);
|
||||
ReadBufferFromString buf{cur_info_str};
|
||||
ReplicatedTableDataPath cur_info;
|
||||
cur_info.read(buf);
|
||||
if ((cur_info.host_id < host_id_) || ((cur_info.host_id == host_id_) && (cur_info.table_name <= table_name_)))
|
||||
break;
|
||||
code = zookeeper->trySet(path, new_info_str, stat.version);
|
||||
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZBADVERSION))
|
||||
throw zkutil::KeeperException(code, path);
|
||||
}
|
||||
}
|
||||
|
||||
String RestoreCoordinationDistributed::getReplicatedTableDataPath(const String & table_zk_path_) const
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
String path = zookeeper_path + "/repl_tables_data_paths/" + escapeForFileName(table_zk_path_);
|
||||
String info_str = zookeeper->get(path);
|
||||
ReadBufferFromString buf{info_str};
|
||||
ReplicatedTableDataPath info;
|
||||
info.read(buf);
|
||||
return info.data_path_in_backup;
|
||||
}
|
||||
|
||||
bool RestoreCoordinationDistributed::startInsertingDataToPartitionInReplicatedTable(
|
||||
const String & host_id_,
|
||||
const DatabaseAndTableName & table_name_,
|
||||
const String & table_zk_path_,
|
||||
const String & partition_name_)
|
||||
{
|
||||
auto zookeeper = get_zookeeper();
|
||||
|
||||
String path = zookeeper_path + "/repl_tables_partitions/" + escapeForFileName(table_zk_path_);
|
||||
zookeeper->createIfNotExists(path, "");
|
||||
|
||||
path += "/" + escapeForFileName(partition_name_);
|
||||
String new_info = host_id_ + "|" + table_name_.first + "|" + table_name_.second;
|
||||
|
||||
auto code = zookeeper->tryCreate(path, new_info, zkutil::CreateMode::Persistent);
|
||||
if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS))
|
||||
throw zkutil::KeeperException(code, path);
|
||||
|
||||
if (code == Coordination::Error::ZOK)
|
||||
return true;
|
||||
|
||||
return zookeeper->get(path) == new_info;
|
||||
}
|
||||
|
||||
void RestoreCoordinationDistributed::drop()
|
||||
|
@ -2,8 +2,6 @@
|
||||
|
||||
#include <Backups/IRestoreCoordination.h>
|
||||
#include <Common/ZooKeeper/Common.h>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -16,12 +14,52 @@ public:
|
||||
RestoreCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_);
|
||||
~RestoreCoordinationDistributed() override;
|
||||
|
||||
void setOrGetPathInBackupForZkPath(const String & zk_path_, String & path_in_backup_) override;
|
||||
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
|
||||
bool startCreatingTableInReplicatedDB(
|
||||
const String & host_id_, const String & database_name_, const String & database_zk_path_, const String & table_name_) override;
|
||||
|
||||
bool acquireZkPathAndName(const String & zk_path_, const String & name_) override;
|
||||
void setResultForZkPathAndName(const String & zk_path_, const String & name_, Result res_) override;
|
||||
bool getResultForZkPathAndName(const String & zk_path_, const String & name_, Result & res_, std::chrono::milliseconds timeout_) const override;
|
||||
/// Sets that either we have been created a table in a replicated database or failed doing that.
|
||||
/// In the latter case `error_message` should be set.
|
||||
/// Calling this function unblocks other hosts waiting for this table to be created (see waitForCreatingTableInReplicatedDB()).
|
||||
void finishCreatingTableInReplicatedDB(
|
||||
const String & host_id_,
|
||||
const String & database_name_,
|
||||
const String & database_zk_path_,
|
||||
const String & table_name_,
|
||||
const String & error_message_) override;
|
||||
|
||||
/// Wait for another host to create a table in a replicated database.
|
||||
void waitForCreatingTableInReplicatedDB(
|
||||
const String & database_name_, const String & database_zk_path_, const String & table_name_, std::chrono::seconds timeout_) override;
|
||||
|
||||
/// Sets that a specified host has finished restoring metadata, successfully or with an error.
|
||||
/// In the latter case `error_message` should be set.
|
||||
void finishRestoringMetadata(const String & host_id_, const String & error_message_) override;
|
||||
|
||||
/// Waits for all hosts to finish restoring their metadata (i.e. to finish creating databases and tables). Returns false if time is out.
|
||||
void waitForAllHostsToRestoreMetadata(const Strings & host_ids_, std::chrono::seconds timeout_) const override;
|
||||
|
||||
/// Sets path in backup used by a replicated table.
|
||||
/// This function can be called multiple times for the same table with different `host_id`, and in that case
|
||||
/// getReplicatedTableDataPath() will choose `data_path_in_backup` with the lexicographycally first `host_id`.
|
||||
void setReplicatedTableDataPath(
|
||||
const String & host_id_,
|
||||
const DatabaseAndTableName & table_name_,
|
||||
const String & table_zk_path_,
|
||||
const String & data_path_in_backup_) override;
|
||||
|
||||
/// Gets path in backup used by a replicated table.
|
||||
String getReplicatedTableDataPath(const String & table_zk_path) const override;
|
||||
|
||||
/// Sets that this replica is going to restore a partition in a replicated table.
|
||||
/// The function returns false if this partition is being already restored by another replica.
|
||||
bool startInsertingDataToPartitionInReplicatedTable(
|
||||
const String & host_id_,
|
||||
const DatabaseAndTableName & table_name_,
|
||||
const String & table_zk_path_,
|
||||
const String & partition_name_) override;
|
||||
|
||||
/// Removes remotely stored information.
|
||||
void drop() override;
|
||||
|
||||
private:
|
||||
@ -30,9 +68,7 @@ private:
|
||||
|
||||
const String zookeeper_path;
|
||||
const zkutil::GetZooKeeper get_zookeeper;
|
||||
mutable std::mutex mutex;
|
||||
mutable std::map<std::pair<String, String>, std::optional<Result>> acquired;
|
||||
std::unordered_map<String, String> paths_in_backup_by_zk_path;
|
||||
const Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,9 @@
|
||||
#include <Backups/RestoreCoordinationLocal.h>
|
||||
#include <Backups/formatTableNameOrTemporaryTableName.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <base/chrono_io.h>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -11,63 +15,94 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
RestoreCoordinationLocal::RestoreCoordinationLocal() = default;
|
||||
RestoreCoordinationLocal::RestoreCoordinationLocal()
|
||||
: log(&Poco::Logger::get("RestoreCoordinationLocal"))
|
||||
{}
|
||||
|
||||
RestoreCoordinationLocal::~RestoreCoordinationLocal() = default;
|
||||
|
||||
void RestoreCoordinationLocal::setOrGetPathInBackupForZkPath(const String & zk_path_, String & path_in_backup_)
|
||||
bool RestoreCoordinationLocal::startCreatingTableInReplicatedDB(
|
||||
const String & /* host_id_ */,
|
||||
const String & /* database_name_ */,
|
||||
const String & /* database_zk_path_*/,
|
||||
const String & /* table_name_ */)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
auto [it, inserted] = paths_in_backup_by_zk_path.try_emplace(zk_path_, path_in_backup_);
|
||||
if (!inserted)
|
||||
path_in_backup_ = it->second;
|
||||
}
|
||||
|
||||
bool RestoreCoordinationLocal::acquireZkPathAndName(const String & path_, const String & name_)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
acquired.emplace(std::pair{path_, name_}, std::nullopt);
|
||||
return true;
|
||||
}
|
||||
|
||||
void RestoreCoordinationLocal::setResultForZkPathAndName(const String & zk_path_, const String & name_, Result res_)
|
||||
void RestoreCoordinationLocal::finishCreatingTableInReplicatedDB(
|
||||
const String & /* host_id_ */,
|
||||
const String & database_name_,
|
||||
const String & /* database_zk_path_ */,
|
||||
const String & table_name_,
|
||||
const String & error_message_)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
getResultRef(zk_path_, name_) = res_;
|
||||
result_changed.notify_all();
|
||||
if (error_message_.empty())
|
||||
LOG_TRACE(log, "Created table {}.{}", database_name_, table_name_);
|
||||
else
|
||||
LOG_TRACE(log, "Failed to created table {}.{}: {}", database_name_, table_name_, error_message_);
|
||||
}
|
||||
|
||||
bool RestoreCoordinationLocal::getResultForZkPathAndName(const String & zk_path_, const String & name_, Result & res_, std::chrono::milliseconds timeout_) const
|
||||
/// Wait for another host to create a table in a replicated database.
|
||||
void RestoreCoordinationLocal::waitForCreatingTableInReplicatedDB(
|
||||
const String & /* database_name_ */,
|
||||
const String & /* database_zk_path_ */,
|
||||
const String & /* table_name_ */,
|
||||
std::chrono::seconds /* timeout_ */)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
auto value = getResultRef(zk_path_, name_);
|
||||
if (value)
|
||||
}
|
||||
|
||||
void RestoreCoordinationLocal::finishRestoringMetadata(const String & /* host_id */, const String & error_message_)
|
||||
{
|
||||
LOG_TRACE(log, "Finished restoring metadata{}", (error_message_.empty() ? "" : (" with error " + error_message_)));
|
||||
}
|
||||
|
||||
void RestoreCoordinationLocal::waitForAllHostsToRestoreMetadata(const Strings & /* host_ids_ */, std::chrono::seconds /* timeout_ */) const
|
||||
{
|
||||
}
|
||||
|
||||
void RestoreCoordinationLocal::setReplicatedTableDataPath(const String & /* host_id_ */,
|
||||
const DatabaseAndTableName & table_name_,
|
||||
const String & table_zk_path_,
|
||||
const String & data_path_in_backup_)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
auto it = replicated_tables_data_paths.find(table_zk_path_);
|
||||
if (it == replicated_tables_data_paths.end())
|
||||
{
|
||||
res_ = *value;
|
||||
return true;
|
||||
ReplicatedTableDataPath new_info;
|
||||
new_info.table_name = table_name_;
|
||||
new_info.data_path_in_backup = data_path_in_backup_;
|
||||
replicated_tables_data_paths.emplace(table_zk_path_, std::move(new_info));
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & cur_info = it->second;
|
||||
if (table_name_ < cur_info.table_name)
|
||||
{
|
||||
cur_info.table_name = table_name_;
|
||||
cur_info.data_path_in_backup = data_path_in_backup_;
|
||||
}
|
||||
}
|
||||
|
||||
bool waited = result_changed.wait_for(lock, timeout_, [this, zk_path_, name_] { return getResultRef(zk_path_, name_).has_value(); });
|
||||
if (!waited)
|
||||
return false;
|
||||
|
||||
res_ = *getResultRef(zk_path_, name_);
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<IRestoreCoordination::Result> & RestoreCoordinationLocal::getResultRef(const String & zk_path_, const String & name_)
|
||||
String RestoreCoordinationLocal::getReplicatedTableDataPath(const String & table_zk_path) const
|
||||
{
|
||||
auto it = acquired.find(std::pair{zk_path_, name_});
|
||||
if (it == acquired.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Path ({}, {}) is not acquired", zk_path_, name_);
|
||||
return it->second;
|
||||
std::lock_guard lock{mutex};
|
||||
auto it = replicated_tables_data_paths.find(table_zk_path);
|
||||
if (it == replicated_tables_data_paths.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Replicated data path is not set for zk_path={}", table_zk_path);
|
||||
return it->second.data_path_in_backup;
|
||||
}
|
||||
|
||||
const std::optional<IRestoreCoordination::Result> & RestoreCoordinationLocal::getResultRef(const String & zk_path_, const String & name_) const
|
||||
bool RestoreCoordinationLocal::startInsertingDataToPartitionInReplicatedTable(
|
||||
const String & /* host_id_ */, const DatabaseAndTableName & table_name_, const String & table_zk_path_, const String & partition_name_)
|
||||
{
|
||||
auto it = acquired.find(std::pair{zk_path_, name_});
|
||||
if (it == acquired.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Path ({}, {}) is not acquired", zk_path_, name_);
|
||||
return it->second;
|
||||
std::lock_guard lock{mutex};
|
||||
auto key = std::pair{table_zk_path_, partition_name_};
|
||||
auto it = replicated_tables_partitions.try_emplace(std::move(key), table_name_).first;
|
||||
return it->second == table_name_;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,10 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IRestoreCoordination.h>
|
||||
#include <condition_variable>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace Poco { class Logger; }
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -15,20 +18,64 @@ public:
|
||||
RestoreCoordinationLocal();
|
||||
~RestoreCoordinationLocal() override;
|
||||
|
||||
void setOrGetPathInBackupForZkPath(const String & zk_path_, String & path_in_backup_) override;
|
||||
/// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
|
||||
bool startCreatingTableInReplicatedDB(
|
||||
const String & host_id_, const String & database_name_, const String & database_zk_path_, const String & table_name_) override;
|
||||
|
||||
bool acquireZkPathAndName(const String & zk_path_, const String & name_) override;
|
||||
void setResultForZkPathAndName(const String & zk_path_, const String & name_, Result res_) override;
|
||||
bool getResultForZkPathAndName(const String & zk_path_, const String & name_, Result & res_, std::chrono::milliseconds timeout_) const override;
|
||||
/// Sets that either we have been created a table in a replicated database or failed doing that.
|
||||
/// In the latter case `error_message` should be set.
|
||||
/// Calling this function unblocks other hosts waiting for this table to be created (see waitForCreatingTableInReplicatedDB()).
|
||||
void finishCreatingTableInReplicatedDB(
|
||||
const String & host_id_,
|
||||
const String & database_name_,
|
||||
const String & database_zk_path_,
|
||||
const String & table_name_,
|
||||
const String & error_message_) override;
|
||||
|
||||
/// Wait for another host to create a table in a replicated database.
|
||||
void waitForCreatingTableInReplicatedDB(
|
||||
const String & database_name_, const String & database_zk_path_, const String & table_name_, std::chrono::seconds timeout_) override;
|
||||
|
||||
/// Sets that a specified host has finished restoring metadata, successfully or with an error.
|
||||
/// In the latter case `error_message` should be set.
|
||||
void finishRestoringMetadata(const String & host_id_, const String & error_message_) override;
|
||||
|
||||
/// Waits for all hosts to finish restoring their metadata (i.e. to finish creating databases and tables). Returns false if time is out.
|
||||
void waitForAllHostsToRestoreMetadata(const Strings & host_ids_, std::chrono::seconds timeout_) const override;
|
||||
|
||||
/// Sets path in backup used by a replicated table.
|
||||
/// This function can be called multiple times for the same table with different `host_id`, and in that case
|
||||
/// getReplicatedTableDataPath() will choose `data_path_in_backup` with the lexicographycally first `host_id`.
|
||||
void setReplicatedTableDataPath(
|
||||
const String & host_id_,
|
||||
const DatabaseAndTableName & table_name_,
|
||||
const String & table_zk_path_,
|
||||
const String & data_path_in_backup_) override;
|
||||
|
||||
/// Gets path in backup used by a replicated table.
|
||||
String getReplicatedTableDataPath(const String & table_zk_path_) const override;
|
||||
|
||||
/// Sets that this replica is going to restore a partition in a replicated table.
|
||||
/// The function returns false if this partition is being already restored by another replica.
|
||||
bool startInsertingDataToPartitionInReplicatedTable(
|
||||
const String & host_id_,
|
||||
const DatabaseAndTableName & table_name_,
|
||||
const String & table_zk_path_,
|
||||
const String & partition_name_) override;
|
||||
|
||||
private:
|
||||
std::optional<Result> & getResultRef(const String & zk_path_, const String & name_);
|
||||
const std::optional<Result> & getResultRef(const String & zk_path_, const String & name_) const;
|
||||
struct ReplicatedTableDataPath
|
||||
{
|
||||
DatabaseAndTableName table_name;
|
||||
String data_path_in_backup;
|
||||
};
|
||||
|
||||
std::unordered_map<String /* table_zk_path */, ReplicatedTableDataPath> replicated_tables_data_paths;
|
||||
|
||||
std::map<std::pair<String /* table_zk_path */, String /* partition_name */>, DatabaseAndTableName> replicated_tables_partitions;
|
||||
|
||||
mutable std::mutex mutex;
|
||||
std::unordered_map<String, String> paths_in_backup_by_zk_path;
|
||||
std::map<std::pair<String, String>, std::optional<Result>> acquired;
|
||||
mutable std::condition_variable result_changed;
|
||||
const Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,10 +1,11 @@
|
||||
#include <Backups/BackupInfo.h>
|
||||
#include <Backups/BackupSettings.h>
|
||||
#include <Backups/RestoreSettings.h>
|
||||
#include <Common/FieldVisitorConvertToNumber.h>
|
||||
#include <Core/SettingsFields.h>
|
||||
#include <Parsers/ASTBackupQuery.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
#include <Common/FieldVisitorConvertToNumber.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -21,6 +22,8 @@ namespace
|
||||
{
|
||||
RestoreTableCreationMode value;
|
||||
|
||||
explicit SettingFieldRestoreTableCreationMode(RestoreTableCreationMode value_) : value(value_) {}
|
||||
|
||||
explicit SettingFieldRestoreTableCreationMode(const Field & field)
|
||||
{
|
||||
if (field.getType() == Field::Types::String)
|
||||
@ -59,7 +62,7 @@ namespace
|
||||
using SettingFieldRestoreDatabaseCreationMode = SettingFieldRestoreTableCreationMode;
|
||||
}
|
||||
|
||||
/// List of restore settings except base_backup_name.
|
||||
/// List of restore settings except base_backup_name and cluster_host_ids.
|
||||
#define LIST_OF_RESTORE_SETTINGS(M) \
|
||||
M(String, password) \
|
||||
M(Bool, structure_only) \
|
||||
@ -72,16 +75,15 @@ namespace
|
||||
M(UInt64, replica_num) \
|
||||
M(UInt64, shard_num_in_backup) \
|
||||
M(UInt64, replica_num_in_backup) \
|
||||
M(Bool, allow_non_empty_tables) \
|
||||
M(Bool, internal) \
|
||||
M(String, host_id) \
|
||||
M(String, coordination_zk_path)
|
||||
|
||||
RestoreSettings RestoreSettings::fromRestoreQuery(const ASTBackupQuery & query)
|
||||
{
|
||||
RestoreSettings res;
|
||||
|
||||
if (query.base_backup_name)
|
||||
res.base_backup_info = BackupInfo::fromAST(*query.base_backup_name);
|
||||
|
||||
if (query.settings)
|
||||
{
|
||||
const auto & settings = query.settings->as<const ASTSetQuery &>().changes;
|
||||
@ -97,25 +99,39 @@ RestoreSettings RestoreSettings::fromRestoreQuery(const ASTBackupQuery & query)
|
||||
}
|
||||
}
|
||||
|
||||
if (query.base_backup_name)
|
||||
res.base_backup_info = BackupInfo::fromAST(*query.base_backup_name);
|
||||
|
||||
if (query.cluster_host_ids)
|
||||
res.cluster_host_ids = BackupSettings::Util::clusterHostIDsFromAST(*query.cluster_host_ids);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void RestoreSettings::copySettingsToRestoreQuery(ASTBackupQuery & query) const
|
||||
void RestoreSettings::copySettingsToQuery(ASTBackupQuery & query) const
|
||||
{
|
||||
query.base_backup_name = base_backup_info ? base_backup_info->toAST() : nullptr;
|
||||
|
||||
auto query_settings = std::make_shared<ASTSetQuery>();
|
||||
query_settings->is_standalone = false;
|
||||
|
||||
static const RestoreSettings default_settings;
|
||||
bool all_settings_are_default = true;
|
||||
|
||||
#define SET_SETTINGS_IN_RESTORE_QUERY_HELPER(TYPE, NAME) \
|
||||
if ((NAME) != default_settings.NAME) \
|
||||
query_settings->changes.emplace_back(#NAME, static_cast<Field>(SettingField##TYPE{NAME}));
|
||||
{ \
|
||||
query_settings->changes.emplace_back(#NAME, static_cast<Field>(SettingField##TYPE{NAME})); \
|
||||
all_settings_are_default = false; \
|
||||
}
|
||||
|
||||
LIST_OF_RESTORE_SETTINGS(SET_SETTINGS_IN_RESTORE_QUERY_HELPER)
|
||||
|
||||
if (all_settings_are_default)
|
||||
query_settings = nullptr;
|
||||
|
||||
query.settings = query_settings;
|
||||
|
||||
query.base_backup_name = base_backup_info ? base_backup_info->toAST() : nullptr;
|
||||
query.cluster_host_ids = !cluster_host_ids.empty() ? BackupSettings::Util::clusterHostIDsToAST(cluster_host_ids) : nullptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -10,6 +10,9 @@ class ASTBackupQuery;
|
||||
|
||||
struct StorageRestoreSettings
|
||||
{
|
||||
/// Internal, should not be specified by user.
|
||||
/// The current host's ID in the format 'escaped_host_name:port'.
|
||||
String host_id;
|
||||
};
|
||||
|
||||
/// How the RESTORE command will handle table/database existence.
|
||||
@ -79,15 +82,24 @@ struct RestoreSettings : public StorageRestoreSettings
|
||||
/// else it means the same as `replica`.
|
||||
size_t replica_num_in_backup = 0;
|
||||
|
||||
/// Allows RESTORE TABLE to insert data into non-empty tables.
|
||||
/// This will mix earlier data in the table with the data extracted from the backup.
|
||||
/// Setting "allow_non_empty_tables=true" thus can cause data duplication in the table, use with caution.
|
||||
bool allow_non_empty_tables = false;
|
||||
|
||||
/// Internal, should not be specified by user.
|
||||
bool internal = false;
|
||||
|
||||
/// Internal, should not be specified by user.
|
||||
/// Cluster's hosts' IDs in the format 'escaped_host_name:port' for all shards and replicas in a cluster specified in BACKUP ON CLUSTER.
|
||||
std::vector<Strings> cluster_host_ids;
|
||||
|
||||
/// Internal, should not be specified by user.
|
||||
/// Path in Zookeeper used to coordinate restoring process while executing by RESTORE ON CLUSTER.
|
||||
String coordination_zk_path;
|
||||
|
||||
static RestoreSettings fromRestoreQuery(const ASTBackupQuery & query);
|
||||
void copySettingsToRestoreQuery(ASTBackupQuery & query) const;
|
||||
void copySettingsToQuery(ASTBackupQuery & query) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,12 +1,13 @@
|
||||
#include <Backups/RestoreUtils.h>
|
||||
#include <Backups/BackupUtils.h>
|
||||
#include <Backups/BackupSettings.h>
|
||||
#include <Backups/RestoreSettings.h>
|
||||
#include <Backups/DDLCompareUtils.h>
|
||||
#include <Backups/DDLRenamingVisitor.h>
|
||||
#include <Backups/IBackup.h>
|
||||
#include <Backups/IBackupEntry.h>
|
||||
#include <Backups/IRestoreTask.h>
|
||||
#include <Backups/RestoreCoordinationDistributed.h>
|
||||
#include <Backups/IRestoreCoordination.h>
|
||||
#include <Backups/formatTableNameOrTemporaryTableName.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
@ -20,9 +21,9 @@
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <base/chrono_io.h>
|
||||
#include <base/insertAtEnd.h>
|
||||
#include <base/sleep.h>
|
||||
#include <boost/range/adaptor/reversed.hpp>
|
||||
#include <boost/range/algorithm_ext/erase.hpp>
|
||||
#include <filesystem>
|
||||
@ -189,7 +190,7 @@ namespace
|
||||
return {};
|
||||
}
|
||||
|
||||
bool isSequential() const override { return true; }
|
||||
RestoreKind getRestoreKind() const override { return RestoreKind::METADATA; }
|
||||
|
||||
private:
|
||||
void createDatabase()
|
||||
@ -243,7 +244,53 @@ namespace
|
||||
};
|
||||
|
||||
|
||||
/// Restores a table and fills it with data.
|
||||
class RestoreTableDataTask : public IRestoreTask
|
||||
{
|
||||
public:
|
||||
RestoreTableDataTask(
|
||||
ContextMutablePtr context_,
|
||||
StoragePtr storage_,
|
||||
const ASTs & partitions_,
|
||||
const BackupPtr & backup_,
|
||||
const String & data_path_in_backup_,
|
||||
const RestoreSettingsPtr & restore_settings_,
|
||||
const std::shared_ptr<IRestoreCoordination> & restore_coordination_)
|
||||
: context(context_)
|
||||
, storage(storage_)
|
||||
, partitions(partitions_)
|
||||
, backup(backup_)
|
||||
, data_path_in_backup(data_path_in_backup_)
|
||||
, restore_settings(restore_settings_)
|
||||
, restore_coordination(restore_coordination_)
|
||||
{
|
||||
}
|
||||
|
||||
RestoreTasks run() override
|
||||
{
|
||||
const auto * replicated_table = typeid_cast<const StorageReplicatedMergeTree *>(storage.get());
|
||||
if (replicated_table)
|
||||
{
|
||||
data_path_in_backup = restore_coordination->getReplicatedTableDataPath(
|
||||
replicated_table->getZooKeeperName() + replicated_table->getZooKeeperPath());
|
||||
}
|
||||
|
||||
RestoreTasks tasks;
|
||||
tasks.emplace_back(storage->restoreData(context, partitions, backup, data_path_in_backup, *restore_settings, restore_coordination));
|
||||
return tasks;
|
||||
}
|
||||
|
||||
private:
|
||||
ContextMutablePtr context;
|
||||
StoragePtr storage;
|
||||
ASTs partitions;
|
||||
BackupPtr backup;
|
||||
String data_path_in_backup;
|
||||
RestoreSettingsPtr restore_settings;
|
||||
std::shared_ptr<IRestoreCoordination> restore_coordination;
|
||||
};
|
||||
|
||||
|
||||
/// Restores a table.
|
||||
class RestoreTableTask : public IRestoreTask
|
||||
{
|
||||
public:
|
||||
@ -254,10 +301,16 @@ namespace
|
||||
const BackupPtr & backup_,
|
||||
const DatabaseAndTableName & table_name_in_backup_,
|
||||
const RestoreSettingsPtr & restore_settings_,
|
||||
const std::shared_ptr<IRestoreCoordination> & restore_coordination_)
|
||||
: context(context_), create_query(typeid_cast<std::shared_ptr<ASTCreateQuery>>(create_query_)),
|
||||
partitions(partitions_), backup(backup_), table_name_in_backup(table_name_in_backup_),
|
||||
restore_settings(restore_settings_), restore_coordination(restore_coordination_)
|
||||
const std::shared_ptr<IRestoreCoordination> & restore_coordination_,
|
||||
std::chrono::seconds timeout_for_restoring_metadata_)
|
||||
: context(context_)
|
||||
, create_query(typeid_cast<std::shared_ptr<ASTCreateQuery>>(create_query_))
|
||||
, partitions(partitions_)
|
||||
, backup(backup_)
|
||||
, table_name_in_backup(table_name_in_backup_)
|
||||
, restore_settings(restore_settings_)
|
||||
, restore_coordination(restore_coordination_)
|
||||
, timeout_for_restoring_metadata(timeout_for_restoring_metadata_)
|
||||
{
|
||||
table_name = DatabaseAndTableName{create_query->getDatabase(), create_query->getTable()};
|
||||
if (create_query->temporary)
|
||||
@ -266,93 +319,22 @@ namespace
|
||||
|
||||
RestoreTasks run() override
|
||||
{
|
||||
if (acquireTableCreation())
|
||||
{
|
||||
try
|
||||
{
|
||||
createStorage();
|
||||
getStorage();
|
||||
checkStorageCreateQuery();
|
||||
setTableCreationResult(IRestoreCoordination::Result::SUCCEEDED);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
setTableCreationResult(IRestoreCoordination::Result::FAILED);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
waitForTableCreation();
|
||||
getStorage();
|
||||
checkStorageCreateQuery();
|
||||
}
|
||||
|
||||
RestoreTasks tasks;
|
||||
if (auto task = insertData())
|
||||
tasks.push_back(std::move(task));
|
||||
return tasks;
|
||||
getDatabase();
|
||||
createStorage();
|
||||
getStorage();
|
||||
checkStorageCreateQuery();
|
||||
checkTableIsEmpty();
|
||||
checkTableDataCompatible();
|
||||
return insertData();
|
||||
}
|
||||
|
||||
bool isSequential() const override { return true; }
|
||||
RestoreKind getRestoreKind() const override { return RestoreKind::METADATA; }
|
||||
|
||||
private:
|
||||
bool acquireTableCreation()
|
||||
void getDatabase()
|
||||
{
|
||||
if (restore_settings->create_table == RestoreTableCreationMode::kMustExist)
|
||||
return true;
|
||||
|
||||
auto replicated_db
|
||||
= typeid_cast<std::shared_ptr<const DatabaseReplicated>>(DatabaseCatalog::instance().getDatabase(table_name.first));
|
||||
if (!replicated_db)
|
||||
return true;
|
||||
|
||||
use_coordination_for_table_creation = true;
|
||||
replicated_database_zookeeper_path = replicated_db->getZooKeeperPath();
|
||||
return restore_coordination->acquireZkPathAndName(replicated_database_zookeeper_path, table_name.second);
|
||||
}
|
||||
|
||||
void setTableCreationResult(IRestoreCoordination::Result res)
|
||||
{
|
||||
if (use_coordination_for_table_creation)
|
||||
restore_coordination->setResultForZkPathAndName(replicated_database_zookeeper_path, table_name.second, res);
|
||||
}
|
||||
|
||||
void waitForTableCreation()
|
||||
{
|
||||
if (!use_coordination_for_table_creation)
|
||||
return;
|
||||
|
||||
IRestoreCoordination::Result res;
|
||||
const auto & config = context->getConfigRef();
|
||||
auto timeout = std::chrono::seconds(config.getUInt("backups.create_table_in_replicated_db_timeout", 10));
|
||||
auto start_time = std::chrono::steady_clock::now();
|
||||
|
||||
if (!restore_coordination->getResultForZkPathAndName(replicated_database_zookeeper_path, table_name.second, res, timeout))
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TABLE,
|
||||
"Waited too long ({}) for creating of {} on another replica",
|
||||
to_string(timeout),
|
||||
formatTableNameOrTemporaryTableName(table_name));
|
||||
|
||||
if (res == IRestoreCoordination::Result::FAILED)
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TABLE,
|
||||
"Failed creating of {} on another replica",
|
||||
formatTableNameOrTemporaryTableName(table_name));
|
||||
|
||||
while (std::chrono::steady_clock::now() - start_time < timeout)
|
||||
{
|
||||
if (DatabaseCatalog::instance().tryGetDatabaseAndTable({table_name.first, table_name.second}, context).second)
|
||||
return;
|
||||
sleepForMilliseconds(50);
|
||||
}
|
||||
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TABLE,
|
||||
"Waited too long ({}) for creating of {} on another replica",
|
||||
to_string(timeout),
|
||||
formatTableNameOrTemporaryTableName(table_name));
|
||||
database = DatabaseCatalog::instance().getDatabase(table_name.first);
|
||||
replicated_database = typeid_cast<std::shared_ptr<DatabaseReplicated>>(database);
|
||||
}
|
||||
|
||||
void createStorage()
|
||||
@ -362,61 +344,171 @@ namespace
|
||||
|
||||
auto cloned_create_query = typeid_cast<std::shared_ptr<ASTCreateQuery>>(create_query->clone());
|
||||
cloned_create_query->if_not_exists = (restore_settings->create_table == RestoreTableCreationMode::kCreateIfNotExists);
|
||||
InterpreterCreateQuery create_interpreter{cloned_create_query, context};
|
||||
create_interpreter.setInternal(true);
|
||||
create_interpreter.execute();
|
||||
}
|
||||
|
||||
StoragePtr getStorage()
|
||||
{
|
||||
if (!storage)
|
||||
std::tie(database, storage) = DatabaseCatalog::instance().getDatabaseAndTable({table_name.first, table_name.second}, context);
|
||||
return storage;
|
||||
}
|
||||
/// We need a special processing for tables in replicated databases.
|
||||
/// Because of the replication multiple nodes can try to restore the same tables again and failed with "Table already exists"
|
||||
/// because of some table could be restored already on other node and then replicated to this node.
|
||||
/// To solve this problem we use the restore coordination: the first node calls
|
||||
/// IRestoreCoordination::startCreatingTableInReplicatedDB() and then for other nodes this function returns false which means
|
||||
/// this table is already being created by some other node.
|
||||
bool wait_instead_of_creating = false;
|
||||
if (replicated_database)
|
||||
wait_instead_of_creating = !restore_coordination->startCreatingTableInReplicatedDB(
|
||||
restore_settings->host_id, table_name.first, replicated_database->getZooKeeperPath(), table_name.second);
|
||||
|
||||
ASTPtr getStorageCreateQuery()
|
||||
{
|
||||
if (!storage_create_query)
|
||||
if (wait_instead_of_creating)
|
||||
{
|
||||
getStorage();
|
||||
storage_create_query = database->getCreateTableQuery(table_name.second, context);
|
||||
waitForReplicatedDatabaseToSyncTable();
|
||||
}
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
InterpreterCreateQuery create_interpreter{cloned_create_query, context};
|
||||
create_interpreter.setInternal(true);
|
||||
create_interpreter.execute();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (replicated_database)
|
||||
{
|
||||
restore_coordination->finishCreatingTableInReplicatedDB(
|
||||
restore_settings->host_id,
|
||||
table_name.first,
|
||||
replicated_database->getZooKeeperPath(),
|
||||
table_name.second,
|
||||
getCurrentExceptionMessage(false));
|
||||
}
|
||||
throw;
|
||||
}
|
||||
|
||||
if (replicated_database)
|
||||
restore_coordination->finishCreatingTableInReplicatedDB(
|
||||
restore_settings->host_id, table_name.first, replicated_database->getZooKeeperPath(), table_name.second);
|
||||
}
|
||||
}
|
||||
|
||||
void waitForReplicatedDatabaseToSyncTable()
|
||||
{
|
||||
if (!replicated_database)
|
||||
return;
|
||||
|
||||
restore_coordination->waitForCreatingTableInReplicatedDB(table_name.first, replicated_database->getZooKeeperPath(), table_name.second);
|
||||
|
||||
/// The table `table_name` was created on other host, must be in the replicated database's queue,
|
||||
/// we have to wait until the replicated database syncs that.
|
||||
bool replicated_database_synced = false;
|
||||
auto start_time = std::chrono::steady_clock::now();
|
||||
bool use_timeout = (timeout_for_restoring_metadata.count() > 0);
|
||||
while (!database->isTableExist(table_name.second, context))
|
||||
{
|
||||
if (replicated_database_synced || (use_timeout && (std::chrono::steady_clock::now() - start_time) >= timeout_for_restoring_metadata))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TABLE,
|
||||
"Table {}.{} in the replicated database {} was not synced from another node in {}",
|
||||
table_name.first,
|
||||
table_name.second,
|
||||
table_name.first,
|
||||
to_string(timeout_for_restoring_metadata));
|
||||
}
|
||||
replicated_database_synced = replicated_database->waitForReplicaToProcessAllEntries(50);
|
||||
}
|
||||
}
|
||||
|
||||
void getStorage()
|
||||
{
|
||||
storage = database->getTable(table_name.second, context);
|
||||
storage_create_query = database->getCreateTableQuery(table_name.second, context);
|
||||
|
||||
if (!restore_settings->structure_only)
|
||||
{
|
||||
data_path_in_backup = PathsInBackup{*backup}.getDataPath(table_name_in_backup, restore_settings->shard_num_in_backup, restore_settings->replica_num_in_backup);
|
||||
has_data = !backup->listFiles(data_path_in_backup).empty();
|
||||
|
||||
const auto * replicated_table = typeid_cast<const StorageReplicatedMergeTree *>(storage.get());
|
||||
if (replicated_table)
|
||||
{
|
||||
/// We need to be consistent when we're restoring replicated tables.
|
||||
/// It's allowed for a backup to contain multiple replicas of the same replicated table,
|
||||
/// and when we restore it we need to choose single data path in the backup to restore this table on each replica.
|
||||
/// That's why we use the restore coordination here: on restoring metadata stage each replica sets its own
|
||||
/// `data_path_in_backup` for same zookeeper path, and then the restore coordination choose one `data_path_in_backup`
|
||||
/// to use for restoring data.
|
||||
restore_coordination->setReplicatedTableDataPath(
|
||||
restore_settings->host_id,
|
||||
table_name_in_backup,
|
||||
replicated_table->getZooKeeperName() + replicated_table->getZooKeeperPath(),
|
||||
data_path_in_backup);
|
||||
has_data = true;
|
||||
}
|
||||
}
|
||||
return storage_create_query;
|
||||
}
|
||||
|
||||
void checkStorageCreateQuery()
|
||||
{
|
||||
if (restore_settings->allow_different_table_def)
|
||||
return;
|
||||
|
||||
getStorageCreateQuery();
|
||||
if (areTableDefinitionsSame(*create_query, *storage_create_query))
|
||||
return;
|
||||
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TABLE,
|
||||
"The {} already exists but has a different definition: {}, "
|
||||
"compare to its definition in the backup: {}",
|
||||
formatTableNameOrTemporaryTableName(table_name),
|
||||
serializeAST(*storage_create_query),
|
||||
serializeAST(*create_query));
|
||||
if (!restore_settings->allow_different_table_def && !areTableDefinitionsSame(*create_query, *storage_create_query))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TABLE,
|
||||
"The {} already exists but has a different definition: {}, "
|
||||
"compare to its definition in the backup: {}",
|
||||
formatTableNameOrTemporaryTableName(table_name),
|
||||
serializeAST(*storage_create_query),
|
||||
serializeAST(*create_query));
|
||||
}
|
||||
}
|
||||
|
||||
bool hasData()
|
||||
void checkTableIsEmpty()
|
||||
{
|
||||
if (has_data)
|
||||
return *has_data;
|
||||
if (restore_settings->allow_non_empty_tables || restore_settings->structure_only || !has_data)
|
||||
return;
|
||||
|
||||
has_data = false;
|
||||
if (restore_settings->structure_only)
|
||||
return false;
|
||||
bool empty = true;
|
||||
if (auto total_rows = storage->totalRows(context->getSettingsRef()))
|
||||
empty = (*total_rows == 0);
|
||||
else if (auto total_bytes = storage->totalBytes(context->getSettingsRef()))
|
||||
empty = (*total_bytes == 0);
|
||||
|
||||
data_path_in_backup = PathsInBackup{*backup}.getDataPath(table_name_in_backup, restore_settings->shard_num_in_backup, restore_settings->replica_num_in_backup);
|
||||
if (backup->listFiles(data_path_in_backup).empty())
|
||||
return false;
|
||||
if (empty)
|
||||
{
|
||||
/// If this is a replicated table new parts could be in its queue but not fetched yet.
|
||||
/// In that case we consider the table as not empty.
|
||||
if (auto * replicated_table = typeid_cast<StorageReplicatedMergeTree *>(storage.get()))
|
||||
{
|
||||
StorageReplicatedMergeTree::Status status;
|
||||
replicated_table->getStatus(status, /* with_zk_fields = */ false);
|
||||
|
||||
if (status.queue.inserts_in_queue)
|
||||
{
|
||||
empty = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Check total_rows again to be sure.
|
||||
if (auto total_rows = storage->totalRows(context->getSettingsRef()); *total_rows != 0)
|
||||
empty = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!empty)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TABLE,
|
||||
"Cannot restore {} because it already contains some data. You can set structure_only=true or "
|
||||
"allow_non_empty_tables=true to overcome that in the way you want",
|
||||
formatTableNameOrTemporaryTableName(table_name));
|
||||
}
|
||||
}
|
||||
|
||||
void checkTableDataCompatible()
|
||||
{
|
||||
if (restore_settings->structure_only || !has_data)
|
||||
return;
|
||||
|
||||
getStorageCreateQuery();
|
||||
if (!areTableDataCompatible(*create_query, *storage_create_query))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_RESTORE_TABLE,
|
||||
"Cannot attach data of the {} in the backup to the existing {} because of they are not compatible. "
|
||||
@ -427,20 +519,17 @@ namespace
|
||||
serializeAST(*create_query),
|
||||
formatTableNameOrTemporaryTableName(table_name),
|
||||
serializeAST(*storage_create_query));
|
||||
|
||||
/// We check for INSERT privilege only if we're going to write into table.
|
||||
context->checkAccess(AccessType::INSERT, table_name.first, table_name.second);
|
||||
|
||||
has_data = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
RestoreTaskPtr insertData()
|
||||
RestoreTasks insertData()
|
||||
{
|
||||
if (!hasData())
|
||||
if (restore_settings->structure_only || !has_data)
|
||||
return {};
|
||||
|
||||
return storage->restoreData(context, partitions, backup, data_path_in_backup, *restore_settings, restore_coordination);
|
||||
RestoreTasks tasks;
|
||||
tasks.emplace_back(std::make_unique<RestoreTableDataTask>(context, storage, partitions, backup, data_path_in_backup, restore_settings, restore_coordination));
|
||||
return tasks;
|
||||
}
|
||||
|
||||
ContextMutablePtr context;
|
||||
@ -451,12 +540,12 @@ namespace
|
||||
DatabaseAndTableName table_name_in_backup;
|
||||
RestoreSettingsPtr restore_settings;
|
||||
std::shared_ptr<IRestoreCoordination> restore_coordination;
|
||||
bool use_coordination_for_table_creation = false;
|
||||
String replicated_database_zookeeper_path;
|
||||
std::chrono::seconds timeout_for_restoring_metadata;
|
||||
DatabasePtr database;
|
||||
std::shared_ptr<DatabaseReplicated> replicated_database;
|
||||
StoragePtr storage;
|
||||
ASTPtr storage_create_query;
|
||||
std::optional<bool> has_data;
|
||||
bool has_data = false;
|
||||
String data_path_in_backup;
|
||||
};
|
||||
|
||||
@ -466,20 +555,25 @@ namespace
|
||||
class RestoreTasksBuilder
|
||||
{
|
||||
public:
|
||||
RestoreTasksBuilder(ContextMutablePtr context_, const BackupPtr & backup_, const RestoreSettings & restore_settings_)
|
||||
: context(context_), backup(backup_), restore_settings(restore_settings_)
|
||||
RestoreTasksBuilder(
|
||||
ContextMutablePtr context_,
|
||||
const BackupPtr & backup_,
|
||||
const RestoreSettings & restore_settings_,
|
||||
const std::shared_ptr<IRestoreCoordination> & restore_coordination_,
|
||||
std::chrono::seconds timeout_for_restoring_metadata_)
|
||||
: context(context_)
|
||||
, backup(backup_)
|
||||
, restore_settings(restore_settings_)
|
||||
, restore_coordination(restore_coordination_)
|
||||
, timeout_for_restoring_metadata(timeout_for_restoring_metadata_)
|
||||
{
|
||||
if (!restore_settings.coordination_zk_path.empty())
|
||||
restore_coordination = std::make_shared<RestoreCoordinationDistributed>(restore_settings.coordination_zk_path, [context=context] { return context->getZooKeeper(); });
|
||||
}
|
||||
|
||||
/// Prepares internal structures for making tasks for restoring.
|
||||
void prepare(const ASTBackupQuery::Elements & elements)
|
||||
{
|
||||
adjustIndicesOfSourceShardAndReplicaInBackup();
|
||||
|
||||
String current_database = context->getCurrentDatabase();
|
||||
renaming_settings.setFromBackupQuery(elements, current_database);
|
||||
calculateShardNumAndReplicaNumInBackup();
|
||||
renaming_settings.setFromBackupQuery(elements);
|
||||
|
||||
for (const auto & element : elements)
|
||||
{
|
||||
@ -487,11 +581,7 @@ namespace
|
||||
{
|
||||
case ElementType::TABLE:
|
||||
{
|
||||
const String & table_name = element.name.second;
|
||||
String database_name = element.name.first;
|
||||
if (database_name.empty())
|
||||
database_name = current_database;
|
||||
prepareToRestoreTable(DatabaseAndTableName{database_name, table_name}, element.partitions);
|
||||
prepareToRestoreTable(element.name, element.partitions);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -522,21 +612,29 @@ namespace
|
||||
|
||||
/// TODO: We need to restore tables according to their dependencies.
|
||||
for (const auto & info : tables | boost::adaptors::map_values)
|
||||
res.push_back(std::make_unique<RestoreTableTask>(context, info.create_query, info.partitions, backup, info.name_in_backup, restore_settings_ptr, restore_coordination));
|
||||
res.push_back(std::make_unique<RestoreTableTask>(context, info.create_query, info.partitions, backup, info.name_in_backup, restore_settings_ptr, restore_coordination, timeout_for_restoring_metadata));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
private:
|
||||
void adjustIndicesOfSourceShardAndReplicaInBackup()
|
||||
void calculateShardNumAndReplicaNumInBackup()
|
||||
{
|
||||
size_t shard_num = 0;
|
||||
size_t replica_num = 0;
|
||||
if (!restore_settings.host_id.empty())
|
||||
{
|
||||
std::tie(shard_num, replica_num)
|
||||
= BackupSettings::Util::findShardNumAndReplicaNum(restore_settings.cluster_host_ids, restore_settings.host_id);
|
||||
}
|
||||
|
||||
auto shards_in_backup = PathsInBackup{*backup}.getShards();
|
||||
if (!restore_settings.shard_num_in_backup)
|
||||
{
|
||||
if (shards_in_backup.size() == 1)
|
||||
restore_settings.shard_num_in_backup = shards_in_backup[0];
|
||||
else
|
||||
restore_settings.shard_num_in_backup = restore_settings.shard_num;
|
||||
restore_settings.shard_num_in_backup = shard_num;
|
||||
}
|
||||
|
||||
if (std::find(shards_in_backup.begin(), shards_in_backup.end(), restore_settings.shard_num_in_backup) == shards_in_backup.end())
|
||||
@ -548,7 +646,7 @@ namespace
|
||||
if (replicas_in_backup.size() == 1)
|
||||
restore_settings.replica_num_in_backup = replicas_in_backup[0];
|
||||
else
|
||||
restore_settings.replica_num_in_backup = restore_settings.replica_num;
|
||||
restore_settings.replica_num_in_backup = replica_num;
|
||||
}
|
||||
|
||||
if (std::find(replicas_in_backup.begin(), replicas_in_backup.end(), restore_settings.replica_num_in_backup) == replicas_in_backup.end())
|
||||
@ -696,115 +794,109 @@ namespace
|
||||
BackupPtr backup;
|
||||
RestoreSettings restore_settings;
|
||||
std::shared_ptr<IRestoreCoordination> restore_coordination;
|
||||
std::chrono::seconds timeout_for_restoring_metadata;
|
||||
DDLRenamingSettings renaming_settings;
|
||||
std::map<String /* new_db_name */, CreateDatabaseInfo> databases;
|
||||
std::map<DatabaseAndTableName /* new_table_name */, CreateTableInfo> tables;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/// Reverts completed restore tasks (in reversed order).
|
||||
void rollbackRestoreTasks(RestoreTasks && restore_tasks)
|
||||
RestoreTasks makeRestoreTasks(ContextMutablePtr context, const BackupPtr & backup, const Elements & elements, const RestoreSettings & restore_settings, const std::shared_ptr<IRestoreCoordination> & restore_coordination, std::chrono::seconds timeout_for_restoring_metadata)
|
||||
{
|
||||
try
|
||||
{
|
||||
for (auto & restore_task : restore_tasks | boost::adaptors::reversed)
|
||||
{
|
||||
try
|
||||
{
|
||||
std::move(restore_task)->rollback();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException("Restore", "Couldn't rollback changes after failed RESTORE");
|
||||
}
|
||||
}
|
||||
RestoreTasksBuilder builder{context, backup, restore_settings, restore_coordination, timeout_for_restoring_metadata};
|
||||
builder.prepare(elements);
|
||||
return builder.makeTasks();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
restore_coordination->finishRestoringMetadata(restore_settings.host_id, getCurrentExceptionMessage(false));
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
RestoreTasks makeRestoreTasks(ContextMutablePtr context, const BackupPtr & backup, const Elements & elements, const RestoreSettings & restore_settings)
|
||||
void executeRestoreTasks(RestoreTasks && restore_tasks, ThreadPool & thread_pool, const RestoreSettings & restore_settings, const std::shared_ptr<IRestoreCoordination> & restore_coordination,
|
||||
std::chrono::seconds timeout_for_restoring_metadata)
|
||||
{
|
||||
RestoreTasksBuilder builder{context, backup, restore_settings};
|
||||
builder.prepare(elements);
|
||||
return builder.makeTasks();
|
||||
}
|
||||
|
||||
|
||||
void executeRestoreTasks(RestoreTasks && restore_tasks, size_t num_threads)
|
||||
{
|
||||
if (!num_threads)
|
||||
num_threads = 1;
|
||||
|
||||
RestoreTasks completed_tasks;
|
||||
bool need_rollback_completed_tasks = true;
|
||||
|
||||
SCOPE_EXIT({
|
||||
if (need_rollback_completed_tasks)
|
||||
rollbackRestoreTasks(std::move(completed_tasks));
|
||||
});
|
||||
|
||||
std::deque<std::unique_ptr<IRestoreTask>> sequential_tasks;
|
||||
std::deque<std::unique_ptr<IRestoreTask>> enqueued_tasks;
|
||||
|
||||
/// There are two kinds of restore tasks: sequential and non-sequential ones.
|
||||
/// Sequential tasks are executed first and always in one thread.
|
||||
for (auto & task : restore_tasks)
|
||||
try
|
||||
{
|
||||
if (task->isSequential())
|
||||
sequential_tasks.push_back(std::move(task));
|
||||
else
|
||||
enqueued_tasks.push_back(std::move(task));
|
||||
}
|
||||
|
||||
/// Sequential tasks.
|
||||
while (!sequential_tasks.empty())
|
||||
{
|
||||
auto current_task = std::move(sequential_tasks.front());
|
||||
sequential_tasks.pop_front();
|
||||
|
||||
RestoreTasks new_tasks = current_task->run();
|
||||
|
||||
completed_tasks.push_back(std::move(current_task));
|
||||
for (auto & task : new_tasks)
|
||||
/// There are two kinds of restore tasks: sequential and non-sequential ones.
|
||||
/// Sequential tasks are executed first and always in one thread.
|
||||
for (auto & task : restore_tasks)
|
||||
{
|
||||
if (task->isSequential())
|
||||
if (task->getRestoreKind() == IRestoreTask::RestoreKind::METADATA)
|
||||
sequential_tasks.push_back(std::move(task));
|
||||
else
|
||||
enqueued_tasks.push_back(std::move(task));
|
||||
}
|
||||
|
||||
/// Sequential tasks.
|
||||
while (!sequential_tasks.empty())
|
||||
{
|
||||
auto current_task = std::move(sequential_tasks.front());
|
||||
sequential_tasks.pop_front();
|
||||
|
||||
RestoreTasks new_tasks = current_task->run();
|
||||
|
||||
for (auto & task : new_tasks)
|
||||
{
|
||||
if (task->getRestoreKind() == IRestoreTask::RestoreKind::METADATA)
|
||||
sequential_tasks.push_back(std::move(task));
|
||||
else
|
||||
enqueued_tasks.push_back(std::move(task));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
restore_coordination->finishRestoringMetadata(restore_settings.host_id, getCurrentExceptionMessage(false));
|
||||
throw;
|
||||
}
|
||||
|
||||
/// We've finished restoring metadata, now we will wait for other replicas and shards to finish too.
|
||||
/// We need this waiting because we're going to call some functions which requires data collected from other nodes too,
|
||||
/// see IRestoreCoordination::checkTablesNotExistedInReplicatedDBs(), IRestoreCoordination::getReplicatedTableDataPath().
|
||||
restore_coordination->finishRestoringMetadata(restore_settings.host_id);
|
||||
if (!restore_settings.host_id.empty())
|
||||
{
|
||||
restore_coordination->waitForAllHostsToRestoreMetadata(
|
||||
BackupSettings::Util::filterHostIDs(
|
||||
restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num),
|
||||
timeout_for_restoring_metadata);
|
||||
}
|
||||
|
||||
/// Non-sequential tasks.
|
||||
std::unordered_map<IRestoreTask *, std::unique_ptr<IRestoreTask>> running_tasks;
|
||||
std::vector<ThreadFromGlobalPool> threads;
|
||||
size_t num_active_jobs = 0;
|
||||
std::mutex mutex;
|
||||
std::condition_variable cond;
|
||||
std::condition_variable event;
|
||||
std::exception_ptr exception;
|
||||
|
||||
while (true)
|
||||
{
|
||||
IRestoreTask * current_task = nullptr;
|
||||
std::unique_ptr<IRestoreTask> current_task;
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
cond.wait(lock, [&]
|
||||
{
|
||||
if (exception)
|
||||
return true;
|
||||
if (enqueued_tasks.empty())
|
||||
return running_tasks.empty();
|
||||
return (running_tasks.size() < num_threads);
|
||||
});
|
||||
|
||||
if (exception || enqueued_tasks.empty())
|
||||
event.wait(lock, [&] { return !enqueued_tasks.empty() || exception || !num_active_jobs; });
|
||||
if ((enqueued_tasks.empty() && !num_active_jobs) || exception)
|
||||
break;
|
||||
|
||||
auto current_task_ptr = std::move(enqueued_tasks.front());
|
||||
current_task = current_task_ptr.get();
|
||||
current_task = std::move(enqueued_tasks.front());
|
||||
enqueued_tasks.pop_front();
|
||||
running_tasks[current_task] = std::move(current_task_ptr);
|
||||
++num_active_jobs;
|
||||
}
|
||||
|
||||
assert(current_task);
|
||||
threads.emplace_back([current_task, &mutex, &cond, &enqueued_tasks, &running_tasks, &completed_tasks, &exception]() mutable
|
||||
auto job = [current_task = std::shared_ptr<IRestoreTask>(std::move(current_task)), &enqueued_tasks, &num_active_jobs, &exception, &mutex, &event]() mutable
|
||||
{
|
||||
SCOPE_EXIT({
|
||||
--num_active_jobs;
|
||||
event.notify_all();
|
||||
});
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (exception)
|
||||
@ -812,56 +904,35 @@ void executeRestoreTasks(RestoreTasks && restore_tasks, size_t num_threads)
|
||||
}
|
||||
|
||||
RestoreTasks new_tasks;
|
||||
std::exception_ptr new_exception;
|
||||
try
|
||||
{
|
||||
new_tasks = current_task->run();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
new_exception = std::current_exception();
|
||||
std::lock_guard lock{mutex};
|
||||
if (!exception)
|
||||
exception = std::current_exception();
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
auto current_task_it = running_tasks.find(current_task);
|
||||
auto current_task_ptr = std::move(current_task_it->second);
|
||||
running_tasks.erase(current_task_it);
|
||||
|
||||
if (!new_exception)
|
||||
{
|
||||
completed_tasks.push_back(std::move(current_task_ptr));
|
||||
enqueued_tasks.insert(
|
||||
enqueued_tasks.end(), std::make_move_iterator(new_tasks.begin()), std::make_move_iterator(new_tasks.end()));
|
||||
}
|
||||
|
||||
if (!exception)
|
||||
exception = new_exception;
|
||||
|
||||
cond.notify_all();
|
||||
enqueued_tasks.insert(
|
||||
enqueued_tasks.end(), std::make_move_iterator(new_tasks.begin()), std::make_move_iterator(new_tasks.end()));
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
if (!thread_pool.trySchedule(job))
|
||||
job();
|
||||
}
|
||||
|
||||
for (auto & thread : threads)
|
||||
thread.join();
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
event.wait(lock, [&] { return !num_active_jobs; });
|
||||
}
|
||||
|
||||
if (exception)
|
||||
std::rethrow_exception(exception);
|
||||
else
|
||||
need_rollback_completed_tasks = false;
|
||||
}
|
||||
|
||||
|
||||
size_t getMinCountOfReplicas(const IBackup & backup)
|
||||
{
|
||||
size_t min_count_of_replicas = static_cast<size_t>(-1);
|
||||
for (size_t shard_index : PathsInBackup(backup).getShards())
|
||||
{
|
||||
size_t count_of_replicas = PathsInBackup(backup).getReplicas(shard_index).size();
|
||||
min_count_of_replicas = std::min(min_count_of_replicas, count_of_replicas);
|
||||
}
|
||||
return min_count_of_replicas;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/ASTBackupQuery.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -12,17 +13,15 @@ class IRestoreTask;
|
||||
using RestoreTaskPtr = std::unique_ptr<IRestoreTask>;
|
||||
using RestoreTasks = std::vector<RestoreTaskPtr>;
|
||||
struct RestoreSettings;
|
||||
class IRestoreCoordination;
|
||||
class Context;
|
||||
using ContextPtr = std::shared_ptr<const Context>;
|
||||
using ContextMutablePtr = std::shared_ptr<Context>;
|
||||
|
||||
/// Prepares restore tasks.
|
||||
RestoreTasks makeRestoreTasks(ContextMutablePtr context, const BackupPtr & backup, const ASTBackupQuery::Elements & elements, const RestoreSettings & restore_settings);
|
||||
RestoreTasks makeRestoreTasks(ContextMutablePtr context, const BackupPtr & backup, const ASTBackupQuery::Elements & elements, const RestoreSettings & restore_settings, const std::shared_ptr<IRestoreCoordination> & restore_coordination, std::chrono::seconds timeout_for_restoring_metadata);
|
||||
|
||||
/// Executes restore tasks.
|
||||
void executeRestoreTasks(RestoreTasks && tasks, size_t num_threads);
|
||||
|
||||
/// Returns the minimal count of replicas stored in the backup.
|
||||
size_t getMinCountOfReplicas(const IBackup & backup);
|
||||
void executeRestoreTasks(RestoreTasks && tasks, ThreadPool & thread_pool, const RestoreSettings & restore_settings, const std::shared_ptr<IRestoreCoordination> & restore_coordination, std::chrono::seconds timeout_for_restoring_metadata);
|
||||
|
||||
}
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string_view>
|
||||
#include <filesystem>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
@ -44,6 +43,7 @@
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTCreateFunctionQuery.h>
|
||||
#include <Parsers/ASTDropQuery.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/ASTUseQuery.h>
|
||||
@ -391,7 +391,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
|
||||
processed_rows += block.rows();
|
||||
|
||||
/// Even if all blocks are empty, we still need to initialize the output stream to write empty resultset.
|
||||
initBlockOutputStream(block, parsed_query);
|
||||
initOutputFormat(block, parsed_query);
|
||||
|
||||
/// The header block containing zero rows was used to initialize
|
||||
/// output_format, do not output it.
|
||||
@ -438,14 +438,14 @@ void ClientBase::onLogData(Block & block)
|
||||
|
||||
void ClientBase::onTotals(Block & block, ASTPtr parsed_query)
|
||||
{
|
||||
initBlockOutputStream(block, parsed_query);
|
||||
initOutputFormat(block, parsed_query);
|
||||
output_format->setTotals(block);
|
||||
}
|
||||
|
||||
|
||||
void ClientBase::onExtremes(Block & block, ASTPtr parsed_query)
|
||||
{
|
||||
initBlockOutputStream(block, parsed_query);
|
||||
initOutputFormat(block, parsed_query);
|
||||
output_format->setExtremes(block);
|
||||
}
|
||||
|
||||
@ -465,7 +465,7 @@ void ClientBase::onProfileInfo(const ProfileInfo & profile_info)
|
||||
}
|
||||
|
||||
|
||||
void ClientBase::initBlockOutputStream(const Block & block, ASTPtr parsed_query)
|
||||
void ClientBase::initOutputFormat(const Block & block, ASTPtr parsed_query)
|
||||
try
|
||||
{
|
||||
if (!output_format)
|
||||
@ -592,24 +592,33 @@ void ClientBase::initLogsOutputStream()
|
||||
}
|
||||
}
|
||||
|
||||
void ClientBase::updateSuggest(const ASTCreateQuery & ast_create)
|
||||
void ClientBase::updateSuggest(const ASTPtr & ast)
|
||||
{
|
||||
std::vector<std::string> new_words;
|
||||
|
||||
if (ast_create.database)
|
||||
new_words.push_back(ast_create.getDatabase());
|
||||
new_words.push_back(ast_create.getTable());
|
||||
|
||||
if (ast_create.columns_list && ast_create.columns_list->columns)
|
||||
if (auto * create = ast->as<ASTCreateQuery>())
|
||||
{
|
||||
for (const auto & elem : ast_create.columns_list->columns->children)
|
||||
if (create->database)
|
||||
new_words.push_back(create->getDatabase());
|
||||
new_words.push_back(create->getTable());
|
||||
|
||||
if (create->columns_list && create->columns_list->columns)
|
||||
{
|
||||
if (const auto * column = elem->as<ASTColumnDeclaration>())
|
||||
new_words.push_back(column->name);
|
||||
for (const auto & elem : create->columns_list->columns->children)
|
||||
{
|
||||
if (const auto * column = elem->as<ASTColumnDeclaration>())
|
||||
new_words.push_back(column->name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
suggest->addWords(std::move(new_words));
|
||||
if (const auto * create_function = ast->as<ASTCreateFunctionQuery>())
|
||||
{
|
||||
new_words.push_back(create_function->getFunctionName());
|
||||
}
|
||||
|
||||
if (!new_words.empty())
|
||||
suggest->addWords(std::move(new_words));
|
||||
}
|
||||
|
||||
bool ClientBase::isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context)
|
||||
@ -640,13 +649,11 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
|
||||
/// always means a problem, i.e. if table already exists, and it is no a
|
||||
/// huge problem if suggestion will be added even on error, since this is
|
||||
/// just suggestion.
|
||||
if (auto * create = parsed_query->as<ASTCreateQuery>())
|
||||
{
|
||||
/// Do not update suggest, until suggestion will be ready
|
||||
/// (this will avoid extra complexity)
|
||||
if (suggest)
|
||||
updateSuggest(*create);
|
||||
}
|
||||
///
|
||||
/// Do not update suggest, until suggestion will be ready
|
||||
/// (this will avoid extra complexity)
|
||||
if (suggest)
|
||||
updateSuggest(parsed_query);
|
||||
|
||||
/// An INSERT query may have the data that follows query text.
|
||||
/// Send part of the query without data, because data will be sent separately.
|
||||
@ -1138,7 +1145,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
|
||||
ConstraintsDescription{},
|
||||
String{},
|
||||
};
|
||||
StoragePtr storage = StorageFile::create(in_file, global_context->getUserFilesPath(), args);
|
||||
StoragePtr storage = std::make_shared<StorageFile>(in_file, global_context->getUserFilesPath(), args);
|
||||
storage->startup();
|
||||
SelectQueryInfo query_info;
|
||||
|
||||
@ -1422,15 +1429,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
|
||||
apply_query_settings(*with_output->settings_ast);
|
||||
|
||||
if (!connection->checkConnected())
|
||||
{
|
||||
auto poco_logs_level = Poco::Logger::parseLevel(config().getString("send_logs_level", "none"));
|
||||
/// Print under WARNING also because it is used by clickhouse-test.
|
||||
if (poco_logs_level >= Poco::Message::PRIO_WARNING)
|
||||
{
|
||||
fmt::print(stderr, "Connection lost. Reconnecting.\n");
|
||||
}
|
||||
connect();
|
||||
}
|
||||
|
||||
ASTPtr input_function;
|
||||
if (insert && insert->select)
|
||||
@ -1487,7 +1486,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
|
||||
|
||||
if (is_interactive)
|
||||
{
|
||||
std::cout << std::endl << processed_rows << " rows in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. ";
|
||||
std::cout << std::endl
|
||||
<< processed_rows << " row" << (processed_rows == 1 ? "" : "s")
|
||||
<< " in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. ";
|
||||
progress_indication.writeFinalProgress();
|
||||
std::cout << std::endl << std::endl;
|
||||
}
|
||||
@ -2057,156 +2058,6 @@ void ClientBase::showClientVersion()
|
||||
}
|
||||
|
||||
|
||||
void ClientBase::readArguments(
|
||||
int argc,
|
||||
char ** argv,
|
||||
Arguments & common_arguments,
|
||||
std::vector<Arguments> & external_tables_arguments,
|
||||
std::vector<Arguments> & hosts_and_ports_arguments)
|
||||
{
|
||||
/** We allow different groups of arguments:
|
||||
* - common arguments;
|
||||
* - arguments for any number of external tables each in form "--external args...",
|
||||
* where possible args are file, name, format, structure, types;
|
||||
* - param arguments for prepared statements.
|
||||
* Split these groups before processing.
|
||||
*/
|
||||
|
||||
bool in_external_group = false;
|
||||
|
||||
std::string prev_host_arg;
|
||||
std::string prev_port_arg;
|
||||
|
||||
for (int arg_num = 1; arg_num < argc; ++arg_num)
|
||||
{
|
||||
std::string_view arg = argv[arg_num];
|
||||
|
||||
if (arg == "--external")
|
||||
{
|
||||
in_external_group = true;
|
||||
external_tables_arguments.emplace_back(Arguments{""});
|
||||
}
|
||||
/// Options with value after equal sign.
|
||||
else if (
|
||||
in_external_group
|
||||
&& (arg.starts_with("--file=") || arg.starts_with("--name=") || arg.starts_with("--format=") || arg.starts_with("--structure=")
|
||||
|| arg.starts_with("--types=")))
|
||||
{
|
||||
external_tables_arguments.back().emplace_back(arg);
|
||||
}
|
||||
/// Options with value after whitespace.
|
||||
else if (in_external_group && (arg == "--file" || arg == "--name" || arg == "--format" || arg == "--structure" || arg == "--types"))
|
||||
{
|
||||
if (arg_num + 1 < argc)
|
||||
{
|
||||
external_tables_arguments.back().emplace_back(arg);
|
||||
++arg_num;
|
||||
arg = argv[arg_num];
|
||||
external_tables_arguments.back().emplace_back(arg);
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
in_external_group = false;
|
||||
|
||||
/// Parameter arg after underline.
|
||||
if (arg.starts_with("--param_"))
|
||||
{
|
||||
auto param_continuation = arg.substr(strlen("--param_"));
|
||||
auto equal_pos = param_continuation.find_first_of('=');
|
||||
|
||||
if (equal_pos == std::string::npos)
|
||||
{
|
||||
/// param_name value
|
||||
++arg_num;
|
||||
if (arg_num >= argc)
|
||||
throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS);
|
||||
arg = argv[arg_num];
|
||||
query_parameters.emplace(String(param_continuation), String(arg));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (equal_pos == 0)
|
||||
throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
/// param_name=value
|
||||
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
|
||||
}
|
||||
}
|
||||
else if (arg.starts_with("--host") || arg.starts_with("-h"))
|
||||
{
|
||||
std::string host_arg;
|
||||
/// --host host
|
||||
if (arg == "--host" || arg == "-h")
|
||||
{
|
||||
++arg_num;
|
||||
if (arg_num >= argc)
|
||||
throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS);
|
||||
arg = argv[arg_num];
|
||||
host_arg = "--host=";
|
||||
host_arg.append(arg);
|
||||
}
|
||||
else
|
||||
host_arg = arg;
|
||||
|
||||
/// --port port1 --host host1
|
||||
if (!prev_port_arg.empty())
|
||||
{
|
||||
hosts_and_ports_arguments.push_back({host_arg, prev_port_arg});
|
||||
prev_port_arg.clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
/// --host host1 --host host2
|
||||
if (!prev_host_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_host_arg});
|
||||
|
||||
prev_host_arg = host_arg;
|
||||
}
|
||||
}
|
||||
else if (arg.starts_with("--port"))
|
||||
{
|
||||
auto port_arg = String{arg};
|
||||
/// --port port
|
||||
if (arg == "--port")
|
||||
{
|
||||
port_arg.push_back('=');
|
||||
++arg_num;
|
||||
if (arg_num >= argc)
|
||||
throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS);
|
||||
arg = argv[arg_num];
|
||||
port_arg.append(arg);
|
||||
}
|
||||
|
||||
/// --host host1 --port port1
|
||||
if (!prev_host_arg.empty())
|
||||
{
|
||||
hosts_and_ports_arguments.push_back({port_arg, prev_host_arg});
|
||||
prev_host_arg.clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
/// --port port1 --port port2
|
||||
if (!prev_port_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_port_arg});
|
||||
|
||||
prev_port_arg = port_arg;
|
||||
}
|
||||
}
|
||||
else if (arg == "--allow_repeated_settings")
|
||||
allow_repeated_settings = true;
|
||||
else
|
||||
common_arguments.emplace_back(arg);
|
||||
}
|
||||
}
|
||||
if (!prev_host_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_host_arg});
|
||||
if (!prev_port_arg.empty())
|
||||
hosts_and_ports_arguments.push_back({prev_port_arg});
|
||||
}
|
||||
|
||||
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
|
||||
{
|
||||
if (allow_repeated_settings)
|
||||
|
@ -106,6 +106,14 @@ protected:
|
||||
|
||||
bool processQueryText(const String & text);
|
||||
|
||||
virtual void readArguments(
|
||||
int argc,
|
||||
char ** argv,
|
||||
Arguments & common_arguments,
|
||||
std::vector<Arguments> & external_tables_arguments,
|
||||
std::vector<Arguments> & hosts_and_ports_arguments) = 0;
|
||||
|
||||
|
||||
private:
|
||||
void receiveResult(ASTPtr parsed_query);
|
||||
bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_);
|
||||
@ -131,22 +139,16 @@ private:
|
||||
void sendDataFromStdin(Block & sample, const ColumnsDescription & columns_description, ASTPtr parsed_query);
|
||||
void sendExternalTables(ASTPtr parsed_query);
|
||||
|
||||
void initBlockOutputStream(const Block & block, ASTPtr parsed_query);
|
||||
void initOutputFormat(const Block & block, ASTPtr parsed_query);
|
||||
void initLogsOutputStream();
|
||||
|
||||
String prompt() const;
|
||||
|
||||
void resetOutput();
|
||||
void outputQueryInfo(bool echo_query_);
|
||||
void readArguments(
|
||||
int argc,
|
||||
char ** argv,
|
||||
Arguments & common_arguments,
|
||||
std::vector<Arguments> & external_tables_arguments,
|
||||
std::vector<Arguments> & hosts_and_ports_arguments);
|
||||
void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments);
|
||||
|
||||
void updateSuggest(const ASTCreateQuery & ast_create);
|
||||
void updateSuggest(const ASTPtr & ast);
|
||||
|
||||
void initQueryIdFormats();
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user