Merge branch 'master' into hashid

This commit is contained in:
Michael Nutt 2022-05-12 03:12:10 +09:00 committed by GitHub
commit 2ff13c4e5d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
889 changed files with 6782 additions and 158261 deletions

View File

@ -1,185 +1,148 @@
Checks: '-*,
misc-misleading-bidirectional,
misc-misleading-identifier,
misc-misplaced-const,
misc-redundant-expression,
misc-static-assert,
misc-throw-by-value-catch-by-reference,
misc-unconventional-assign-operator,
misc-uniqueptr-reset-release,
misc-unused-alias-decls,
misc-unused-parameters,
misc-unused-using-decls,
Checks: '*,
-abseil-*,
modernize-avoid-bind,
modernize-loop-convert,
modernize-macro-to-enum,
modernize-make-shared,
modernize-make-unique,
modernize-raw-string-literal,
modernize-redundant-void-arg,
modernize-replace-random-shuffle,
modernize-shrink-to-fit,
modernize-use-bool-literals,
modernize-use-equals-default,
modernize-use-equals-delete,
modernize-use-nullptr,
modernize-use-transparent-functors,
modernize-use-uncaught-exceptions,
modernize-use-using,
-altera-*,
performance-faster-string-find,
performance-for-range-copy,
performance-implicit-conversion-in-loop,
performance-inefficient-algorithm,
performance-inefficient-vector-operation,
performance-move-const-arg,
performance-move-constructor-init,
performance-no-automatic-move,
performance-noexcept-move-constructor,
performance-trivially-destructible,
performance-unnecessary-copy-initialization,
-android-*,
readability-avoid-const-params-in-decls,
readability-const-return-type,
readability-container-contains,
readability-container-size-empty,
readability-convert-member-functions-to-static,
readability-delete-null-pointer,
readability-deleted-default,
readability-duplicate-include,
readability-identifier-naming,
readability-inconsistent-declaration-parameter-name,
readability-make-member-function-const,
readability-misplaced-array-index,
readability-non-const-parameter,
readability-qualified-auto,
readability-redundant-access-specifiers,
readability-redundant-control-flow,
readability-redundant-function-ptr-dereference,
readability-redundant-member-init,
readability-redundant-preprocessor,
readability-redundant-smartptr-get,
readability-redundant-string-cstr,
readability-redundant-string-init,
readability-simplify-boolean-expr,
readability-simplify-subscript-expr,
readability-static-definition-in-anonymous-namespace,
readability-string-compare,
readability-uniqueptr-delete-release,
-bugprone-assert-side-effect,
-bugprone-branch-clone,
-bugprone-dynamic-static-initializers,
-bugprone-easily-swappable-parameters,
-bugprone-exception-escape,
-bugprone-forwarding-reference-overload,
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-lambda-function-name,
-bugprone-misplaced-widening-cast,
-bugprone-narrowing-conversions,
-bugprone-no-escape,
-bugprone-not-null-terminated-result,
-bugprone-signal-handler,
-bugprone-spuriously-wake-up-functions,
-bugprone-suspicious-semicolon,
-bugprone-unhandled-exception-at-new,
-bugprone-unhandled-self-assignment,
bugprone-argument-comment,
bugprone-bad-signal-to-kill-thread,
bugprone-bool-pointer-implicit-conversion,
bugprone-copy-constructor-init,
bugprone-dangling-handle,
bugprone-fold-init-type,
bugprone-forward-declaration-namespace,
bugprone-inaccurate-erase,
bugprone-incorrect-roundings,
bugprone-infinite-loop,
bugprone-integer-division,
bugprone-lambda-function-name,
bugprone-macro-parentheses,
bugprone-macro-repeated-side-effects,
bugprone-misplaced-operator-in-strlen-in-alloc,
bugprone-misplaced-pointer-artithmetic-in-alloc,
bugprone-misplaced-widening-cast,
bugprone-move-forwarding-reference,
bugprone-multiple-statement-macro,
bugprone-parent-virtual-call,
bugprone-posix-return,
bugprone-redundant-branch-condition,
bugprone-reserved-identifier,
bugprone-shared-ptr-array-mismatch,
bugprone-signed-char-misuse,
bugprone-sizeof-container,
bugprone-sizeof-expression,
bugprone-string-constructor,
bugprone-string-integer-assignment,
bugprone-string-literal-with-embedded-nul,
bugprone-stringview-nullptr,
bugprone-suspicious-enum-usage,
bugprone-suspicious-include,
bugprone-suspicious-memory-comparison,
bugprone-suspicious-memset-usage,
bugprone-suspicious-missing-comma,
bugprone-suspicious-string-compare,
bugprone-swapped-arguments,
bugprone-terminating-continue,
bugprone-throw-keyword-missing,
bugprone-too-small-loop-variable,
bugprone-undefined-memory-manipulation,
bugprone-undelegated-constructor,
bugprone-unhandled-self-assignment,
bugprone-unused-raii,
bugprone-unused-return-value,
bugprone-use-after-move,
bugprone-virtual-near-miss,
-cert-dcl16-c,
-cert-dcl37-c,
-cert-dcl51-cpp,
-cert-dcl58-cpp,
-cert-err58-cpp,
-cert-err60-cpp,
-cert-msc32-c,
-cert-msc51-cpp,
-cert-oop54-cpp,
-cert-oop57-cpp,
-cert-oop58-cpp,
cert-dcl21-cpp,
cert-dcl50-cpp,
cert-env33-c,
cert-err34-c,
cert-err52-cpp,
cert-flp30-c,
cert-mem57-cpp,
cert-msc50-cpp,
cert-oop58-cpp,
-clang-analyzer-core.DynamicTypePropagation,
-clang-analyzer-core.uninitialized.CapturedBlockVariable,
google-build-explicit-make-pair,
google-build-namespaces,
google-default-arguments,
google-explicit-constructor,
google-readability-avoid-underscore-in-googletest-name,
google-readability-casting,
google-runtime-int,
google-runtime-operator,
-clang-analyzer-optin.performance.Padding,
-clang-analyzer-optin.portability.UnixAPI,
hicpp-exception-baseclass,
-clang-analyzer-security.insecureAPI.bzero,
-clang-analyzer-security.insecureAPI.strcpy,
clang-analyzer-core.CallAndMessage,
clang-analyzer-core.DivideZero,
clang-analyzer-core.NonNullParamChecker,
clang-analyzer-core.NullDereference,
clang-analyzer-core.StackAddressEscape,
clang-analyzer-core.UndefinedBinaryOperatorResult,
clang-analyzer-core.VLASize,
clang-analyzer-core.uninitialized.ArraySubscript,
clang-analyzer-core.uninitialized.Assign,
clang-analyzer-core.uninitialized.Branch,
clang-analyzer-core.uninitialized.CapturedBlockVariable,
clang-analyzer-core.uninitialized.UndefReturn,
clang-analyzer-cplusplus.InnerPointer,
clang-analyzer-cplusplus.Move,
clang-analyzer-cplusplus.NewDelete,
clang-analyzer-cplusplus.NewDeleteLeaks,
clang-analyzer-cplusplus.PlacementNewChecker,
clang-analyzer-cplusplus.SelfAssignment,
clang-analyzer-deadcode.DeadStores,
clang-analyzer-optin.cplusplus.UninitializedObject,
clang-analyzer-optin.cplusplus.VirtualCall,
clang-analyzer-security.insecureAPI.UncheckedReturn,
clang-analyzer-security.insecureAPI.bcmp,
clang-analyzer-security.insecureAPI.bcopy,
clang-analyzer-security.insecureAPI.bzero,
clang-analyzer-security.insecureAPI.getpw,
clang-analyzer-security.insecureAPI.gets,
clang-analyzer-security.insecureAPI.mkstemp,
clang-analyzer-security.insecureAPI.mktemp,
clang-analyzer-security.insecureAPI.rand,
clang-analyzer-security.insecureAPI.strcpy,
clang-analyzer-unix.Malloc,
clang-analyzer-unix.MallocSizeof,
clang-analyzer-unix.MismatchedDeallocator,
clang-analyzer-unix.Vfork,
clang-analyzer-unix.cstring.BadSizeArg,
clang-analyzer-unix.cstring.NullArg,
-cppcoreguidelines-*,
boost-use-to-string,
-concurrency-mt-unsafe,
alpha.security.cert.env.InvalidPtr,
-darwin-*,
-fuchsia-*,
-google-build-using-namespace,
-google-global-names-in-headers,
-google-readability-braces-around-statements,
-google-readability-function-size,
-google-readability-namespace-comments,
-google-readability-todo,
-google-upgrade-googletest-case,
-hicpp-avoid-c-arrays,
-hicpp-avoid-goto,
-hicpp-braces-around-statements,
-hicpp-deprecated-headers,
-hicpp-explicit-conversions,
-hicpp-function-size,
-hicpp-invalid-access-moved,
-hicpp-member-init,
-hicpp-move-const-arg,
-hicpp-multiway-paths-covered,
-hicpp-named-parameter,
-hicpp-no-array-decay,
-hicpp-no-assembler,
-hicpp-no-malloc,
-hicpp-signed-bitwise,
-hicpp-special-member-functions,
-hicpp-uppercase-literal-suffix,
-hicpp-use-auto,
-hicpp-use-emplace,
-hicpp-use-equals-default,
-hicpp-use-noexcept,
-hicpp-use-override,
-hicpp-vararg,
-llvm-*,
-llvmlibc-*,
-openmp-*,
-misc-definitions-in-headers,
-misc-new-delete-overloads,
-misc-no-recursion,
-misc-non-copyable-objects,
-misc-non-private-member-variables-in-classes,
-misc-static-assert,
-modernize-avoid-c-arrays,
-modernize-concat-nested-namespaces,
-modernize-deprecated-headers,
-modernize-deprecated-ios-base-aliases,
-modernize-pass-by-value,
-modernize-replace-auto-ptr,
-modernize-replace-disallow-copy-and-assign-macro,
-modernize-return-braced-init-list,
-modernize-unary-static-assert,
-modernize-use-auto,
-modernize-use-default-member-init,
-modernize-use-emplace,
-modernize-use-equals-default,
-modernize-use-nodiscard,
-modernize-use-noexcept,
-modernize-use-override,
-modernize-use-trailing-return-type,
-performance-inefficient-string-concatenation,
-performance-no-int-to-ptr,
-performance-type-promotion-in-math-fn,
-performance-trivially-destructible,
-performance-unnecessary-value-param,
-portability-simd-intrinsics,
-readability-convert-member-functions-to-static,
-readability-braces-around-statements,
-readability-else-after-return,
-readability-function-cognitive-complexity,
-readability-function-size,
-readability-implicit-bool-conversion,
-readability-isolate-declaration,
-readability-magic-numbers,
-readability-misleading-indentation,
-readability-named-parameter,
-readability-qualified-auto,
-readability-redundant-declaration,
-readability-static-accessed-through-instance,
-readability-suspicious-call-argument,
-readability-uppercase-literal-suffix,
-readability-use-anyofallof,
-zirkon-*,
'
WarningsAsErrors: '*'
CheckOptions:

View File

@ -1,7 +1,7 @@
### Changelog category (leave one):
- New Feature
- Improvement
- Bug Fix (user-visible misbehaviour in official stable or prestable release)
- Bug Fix (user-visible misbehavior in official stable or prestable release)
- Performance Improvement
- Backward Incompatible Change
- Build/Testing/Packaging Improvement

View File

@ -1,48 +0,0 @@
name: "CodeQL"
"on":
schedule:
- cron: '0 0 * * *'
workflow_dispatch:
env:
CC: clang-14
CXX: clang++-14
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: ['cpp']
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: 'true'
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
- name: Build
run: |
sudo apt-get install -yq ninja-build
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
mkdir build
cd build
cmake -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 ..
ninja
rm -rf ../contrib
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2

View File

@ -156,3 +156,19 @@ jobs:
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
FinishCheck:
needs:
- StyleCheck
- DockerHubPush
- DocsCheck
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py

View File

@ -362,50 +362,50 @@ jobs:
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinGCC:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
CHECK_NAME=ClickHouse build check (actions)
BUILD_NAME=binary_gcc
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
# BuilderBinGCC:
# needs: [DockerHubPush]
# runs-on: [self-hosted, builder]
# steps:
# - name: Set envs
# run: |
# cat >> "$GITHUB_ENV" << 'EOF'
# TEMP_PATH=${{runner.temp}}/build_check
# IMAGES_PATH=${{runner.temp}}/images_path
# REPO_COPY=${{runner.temp}}/build_check/ClickHouse
# CACHES_PATH=${{runner.temp}}/../ccaches
# CHECK_NAME=ClickHouse build check (actions)
# BUILD_NAME=binary_gcc
# EOF
# - name: Download changed images
# uses: actions/download-artifact@v2
# with:
# name: changed_images
# path: ${{ env.IMAGES_PATH }}
# - name: Clear repository
# run: |
# sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
# - name: Check out repository code
# uses: actions/checkout@v2
# - name: Build
# run: |
# git -C "$GITHUB_WORKSPACE" submodule sync --recursive
# git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
# sudo rm -fr "$TEMP_PATH"
# mkdir -p "$TEMP_PATH"
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
# cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
# - name: Upload build URLs to artifacts
# if: ${{ success() || failure() }}
# uses: actions/upload-artifact@v2
# with:
# name: ${{ env.BUILD_URLS }}
# path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
# - name: Cleanup
# if: always()
# run: |
# docker kill "$(docker ps -q)" ||:
# docker rm -f "$(docker ps -a -q)" ||:
# sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebAsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
@ -1030,7 +1030,7 @@ jobs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
- BuilderBinFreeBSD
- BuilderBinGCC
# - BuilderBinGCC
- BuilderBinPPC64
- BuilderBinTidy
- BuilderDebSplitted
@ -2685,40 +2685,40 @@ jobs:
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
UnitTestsReleaseGCC:
needs: [BuilderBinGCC]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-gcc, actions)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Unit test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 unit_tests_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
# UnitTestsReleaseGCC:
# needs: [BuilderBinGCC]
# runs-on: [self-hosted, fuzzer-unit-tester]
# steps:
# - name: Set envs
# run: |
# cat >> "$GITHUB_ENV" << 'EOF'
# TEMP_PATH=${{runner.temp}}/unit_tests_asan
# REPORTS_PATH=${{runner.temp}}/reports_dir
# CHECK_NAME=Unit tests (release-gcc, actions)
# REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
# EOF
# - name: Download json reports
# uses: actions/download-artifact@v2
# with:
# path: ${{ env.REPORTS_PATH }}
# - name: Clear repository
# run: |
# sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
# - name: Check out repository code
# uses: actions/checkout@v2
# - name: Unit test
# run: |
# sudo rm -fr "$TEMP_PATH"
# mkdir -p "$TEMP_PATH"
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
# cd "$REPO_COPY/tests/ci"
# python3 unit_tests_check.py "$CHECK_NAME"
# - name: Cleanup
# if: always()
# run: |
# docker kill "$(docker ps -q)" ||:
# docker rm -f "$(docker ps -a -q)" ||:
# sudo rm -fr "$TEMP_PATH"
UnitTestsTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, fuzzer-unit-tester]

View File

@ -375,50 +375,50 @@ jobs:
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinGCC:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
CHECK_NAME=ClickHouse build check (actions)
BUILD_NAME=binary_gcc
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
# BuilderBinGCC:
# needs: [DockerHubPush, FastTest]
# runs-on: [self-hosted, builder]
# steps:
# - name: Set envs
# run: |
# cat >> "$GITHUB_ENV" << 'EOF'
# TEMP_PATH=${{runner.temp}}/build_check
# IMAGES_PATH=${{runner.temp}}/images_path
# REPO_COPY=${{runner.temp}}/build_check/ClickHouse
# CACHES_PATH=${{runner.temp}}/../ccaches
# CHECK_NAME=ClickHouse build check (actions)
# BUILD_NAME=binary_gcc
# EOF
# - name: Download changed images
# uses: actions/download-artifact@v2
# with:
# name: changed_images
# path: ${{ runner.temp }}/images_path
# - name: Clear repository
# run: |
# sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
# - name: Check out repository code
# uses: actions/checkout@v2
# - name: Build
# run: |
# git -C "$GITHUB_WORKSPACE" submodule sync --recursive
# git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
# sudo rm -fr "$TEMP_PATH"
# mkdir -p "$TEMP_PATH"
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
# cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
# - name: Upload build URLs to artifacts
# if: ${{ success() || failure() }}
# uses: actions/upload-artifact@v2
# with:
# name: ${{ env.BUILD_URLS }}
# path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
# - name: Cleanup
# if: always()
# run: |
# docker kill "$(docker ps -q)" ||:
# docker rm -f "$(docker ps -a -q)" ||:
# sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebAarch64:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
@ -1077,7 +1077,7 @@ jobs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
- BuilderBinFreeBSD
- BuilderBinGCC
# - BuilderBinGCC
- BuilderBinPPC64
- BuilderBinTidy
- BuilderDebSplitted
@ -2886,40 +2886,40 @@ jobs:
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
UnitTestsReleaseGCC:
needs: [BuilderBinGCC]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-gcc, actions)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Unit test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 unit_tests_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
# UnitTestsReleaseGCC:
# needs: [BuilderBinGCC]
# runs-on: [self-hosted, fuzzer-unit-tester]
# steps:
# - name: Set envs
# run: |
# cat >> "$GITHUB_ENV" << 'EOF'
# TEMP_PATH=${{runner.temp}}/unit_tests_asan
# REPORTS_PATH=${{runner.temp}}/reports_dir
# CHECK_NAME=Unit tests (release-gcc, actions)
# REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
# EOF
# - name: Download json reports
# uses: actions/download-artifact@v2
# with:
# path: ${{ env.REPORTS_PATH }}
# - name: Clear repository
# run: |
# sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
# - name: Check out repository code
# uses: actions/checkout@v2
# - name: Unit test
# run: |
# sudo rm -fr "$TEMP_PATH"
# mkdir -p "$TEMP_PATH"
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
# cd "$REPO_COPY/tests/ci"
# python3 unit_tests_check.py "$CHECK_NAME"
# - name: Cleanup
# if: always()
# run: |
# docker kill "$(docker ps -q)" ||:
# docker rm -f "$(docker ps -a -q)" ||:
# sudo rm -fr "$TEMP_PATH"
UnitTestsTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, fuzzer-unit-tester]

View File

@ -3,9 +3,14 @@ name: TagsStableWorkflow
# - Sends it to JFROG Artifactory
# - Adds them to the release assets
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
on: # yamllint disable-line rule:truthy
push:
tags:
- 'v*-prestable'
- 'v*-stable'
- 'v*-lts'
@ -15,26 +20,36 @@ jobs:
runs-on: [self-hosted, style-checker]
steps:
- name: Get tag name
run: echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
run: |
echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
- name: Check out repository code
uses: actions/checkout@v2
with:
ref: master
fetch-depth: 0
- name: Generate versions
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
git fetch --tags
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
GID=$(id -d "${UID}")
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 \
--volume="${GITHUB_WORKSPACE}:/ClickHouse" clickhouse/style-test \
/ClickHouse/utils/changelog/changelog.py -vv --gh-user-or-token="$GITHUB_TOKEN" \
--output="/ClickHouse/docs/changelogs/${GITHUB_TAG}.md" --jobs=5 "${GITHUB_TAG}"
git add "./docs/changelogs/${GITHUB_TAG}.md"
git diff HEAD
- name: Create Pull Request
uses: peter-evans/create-pull-request@v3
with:
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
commit-message: Update version_date.tsv after ${{ env.GITHUB_TAG }}
commit-message: Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }}
branch: auto/${{ env.GITHUB_TAG }}
delete-branch: true
title: Update version_date.tsv after ${{ env.GITHUB_TAG }}
title: Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }}
body: |
Update version_date.tsv after ${{ env.GITHUB_TAG }}
Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }}
Changelog category (leave one):
### Changelog category (leave one):
- Not for changelog (changelog entry is not required)

3
.gitmodules vendored
View File

@ -262,6 +262,9 @@
[submodule "contrib/minizip-ng"]
path = contrib/minizip-ng
url = https://github.com/zlib-ng/minizip-ng
[submodule "contrib/wyhash"]
path = contrib/wyhash
url = https://github.com/wangyi-fudan/wyhash.git
[submodule "contrib/hashidsxx"]
path = contrib/hashidsxx
url = https://github.com/schoentoon/hashidsxx.git

View File

@ -0,0 +1,43 @@
#!/usr/bin/env bash
QUERIES_FILE="queries.sql"
TABLE=$1
TRIES=3
PARAMS="--host ... --secure --password ..."
if [ -x ./clickhouse ]
then
CLICKHOUSE_CLIENT="./clickhouse client"
elif command -v clickhouse-client >/dev/null 2>&1
then
CLICKHOUSE_CLIENT="clickhouse-client"
else
echo "clickhouse-client is not found"
exit 1
fi
QUERY_ID_PREFIX="benchmark_$RANDOM"
QUERY_NUM=1
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query
do
for i in $(seq 1 $TRIES)
do
QUERY_ID="${QUERY_ID_PREFIX}_${QUERY_NUM}_${i}"
${CLICKHOUSE_CLIENT} ${PARAMS} --query_id "${QUERY_ID}" --format=Null --max_memory_usage=100G --query="$query"
echo -n '.'
done
QUERY_NUM=$((QUERY_NUM + 1))
echo
done
sleep 10
${CLICKHOUSE_CLIENT} ${PARAMS} --query "
WITH extractGroups(query_id, '(\d+)_(\d+)\$') AS num_run, num_run[1]::UInt8 AS num, num_run[2]::UInt8 AS run
SELECT groupArrayInsertAt(query_duration_ms / 1000, (run - 1)::UInt8)::String || ','
FROM clusterAllReplicas(default, system.query_log)
WHERE event_date >= yesterday() AND type = 2 AND query_id LIKE '${QUERY_ID_PREFIX}%'
GROUP BY num ORDER BY num FORMAT TSV
"

View File

@ -94,6 +94,7 @@ if (ENABLE_FUZZING)
add_contrib (libprotobuf-mutator-cmake libprotobuf-mutator)
endif()
add_contrib (wyhash-cmake wyhash)
add_contrib (cityhash102)
add_contrib (libfarmhash)
add_contrib (icu-cmake icu)

View File

@ -364,10 +364,8 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_snappy.cc" ${ARROW_SRCS})
add_definitions(-DARROW_WITH_ZLIB)
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS})
if (ARROW_WITH_ZSTD)
add_definitions(-DARROW_WITH_ZSTD)
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})
endif ()
add_definitions(-DARROW_WITH_ZSTD)
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})
add_library(_arrow ${ARROW_SRCS})
@ -383,7 +381,6 @@ target_link_libraries(_arrow PRIVATE
ch_contrib::snappy
ch_contrib::zlib
ch_contrib::zstd
ch_contrib::zstd
)
target_link_libraries(_arrow PUBLIC _orc)

1
contrib/wyhash vendored Submodule

@ -0,0 +1 @@
Subproject commit 991aa3dab624e50b066f7a02ccc9f6935cc740ec

View File

@ -0,0 +1,3 @@
add_library(wyhash INTERFACE)
target_include_directories(wyhash SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/wyhash")
add_library(ch_contrib::wyhash ALIAS wyhash)

View File

@ -1,3 +1,4 @@
# rebuild in #36968
# docker build -t clickhouse/docs-builder .
# nodejs 17 prefers ipv6 and is broken in our environment
FROM node:16.14.2-alpine3.15

View File

@ -176,6 +176,7 @@ function clone_submodules
contrib/NuRaft
contrib/jemalloc
contrib/replxx
contrib/wyhash
)
git submodule sync

View File

@ -28,8 +28,11 @@ done
set -e
# cleanup for retry run if volume is not recreated
docker kill "$(docker ps -aq)" || true
docker rm "$(docker ps -aq)" || true
# shellcheck disable=SC2046
{
docker kill $(docker ps -aq) || true
docker rm $(docker ps -aq) || true
}
echo "Start tests"
export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/clickhouse

View File

@ -101,7 +101,12 @@ EOL
function stop()
{
clickhouse stop
clickhouse stop --do-not-kill && return
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
kill -TERM "$(pidof gdb)" ||:
sleep 5
gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" ||:
clickhouse stop --force
}
function start()
@ -198,10 +203,14 @@ clickhouse-client --query "SHOW TABLES FROM test"
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log
# NOTE Disable thread fuzzer before server start with data after stress test.
# In debug build it can take a lot of time.
unset "${!THREAD_@}"
start
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|| (echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|| (echo -e 'Server failed to start (see application_errors.txt and clickhouse-server.clean.log)\tFAIL' >> /test_output/test_results.tsv \
&& grep -Fa "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt)
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
@ -387,7 +396,7 @@ for table in query_log trace_log; do
done
# Write check result into check_status.tsv
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%') LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# Core dumps (see gcore)

View File

@ -13,6 +13,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
libxml2-utils \
moreutils \
pylint \
python3-fuzzywuzzy \
python3-pip \
shellcheck \
yamllint \

View File

@ -18,3 +18,6 @@ echo "Check workflows" | ts
echo "Check shell scripts with shellcheck" | ts
./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt
/process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
echo "Check help for changelog generator works" | ts
cd ../changelog || exit 1
./changelog.py -h 2>/dev/null 1>&2

View File

@ -12,7 +12,7 @@ UNKNOWN_SIGN = "[ UNKNOWN "
SKIPPED_SIGN = "[ SKIPPED "
HUNG_SIGN = "Found hung queries in processlist"
NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"]
SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"]
RETRIES_SIGN = "Some tests were restarted"
@ -25,14 +25,14 @@ def process_test_log(log_path):
success = 0
hung = False
retries = False
task_timeout = True
success_finish = False
test_results = []
with open(log_path, "r") as test_file:
for line in test_file:
original_line = line
line = line.strip()
if any(s in line for s in NO_TASK_TIMEOUT_SIGNS):
task_timeout = False
if any(s in line for s in SUCCESS_FINISH_SIGNS):
success_finish = True
if HUNG_SIGN in line:
hung = True
if RETRIES_SIGN in line:
@ -81,7 +81,7 @@ def process_test_log(log_path):
failed,
success,
hung,
task_timeout,
success_finish,
retries,
test_results,
)
@ -108,7 +108,7 @@ def process_result(result_path):
failed,
success,
hung,
task_timeout,
success_finish,
retries,
test_results,
) = process_test_log(result_path)
@ -123,10 +123,10 @@ def process_result(result_path):
description = "Some queries hung, "
state = "failure"
test_results.append(("Some queries hung", "FAIL", "0", ""))
elif task_timeout:
description = "Timeout, "
elif not success_finish:
description = "Tests are not finished, "
state = "failure"
test_results.append(("Timeout", "FAIL", "0", ""))
test_results.append(("Tests are not finished", "FAIL", "0", ""))
elif retries:
description = "Some tests restarted, "
test_results.append(("Some tests restarted", "SKIPPED", "0", ""))

View File

@ -0,0 +1,217 @@
### ClickHouse release v22.1.1.2542-prestable FIXME as compared to v21.12.1.9017-prestable
#### Backward Incompatible Change
* Change ZooKeeper path for zero-copy marks for shared data. Fix for remove marks in ZooKeeper for renamed parts. [#32061](https://github.com/ClickHouse/ClickHouse/pull/32061) ([ianton-ru](https://github.com/ianton-ru)).
* - Account for scalar subqueries. With this change, rows read in scalar subqueries are now reported in the query_log. If the scalar subquery is cached (repeated or called for several rows) the rows read are only counted once. This change allows KILLing queries and reporting progress while they are executing scalar subqueries. [#32271](https://github.com/ClickHouse/ClickHouse/pull/32271) ([Raúl Marín](https://github.com/Algunenano)).
* Add `left`, `right`, `leftUTF8`, `rightUTF8` functions. Fix error in implementation of `substringUTF8` function with negative offset (offset from the end of string). The functions `left` and `right` were previously implemented in parser. Upgrade notes: distributed queries with `left` or `right` functions without aliases may throw exception if cluster contains different versions of clickhouse-server. If you are upgrading your cluster and encounter this error, you should finish upgrading your cluster to ensure all nodes have the same version. Also you can add aliases (`AS something`) to the columns in your queries to avoid this issue. [#33407](https://github.com/ClickHouse/ClickHouse/pull/33407) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### New Feature
* Implemented sparse serialization. It can reduce usage of disk space and improve performance of some queries for columns, which contain a lot of default (zero) values. It can be enabled by setting `ratio_for_sparse_serialization`. Sparse serialization will be chosen dynamically for column, if it has ratio of number of default values to number of all values above that threshold. Serialization (default or sparse) will be fixed for every column in part, but may varies between parts. [#22535](https://github.com/ClickHouse/ClickHouse/pull/22535) ([Anton Popov](https://github.com/CurtizJ)).
* add grouping sets function, like GROUP BY grouping sets (a, b, (a, b)). [#26869](https://github.com/ClickHouse/ClickHouse/pull/26869) ([taylor12805](https://github.com/taylor12805)).
* Added an ability to read from all replicas within a shard during distributed query. To enable this, set `allow_experimental_parallel_reading_from_replicas=true` and `max_parallel_replicas` to any number. This closes [#26748](https://github.com/ClickHouse/ClickHouse/issues/26748). [#29279](https://github.com/ClickHouse/ClickHouse/pull/29279) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Start and stop servers when hosts and ports configuration changes. [#30549](https://github.com/ClickHouse/ClickHouse/pull/30549) ([Kevin Michel](https://github.com/kmichel-aiven)).
* Create any kind of view with comment. ... [#31062](https://github.com/ClickHouse/ClickHouse/pull/31062) ([Vasily Nemkov](https://github.com/Enmk)).
* Implement hive table engine to access apache hive from clickhouse. Related RFC: [#29245](https://github.com/ClickHouse/ClickHouse/issues/29245). [#31104](https://github.com/ClickHouse/ClickHouse/pull/31104) ([李扬](https://github.com/taiyang-li)).
* * Automatic cluster discovery via Zoo/Keeper. [#31442](https://github.com/ClickHouse/ClickHouse/pull/31442) ([Vladimir C](https://github.com/vdimir)).
* Adding support for disks backed by Azure Blob Storage, in a similar way it has been done for disks backed by AWS S3. Current implementation allows for all the basic disk operations. [#31505](https://github.com/ClickHouse/ClickHouse/pull/31505) ([Jakub Kuklis](https://github.com/jkuklis)).
* * Add "TABLE OVERRIDE" feature for customizing MaterializedMySQL table schemas. [#32325](https://github.com/ClickHouse/ClickHouse/pull/32325) ([Stig Bakken](https://github.com/stigsb)).
* Implement data schema inference for input formats. Allow to skip structure (or write just `auto`) in table functions `file`, `url`, `s3`, `hdfs` and in parameters of `clickhouse-local` . Allow to skip structure in create query for table engines `File`, `HDFS`, `S3`, `URL`, `Merge`, `Buffer`, `Distributed` and `ReplicatedMergeTree` (if we add new replicas). [#32455](https://github.com/ClickHouse/ClickHouse/pull/32455) ([Kruglov Pavel](https://github.com/Avogar)).
* Support TABLE OVERRIDE clause for MaterializedPostgreSQL. RFC: [#31480](https://github.com/ClickHouse/ClickHouse/issues/31480). [#32749](https://github.com/ClickHouse/ClickHouse/pull/32749) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add `EXPLAIN TABLE OVERRIDE` query. [#32836](https://github.com/ClickHouse/ClickHouse/pull/32836) ([Stig Bakken](https://github.com/stigsb)).
* If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)).
* Add aggregate functions `cramersV`, `cramersVBiasCorrected`, `theilsU` and `contingency`. These functions calculate dependency (measure of association) between categorial values. All these functions are using cross-tab (histogram on pairs) for implementation. You can imagine it like a correlation coefficient but for any discrete values (not necessary numbers). [#33366](https://github.com/ClickHouse/ClickHouse/pull/33366) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Added function `arrayLast`. Closes [#33390](https://github.com/ClickHouse/ClickHouse/issues/33390). [#33415](https://github.com/ClickHouse/ClickHouse/pull/33415) ([Maksim Kita](https://github.com/kitaisreal)).
* Add MONTHNAME function. [#33436](https://github.com/ClickHouse/ClickHouse/pull/33436) ([usurai](https://github.com/usurai)).
* Auto detect file extension. Close [#30918](https://github.com/ClickHouse/ClickHouse/issues/30918). [#33443](https://github.com/ClickHouse/ClickHouse/pull/33443) ([zhongyuankai](https://github.com/zhongyuankai)).
* Added function `arrayLastIndex`. [#33465](https://github.com/ClickHouse/ClickHouse/pull/33465) ([Maksim Kita](https://github.com/kitaisreal)).
* Add new h3 miscellaneous functions: `h3DegsToRads`, `h3RadsToDegs`, `h3HexAreaKm2`, `h3CellAreaM2`, `h3CellAreaRads2`. [#33479](https://github.com/ClickHouse/ClickHouse/pull/33479) ([Bharat Nallan](https://github.com/bharatnc)).
* Detect format by file extension in file/hdfs/s3/url table functions and HDFS/S3/URL table engines. [#33565](https://github.com/ClickHouse/ClickHouse/pull/33565) ([Kruglov Pavel](https://github.com/Avogar)).
#### Performance Improvement
* Non significant change. In extremely rare cases when data part is lost on every replica, after merging of some data parts, the subsequent queries may skip less amount of partitions during partition pruning. This hardly affects anything. [#32220](https://github.com/ClickHouse/ClickHouse/pull/32220) ([Azat Khuzhin](https://github.com/azat)).
* Slight performance improvement of `reinterpret` function. [#32587](https://github.com/ClickHouse/ClickHouse/pull/32587) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Reduce allocated memory for dictionaries with string attributes. [#33466](https://github.com/ClickHouse/ClickHouse/pull/33466) ([Maksim Kita](https://github.com/kitaisreal)).
* Avoid exponential backtracking in parser. This closes [#20158](https://github.com/ClickHouse/ClickHouse/issues/20158). [#33481](https://github.com/ClickHouse/ClickHouse/pull/33481) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Improvement
* Now date time conversion functions that generates time before 1970-01-01 00:00:00 will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)).
* Added settings `command_read_timeout`, `command_write_timeout` for `StorageExecutable`, `StorageExecutablePool`, `ExecutableDictionary`, `ExecutablePoolDictionary`, `ExecutableUserDefinedFunctions`. Setting `command_read_timeout` controls timeout for reading data from command stdout in milliseconds. Setting `command_write_timeout` timeout for writing data to command stdin in milliseconds. Added settings `command_termination_timeout` for `ExecutableUserDefinedFunction`, `ExecutableDictionary`, `StorageExecutable`. Added setting `execute_direct` for `ExecutableUserDefinedFunction`, by default true. Added setting `execute_direct` for `ExecutableDictionary`, `ExecutablePoolDictionary`, by default false. [#30957](https://github.com/ClickHouse/ClickHouse/pull/30957) ([Maksim Kita](https://github.com/kitaisreal)).
* Optimize single part projection materialization. This closes [#31669](https://github.com/ClickHouse/ClickHouse/issues/31669). [#31885](https://github.com/ClickHouse/ClickHouse/pull/31885) ([Amos Bird](https://github.com/amosbird)).
* Enable comparison between `Decimal` and `Float`. Closes [#22626](https://github.com/ClickHouse/ClickHouse/issues/22626). [#31966](https://github.com/ClickHouse/ClickHouse/pull/31966) ([flynn](https://github.com/ucasfl)).
* - Ignore parse failure of opentelemetry's `traceparent` header. [#32116](https://github.com/ClickHouse/ClickHouse/pull/32116) ([Frank Chen](https://github.com/FrankChen021)).
* Improve keeper writing performance by optimization the size calculation logic. [#32366](https://github.com/ClickHouse/ClickHouse/pull/32366) ([zhanglistar](https://github.com/zhanglistar)).
* Allows to connect to mongodb 5.0. Closes [#31483](https://github.com/ClickHouse/ClickHouse/issues/31483),. [#32416](https://github.com/ClickHouse/ClickHouse/pull/32416) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Disable some optimizations for window functions. Closes [#31535](https://github.com/ClickHouse/ClickHouse/issues/31535). Closes [#31620](https://github.com/ClickHouse/ClickHouse/issues/31620). [#32453](https://github.com/ClickHouse/ClickHouse/pull/32453) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Allow empty credentials for mongo engine. Closes [#26267](https://github.com/ClickHouse/ClickHouse/issues/26267). [#32460](https://github.com/ClickHouse/ClickHouse/pull/32460) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Improve handling nested structures with missing columns while reading protobuf. Follow-up to https://github.com/ClickHouse/ClickHouse/pull/31988. [#32531](https://github.com/ClickHouse/ClickHouse/pull/32531) ([Vitaly Baranov](https://github.com/vitlibar)).
* Events clause support for window view watch query. [#32607](https://github.com/ClickHouse/ClickHouse/pull/32607) ([vxider](https://github.com/Vxider)).
* Add settings `max_concurrent_select_queries` and `max_concurrent_insert_queries` for control concurrent queries by query kind. Close [#3575](https://github.com/ClickHouse/ClickHouse/issues/3575). [#32609](https://github.com/ClickHouse/ClickHouse/pull/32609) ([SuperDJY](https://github.com/cmsxbc)).
* support Date32 for `genarateRandom` engine. [#32643](https://github.com/ClickHouse/ClickHouse/pull/32643) ([nauta](https://github.com/nautaa)).
* Support authSource option for storage MongoDB. Closes [#32594](https://github.com/ClickHouse/ClickHouse/issues/32594). [#32702](https://github.com/ClickHouse/ClickHouse/pull/32702) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Allow to control connection timeouts for mysql (previously was supported only for dictionary source). Closes [#16669](https://github.com/ClickHouse/ClickHouse/issues/16669). Previously default connect_timeout was rather small, now it is configurable. [#32734](https://github.com/ClickHouse/ClickHouse/pull/32734) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Flush all In-Memory data parts when WAL is not enabled while shutdown server or detaching table. [#32742](https://github.com/ClickHouse/ClickHouse/pull/32742) ([nauta](https://github.com/nautaa)).
* Improve gRPC compression support for [#28671](https://github.com/ClickHouse/ClickHouse/issues/28671). [#32747](https://github.com/ClickHouse/ClickHouse/pull/32747) ([Vitaly Baranov](https://github.com/vitlibar)).
* Added support for specifying subquery as SQL user defined function. Example: `CREATE FUNCTION test AS () -> (SELECT 1)`. Closes [#30755](https://github.com/ClickHouse/ClickHouse/issues/30755). [#32758](https://github.com/ClickHouse/ClickHouse/pull/32758) ([Maksim Kita](https://github.com/kitaisreal)).
* Support hints for clickhouse-client and clickhouse-local. Closes [#32237](https://github.com/ClickHouse/ClickHouse/issues/32237),. [#32841](https://github.com/ClickHouse/ClickHouse/pull/32841) ([凌涛](https://github.com/lingtaolf)).
* - Do not prepend THREADS_COUNT with -j to avoid additional prepending in subprocesses. [#32844](https://github.com/ClickHouse/ClickHouse/pull/32844) ([Ernest Zaslavsky](https://github.com/kreuzerkrieg)).
* Added support for `BIT` data type in `MaterializedMySQL`. Closes [#15182](https://github.com/ClickHouse/ClickHouse/issues/15182), [#32233](https://github.com/ClickHouse/ClickHouse/issues/32233). [#32900](https://github.com/ClickHouse/ClickHouse/pull/32900) ([zzsmdfj](https://github.com/zzsmdfj)).
* More efficient handling of globs for url storage. Closes [#32866](https://github.com/ClickHouse/ClickHouse/issues/32866). [#32907](https://github.com/ClickHouse/ClickHouse/pull/32907) ([Kseniia Sumarokova](https://github.com/kssenii)).
* This only happens in unofficial builds. Fixed segfault when inserting data into compressed Decimal, String, FixedString and Array columns. This closes [#32939](https://github.com/ClickHouse/ClickHouse/issues/32939). [#32940](https://github.com/ClickHouse/ClickHouse/pull/32940) ([N. Kolotov](https://github.com/nkolotov)).
* Dictionaries added `Date32` date type support. Closes [#32913](https://github.com/ClickHouse/ClickHouse/issues/32913). [#32971](https://github.com/ClickHouse/ClickHouse/pull/32971) ([Maksim Kita](https://github.com/kitaisreal)).
* Short circuit evaluation function `throwIf` support. Closes [#32969](https://github.com/ClickHouse/ClickHouse/issues/32969). [#32973](https://github.com/ClickHouse/ClickHouse/pull/32973) ([Maksim Kita](https://github.com/kitaisreal)).
* Improve Bool type serialization and deserialization. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)).
* Send profile info in clickhouse-local. Closes [#33093](https://github.com/ClickHouse/ClickHouse/issues/33093). [#33097](https://github.com/ClickHouse/ClickHouse/pull/33097) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Inject git information into clickhouse binary file. So we can get source code revision easily from clickhouse binary file. [#33124](https://github.com/ClickHouse/ClickHouse/pull/33124) ([李扬](https://github.com/taiyang-li)).
* Validate config keys for external dictionaries. [#33095](https://github.com/ClickHouse/ClickHouse/issues/33095)#issuecomment-1000577517. [#33130](https://github.com/ClickHouse/ClickHouse/pull/33130) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add retry for Postgres connect in case nothing has been fetched yet. Closes [#33199](https://github.com/ClickHouse/ClickHouse/issues/33199). [#33209](https://github.com/ClickHouse/ClickHouse/pull/33209) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Correctly prevent nullable primary keys if necessary. This is for [#32780](https://github.com/ClickHouse/ClickHouse/issues/32780). [#33218](https://github.com/ClickHouse/ClickHouse/pull/33218) ([Amos Bird](https://github.com/amosbird)).
* If storage supports SETTINGS allow to pass them as key value or via config. Add this support for mysql. [#33231](https://github.com/ClickHouse/ClickHouse/pull/33231) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Allow to cancel formats Arrow / Parquet / ORC which failed to be cancelled it case of big files and setting input_format_allow_seeks as false. Closes [#29678](https://github.com/ClickHouse/ClickHouse/issues/29678). [#33238](https://github.com/ClickHouse/ClickHouse/pull/33238) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* parseDateTimeBestEffort support Unix Timestamp with Milliseconds. [#33276](https://github.com/ClickHouse/ClickHouse/pull/33276) ([Ben](https://github.com/benbiti)).
* Support moving conditions to `PREWHERE` (setting `optimize_move_to_prewhere`) for tables of `Merge` engine if its all underlying tables supports `PREWHERE`. [#33300](https://github.com/ClickHouse/ClickHouse/pull/33300) ([Anton Popov](https://github.com/CurtizJ)).
* Pressing Ctrl+C twice will terminate `clickhouse-benchmark` immediately without waiting for in-flight queries. This closes [#32586](https://github.com/ClickHouse/ClickHouse/issues/32586). [#33303](https://github.com/ClickHouse/ClickHouse/pull/33303) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* support `<secure/>` in cluster configuration. Close [#33270](https://github.com/ClickHouse/ClickHouse/issues/33270). [#33330](https://github.com/ClickHouse/ClickHouse/pull/33330) ([SuperDJY](https://github.com/cmsxbc)).
* `LineAsString` can be used as output format. This closes [#30919](https://github.com/ClickHouse/ClickHouse/issues/30919). [#33331](https://github.com/ClickHouse/ClickHouse/pull/33331) ([Sergei Trifonov](https://github.com/serxa)).
* Allow negative intervals in function `intervalLengthSum`. Their length will be added as well. This closes [#33323](https://github.com/ClickHouse/ClickHouse/issues/33323). [#33335](https://github.com/ClickHouse/ClickHouse/pull/33335) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* clickhouse-local: track memory under --max_memory_usage_in_client option. [#33341](https://github.com/ClickHouse/ClickHouse/pull/33341) ([Azat Khuzhin](https://github.com/azat)).
* Make installation script working on FreeBSD. This closes [#33384](https://github.com/ClickHouse/ClickHouse/issues/33384). [#33418](https://github.com/ClickHouse/ClickHouse/pull/33418) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add some building options in system.build_options. [#33431](https://github.com/ClickHouse/ClickHouse/pull/33431) ([李扬](https://github.com/taiyang-li)).
* Abuse of `untuple` function was leading to exponential complexity of query analysis (found by fuzzer). This closes [#33297](https://github.com/ClickHouse/ClickHouse/issues/33297). [#33445](https://github.com/ClickHouse/ClickHouse/pull/33445) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add config to enable ipv4 or ipv6. This close [#33381](https://github.com/ClickHouse/ClickHouse/issues/33381). [#33450](https://github.com/ClickHouse/ClickHouse/pull/33450) ([Wu Xueyang](https://github.com/wuxueyang96)).
* add function decodeURLFormComponent. Close [#10298](https://github.com/ClickHouse/ClickHouse/issues/10298). [#33451](https://github.com/ClickHouse/ClickHouse/pull/33451) ([SuperDJY](https://github.com/cmsxbc)).
* Implement Materialized view `getVirtuals` function. Close [#11210](https://github.com/ClickHouse/ClickHouse/issues/11210). [#33482](https://github.com/ClickHouse/ClickHouse/pull/33482) ([zhongyuankai](https://github.com/zhongyuankai)).
#### Bug Fix
* Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)).
* - Clickhouse Keeper handler should remove operation when response sent. [#32988](https://github.com/ClickHouse/ClickHouse/pull/32988) ([JackyWoo](https://github.com/JackyWoo)).
* Fix null pointer dereference in low cardinality data when deserializing LowCardinality data in the Native format. [#33021](https://github.com/ClickHouse/ClickHouse/pull/33021) ([Harry Lee](https://github.com/HarryLeeIBM)).
* Specifically crafted input data for `Native` format may lead to reading uninitialized memory or crash. This is relevant if `clickhouse-server` is open for write access to adversary. [#33050](https://github.com/ClickHouse/ClickHouse/pull/33050) ([Heena Bansal](https://github.com/HeenaBansal2009)).
#### Build/Testing/Packaging Improvement
* - Add arm64 packages - Stream python logs in realtime with `PYTHONUNBUFFERED=1` - Fix building docker images in docker/packager/packager script. [#32415](https://github.com/ClickHouse/ClickHouse/pull/32415) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Terminate build when linker path not found. [#32437](https://github.com/ClickHouse/ClickHouse/pull/32437) ([JackyWoo](https://github.com/JackyWoo)).
* - Create a global ENV per job - Clean CCACHE after a build is over. [#32478](https://github.com/ClickHouse/ClickHouse/pull/32478) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* PENDING (Should mention submodule updates and versions). [#32484](https://github.com/ClickHouse/ClickHouse/pull/32484) ([Raúl Marín](https://github.com/Algunenano)).
* Remove readline support. [#32574](https://github.com/ClickHouse/ClickHouse/pull/32574) ([Azat Khuzhin](https://github.com/azat)).
* Fix build issue related to azure blob storage. [#32788](https://github.com/ClickHouse/ClickHouse/pull/32788) ([Amos Bird](https://github.com/amosbird)).
* - Unify init scripts for every worker runner type - Install pigz in AMI. [#32800](https://github.com/ClickHouse/ClickHouse/pull/32800) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Use all available in container processors for PVS studio check and fast tests. Delete coverage image. [#32854](https://github.com/ClickHouse/ClickHouse/pull/32854) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* - Rename main to pull_request - Add BuilderDebAarch64 to all workflows - Use docker buildkit, save building cache to docker hub and reuse it - Build x86_64 and arm64 docker images separately, then merge them together to a multi-architecture manifest - Tune many docker images to being multi-architecture - Use the images from the current PR/commit in the following dependent builds - Upgrade mysql client in stateless-tests image - Add functional tests for aarch64 for PR actions (forced green for a while) - Add python typing to some scripts - Add docker buildkit to runners' init script - Add func-tester-aarch64 runners - Use `docker login --password-stdin` to not expose password on exception. [#32911](https://github.com/ClickHouse/ClickHouse/pull/32911) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Enable hermetic build for shared builds. This is mainly for developers. [#32968](https://github.com/ClickHouse/ClickHouse/pull/32968) ([Amos Bird](https://github.com/amosbird)).
* Prepare ClickHouse to be built with musl-libc. It is not enabled by default. [#33134](https://github.com/ClickHouse/ClickHouse/pull/33134) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add a simple tool to visualize flaky tests in web browser. [#33185](https://github.com/ClickHouse/ClickHouse/pull/33185) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Avoid strict checking when `ENABLE_AZURE_BLOB_STORAGE = 0`. https://github.com/ClickHouse/ClickHouse/pull/32948#discussion_r773168611 cc @nikitamikhaylov. [#33219](https://github.com/ClickHouse/ClickHouse/pull/33219) ([Amos Bird](https://github.com/amosbird)).
* Add more tests for the nullable primary key feature. Add more tests with different types and merge tree kinds, plus randomly generated data. [#33228](https://github.com/ClickHouse/ClickHouse/pull/33228) ([Amos Bird](https://github.com/amosbird)).
* Don't use particular encoding for diff-strings, it may contain multiple different encodings. [#33336](https://github.com/ClickHouse/ClickHouse/pull/33336) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Avoid strict checking when ENABLE_AZURE_BLOB_STORAGE = 0. This is another try on behalf of https://github.com/ClickHouse/ClickHouse/pull/33219 , which was reverted likely due to CI issues. [#33346](https://github.com/ClickHouse/ClickHouse/pull/33346) ([Amos Bird](https://github.com/amosbird)).
* During migration from Yandex to github actions we've lost static links to the latest master ([doc](https://clickhouse.com/docs/en/getting-started/install/#from-single-binary)) It solves issue [#33480](https://github.com/ClickHouse/ClickHouse/issues/33480) partially. [#33559](https://github.com/ClickHouse/ClickHouse/pull/33559) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Restore a lost description checking. [#33591](https://github.com/ClickHouse/ClickHouse/pull/33591) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add `actionlint` for workflows and verify workflow files via `act --list` to check the correct workflow syntax. [#33612](https://github.com/ClickHouse/ClickHouse/pull/33612) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Remove editing /etc/hosts from Dockerfile. [#33635](https://github.com/ClickHouse/ClickHouse/pull/33635) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Properly separate thrift-cmake from arrow-cmake after https://github.com/ClickHouse/ClickHouse/pull/31104 . cc @taiyang-li. [#33661](https://github.com/ClickHouse/ClickHouse/pull/33661) ([Amos Bird](https://github.com/amosbird)).
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Fixed CAST from String to IPv4 or IPv6 and back. Fixed error message in case of failed conversion. [#29224](https://github.com/ClickHouse/ClickHouse/pull/29224) ([Dmitry Novik](https://github.com/novikd)).
* Fix base64Encode adding trailing bytes on small strings. [#31797](https://github.com/ClickHouse/ClickHouse/pull/31797) ([Kevin Michel](https://github.com/kmichel-aiven)).
* Fix unexpected projection removal when detaching parts. [#32067](https://github.com/ClickHouse/ClickHouse/pull/32067) ([Amos Bird](https://github.com/amosbird)).
* Fix 'APPLY lambda' parsing which could lead to client/server crash. [#32138](https://github.com/ClickHouse/ClickHouse/pull/32138) ([Kruglov Pavel](https://github.com/Avogar)).
* Some replication queue entries might hang for `temporary_directories_lifetime` (1 day by default) with `Directory tmp_merge_<part_name>` or `Part ... (state Deleting) already exists, but it will be deleted soon` or similar error. It's fixed. Fixes [#29616](https://github.com/ClickHouse/ClickHouse/issues/29616). [#32201](https://github.com/ClickHouse/ClickHouse/pull/32201) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix unix timestamp Millisecond convert to DateTime64, fractional part calc reversed. [#32240](https://github.com/ClickHouse/ClickHouse/pull/32240) ([Ben](https://github.com/benbiti)).
* Fix broken select query when there are more than 2 row policies on same column, begin at second queries on the same session. [#31606](https://github.com/ClickHouse/ClickHouse/issues/31606). [#32291](https://github.com/ClickHouse/ClickHouse/pull/32291) ([SuperDJY](https://github.com/cmsxbc)).
* Fix failures in queries that are trying to use skipping indices, which are not materialized yet. Fixes [#32292](https://github.com/ClickHouse/ClickHouse/issues/32292) and [#30343](https://github.com/ClickHouse/ClickHouse/issues/30343). [#32359](https://github.com/ClickHouse/ClickHouse/pull/32359) ([Anton Popov](https://github.com/CurtizJ)).
* Fix issue with ambiguous query formatting in distributed queries that led to errors when some table columns were named ALL or DISTINCT. This closes [#32391](https://github.com/ClickHouse/ClickHouse/issues/32391). [#32490](https://github.com/ClickHouse/ClickHouse/pull/32490) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The Proper handle of the case with apache arrow column duplication. [#32507](https://github.com/ClickHouse/ClickHouse/pull/32507) ([Dmitriy Mokhnatkin](https://github.com/DMokhnatkin)).
* Fix crash in `JoinCommon::removeColumnNullability`, close [#32458](https://github.com/ClickHouse/ClickHouse/issues/32458). [#32508](https://github.com/ClickHouse/ClickHouse/pull/32508) ([Vladimir C](https://github.com/vdimir)).
* fix groupBitmapAnd function on distributed table. [#32529](https://github.com/ClickHouse/ClickHouse/pull/32529) ([minhthucdao](https://github.com/dmthuc)).
* Fix async inserts with formats CustomSeparated, Template, Regexp, MsgPack and JSONAsString. Previousely async inserts with these formats didn't read any data. [#32530](https://github.com/ClickHouse/ClickHouse/pull/32530) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix sparse_hashed dict performance with sequential keys (wrong hash function). [#32536](https://github.com/ClickHouse/ClickHouse/pull/32536) ([Azat Khuzhin](https://github.com/azat)).
* Fix table lifetime (i.e. possible use-after-free) in case of parallel DROP TABLE and INSERT. [#32572](https://github.com/ClickHouse/ClickHouse/pull/32572) ([Azat Khuzhin](https://github.com/azat)).
* Fix possible exception at RabbitMQ storage startup by delaying channel creation. [#32584](https://github.com/ClickHouse/ClickHouse/pull/32584) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix surprisingly bad code in function `file`. [#32640](https://github.com/ClickHouse/ClickHouse/pull/32640) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* close [#32504](https://github.com/ClickHouse/ClickHouse/issues/32504). [#32649](https://github.com/ClickHouse/ClickHouse/pull/32649) ([Vladimir C](https://github.com/vdimir)).
* Fix LOGICAL_ERROR when the target of a materialized view is a JOIN or a SET table. [#32669](https://github.com/ClickHouse/ClickHouse/pull/32669) ([Raúl Marín](https://github.com/Algunenano)).
* Fix `optimize_read_in_order` optimization in case when table engine is `Distributed` or `Merge` and its underlying `MergeTree` tables have monotonous function in prefix of sorting key. [#32670](https://github.com/ClickHouse/ClickHouse/pull/32670) ([Anton Popov](https://github.com/CurtizJ)).
* Fix `ALTER TABLE ... MATERIALIZE TTL` query with `TTL ... DELETE WHERE ...` and `TTL ... GROUP BY ...` modes. [#32695](https://github.com/ClickHouse/ClickHouse/pull/32695) ([Anton Popov](https://github.com/CurtizJ)).
* Fix error `Column is not under aggregate function` in case of MV with `GROUP BY (list of columns)` (which is pared as `GROUP BY tuple(...)`) over Kafka/RabbitMQ. Fixes [#32668](https://github.com/ClickHouse/ClickHouse/issues/32668) and [#32744](https://github.com/ClickHouse/ClickHouse/issues/32744). [#32751](https://github.com/ClickHouse/ClickHouse/pull/32751) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* fix crash when used fuzzBits with multiply same FixedString, Close [#32737](https://github.com/ClickHouse/ClickHouse/issues/32737). [#32755](https://github.com/ClickHouse/ClickHouse/pull/32755) ([SuperDJY](https://github.com/cmsxbc)).
* Fix `--database` option for clickhouse-local. [#32797](https://github.com/ClickHouse/ClickHouse/pull/32797) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Server might fail to start if database with `MySQL` engine cannot connect to MySQL server, it's fixed. Fixes [#14441](https://github.com/ClickHouse/ClickHouse/issues/14441). [#32802](https://github.com/ClickHouse/ClickHouse/pull/32802) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Avoid reusing the scalar subquery cache when processing MV blocks. This fixes a bug when the scalar query reference the source table but it means that all subscalar queries in the MV definition will be calculated for each block. [#32811](https://github.com/ClickHouse/ClickHouse/pull/32811) ([Raúl Marín](https://github.com/Algunenano)).
* `MergeTree` table engine might silently skip some mutations if there are too many running mutations or in case of high memory consumption, it's fixed. Fixes [#17882](https://github.com/ClickHouse/ClickHouse/issues/17882). [#32814](https://github.com/ClickHouse/ClickHouse/pull/32814) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix optimization with lazy seek for async reads from remote fs. Closes [#32803](https://github.com/ClickHouse/ClickHouse/issues/32803). [#32835](https://github.com/ClickHouse/ClickHouse/pull/32835) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fixed --echo option is not used by clickhouse-client in batch mode with single query. [#32843](https://github.com/ClickHouse/ClickHouse/pull/32843) ([N. Kolotov](https://github.com/nkolotov)).
* Fix MV query with multiple chunk result. Fixes [#31419](https://github.com/ClickHouse/ClickHouse/issues/31419). [#32862](https://github.com/ClickHouse/ClickHouse/pull/32862) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Close [#32487](https://github.com/ClickHouse/ClickHouse/issues/32487). [#32914](https://github.com/ClickHouse/ClickHouse/pull/32914) ([Vladimir C](https://github.com/vdimir)).
* Fix ORC stripe reading. [#32929](https://github.com/ClickHouse/ClickHouse/pull/32929) ([Ernest Zaslavsky](https://github.com/kreuzerkrieg)).
* Fix a regression in `replaceRegexpAll` function. The function worked incorrectly when matched substring was empty. This closes [#32777](https://github.com/ClickHouse/ClickHouse/issues/32777). This closes [#30245](https://github.com/ClickHouse/ClickHouse/issues/30245). [#32945](https://github.com/ClickHouse/ClickHouse/pull/32945) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix UB in case of unexpected EOF during filling a set from HTTP query (i.e. if the client interrupted in the middle, i.e. `timeout 0.15s curl -Ss -F 's=@t.csv;' 'http://127.0.0.1:8123/?s_structure=key+Int&query=SELECT+dummy+IN+s'` and with large enough `t.csv`). [#32955](https://github.com/ClickHouse/ClickHouse/pull/32955) ([Azat Khuzhin](https://github.com/azat)).
* Fix throwing exception like positional argument out of bounds for non-positional arguments. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173)#event-5789668239. [#32961](https://github.com/ClickHouse/ClickHouse/pull/32961) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix hdfs url check that didn't allow using HA namenode address. Bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/31042. [#32976](https://github.com/ClickHouse/ClickHouse/pull/32976) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix wrong tuple output in CSV format in case of custom csv delimiter. [#32981](https://github.com/ClickHouse/ClickHouse/pull/32981) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix Context leak in case of cancel_http_readonly_queries_on_client_close (i.e. leaking of external tables that had been uploaded the the server and other resources). [#32982](https://github.com/ClickHouse/ClickHouse/pull/32982) ([Azat Khuzhin](https://github.com/azat)).
* Remove obsolete code from ConfigProcessor. Yandex specific code is not used anymore. The code contained one minor defect. This defect was reported by [Mallik Hassan](https://github.com/SadiHassan) in [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). This closes [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). [#33026](https://github.com/ClickHouse/ClickHouse/pull/33026) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix s3 table function reading empty file. Closes [#33008](https://github.com/ClickHouse/ClickHouse/issues/33008). [#33037](https://github.com/ClickHouse/ClickHouse/pull/33037) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix error `Invalid version for SerializationLowCardinality key column` in case of reading from `LowCardinality` column with `local_filesystem_read_prefetch` or `remote_filesystem_read_prefetch` enabled. [#33046](https://github.com/ClickHouse/ClickHouse/pull/33046) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Don't allow to write into S3 if path contains globs. [#33142](https://github.com/ClickHouse/ClickHouse/pull/33142) ([Kruglov Pavel](https://github.com/Avogar)).
* fix incorrect metric: StorageBufferBytes. [#33159](https://github.com/ClickHouse/ClickHouse/pull/33159) ([xuyatian](https://github.com/xuyatian)).
* Fix MaterializedPostreSQL detach/attach (removing / adding to replication) tables with non-default schema. Found in [#29535](https://github.com/ClickHouse/ClickHouse/issues/29535). [#33179](https://github.com/ClickHouse/ClickHouse/pull/33179) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Make sure unused replication slots are always removed. Found in [#26952](https://github.com/ClickHouse/ClickHouse/issues/26952),. [#33187](https://github.com/ClickHouse/ClickHouse/pull/33187) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix ddl validation. Fix setting `materialized_postgresql_allow_automatic_update`. Closes [#29535](https://github.com/ClickHouse/ClickHouse/issues/29535). [#33200](https://github.com/ClickHouse/ClickHouse/pull/33200) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix WHERE 1=0 for external databases query. Closes [#33152](https://github.com/ClickHouse/ClickHouse/issues/33152). [#33214](https://github.com/ClickHouse/ClickHouse/pull/33214) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add asynchronous inserts (with enabled setting `async_insert`) to query log. Previously such queries didn't appear in query log. [#33239](https://github.com/ClickHouse/ClickHouse/pull/33239) ([Anton Popov](https://github.com/CurtizJ)).
* Fix ACLMap num, because acl_to_num will erase. [#33246](https://github.com/ClickHouse/ClickHouse/pull/33246) ([小路](https://github.com/nicelulu)).
* Fix ACL with explicit digit hash in clickhouse-keeper: now the behavior consistent with zookeeper and generated digest is always accepted. [#33249](https://github.com/ClickHouse/ClickHouse/pull/33249) ([小路](https://github.com/nicelulu)).
* Fix when `COMMENT` for dictionaries does not appear in `system.tables`, `system.dictionaries`. Allow to modify comment for `Dictionary` engine. Closes [#33251](https://github.com/ClickHouse/ClickHouse/issues/33251). [#33261](https://github.com/ClickHouse/ClickHouse/pull/33261) ([Maksim Kita](https://github.com/kitaisreal)).
* The commands `SYSTEM SUSPEND` and `SYSTEM ... THREAD FUZZER` missed access control. It is fixed. Author: Kevin Michel. [#33333](https://github.com/ClickHouse/ClickHouse/pull/33333) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Do not try to read pass EOF (to workaround a bug in a kernel), this bug can be reproduced on kernels (3.14..5.9), and requires `index_granularity_bytes=0` (i.e. turn off adaptive index granularity). [#33372](https://github.com/ClickHouse/ClickHouse/pull/33372) ([Azat Khuzhin](https://github.com/azat)).
* Fix possible use-after-free for INSERT into MV with concurrent DROP ([#32572](https://github.com/ClickHouse/ClickHouse/issues/32572) significantly reduce the race window, this one should completely eliminate it). [#33386](https://github.com/ClickHouse/ClickHouse/pull/33386) ([Azat Khuzhin](https://github.com/azat)).
* Fix query cancellation in case of allow_experimental_parallel_reading_from_replicas. [#33456](https://github.com/ClickHouse/ClickHouse/pull/33456) ([Azat Khuzhin](https://github.com/azat)).
* Fix DROP MaterializedPostgreSQL database. [#33468](https://github.com/ClickHouse/ClickHouse/pull/33468) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix dictionary expressions for RangeHashedDictionary range min and range max attributes when created using DDL. Closes [#30809](https://github.com/ClickHouse/ClickHouse/issues/30809). [#33478](https://github.com/ClickHouse/ClickHouse/pull/33478) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix an exception `Block structure mismatch` which may happen during insertion into table with default nested `LowCardinality` column. Fixes [#33028](https://github.com/ClickHouse/ClickHouse/issues/33028). [#33504](https://github.com/ClickHouse/ClickHouse/pull/33504) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Close issue: [#33289](https://github.com/ClickHouse/ClickHouse/issues/33289) Fix bug when query view with setting offset and limit. [#33518](https://github.com/ClickHouse/ClickHouse/pull/33518) ([hexiaoting](https://github.com/hexiaoting)).
* Fix parsing incorrect queries with FROM INFILE statement. [#33521](https://github.com/ClickHouse/ClickHouse/pull/33521) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix segfault in arrowSchemaToCHHeader if schema contains Dictionary type. Closes [#33507](https://github.com/ClickHouse/ClickHouse/issues/33507). [#33529](https://github.com/ClickHouse/ClickHouse/pull/33529) ([Kruglov Pavel](https://github.com/Avogar)).
* session_id_counter poniter to next slot. [#33555](https://github.com/ClickHouse/ClickHouse/pull/33555) ([小路](https://github.com/nicelulu)).
* Fix segfault in Avro that appears after the second insert into file. [#33566](https://github.com/ClickHouse/ClickHouse/pull/33566) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix wrong database for JOIN w/o explicit database in distributed queries (Fixes: [#10471](https://github.com/ClickHouse/ClickHouse/issues/10471)). [#33611](https://github.com/ClickHouse/ClickHouse/pull/33611) ([Azat Khuzhin](https://github.com/azat)).
#### NO CL ENTRY
* NO CL ENTRY: 'Update CHANGELOG.md'. [#32472](https://github.com/ClickHouse/ClickHouse/pull/32472) ([Rich Raposa](https://github.com/rfraposa)).
* NO CL ENTRY: 'Revert "Split long tests into multiple checks"'. [#32514](https://github.com/ClickHouse/ClickHouse/pull/32514) ([alesapin](https://github.com/alesapin)).
* NO CL ENTRY: 'Revert "Revert "Split long tests into multiple checks""'. [#32515](https://github.com/ClickHouse/ClickHouse/pull/32515) ([alesapin](https://github.com/alesapin)).
* NO CL ENTRY: 'blog post how to enable predictive capabilities in Clickhouse'. [#32768](https://github.com/ClickHouse/ClickHouse/pull/32768) ([Tom Risse](https://github.com/flickerbox-tom)).
* NO CL ENTRY: 'Revert "Fix build issue related to azure blob storage"'. [#32845](https://github.com/ClickHouse/ClickHouse/pull/32845) ([alesapin](https://github.com/alesapin)).
* NO CL ENTRY: 'Revert "Dictionaries added Date32 type support"'. [#33053](https://github.com/ClickHouse/ClickHouse/pull/33053) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Updated Lawrence Berkeley National Lab stats'. [#33066](https://github.com/ClickHouse/ClickHouse/pull/33066) ([Michael Smitasin](https://github.com/michaelsmitasin)).
* NO CL ENTRY: 'fix AggregateFunctionGroupBitmapData function rb_contains rb_remove'. [#33127](https://github.com/ClickHouse/ClickHouse/pull/33127) ([DR](https://github.com/freedomDR)).
* NO CL ENTRY: 'Fix for example request with settings'. [#33143](https://github.com/ClickHouse/ClickHouse/pull/33143) ([Vitaly Artemyev](https://github.com/VitalyArt)).
* NO CL ENTRY: 'Revert "Grouping sets dev"'. [#33186](https://github.com/ClickHouse/ClickHouse/pull/33186) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Optimize MergeTreePartsMover'. [#33225](https://github.com/ClickHouse/ClickHouse/pull/33225) ([zhongyuankai](https://github.com/zhongyuankai)).
* NO CL ENTRY: 'rm redundant judge in hashmap iter operation'. [#33285](https://github.com/ClickHouse/ClickHouse/pull/33285) ([zbtzbtzbt](https://github.com/zbtzbtzbt)).
* NO CL ENTRY: 'fix hang up with command 'drop table system.query_log sync''. [#33293](https://github.com/ClickHouse/ClickHouse/pull/33293) ([zhanghuajie](https://github.com/zhanghuajieHIT)).
* NO CL ENTRY: 'Improve query performance of system tables'. [#33312](https://github.com/ClickHouse/ClickHouse/pull/33312) ([zhongyuankai](https://github.com/zhongyuankai)).
* NO CL ENTRY: 'Revert "Better cmake script for azure blob"'. [#33319](https://github.com/ClickHouse/ClickHouse/pull/33319) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* NO CL ENTRY: 'Added Superwall to adopters list'. [#33573](https://github.com/ClickHouse/ClickHouse/pull/33573) ([Justin Hilliard](https://github.com/jahilliard)).
* NO CL ENTRY: 'Revert "Ignore parse failure of opentelemetry header"'. [#33594](https://github.com/ClickHouse/ClickHouse/pull/33594) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
#### Bug Fix (user-visible misbehaviour in official stable or prestable release):
* Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### NO CL CATEGORY
* Fix Regular Expression while key path search. [#33023](https://github.com/ClickHouse/ClickHouse/pull/33023) ([mreddy017](https://github.com/mreddy017)).
* - Allow to split GraphiteMergeTree rollup rules for plain/tagged metrics (optional rule_type field). [#33494](https://github.com/ClickHouse/ClickHouse/pull/33494) ([Michail Safronov](https://github.com/msaf1980)).
#### Bug Fix (v21.9.4.35-stable)
* Fix [#32964](https://github.com/ClickHouse/ClickHouse/issues/32964). [#32965](https://github.com/ClickHouse/ClickHouse/pull/32965) ([save-my-heart](https://github.com/save-my-heart)).
#### New Feature / New Tool
* Tool for collecting diagnostics data. [#33175](https://github.com/ClickHouse/ClickHouse/pull/33175) ([Alexander Burmak](https://github.com/Alex-Burmak)).

View File

@ -0,0 +1,2 @@
### ClickHouse release v22.1.2.2-stable FIXME as compared to v22.1.1.2542-prestable

View File

@ -0,0 +1,2 @@
### ClickHouse release v22.1.3.7-stable FIXME as compared to v22.1.2.2-stable

View File

@ -0,0 +1,19 @@
### ClickHouse release v22.1.4.30-stable FIXME as compared to v22.1.3.7-stable
#### Build/Testing/Packaging Improvement
* Backport CI checks to 22.1 release branch. [#34897](https://github.com/ClickHouse/ClickHouse/pull/34897) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Backported in [#34119](https://github.com/ClickHouse/ClickHouse/issues/34119): Fix usage of functions `array` and `tuple` with literal arguments in distributed queries. Previously it could lead to `Not found columns` exception. [#33938](https://github.com/ClickHouse/ClickHouse/pull/33938) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#34124](https://github.com/ClickHouse/ClickHouse/issues/34124): Fix crash while reading of nested tuples. Fixes [#33838](https://github.com/ClickHouse/ClickHouse/issues/33838). [#33956](https://github.com/ClickHouse/ClickHouse/pull/33956) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#34098](https://github.com/ClickHouse/ClickHouse/issues/34098): Fix segfault while parsing ORC file with corrupted footer. Closes [#33797](https://github.com/ClickHouse/ClickHouse/issues/33797). [#33984](https://github.com/ClickHouse/ClickHouse/pull/33984) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#34038](https://github.com/ClickHouse/ClickHouse/issues/34038): Fix bug which lead to inability for server to start when both replicated access storage and keeper are used. Introduced two settings for keeper socket timeout instead of settings from default user: `keeper_server.socket_receive_timeout_sec` and `keeper_server.socket_send_timeout_sec`. Fixes [#33973](https://github.com/ClickHouse/ClickHouse/issues/33973). [#33988](https://github.com/ClickHouse/ClickHouse/pull/33988) ([alesapin](https://github.com/alesapin)).
* Backported in [#34184](https://github.com/ClickHouse/ClickHouse/issues/34184): Fixed minor race condition that might cause "intersecting parts" error in extremely rare cases after ZooKeeper connection loss. [#34096](https://github.com/ClickHouse/ClickHouse/pull/34096) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#34467](https://github.com/ClickHouse/ClickHouse/issues/34467): Fix inserts to distributed tables in case of change of native protocol. The last change was in the version version 22.1, so there may be some failures of inserts to distributed tables after upgrade to that version. [#34132](https://github.com/ClickHouse/ClickHouse/pull/34132) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#34209](https://github.com/ClickHouse/ClickHouse/issues/34209): Fix bug which can rarely lead to error "Cannot read all data" while reading LowCardinality columns of MergeTree table engines family which stores data on remote file system like S3. [#34139](https://github.com/ClickHouse/ClickHouse/pull/34139) ([alesapin](https://github.com/alesapin)).
* Backported in [#34266](https://github.com/ClickHouse/ClickHouse/issues/34266): Fix metric `Query`, which shows number of executing queries. In last several releases it was always 0. [#34224](https://github.com/ClickHouse/ClickHouse/pull/34224) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#34298](https://github.com/ClickHouse/ClickHouse/issues/34298): Fix progress bar width. It was incorrectly rounded to integer number of characters. [#34275](https://github.com/ClickHouse/ClickHouse/pull/34275) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#34391](https://github.com/ClickHouse/ClickHouse/issues/34391): Try to fix rare bug while reading of empty arrays, which could lead to `Data compressed with different methods` error. [#34327](https://github.com/ClickHouse/ClickHouse/pull/34327) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#34430](https://github.com/ClickHouse/ClickHouse/issues/34430): Fix segfault in schema inference from url. Closes [#34147](https://github.com/ClickHouse/ClickHouse/issues/34147). [#34405](https://github.com/ClickHouse/ClickHouse/pull/34405) ([Kruglov Pavel](https://github.com/Avogar)).

View File

@ -0,0 +1,208 @@
### ClickHouse release v22.2.1.2139-prestable FIXME as compared to v22.1.1.2542-prestable
#### New Feature
* (Not ready for production, put into experimental features) Add memory overcommit to `MemoryTracker`. Added `guaranteed` settings for memory limits which represent soft memory limits. In case when hard memory limit is reached, `MemoryTracker` tries to cancel the most overcommited query. New setting `memory_usage_overcommit_max_wait_microseconds` specifies how long queries may wait another query to stop. Closes [#28375](https://github.com/ClickHouse/ClickHouse/issues/28375). [#31182](https://github.com/ClickHouse/ClickHouse/pull/31182) ([Dmitry Novik](https://github.com/novikd)).
* The setting allows a user to provide own deduplication semantic in MergeTree/ReplicatedMergeTree If provided, it's used instead of data digest to generate block ID. So, for example, by providing a unique value for the setting in each INSERT statement, the user can avoid the same inserted data being deduplicated. This closes: [#7461](https://github.com/ClickHouse/ClickHouse/issues/7461). [#32304](https://github.com/ClickHouse/ClickHouse/pull/32304) ([Igor Nikonov](https://github.com/devcrafter)).
* Add support of DEFAULT keyword for INSERT statements. Closes [#6331](https://github.com/ClickHouse/ClickHouse/issues/6331). [#33141](https://github.com/ClickHouse/ClickHouse/pull/33141) ([Andrii Buriachevskyi](https://github.com/0over)).
* Add confidence intervals to ttests. [#33260](https://github.com/ClickHouse/ClickHouse/pull/33260) ([achimbab](https://github.com/achimbab)).
* Allow to create new files on insert for File/S3/HDFS engines. Allow to owerwrite file in HDFS. Throw an exception in attempt to overwrite a file in S3 by default. Throw an exception in attempt to append data to file in formats that have suffix. Closes [#31640](https://github.com/ClickHouse/ClickHouse/issues/31640) Closes [#31622](https://github.com/ClickHouse/ClickHouse/issues/31622) Closes [#23862](https://github.com/ClickHouse/ClickHouse/issues/23862) Closes [#15022](https://github.com/ClickHouse/ClickHouse/issues/15022) Closes [#16674](https://github.com/ClickHouse/ClickHouse/issues/16674). [#33302](https://github.com/ClickHouse/ClickHouse/pull/33302) ([Kruglov Pavel](https://github.com/Avogar)).
* Add `h3ToCenterChild` function. [#33313](https://github.com/ClickHouse/ClickHouse/pull/33313) ([Bharat Nallan](https://github.com/bharatnc)).
* Merge functions for text classification. See [#23271](https://github.com/ClickHouse/ClickHouse/issues/23271). [#33314](https://github.com/ClickHouse/ClickHouse/pull/33314) ([Nikolay Degterinsky](https://github.com/evillique)).
* Implemented meanZTest. [#33354](https://github.com/ClickHouse/ClickHouse/pull/33354) ([achimbab](https://github.com/achimbab)).
* - Add function bitSlice. [#33360](https://github.com/ClickHouse/ClickHouse/pull/33360) ([RogerYK](https://github.com/RogerYK)).
* Add new h3 miscellaneous functions: `edgeLengthKm`,`exactEdgeLengthKm`,`exactEdgeLengthM`,`exactEdgeLengthRads`,`numHexagons`. [#33621](https://github.com/ClickHouse/ClickHouse/pull/33621) ([Bharat Nallan](https://github.com/bharatnc)).
* Add `DEGREES` and `RADIANS` functions. [#33769](https://github.com/ClickHouse/ClickHouse/pull/33769) ([Bharat Nallan](https://github.com/bharatnc)).
* Parameter `--host` can accept multiple hosts. In case of unavailability of one of them, the client will try to connect to the next one. [#33824](https://github.com/ClickHouse/ClickHouse/pull/33824) ([Filippov Denis](https://github.com/DF5HSE)).
* Detect format in clickhouse-local by file name. [#33829](https://github.com/ClickHouse/ClickHouse/pull/33829) ([Kruglov Pavel](https://github.com/Avogar)).
* Add a new method `expire()` in PoolBase which is used to reallocate an invalid object in the pool. [#34076](https://github.com/ClickHouse/ClickHouse/pull/34076) ([lgbo](https://github.com/lgbo-ustc)).
* Add table function `format(format_name, data)`. [#34125](https://github.com/ClickHouse/ClickHouse/pull/34125) ([Kruglov Pavel](https://github.com/Avogar)).
* Allow to create default table engine. [#34187](https://github.com/ClickHouse/ClickHouse/pull/34187) ([Ilya Yatsishin](https://github.com/qoega)).
* `EPHEMERAL` column specifier is added to `CREATE TABLE` query. Closes [#9436](https://github.com/ClickHouse/ClickHouse/issues/9436). [#34424](https://github.com/ClickHouse/ClickHouse/pull/34424) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
#### Performance Improvement
* For inserts and merges into S3, write files in parallel whenever possible. [#33291](https://github.com/ClickHouse/ClickHouse/pull/33291) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Improve RangeHashedDictionary performance if for key there are a lot of intervals. Fixes [#23821](https://github.com/ClickHouse/ClickHouse/issues/23821). [#33516](https://github.com/ClickHouse/ClickHouse/pull/33516) ([Maksim Kita](https://github.com/kitaisreal)).
* Add x86 avx512 support for memcmpSmall functions to accelerate memory comparison. It works only if you compile ClickHouse by yourself. [#33706](https://github.com/ClickHouse/ClickHouse/pull/33706) ([hanqf-git](https://github.com/hanqf-git)).
* Reworks and reintroduces the scalar cache to MV execution. [#33958](https://github.com/ClickHouse/ClickHouse/pull/33958) ([Raúl Marín](https://github.com/Algunenano)).
* Make ORDER BY tuple almost as fast as ORDER BY columns. We have special optimizations for multiple column ORDER BY: https://github.com/ClickHouse/ClickHouse/pull/10831 . It's beneficial to also apply to tuple columns. [#34060](https://github.com/ClickHouse/ClickHouse/pull/34060) ([Amos Bird](https://github.com/amosbird)).
* Minor improvement to potential hot-path in `ExecuteScalarSubqueriesMatcher::visit` , where `std::set<String>` was constructed on every function invocation. [#34128](https://github.com/ClickHouse/ClickHouse/pull/34128) ([Federico Rodriguez](https://github.com/fedrod)).
* Slightly improve performance of `Regexp` format. [#34202](https://github.com/ClickHouse/ClickHouse/pull/34202) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Optimize quantilesExact{Low,High} to use nth_element instead of sort. [#34287](https://github.com/ClickHouse/ClickHouse/pull/34287) ([Daniel Kutenin](https://github.com/danlark1)).
* Improve performance of `LineAsString` format. This closes [#34303](https://github.com/ClickHouse/ClickHouse/issues/34303). [#34306](https://github.com/ClickHouse/ClickHouse/pull/34306) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Speed up loading of data parts. It was not parallelized before: the setting `part_loading_threads` did not have effect. See [#4699](https://github.com/ClickHouse/ClickHouse/issues/4699). [#34310](https://github.com/ClickHouse/ClickHouse/pull/34310) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* `_file` and `_path` virtual columns (in file-like table engines) are made `LowCardinality` - it will make queries for multiple files faster. Closes [#34300](https://github.com/ClickHouse/ClickHouse/issues/34300). [#34317](https://github.com/ClickHouse/ClickHouse/pull/34317) ([flynn](https://github.com/ucasfl)).
* Improve performance of `mapPopulateSeries` function. Closes [#33944](https://github.com/ClickHouse/ClickHouse/issues/33944). [#34318](https://github.com/ClickHouse/ClickHouse/pull/34318) ([Maksim Kita](https://github.com/kitaisreal)).
* Use a vector to collect useless list iterators when doing a snapshot, and in latter clearOutdatedNodes, we can just traverse the vector, not the list, which is faster. [#34484](https://github.com/ClickHouse/ClickHouse/pull/34484) ([zhanglistar](https://github.com/zhanglistar)).
* Improve performance of insert into table functions URL, S3, File, HDFS. Closes [#34348](https://github.com/ClickHouse/ClickHouse/issues/34348). [#34510](https://github.com/ClickHouse/ClickHouse/pull/34510) ([Maksim Kita](https://github.com/kitaisreal)).
* According https://github.com/eBay/NuRaft/issues/209, there is no need to lock the mothod. [#34523](https://github.com/ClickHouse/ClickHouse/pull/34523) ([zhanglistar](https://github.com/zhanglistar)).
#### Improvement
* Now ReplicatedMergeTree can recover data when some of its disks are broken. [#13544](https://github.com/ClickHouse/ClickHouse/pull/13544) ([Amos Bird](https://github.com/amosbird)).
* Merge [#15765](https://github.com/ClickHouse/ClickHouse/issues/15765) (Dynamic reload of server TLS certificates on config reload) cc @johnskopis. [#31257](https://github.com/ClickHouse/ClickHouse/pull/31257) ([Filatenkov Artur](https://github.com/FArthur-cmd)).
* Added `UUID` data type support for functions `hex`, `bin`. [#32170](https://github.com/ClickHouse/ClickHouse/pull/32170) ([Frank Chen](https://github.com/FrankChen021)).
* Support `optimize_read_in_order` if prefix of sorting key is already sorted. E.g. if we have sorting key `ORDER BY (a, b)` in table and query with `WHERE a = const ORDER BY b` clauses, now it will be applied reading in order of sorting key instead of full sort. [#32748](https://github.com/ClickHouse/ClickHouse/pull/32748) ([Anton Popov](https://github.com/CurtizJ)).
* Add new keeper setting `min_session_timeout_ms`. Now keeper server will determine client session timeout according to `min_session_timeout_ms` and `session_timeout_ms` settings. [#33288](https://github.com/ClickHouse/ClickHouse/pull/33288) ([JackyWoo](https://github.com/JackyWoo)).
* Improve keeper performance and fix several memory leaks. [#33329](https://github.com/ClickHouse/ClickHouse/pull/33329) ([alesapin](https://github.com/alesapin)).
* Respect cgroup limits for CPU quota. [#33342](https://github.com/ClickHouse/ClickHouse/pull/33342) ([JaySon](https://github.com/JaySon-Huang)).
* Enable binary arithmetic(plus, minus, multiply, division, least, greates) between Decimal and Float. [#33355](https://github.com/ClickHouse/ClickHouse/pull/33355) ([flynn](https://github.com/ucasfl)).
* Replace `_shard_num` via constants (from [#7624](https://github.com/ClickHouse/ClickHouse/issues/7624)) with `shardNum()` function (from [#27020](https://github.com/ClickHouse/ClickHouse/issues/27020)), to avoid possible issues (like those that had been found in [#16947](https://github.com/ClickHouse/ClickHouse/issues/16947)). [#33392](https://github.com/ClickHouse/ClickHouse/pull/33392) ([Azat Khuzhin](https://github.com/azat)).
* Support `SET`, `YEAR`, `TIME` and `GEOMETRY` data types in `MaterializedMySQL`. Fixes [#18091](https://github.com/ClickHouse/ClickHouse/issues/18091), [#21536](https://github.com/ClickHouse/ClickHouse/issues/21536), [#26361](https://github.com/ClickHouse/ClickHouse/issues/26361). [#33429](https://github.com/ClickHouse/ClickHouse/pull/33429) ([zzsmdfj](https://github.com/zzsmdfj)).
* add function addressToLineWithInlines. Close [#26211](https://github.com/ClickHouse/ClickHouse/issues/26211). [#33467](https://github.com/ClickHouse/ClickHouse/pull/33467) ([SuperDJY](https://github.com/cmsxbc)).
* Improvement for `fromUnixTimestamp64` family functions.. They now accept any integer value that can be converted to `Int64`. This closes: [#14648](https://github.com/ClickHouse/ClickHouse/issues/14648). [#33505](https://github.com/ClickHouse/ClickHouse/pull/33505) ([Andrey Zvonov](https://github.com/zvonand)).
* Functions `dictGet`, `dictHas` implicitly cast key argument to dictionary key structure, if they are different. [#33672](https://github.com/ClickHouse/ClickHouse/pull/33672) ([Maksim Kita](https://github.com/kitaisreal)).
* - Parse and store OpenTelemetry trace-id in big-endian order. [#33723](https://github.com/ClickHouse/ClickHouse/pull/33723) ([Frank Chen](https://github.com/FrankChen021)).
* Enable stream to table join in WindowView. [#33729](https://github.com/ClickHouse/ClickHouse/pull/33729) ([vxider](https://github.com/Vxider)).
* Create parent directories in DiskS3::restoreFileOperations method. [#33730](https://github.com/ClickHouse/ClickHouse/pull/33730) ([ianton-ru](https://github.com/ianton-ru)).
* Add some improvements and fixes for Bool data type. Fixes [#33244](https://github.com/ClickHouse/ClickHouse/issues/33244). [#33737](https://github.com/ClickHouse/ClickHouse/pull/33737) ([Kruglov Pavel](https://github.com/Avogar)).
* Added support for cast from `Map(Key, Value)` to `Array(Tuple(Key, Value))`. [#33794](https://github.com/ClickHouse/ClickHouse/pull/33794) ([Maksim Kita](https://github.com/kitaisreal)).
* Support explain create function query ``` sql :) explain ast create function mycast AS (n) -> cast(n as String); EXPLAIN AST CREATE FUNCTION mycast AS n -> CAST(n, 'String'). [#33819](https://github.com/ClickHouse/ClickHouse/pull/33819) ([李扬](https://github.com/taiyang-li)).
* Try every resolved ip address while getting S3 proxy. [#33862](https://github.com/ClickHouse/ClickHouse/pull/33862) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* `FlatDictionary` improve performance of dictionary data load. [#33871](https://github.com/ClickHouse/ClickHouse/pull/33871) ([Maksim Kita](https://github.com/kitaisreal)).
* fix disk using the same path, close [#29072](https://github.com/ClickHouse/ClickHouse/issues/29072). [#33905](https://github.com/ClickHouse/ClickHouse/pull/33905) ([zhongyuankai](https://github.com/zhongyuankai)).
* Dictionaries added support for DateTime64. [#33914](https://github.com/ClickHouse/ClickHouse/pull/33914) ([Maksim Kita](https://github.com/kitaisreal)).
* `FlatDictionary`, `HashedDictionary`, `HashedArrayDictionary` added support for creating with empty attributes, with support of read all keys, and `dictHas`. Fixes [#33820](https://github.com/ClickHouse/ClickHouse/issues/33820). [#33918](https://github.com/ClickHouse/ClickHouse/pull/33918) ([Maksim Kita](https://github.com/kitaisreal)).
* `RangeHashedDictionary` improvements. Improve performance of load time if there are multiple attributes. Allow to create without attributes. Added option to specify strategy when intervals `start` and `end` have `Nullable` type `convert_null_range_bound_to_open` by default is `true`. Closes [#29791](https://github.com/ClickHouse/ClickHouse/issues/29791). Allow to specify `Float`, `Decimal`, `DateTime64`, `Int128`, `Int256`, `UInt128`, `UInt256` as range types. `RangeHashedDictionary` added support for range values that extend `Int64` type. Closes [#28322](https://github.com/ClickHouse/ClickHouse/issues/28322). Added option `range_lookup_strategy` to specify range lookup type `min`, `max` by default is `min` . Closes [#21647](https://github.com/ClickHouse/ClickHouse/issues/21647). Fixed allocated bytes calculations. Fixed type name in `system.dictionaries` in case of `ComplexKeyHashedDictionary`. [#33927](https://github.com/ClickHouse/ClickHouse/pull/33927) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix getauxval() in glibc-compatibility, this should fix vsyscalls after `setenv` (i.e. timezone is set in config), and LSan (and also fix some leaks that had been found by LSan). [#33957](https://github.com/ClickHouse/ClickHouse/pull/33957) ([Azat Khuzhin](https://github.com/azat)).
* Detect format and schema from stdin in clickhouse-local. [#33960](https://github.com/ClickHouse/ClickHouse/pull/33960) ([Kruglov Pavel](https://github.com/Avogar)).
* Fixed UTF-8 string case-insensitive search when lowercase and uppercase characters are represented by different number of bytes. Example is `ẞ` and `ß`. This closes [#7334](https://github.com/ClickHouse/ClickHouse/issues/7334). [#33992](https://github.com/ClickHouse/ClickHouse/pull/33992) ([Harry Lee](https://github.com/HarryLeeIBM)).
* Fix memory accounting for queries that uses < max_untracker_memory. [#34001](https://github.com/ClickHouse/ClickHouse/pull/34001) ([Azat Khuzhin](https://github.com/azat)).
* Supports all types of SYSTEM query ON CLUSTER clause. [#34005](https://github.com/ClickHouse/ClickHouse/pull/34005) ([小路](https://github.com/nicelulu)).
* Add schema inference for values() table function. Closes [#33811](https://github.com/ClickHouse/ClickHouse/issues/33811). [#34017](https://github.com/ClickHouse/ClickHouse/pull/34017) ([Kruglov Pavel](https://github.com/Avogar)).
* Tracing context is now propagated from GRPC client metadata. [#34064](https://github.com/ClickHouse/ClickHouse/pull/34064) ([andremarianiello](https://github.com/andremarianiello)).
* Add UUID suport in MsgPack input/output format. [#34065](https://github.com/ClickHouse/ClickHouse/pull/34065) ([Kruglov Pavel](https://github.com/Avogar)).
* Improving the experience of multiple line editing for clickhouse-client. This is a follow-up of https://github.com/ClickHouse/ClickHouse/pull/31123. [#34114](https://github.com/ClickHouse/ClickHouse/pull/34114) ([Amos Bird](https://github.com/amosbird)).
* Maxsplit argument for splitByChar. close [#34081](https://github.com/ClickHouse/ClickHouse/issues/34081). [#34140](https://github.com/ClickHouse/ClickHouse/pull/34140) ([李扬](https://github.com/taiyang-li)).
* Allow to parse dictionary `PRIMARY KEY` as `PRIMARY KEY (id, value)`, previously supported only `PRIMARY KEY id, value`. Closes [#34135](https://github.com/ClickHouse/ClickHouse/issues/34135). [#34141](https://github.com/ClickHouse/ClickHouse/pull/34141) ([Maksim Kita](https://github.com/kitaisreal)).
* Allow carriage return in the middle of the line while parsing by `Regexp` format. This closes [#34200](https://github.com/ClickHouse/ClickHouse/issues/34200). [#34205](https://github.com/ClickHouse/ClickHouse/pull/34205) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Recognize `YYYYMMDD-hhmmss` format in `parseDateTimeBestEffort` function. This closes [#34206](https://github.com/ClickHouse/ClickHouse/issues/34206). [#34208](https://github.com/ClickHouse/ClickHouse/pull/34208) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add ability to compose PostgreSQL-style cast operator `::` with `ArrayElement` and `TupleElement`. [#34229](https://github.com/ClickHouse/ClickHouse/pull/34229) ([Nikolay Degterinsky](https://github.com/evillique)).
* Added `#!` and `# ` as a recognised start of a single line comment. Reference to task [#34138](https://github.com/ClickHouse/ClickHouse/issues/34138). [#34230](https://github.com/ClickHouse/ClickHouse/pull/34230) ([Aaron Katz](https://github.com/aaronstephenkatz)).
* Change severity of the "Cancelled merging parts" message in logs, because it's not an error. This closes [#34148](https://github.com/ClickHouse/ClickHouse/issues/34148). [#34232](https://github.com/ClickHouse/ClickHouse/pull/34232) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Apply data skipping indexes for queries with FINAL may produce incorrect result. Disable data skipping indexes by default for queries with FINAL (introduce new `use_skip_indexes_if_final` setting and disable it by default). [#34243](https://github.com/ClickHouse/ClickHouse/pull/34243) ([Azat Khuzhin](https://github.com/azat)).
* Support asynchronous inserts in `clickhouse-client` for queries with inlined data. [#34267](https://github.com/ClickHouse/ClickHouse/pull/34267) ([Anton Popov](https://github.com/CurtizJ)).
* Cancel merges before acquiring table lock for `TRUNCATE` query to avoid `DEADLOCK_AVOIDED` error in some cases. Fixes [#34302](https://github.com/ClickHouse/ClickHouse/issues/34302). [#34304](https://github.com/ClickHouse/ClickHouse/pull/34304) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Some servers expect a User-Agent header in their HTTP requests. A `User-Agent` header entry has been added to HTTP requests of the form: User-Agent: ClickHouse/VERSION_STRING. [#34330](https://github.com/ClickHouse/ClickHouse/pull/34330) ([Saad Ur Rahman](https://github.com/surahman)).
* REGEXP_MATCHES and REGEXP_REPLACE function aliases for compatibility with PostgreSQL. Close [#30885](https://github.com/ClickHouse/ClickHouse/issues/30885). [#34334](https://github.com/ClickHouse/ClickHouse/pull/34334) ([李扬](https://github.com/taiyang-li)).
* Better handle pre-inputs before client start. This is for [#34308](https://github.com/ClickHouse/ClickHouse/issues/34308) . [#34336](https://github.com/ClickHouse/ClickHouse/pull/34336) ([Amos Bird](https://github.com/amosbird)).
* Add options for clickhouse-format. Which close [#30528](https://github.com/ClickHouse/ClickHouse/issues/30528) - max_query_size - max_parser_depth. [#34349](https://github.com/ClickHouse/ClickHouse/pull/34349) ([李扬](https://github.com/taiyang-li)).
* Default input and output formats that can be overriden by --input-format and --output-format. Close [#30631](https://github.com/ClickHouse/ClickHouse/issues/30631). [#34352](https://github.com/ClickHouse/ClickHouse/pull/34352) ([李扬](https://github.com/taiyang-li)).
* Allow to skip not found urls for globs when using URL storage / table function. Also closes [#34359](https://github.com/ClickHouse/ClickHouse/issues/34359). [#34392](https://github.com/ClickHouse/ClickHouse/pull/34392) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add two new settings: `s3_upload_part_size_multiply_factor` and `s3_upload_part_size_multiply_parts_count_threshold`. Now each time `s3_upload_part_size_multiply_parts_count_threshold` uploaded to S3 from a single query `s3_min_upload_part_size` multiplied by `s3_upload_part_size_multiply_factor`. Fixes [#34244](https://github.com/ClickHouse/ClickHouse/issues/34244). [#34422](https://github.com/ClickHouse/ClickHouse/pull/34422) ([alesapin](https://github.com/alesapin)).
* Allow `allow_experimental_projection_optimization` by default. [#34456](https://github.com/ClickHouse/ClickHouse/pull/34456) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Privileges CREATE/ALTER/DROP ROW POLICY now can be granted on a table or on `database.*` as well as globally `*.*`. [#34489](https://github.com/ClickHouse/ClickHouse/pull/34489) ([Vitaly Baranov](https://github.com/vitlibar)).
* Refactor client fault tolerant connection (https://github.com/ClickHouse/ClickHouse/pull/33824#issuecomment-1033690860). The new way to use it: ```bash clickhouse-client ... --host host1 --host host2 --port port2 --host host3 --port port --host host4 ```. [#34490](https://github.com/ClickHouse/ClickHouse/pull/34490) ([Kruglov Pavel](https://github.com/Avogar)).
* Improve schema inference in clickhouse-local. Allow to write just `clickhouse-local -q "select * from table" < data.format`. [#34495](https://github.com/ClickHouse/ClickHouse/pull/34495) ([Kruglov Pavel](https://github.com/Avogar)).
* Support `.jsonl` extension for JSONEachRow format. [#34496](https://github.com/ClickHouse/ClickHouse/pull/34496) ([Kruglov Pavel](https://github.com/Avogar)).
* Send ProfileEvents statistics in case of INSERT SELECT query. [#34498](https://github.com/ClickHouse/ClickHouse/pull/34498) ([Dmitry Novik](https://github.com/novikd)).
* Added sending of the output format back to client like it's done in HTTP protocol as suggested in [#34362](https://github.com/ClickHouse/ClickHouse/issues/34362). Closes [#34362](https://github.com/ClickHouse/ClickHouse/issues/34362). [#34499](https://github.com/ClickHouse/ClickHouse/pull/34499) ([Vitaly Baranov](https://github.com/vitlibar)).
* Allow to write `s3(url, access_key_id, secret_access_key)`. [#34503](https://github.com/ClickHouse/ClickHouse/pull/34503) ([Kruglov Pavel](https://github.com/Avogar)).
* Support `IF EXISTS` clause for `TTL expr TO [DISK|VOLUME] [IF EXISTS] 'xxx'` feature. Parts will be moved to disk or volume only if it exists on replica, so `MOVE TTL` rules will be able to behave differently on replicas according to the existing storage policies. Resolves [#34455](https://github.com/ClickHouse/ClickHouse/issues/34455). [#34504](https://github.com/ClickHouse/ClickHouse/pull/34504) ([Anton Popov](https://github.com/CurtizJ)).
* Little improvement no need to clone log entry. [#34587](https://github.com/ClickHouse/ClickHouse/pull/34587) ([zhanglistar](https://github.com/zhanglistar)).
* Slightly improve performance in case of filtering by sparse columns (which can be enabled by setting `ratio_of_defaults_for_sparse_serialization` in `MergeTree` tables). [#34601](https://github.com/ClickHouse/ClickHouse/pull/34601) ([Anton Popov](https://github.com/CurtizJ)).
#### Bug Fix
* This PR allows using multiple LDAP storages in the same list of user directories. It worked earlier but was broken because LDAP tests are disabled (they are part of the testflows tests). [#33574](https://github.com/ClickHouse/ClickHouse/pull/33574) ([Vitaly Baranov](https://github.com/vitlibar)).
* This PR fixes [#18206](https://github.com/ClickHouse/ClickHouse/issues/18206). [#33977](https://github.com/ClickHouse/ClickHouse/pull/33977) ([Vitaly Baranov](https://github.com/vitlibar)).
* This PR fixes [#19429](https://github.com/ClickHouse/ClickHouse/issues/19429). [#34225](https://github.com/ClickHouse/ClickHouse/pull/34225) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix inserting to temporary tables via gRPC. This PR fixes [#34347](https://github.com/ClickHouse/ClickHouse/issues/34347), issue `#2`. [#34364](https://github.com/ClickHouse/ClickHouse/pull/34364) ([Vitaly Baranov](https://github.com/vitlibar)).
* add HashMethodSingleLowCardinalityColumn::findKey, avoid crash. [#34506](https://github.com/ClickHouse/ClickHouse/pull/34506) ([DR](https://github.com/freedomDR)).
#### Build/Testing/Packaging Improvement
* Add action for published releases. [#32218](https://github.com/ClickHouse/ClickHouse/pull/32218) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Remove unbundled support. [#33690](https://github.com/ClickHouse/ClickHouse/pull/33690) ([Azat Khuzhin](https://github.com/azat)).
* Some improvement over current build system. [#33695](https://github.com/ClickHouse/ClickHouse/pull/33695) ([Amos Bird](https://github.com/amosbird)).
* Removed "Yandex ClickHouse" terms from descriptions. Change to default mirrors for packages. [#33745](https://github.com/ClickHouse/ClickHouse/pull/33745) ([Ilya Yatsishin](https://github.com/qoega)).
* - Fix unconditional `--build-arg FROM_TAG=` docker argument - Add some tests for docker tests/ci. [#33751](https://github.com/ClickHouse/ClickHouse/pull/33751) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Disable dh_update_autotools_config that updates config.guess/config.sub in sources directory on build. It will be more deterministic in terms of different build hosts used as config.guess is provided with sources. [#33752](https://github.com/ClickHouse/ClickHouse/pull/33752) ([Ilya Yatsishin](https://github.com/qoega)).
* clickhouse-test.deb is not used in stateless CI checks. [#33948](https://github.com/ClickHouse/ClickHouse/pull/33948) ([Ilya Yatsishin](https://github.com/qoega)).
* - Add on-demand style-checker-aarch64 hosts - Run dockerpush CI jobs there. [#33954](https://github.com/ClickHouse/ClickHouse/pull/33954) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add back the missing `-Werror` flag globally. This fixes https://github.com/ClickHouse/ClickHouse/pull/33940#issuecomment-1020466537. [#33970](https://github.com/ClickHouse/ClickHouse/pull/33970) ([Amos Bird](https://github.com/amosbird)).
* Separate base parts out of SystemLog. Common code can use system log without fully linking to Interpreters. This helps with errors like https://s3.amazonaws.com/clickhouse-builds/33970/49b229f9c781854861254350d3407f209fb99dfd/binary_splitted/build_log.log. [#33978](https://github.com/ClickHouse/ClickHouse/pull/33978) ([Amos Bird](https://github.com/amosbird)).
* Fix broken dependencies tree building, improve tests. [#33983](https://github.com/ClickHouse/ClickHouse/pull/33983) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* - Add get_with_retries to a download helper - Use it un PRInfo class - Replace `labels_from_api` by `pr_event_from_api` - Use it in description check to have always actual body and labels. [#34012](https://github.com/ClickHouse/ClickHouse/pull/34012) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* - Fix style check for tags_stable.yml - Run StyleCheck always in PR - Mark `Run Check` as failed only for some cases. [#34283](https://github.com/ClickHouse/ClickHouse/pull/34283) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add verbosity to a style check. [#34289](https://github.com/ClickHouse/ClickHouse/pull/34289) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix previously wrong OK_SKIP_LABELS in run_check.py. [#34340](https://github.com/ClickHouse/ClickHouse/pull/34340) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* - Migrate docker images from Moscow timezone to UTC - Update ubuntu version for performance comparison from 18.04 to 20.04. [#34373](https://github.com/ClickHouse/ClickHouse/pull/34373) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Ensure that tests don't depend on the result of non-stable sorting of equal elements. Added equal items ranges randomization in debug after sort to prevent issues when we rely on equal items sort order. [#34393](https://github.com/ClickHouse/ClickHouse/pull/34393) ([Maksim Kita](https://github.com/kitaisreal)).
* - Rebuild docker images on a daily base - Add `--all` flag to rebuild all images - Add `only_amd64` parameter for some images - Revert all workarounds for `only_amd64` images - Fix all broken images. [#34492](https://github.com/ClickHouse/ClickHouse/pull/34492) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix broken Hadoop tests after updated paths in an image. [#34556](https://github.com/ClickHouse/ClickHouse/pull/34556) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* I test compile ClickHouse without HDFS but failed, because the code in DiskHDFS doesn`t judge whether use HDFS. [#34573](https://github.com/ClickHouse/ClickHouse/pull/34573) ([zxealous](https://github.com/zxealous)).
* Set timeout 40 minutes for fast tests. [#34614](https://github.com/ClickHouse/ClickHouse/pull/34614) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Reverts ClickHouse/ClickHouse[#34614](https://github.com/ClickHouse/ClickHouse/issues/34614). [#34622](https://github.com/ClickHouse/ClickHouse/pull/34622) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* - Rework version_helper, make it executable - Reimplement StorageSystemContributors.sh in version_helper - Create a release script. [#34641](https://github.com/ClickHouse/ClickHouse/pull/34641) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* - Fix path in workflows/release.yml - To be backported to branch 22.1. [#34646](https://github.com/ClickHouse/ClickHouse/pull/34646) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Fix lz4 compression for output. Closes [#31421](https://github.com/ClickHouse/ClickHouse/issues/31421). [#31862](https://github.com/ClickHouse/ClickHouse/pull/31862) ([Kruglov Pavel](https://github.com/Avogar)).
* Create a function escapeForLDAPFilter and use it to escape characters '(' and ')' in a final_user_dn variable. [#33401](https://github.com/ClickHouse/ClickHouse/pull/33401) ([IlyaTsoi](https://github.com/IlyaTsoi)).
* TODO. [#33492](https://github.com/ClickHouse/ClickHouse/pull/33492) ([huzhichengdd](https://github.com/huzhichengdd)).
* Fix error `Bad cast from type ... to DB::DataTypeArray` which may happen when table has `Nested` column with dots in name, and default value is generated for it (e.g. during insert, when column is not listed). Continuation of [#28762](https://github.com/ClickHouse/ClickHouse/issues/28762). [#33588](https://github.com/ClickHouse/ClickHouse/pull/33588) ([Alexey Pavlenko](https://github.com/alexeypavlenko)).
* Fix `Chunk should have AggregatedChunkInfo in GroupingAggregatedTransform` (in case of `optimize_aggregation_in_order=1`). [#33637](https://github.com/ClickHouse/ClickHouse/pull/33637) ([Azat Khuzhin](https://github.com/azat)).
* Fix bug in zero copy replication which lead to data duplication in case of TTL move. Fixes [#33643](https://github.com/ClickHouse/ClickHouse/issues/33643). [#33642](https://github.com/ClickHouse/ClickHouse/pull/33642) ([alesapin](https://github.com/alesapin)).
* Allow some queries with sorting, LIMIT BY, ARRAY JOIN and lambda functions. This closes [#7462](https://github.com/ClickHouse/ClickHouse/issues/7462). [#33675](https://github.com/ClickHouse/ClickHouse/pull/33675) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Correctly determine current database if `CREATE TEMPORARY TABLE AS SELECT` is queried inside a named HTTP session. This is a very rare use case. This closes [#8340](https://github.com/ClickHouse/ClickHouse/issues/8340). [#33676](https://github.com/ClickHouse/ClickHouse/pull/33676) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix mutation when table contains projections. This fixes [#33010](https://github.com/ClickHouse/ClickHouse/issues/33010) . This fixes [#33275](https://github.com/ClickHouse/ClickHouse/issues/33275) . [#33679](https://github.com/ClickHouse/ClickHouse/pull/33679) ([Amos Bird](https://github.com/amosbird)).
* Throw exception when storage hdfs list directory failed. [#33724](https://github.com/ClickHouse/ClickHouse/pull/33724) ([LiuNeng](https://github.com/liuneng1994)).
* Fix tiny race between count() and INSERT/merges/... in MergeTree (it is possible to return incorrect number of rows for SELECT with optimize_trivial_count_query). [#33753](https://github.com/ClickHouse/ClickHouse/pull/33753) ([Azat Khuzhin](https://github.com/azat)).
* Fix bug of check table when creating data part with wide format and projection. [#33774](https://github.com/ClickHouse/ClickHouse/pull/33774) ([李扬](https://github.com/taiyang-li)).
* Fix parsing query INSERT INTO ... VALUES SETTINGS ... (...), ... [#33776](https://github.com/ClickHouse/ClickHouse/pull/33776) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix bug in client that led to 'Connection reset by peer' in server. Closes [#33309](https://github.com/ClickHouse/ClickHouse/issues/33309). [#33790](https://github.com/ClickHouse/ClickHouse/pull/33790) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix usage of external dictionaries with `Redis` source and large number of keys. [#33804](https://github.com/ClickHouse/ClickHouse/pull/33804) ([Anton Popov](https://github.com/CurtizJ)).
* Fix schema inference for JSONEachRow and JSONCompactEachRow. [#33830](https://github.com/ClickHouse/ClickHouse/pull/33830) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix KeyCondition with no common types available. [#33833](https://github.com/ClickHouse/ClickHouse/pull/33833) ([Amos Bird](https://github.com/amosbird)).
* Fix memory leak in `clickhouse-keeper` in case of compression is used (default). [#33840](https://github.com/ClickHouse/ClickHouse/pull/33840) ([Azat Khuzhin](https://github.com/azat)).
* Fixed `replica is not readonly` logical error on `SYSTEM RESTORE REPLICA` query when replica is actually readonly. Fixes [#33806](https://github.com/ClickHouse/ClickHouse/issues/33806). [#33847](https://github.com/ClickHouse/ClickHouse/pull/33847) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix usage of sparse columns (which can be enabled by experimental setting `ratio_of_defaults_for_sparse_serialization`). [#33849](https://github.com/ClickHouse/ClickHouse/pull/33849) ([Anton Popov](https://github.com/CurtizJ)).
* Fix crash if sql user defined function is created with lambda with non identifier arguments. Closes [#33866](https://github.com/ClickHouse/ClickHouse/issues/33866). [#33868](https://github.com/ClickHouse/ClickHouse/pull/33868) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix potential race condition when doing remote disk read. cc @Jokser. [#33912](https://github.com/ClickHouse/ClickHouse/pull/33912) ([Amos Bird](https://github.com/amosbird)).
* Aggregate function combinator `-If` did not correctly process `Nullable` filter argument. This closes [#27073](https://github.com/ClickHouse/ClickHouse/issues/27073). [#33920](https://github.com/ClickHouse/ClickHouse/pull/33920) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix parsing ZK metadata: now metadata from zookeeper compared with local metadata in canonical form. [#33933](https://github.com/ClickHouse/ClickHouse/pull/33933) ([sunny](https://github.com/sunny19930321)).
* Fix usage of functions `array` and `tuple` with literal arguments in distributed queries. Previously it could lead to `Not found columns` exception. [#33938](https://github.com/ClickHouse/ClickHouse/pull/33938) ([Anton Popov](https://github.com/CurtizJ)).
* Fix crash while reading of nested tuples. Fixes [#33838](https://github.com/ClickHouse/ClickHouse/issues/33838). [#33956](https://github.com/ClickHouse/ClickHouse/pull/33956) ([Anton Popov](https://github.com/CurtizJ)).
* Fix parsing IPv6 from query parameter and fix IPv6 to string conversion. Closes [#33928](https://github.com/ClickHouse/ClickHouse/issues/33928). [#33971](https://github.com/ClickHouse/ClickHouse/pull/33971) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix segfault while parsing ORC file with corrupted footer. Closes [#33797](https://github.com/ClickHouse/ClickHouse/issues/33797). [#33984](https://github.com/ClickHouse/ClickHouse/pull/33984) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix bug which lead to inability for server to start when both replicated access storage and keeper are used. Introduced two settings for keeper socket timeout instead of settings from default user: `keeper_server.socket_receive_timeout_sec` and `keeper_server.socket_send_timeout_sec`. Fixes [#33973](https://github.com/ClickHouse/ClickHouse/issues/33973). [#33988](https://github.com/ClickHouse/ClickHouse/pull/33988) ([alesapin](https://github.com/alesapin)).
* - Fixes `parallel_view_processing=0` not working when inserting into a table using `VALUES`. - Fixes `view_duration_ms` in the `query_views_log` not being set correctly for materialized views. [#34067](https://github.com/ClickHouse/ClickHouse/pull/34067) ([Raúl Marín](https://github.com/Algunenano)).
* Fix asynchronous inserts with `Native` format. [#34068](https://github.com/ClickHouse/ClickHouse/pull/34068) ([Anton Popov](https://github.com/CurtizJ)).
* Fixed minor race condition that might cause "intersecting parts" error in extremely rare cases after ZooKeeper connection loss. [#34096](https://github.com/ClickHouse/ClickHouse/pull/34096) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix possible data race in StorageFile that was introduced in https://github.com/ClickHouse/ClickHouse/pull/33960. Closes [#34111](https://github.com/ClickHouse/ClickHouse/issues/34111). [#34113](https://github.com/ClickHouse/ClickHouse/pull/34113) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix inserts to distributed tables in case of change of native protocol. The last change was in the version version 22.1, so there may be some failures of inserts to distributed tables after upgrade to that version. [#34132](https://github.com/ClickHouse/ClickHouse/pull/34132) ([Anton Popov](https://github.com/CurtizJ)).
* Fix bug which can rarely lead to error "Cannot read all data" while reading LowCardinality columns of MergeTree table engines family which stores data on remote file system like S3. [#34139](https://github.com/ClickHouse/ClickHouse/pull/34139) ([alesapin](https://github.com/alesapin)).
* Fix rare and benign race condition in `HDFS`, `S3` and `URL` storage engines which can lead to additional connections. [#34172](https://github.com/ClickHouse/ClickHouse/pull/34172) ([alesapin](https://github.com/alesapin)).
* Fix schema inference for table runction s3. [#34186](https://github.com/ClickHouse/ClickHouse/pull/34186) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix metric `Query`, which shows number of executing queries. In last several releases it was always 0. [#34224](https://github.com/ClickHouse/ClickHouse/pull/34224) ([Anton Popov](https://github.com/CurtizJ)).
* Fix reading of subcolumns with dots in their names. In particular fixed reading of `Nested` columns, if their element names contain dots (e.g ```Nested(`keys.name` String, `keys.id` UInt64, values UInt64)```). [#34228](https://github.com/ClickHouse/ClickHouse/pull/34228) ([Anton Popov](https://github.com/CurtizJ)).
* Fix memory leak in case of some Exception during query processing with `optimize_aggregation_in_order=1`. [#34234](https://github.com/ClickHouse/ClickHouse/pull/34234) ([Azat Khuzhin](https://github.com/azat)).
* Fix current_user/current_address for interserver mode (Before this patch current_user/current_address will be preserved from the previous query). [#34263](https://github.com/ClickHouse/ClickHouse/pull/34263) ([Azat Khuzhin](https://github.com/azat)).
* Fix progress bar width. It was incorrectly rounded to integer number of characters. [#34275](https://github.com/ClickHouse/ClickHouse/pull/34275) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fixed a couple of extremely rare race conditions that might lead to broken state of replication queue and "intersecting parts" error. [#34297](https://github.com/ClickHouse/ClickHouse/pull/34297) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix various issues when projection is enabled by default. Each issue is described in separate commit. This is for [#33678](https://github.com/ClickHouse/ClickHouse/issues/33678) . This fixes [#34273](https://github.com/ClickHouse/ClickHouse/issues/34273). [#34305](https://github.com/ClickHouse/ClickHouse/pull/34305) ([Amos Bird](https://github.com/amosbird)).
* Try to fix rare bug while reading of empty arrays, which could lead to `Data compressed with different methods` error. [#34327](https://github.com/ClickHouse/ClickHouse/pull/34327) ([Anton Popov](https://github.com/CurtizJ)).
* Fix wrong engine syntax in result of `SHOW CREATE DATABASE` query for databases with engine `Memory`. This closes [#34335](https://github.com/ClickHouse/ClickHouse/issues/34335). [#34345](https://github.com/ClickHouse/ClickHouse/pull/34345) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* For SQLUserDefinedFunctions change privilege level from DATABASE to GLOBAL. Closes [#34281](https://github.com/ClickHouse/ClickHouse/issues/34281). [#34404](https://github.com/ClickHouse/ClickHouse/pull/34404) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix segfault in schema inference from url. Closes [#34147](https://github.com/ClickHouse/ClickHouse/issues/34147). [#34405](https://github.com/ClickHouse/ClickHouse/pull/34405) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix possible error 'Cannot convert column Function to mask' in short circuit function evaluation. Closes [#34171](https://github.com/ClickHouse/ClickHouse/issues/34171). [#34415](https://github.com/ClickHouse/ClickHouse/pull/34415) ([Kruglov Pavel](https://github.com/Avogar)).
* Add missing lock for storage. Fixes possible race with table deletion. [#34416](https://github.com/ClickHouse/ClickHouse/pull/34416) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix possible error 'file_size: Operation not supported'. [#34479](https://github.com/ClickHouse/ClickHouse/pull/34479) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix compression support in URL engine. [#34524](https://github.com/ClickHouse/ClickHouse/pull/34524) ([Frank Chen](https://github.com/FrankChen021)).
* Fix comparison between integers and floats in index analysis. Previously it could lead to skipping some granules for reading by mistake. Fixes [#34493](https://github.com/ClickHouse/ClickHouse/issues/34493). [#34528](https://github.com/ClickHouse/ClickHouse/pull/34528) ([Anton Popov](https://github.com/CurtizJ)).
* Fix exception `Chunk should have AggregatedChunkInfo in MergingAggregatedTransform` (in case of `optimize_aggregation_in_order=1` and `distributed_aggregation_memory_efficient=0`). Fixes [#34526](https://github.com/ClickHouse/ClickHouse/issues/34526). [#34532](https://github.com/ClickHouse/ClickHouse/pull/34532) ([Anton Popov](https://github.com/CurtizJ)).
* In case of cancelation S3 and HDFS canceled only current reader, but continued to execute the initial query. Fixes [#34301](https://github.com/ClickHouse/ClickHouse/issues/34301) Relates to [#34397](https://github.com/ClickHouse/ClickHouse/issues/34397). [#34539](https://github.com/ClickHouse/ClickHouse/pull/34539) ([Dmitry Novik](https://github.com/novikd)).
* Fix bug of round/roundBankers, close [#33267](https://github.com/ClickHouse/ClickHouse/issues/33267). [#34562](https://github.com/ClickHouse/ClickHouse/pull/34562) ([李扬](https://github.com/taiyang-li)).
* Fixed the assertion in case of using `allow_experimental_parallel_reading_from_replicas` with `max_parallel_replicas` equals to 1. This fixes [#34525](https://github.com/ClickHouse/ClickHouse/issues/34525). [#34613](https://github.com/ClickHouse/ClickHouse/pull/34613) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* - Add Debug workflow to get variables for all actions on demand - Fix lack of pr_info.number for some edge case. [#34644](https://github.com/ClickHouse/ClickHouse/pull/34644) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Additionally check remote_fs_execute_merges_on_single_replica_time_threshold inside ReplicatedMergeTreeQueue"'. [#34201](https://github.com/ClickHouse/ClickHouse/pull/34201) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Add func tests run with s3"'. [#34211](https://github.com/ClickHouse/ClickHouse/pull/34211) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Add pool to WriteBufferFromS3"'. [#34212](https://github.com/ClickHouse/ClickHouse/pull/34212) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Add support agreement page and snippets.'. [#34512](https://github.com/ClickHouse/ClickHouse/pull/34512) ([Tom Risse](https://github.com/flickerbox-tom)).
* NO CL ENTRY: 'Add Gigasheet to adopters'. [#34589](https://github.com/ClickHouse/ClickHouse/pull/34589) ([Brian Hunter](https://github.com/bjhunter)).
#### NO CL CATEGORY
* Reverting to previous docker images, will take a closer look at failing tests from [#34373](https://github.com/ClickHouse/ClickHouse/issues/34373). [#34413](https://github.com/ClickHouse/ClickHouse/pull/34413) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,2 @@
### ClickHouse release v22.2.2.1-stable FIXME as compared to v22.2.1.2139-prestable

View File

@ -0,0 +1,6 @@
### ClickHouse release v22.2.3.5-stable FIXME as compared to v22.2.2.1-stable
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Backported in [#34848](https://github.com/ClickHouse/ClickHouse/issues/34848): Fix possible failures in S2 functions when queries contain const columns. [#34745](https://github.com/ClickHouse/ClickHouse/pull/34745) ([Bharat Nallan](https://github.com/bharatnc)).

View File

@ -0,0 +1,146 @@
### ClickHouse release v22.3.1.1262-prestable FIXME as compared to v22.2.1.2139-prestable
#### Backward Incompatible Change
* Improvement the toDatetime function overflows. When the date string is very large, it will be converted to 1970. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)).
* Make arrayCompact behave as other higher-order functions: perform compaction not of lambda function results but on original array. If you using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping arrayCompact arguments into arrayMap. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)).
#### New Feature
* New data type `Object(<schema_format>)`, which supports storing of semi-structured data (for now JSON only). Data is written to such types as string. Then all paths are extracted according to format of semi-structured data and written as separate columns in most optimal types, that can store all their values. Those columns can be queried by names that match paths in source data. E.g `data.key1.key2` or with cast operator `data.key1.key2::Int64`. [#23932](https://github.com/ClickHouse/ClickHouse/pull/23932) ([Anton Popov](https://github.com/CurtizJ)).
* Support authentication of users connected via SSL by their X.509 certificate. [#31484](https://github.com/ClickHouse/ClickHouse/pull/31484) ([eungenue](https://github.com/eungenue)).
* related to issue: [#30715](https://github.com/ClickHouse/ClickHouse/issues/30715). Add three functions for map data type: 1. mapReplace(map1, map2) - replaces values for keys in map1 with the values of the corresponding keys in map2; adds keys from map2 that don't exist in map1. 2. mapFilter 3. mapMap mapFilter and mapMap are higher order functions , accept two arguments, first argument is a lambda function with k, v pair , the second argument is a map type column. [#33698](https://github.com/ClickHouse/ClickHouse/pull/33698) ([hexiaoting](https://github.com/hexiaoting)).
* Add local cache for disk s3. Closes [#28961](https://github.com/ClickHouse/ClickHouse/issues/28961). [#33717](https://github.com/ClickHouse/ClickHouse/pull/33717) ([Kseniia Sumarokova](https://github.com/kssenii)).
* - Add startsWith & endsWith function for arrays, closes [#33982](https://github.com/ClickHouse/ClickHouse/issues/33982). [#34368](https://github.com/ClickHouse/ClickHouse/pull/34368) ([usurai](https://github.com/usurai)).
* Implement DateTime64 transform from and to arrow column, which closes [#8280](https://github.com/ClickHouse/ClickHouse/issues/8280) and closes [#28574](https://github.com/ClickHouse/ClickHouse/issues/28574). [#34561](https://github.com/ClickHouse/ClickHouse/pull/34561) ([李扬](https://github.com/taiyang-li)).
* Add cpu/mem metric for clickhouse-local. Close [#34545](https://github.com/ClickHouse/ClickHouse/issues/34545). [#34605](https://github.com/ClickHouse/ClickHouse/pull/34605) ([李扬](https://github.com/taiyang-li)).
* Support schema inference for inserting into table functions file/hdfs/s3/url. [#34732](https://github.com/ClickHouse/ClickHouse/pull/34732) ([Kruglov Pavel](https://github.com/Avogar)).
* A new settings called <allow_plaintext_password><allow_no_password> is added in server configuration which on/off insecure AUTH_TYPE plaintext-password and no_password. By default the property is set to true which means authType Plaintext_password & NO_password is allowed. [#34738](https://github.com/ClickHouse/ClickHouse/pull/34738) ([Heena Bansal](https://github.com/HeenaBansal2009)).
* Add new table function `hive`, usage as follow ``` hive('<hive metastore url>', '<hive database>', '<hive table name>', '<columns definition>', '<partition columns>') ``` for example ``` SELECT * FROM hive('thrift://hivetest:9083', 'test', 'demo', '`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)', 'day') ```. [#34946](https://github.com/ClickHouse/ClickHouse/pull/34946) ([lgbo](https://github.com/lgbo-ustc)).
* - When use clickhouse-client logining, If user and password is not specified in command line or configuration file, get them from `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables. Close [#34538](https://github.com/ClickHouse/ClickHouse/issues/34538). [#34947](https://github.com/ClickHouse/ClickHouse/pull/34947) ([DR](https://github.com/freedomDR)).
* Added date_time_input_format = 'best_effort_us'. Closes [#34799](https://github.com/ClickHouse/ClickHouse/issues/34799). [#34982](https://github.com/ClickHouse/ClickHouse/pull/34982) ([WenYao](https://github.com/Cai-Yao)).
* Changed the Play UI to select a theme by the following priority: * 'theme' GET parameter * 'theme' in localStorage * According to OS preference (didn't work before). [#35068](https://github.com/ClickHouse/ClickHouse/pull/35068) ([peledni](https://github.com/peledni)).
* ``` sql ) explain ast graph = 1 select * from system.parts;. [#35173](https://github.com/ClickHouse/ClickHouse/pull/35173) ([李扬](https://github.com/taiyang-li)).
* Add `database_replicated_allow_only_replicated_engine` setting. When enabled, it only allowed to create `Replicated` tables in `Replicated` database. [#35214](https://github.com/ClickHouse/ClickHouse/pull/35214) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### Performance Improvement
* Calling std::distance on large list will decrease performance, use a version in Node to substitute with more memory 80MB/1000w Nodes. The logic is: Every node is list has a version with type size_t, setting in insert or insertOrReplace method using the class member variable current_version. The version is only increase at enableSnapshot method. When doing a snapshot, call snapshotSizeWithVersion to get snapshot size and version(snapshot_up_to_version). When traversing the list, if node version is less then or equal to snapshot_up_to_version, then protects it from deleting if node version is bigger than snapshot_up_to_version, we can do anything to it. [#34486](https://github.com/ClickHouse/ClickHouse/pull/34486) ([zhanglistar](https://github.com/zhanglistar)).
* Compaction of log store in Nuraft need acquire an inner lock which also used in normal commit process, so we delete useless logs in `compact` method of Changelog class in a background thread. See details on: https://github.com/ClickHouse-Extras/NuRaft/blob/1707a7572aa66ec5d0a2dbe2bf5effa3352e6b2d/src/handle_commit.cxx#L560. [#34534](https://github.com/ClickHouse/ClickHouse/pull/34534) ([zhanglistar](https://github.com/zhanglistar)).
* Don't hold the latest snapshot in memory, instead, reading the snapshot if needed, sequence reading is fast to 200+MBps even on HDD using mmap system call. Writing snapshot data directly to disk using compression method without holding original data and compressed data in memory. [#34584](https://github.com/ClickHouse/ClickHouse/pull/34584) ([zhanglistar](https://github.com/zhanglistar)).
* MergeTree improve insert performance replacing std::stable_sort with pdqsort. [#34750](https://github.com/ClickHouse/ClickHouse/pull/34750) ([Maksim Kita](https://github.com/kitaisreal)).
* Improve the performance of the `ANY` aggregation function by acting over batches. [#34760](https://github.com/ClickHouse/ClickHouse/pull/34760) ([Raúl Marín](https://github.com/Algunenano)).
* Improve performance of `detectCharset `, `detectLanguageUnknown ` functions. Improve performance of `DirectDictionary` if dictionary source is `ClickHouse`. Improve performance of processing queries with large `IN` section. [#34888](https://github.com/ClickHouse/ClickHouse/pull/34888) ([Maksim Kita](https://github.com/kitaisreal)).
* Less lock on connection using atomic stat. Notice that it is an approximate stat. [#35010](https://github.com/ClickHouse/ClickHouse/pull/35010) ([zhanglistar](https://github.com/zhanglistar)).
#### Improvement
* Make the znode ctime and mtime consistent between servers. [#33441](https://github.com/ClickHouse/ClickHouse/pull/33441) ([小路](https://github.com/nicelulu)).
* Hold time lock while assigning tasks to clear old temporary directories in StorageMergeTree. [#34025](https://github.com/ClickHouse/ClickHouse/pull/34025) ([Amos Bird](https://github.com/amosbird)).
* When large files were written with `s3` table function or table engine, the content type on the files was mistakenly set to `application/xml` due to a bug in the AWS SDK. This closes [#33964](https://github.com/ClickHouse/ClickHouse/issues/33964). [#34433](https://github.com/ClickHouse/ClickHouse/pull/34433) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve schema inference with globs in FIle/S3/HDFS/URL engines. Try to use the next path for schema inference in case of error. [#34465](https://github.com/ClickHouse/ClickHouse/pull/34465) ([Kruglov Pavel](https://github.com/Avogar)).
* - Improve the opentelemetry span logs for INSERT operation on distributed table. [#34480](https://github.com/ClickHouse/ClickHouse/pull/34480) ([Frank Chen](https://github.com/FrankChen021)).
* MaterializedMySQL support materialized_mysql_tables_list(a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated) settings, mentioned at [#32977](https://github.com/ClickHouse/ClickHouse/issues/32977). [#34487](https://github.com/ClickHouse/ClickHouse/pull/34487) ([zzsmdfj](https://github.com/zzsmdfj)).
* This PR changes restrictive row policies a bit to make them an easier alternative to permissive policies in easy cases. If for a particular table only restrictive policies exist (without permissive policies) users will be able to see some rows. Also `SHOW CREATE ROW POLICY` will always show `AS permissive` or `AS restrictive` in row policy's definition. [#34596](https://github.com/ClickHouse/ClickHouse/pull/34596) ([Vitaly Baranov](https://github.com/vitlibar)).
* Add `encodeURLComponent`, 'encodeURLFormComponent' function. Closes [#31092](https://github.com/ClickHouse/ClickHouse/issues/31092). [#34607](https://github.com/ClickHouse/ClickHouse/pull/34607) ([zzsmdfj](https://github.com/zzsmdfj)).
* Now you can read `system.zookeeper` table without restrictions on path or using `like` expression. This reads can generate quite heavy load for zookeeper so to enable this ability you have to enable setting `allow_unrestricted_reads_from_keeper`. [#34609](https://github.com/ClickHouse/ClickHouse/pull/34609) ([Sergei Trifonov](https://github.com/serxa)).
* Some refactoring and improvement over async and remote buffer related stuff. Separated in each commit. [#34629](https://github.com/ClickHouse/ClickHouse/pull/34629) ([Amos Bird](https://github.com/amosbird)).
* ExecutableUserDefinedFunctions allow to specify argument names. This is necessary for formats where argument name is part of serialization, like `Native`, `JSONEachRow`. Closes [#34604](https://github.com/ClickHouse/ClickHouse/issues/34604). [#34653](https://github.com/ClickHouse/ClickHouse/pull/34653) ([Maksim Kita](https://github.com/kitaisreal)).
* Extract schema only once on table creation and prevent reading from local files/external sources to extract schema on each server startup. [#34684](https://github.com/ClickHouse/ClickHouse/pull/34684) ([Kruglov Pavel](https://github.com/Avogar)).
* Do not reset logging that configured via --log-file/--errorlog-file in case of empty logger.log/logger.errorlog. [#34718](https://github.com/ClickHouse/ClickHouse/pull/34718) ([Amos Bird](https://github.com/amosbird)).
* Support `remote()`/`cluster()` for `parallel_distributed_insert_select=2`. [#34728](https://github.com/ClickHouse/ClickHouse/pull/34728) ([Azat Khuzhin](https://github.com/azat)).
* Add name hints for data skipping indices. Closes [#29698](https://github.com/ClickHouse/ClickHouse/issues/29698). [#34764](https://github.com/ClickHouse/ClickHouse/pull/34764) ([flynn](https://github.com/ucasfl)).
* Now `ALTER TABLE DROP COLUMN columnX` queries for `MergeTree` table engines will work instantly when `columnX` is `ALIAS` column. Fixes [#34660](https://github.com/ClickHouse/ClickHouse/issues/34660). [#34786](https://github.com/ClickHouse/ClickHouse/pull/34786) ([alesapin](https://github.com/alesapin)).
* In previous versions the progress bar in clickhouse-client can jump forward near 50% for no reason. This closes [#34324](https://github.com/ClickHouse/ClickHouse/issues/34324). [#34801](https://github.com/ClickHouse/ClickHouse/pull/34801) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix reading only columns which user asked for. Closes [#34163](https://github.com/ClickHouse/ClickHouse/issues/34163). [#34849](https://github.com/ClickHouse/ClickHouse/pull/34849) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Implement MemoryStatisticsOS for FreeBSD. [#34902](https://github.com/ClickHouse/ClickHouse/pull/34902) ([Alexandre Snarskii](https://github.com/snar)).
* Allow to open empty sqlite db file if it does not exist. Closes [#33367](https://github.com/ClickHouse/ClickHouse/issues/33367). [#34907](https://github.com/ClickHouse/ClickHouse/pull/34907) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Allow LowCardinality strings for ngrambf_v1/tokenbf_v1 indexes. Closes [#21865](https://github.com/ClickHouse/ClickHouse/issues/21865). [#34911](https://github.com/ClickHouse/ClickHouse/pull/34911) ([Lars Hiller Eidnes](https://github.com/larspars)).
* Ignore per-column `TTL` in `CREATE TABLE AS` if new table engine does not support it (i.e. if the engine is not of `MergeTree` family). [#34938](https://github.com/ClickHouse/ClickHouse/pull/34938) ([Azat Khuzhin](https://github.com/azat)).
* Use connection pool for hive metastore client. [#34940](https://github.com/ClickHouse/ClickHouse/pull/34940) ([lgbo](https://github.com/lgbo-ustc)).
* Currently, if the user changes the settings of the system tables there will be tons of logs and ClickHouse will rename the tables every minute. This fixes [#34929](https://github.com/ClickHouse/ClickHouse/issues/34929). [#34949](https://github.com/ClickHouse/ClickHouse/pull/34949) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* remove unnecessary columns for reading parquet/orc files. [#34954](https://github.com/ClickHouse/ClickHouse/pull/34954) ([lgbo](https://github.com/lgbo-ustc)).
* For random access readbuffer in hive, the first time to read the readbuffer would use the original readbuffer instead of local file. When we read a parquet/orc format file, the readbuffer seeks to the end of the file, which will be blocked until the local file finishes download, and make the whold process slow. [#34957](https://github.com/ClickHouse/ClickHouse/pull/34957) ([lgbo](https://github.com/lgbo-ustc)).
* Add more sanity checks for keeper configuration: now mixing of localhost and non-local servers is not allowed, also add checks for same value of internal raft port and keeper client port. [#35004](https://github.com/ClickHouse/ClickHouse/pull/35004) ([alesapin](https://github.com/alesapin)).
* Functions `dictGetHierarchy`, `dictIsIn`, `dictGetChildren`, `dictGetDescendants` support implicit key cast and constant arguments. Closes [#34970](https://github.com/ClickHouse/ClickHouse/issues/34970). [#35027](https://github.com/ClickHouse/ClickHouse/pull/35027) ([Maksim Kita](https://github.com/kitaisreal)).
* Avoid division by zero in Query Profiler if Linux kernel has a bug. Closes [#34787](https://github.com/ClickHouse/ClickHouse/issues/34787). [#35032](https://github.com/ClickHouse/ClickHouse/pull/35032) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Avoid possible `MEMORY_LIMIT_EXCEEDED` during `INSERT` into `Buffer` with `AggregateFunction`. [#35072](https://github.com/ClickHouse/ClickHouse/pull/35072) ([Azat Khuzhin](https://github.com/azat)).
* Support `view()` for `parallel_distributed_insert_select`. [#35132](https://github.com/ClickHouse/ClickHouse/pull/35132) ([Azat Khuzhin](https://github.com/azat)).
* Add setting to lower column case when reading parquet/ORC file. [#35145](https://github.com/ClickHouse/ClickHouse/pull/35145) ([shuchaome](https://github.com/shuchaome)).
* Do not retry non-rertiable errors. Closes [#35161](https://github.com/ClickHouse/ClickHouse/issues/35161). [#35172](https://github.com/ClickHouse/ClickHouse/pull/35172) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Added disk_name to system.part_log. [#35178](https://github.com/ClickHouse/ClickHouse/pull/35178) ([Artyom Yurkov](https://github.com/Varinara)).
* Currently,Clickhouse validates hosts defined under <remote_url_allow_hosts> for URL and Remote Table functions. This PR extends the RemoteHostFilter to Mysql and PostgreSQL table functions. [#35191](https://github.com/ClickHouse/ClickHouse/pull/35191) ([Heena Bansal](https://github.com/HeenaBansal2009)).
* Sometimes it is not enough for us to distinguish queries hierachy only by is_initial_query in system.query_log and system.processes. So distributed_depth is needed. [#35207](https://github.com/ClickHouse/ClickHouse/pull/35207) ([李扬](https://github.com/taiyang-li)).
* Support test mode for clickhouse-local. [#35264](https://github.com/ClickHouse/ClickHouse/pull/35264) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Return const for function getMacro if not in distributed query. Close [#34727](https://github.com/ClickHouse/ClickHouse/issues/34727). [#35289](https://github.com/ClickHouse/ClickHouse/pull/35289) ([李扬](https://github.com/taiyang-li)).
* Reload `remote_url_allow_hosts` after config update. [#35294](https://github.com/ClickHouse/ClickHouse/pull/35294) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### Bug Fix
* Ignore obsolete grants in ATTACH GRANT statements. This PR fixes [#34815](https://github.com/ClickHouse/ClickHouse/issues/34815). [#34855](https://github.com/ClickHouse/ClickHouse/pull/34855) ([Vitaly Baranov](https://github.com/vitlibar)).
* When the inner readbuffer's buffer size is too small, NEED_MORE_INPUT in `HadoopSnappyDecoder` will run multi times (>=3)for one compressed block. This makes the input data be copied into the wrong place in `HadoopSnappyDecoder::buffer`. [#35116](https://github.com/ClickHouse/ClickHouse/pull/35116) ([lgbo](https://github.com/lgbo-ustc)).
#### Build/Testing/Packaging Improvement
* Randomize some settings in functional tests. This closes [#32268](https://github.com/ClickHouse/ClickHouse/issues/32268). [#34092](https://github.com/ClickHouse/ClickHouse/pull/34092) ([Kruglov Pavel](https://github.com/Avogar)).
* NA. [#34513](https://github.com/ClickHouse/ClickHouse/pull/34513) ([vzakaznikov](https://github.com/vzakaznikov)).
* Debian package clickhouse-test.deb removed completely. CI use tests from repository and standalone testing via deb package is no longer supported. [#34606](https://github.com/ClickHouse/ClickHouse/pull/34606) ([Ilya Yatsishin](https://github.com/qoega)).
* Set timeout 40 minutes for fast tests. [#34624](https://github.com/ClickHouse/ClickHouse/pull/34624) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Drop PVS test from CI. [#34680](https://github.com/ClickHouse/ClickHouse/pull/34680) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Limit DWARF version for debug info by 4 max, because our internal stack symbolizer cannot parse DWARF version 5. This makes sense if you compile ClickHouse with clang-15. [#34777](https://github.com/ClickHouse/ClickHouse/pull/34777) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve CI scripts arguments. [#34792](https://github.com/ClickHouse/ClickHouse/pull/34792) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Use @robot-clickhouse as an author and committer for PRs like https://github.com/ClickHouse/ClickHouse/pull/34685. [#34793](https://github.com/ClickHouse/ClickHouse/pull/34793) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Separate smaller clickhouse-keeper build. [#35031](https://github.com/ClickHouse/ClickHouse/pull/35031) ([alesapin](https://github.com/alesapin)).
* Clion has the following problems "The breakpoint will not currently be hit. No executable code is associated with this line". [#35179](https://github.com/ClickHouse/ClickHouse/pull/35179) ([小路](https://github.com/nicelulu)).
* Add an ability to build stripped binaries with cmake. [#35196](https://github.com/ClickHouse/ClickHouse/pull/35196) ([alesapin](https://github.com/alesapin)).
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Fix distributed subquery max_query_size limitation inconsistency. [#34078](https://github.com/ClickHouse/ClickHouse/pull/34078) ([Chao Ma](https://github.com/godliness)).
* Fix incorrect trivial count result when part movement feature is used [#34089](https://github.com/ClickHouse/ClickHouse/issues/34089). [#34385](https://github.com/ClickHouse/ClickHouse/pull/34385) ([nvartolomei](https://github.com/nvartolomei)).
* Stop to select part for mutate when the other replica has already updated the /log for ReplatedMergeTree engine. [#34633](https://github.com/ClickHouse/ClickHouse/pull/34633) ([Jianmei Zhang](https://github.com/zhangjmruc)).
* Fix `allow_experimental_projection_optimization` with `enable_global_with_statement` (before it may lead to `Stack size too large` error in case of multiple expressions in `WITH` clause, and also it executes scalar subqueries again and again, so not it will be more optimal). [#34650](https://github.com/ClickHouse/ClickHouse/pull/34650) ([Azat Khuzhin](https://github.com/azat)).
* Fix serialization/printing for system queries `RELOAD MODEL`, `RELOAD FUNCTION`, `RESTART DISK` when used `ON CLUSTER`. Closes [#34514](https://github.com/ClickHouse/ClickHouse/issues/34514). [#34696](https://github.com/ClickHouse/ClickHouse/pull/34696) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix ENOENT with fsync_part_directory and Vertical merge. [#34739](https://github.com/ClickHouse/ClickHouse/pull/34739) ([Azat Khuzhin](https://github.com/azat)).
* Fix bug for h3 funcs containing const columns which cause queries to fail. [#34743](https://github.com/ClickHouse/ClickHouse/pull/34743) ([Bharat Nallan](https://github.com/bharatnc)).
* Fix possible failures in S2 functions when queries contain const columns. [#34745](https://github.com/ClickHouse/ClickHouse/pull/34745) ([Bharat Nallan](https://github.com/bharatnc)).
* Fix bugs for multiple columns group by in WindowView. [#34859](https://github.com/ClickHouse/ClickHouse/pull/34859) ([vxider](https://github.com/Vxider)).
* Support DDLs like CREATE USER to be executed on cross replicated cluster. [#34860](https://github.com/ClickHouse/ClickHouse/pull/34860) ([Jianmei Zhang](https://github.com/zhangjmruc)).
* Fix asynchronous inserts to table functions. Fixes [#34864](https://github.com/ClickHouse/ClickHouse/issues/34864). [#34866](https://github.com/ClickHouse/ClickHouse/pull/34866) ([Anton Popov](https://github.com/CurtizJ)).
* Fix possible "Part directory doesn't exist" during `INSERT`. [#34876](https://github.com/ClickHouse/ClickHouse/pull/34876) ([Azat Khuzhin](https://github.com/azat)).
* Fix postgres datetime64 conversion. Closes [#33364](https://github.com/ClickHouse/ClickHouse/issues/33364). [#34910](https://github.com/ClickHouse/ClickHouse/pull/34910) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Avoid busy polling in keeper while searching for changelog files to delete. [#34931](https://github.com/ClickHouse/ClickHouse/pull/34931) ([Azat Khuzhin](https://github.com/azat)).
* Unexpected result when use `in` in `where` in hive query. [#34945](https://github.com/ClickHouse/ClickHouse/pull/34945) ([lgbo](https://github.com/lgbo-ustc)).
* Fix wrong schema inference for unquoted dates in CSV. Closes [#34768](https://github.com/ClickHouse/ClickHouse/issues/34768). [#34961](https://github.com/ClickHouse/ClickHouse/pull/34961) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix possible rare error `Cannot push block to port which already has data`. Avoid pushing to port with data inside `DelayedSource`. [#34993](https://github.com/ClickHouse/ClickHouse/pull/34993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix possible segfault in filelog. Closes [#30749](https://github.com/ClickHouse/ClickHouse/issues/30749). [#34996](https://github.com/ClickHouse/ClickHouse/pull/34996) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix unexpected result when use -state type aggregate function in window frame. [#34999](https://github.com/ClickHouse/ClickHouse/pull/34999) ([metahys](https://github.com/metahys)).
* Fix possible exception `Reading for MergeTree family tables must be done with last position boundary`. Closes [#34979](https://github.com/ClickHouse/ClickHouse/issues/34979). [#35001](https://github.com/ClickHouse/ClickHouse/pull/35001) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix reading from `system.asynchronous_inserts` table if there exists asynchronous insert into table function. [#35050](https://github.com/ClickHouse/ClickHouse/pull/35050) ([Anton Popov](https://github.com/CurtizJ)).
* Fix missing alias after function is optimized to subcolumn when setting `optimize_functions_to_subcolumns` is enabled. Closes [#33798](https://github.com/ClickHouse/ClickHouse/issues/33798). [#35079](https://github.com/ClickHouse/ClickHouse/pull/35079) ([qieqieplus](https://github.com/qieqieplus)).
* Avoid possible deadlock on server shutdown. [#35081](https://github.com/ClickHouse/ClickHouse/pull/35081) ([Azat Khuzhin](https://github.com/azat)).
* Fixed the "update_lag" external dictionary configuration option being unusable with the error message ``Unexpected key `update_lag` in dictionary source configuration``. [#35089](https://github.com/ClickHouse/ClickHouse/pull/35089) ([Jason Chu](https://github.com/1lann)).
* fix issue: [#31469](https://github.com/ClickHouse/ClickHouse/issues/31469). [#35118](https://github.com/ClickHouse/ClickHouse/pull/35118) ([zzsmdfj](https://github.com/zzsmdfj)).
* Fix `optimize_skip_unused_shards_rewrite_in` for signed columns and negative values. [#35134](https://github.com/ClickHouse/ClickHouse/pull/35134) ([Azat Khuzhin](https://github.com/azat)).
* Fixed the incorrect translation YAML config to XML. [#35135](https://github.com/ClickHouse/ClickHouse/pull/35135) ([Miel Donkers](https://github.com/mdonkers)).
* Fix partition pruning error when non-monotonic function is used with IN operator. This fixes [#35136](https://github.com/ClickHouse/ClickHouse/issues/35136). [#35146](https://github.com/ClickHouse/ClickHouse/pull/35146) ([Amos Bird](https://github.com/amosbird)).
* Fix materialised postrgesql adding new table to replication (ATTACH TABLE) after manually removing (DETACH TABLE). Closes [#33800](https://github.com/ClickHouse/ClickHouse/issues/33800). Closes [#34922](https://github.com/ClickHouse/ClickHouse/issues/34922). Closes [#34315](https://github.com/ClickHouse/ClickHouse/issues/34315). [#35158](https://github.com/ClickHouse/ClickHouse/pull/35158) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix materialised postgres `table overrides` for partition by, etc. Closes [#35048](https://github.com/ClickHouse/ClickHouse/issues/35048). [#35162](https://github.com/ClickHouse/ClickHouse/pull/35162) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Schema inference didn't work properly on case of `INSERT INTO FUNCTION s3(...) FROM ...`, it tried to read schema from s3 file instead of from select query. [#35176](https://github.com/ClickHouse/ClickHouse/pull/35176) ([Kruglov Pavel](https://github.com/Avogar)).
* - Fix `replaceRegexpAll`, close [#35117](https://github.com/ClickHouse/ClickHouse/issues/35117). [#35182](https://github.com/ClickHouse/ClickHouse/pull/35182) ([Vladimir C](https://github.com/vdimir)).
* Fix error in query with `WITH TOTALS` in case if `HAVING` returned empty result. This fixes [#33711](https://github.com/ClickHouse/ClickHouse/issues/33711). [#35186](https://github.com/ClickHouse/ClickHouse/pull/35186) ([Amos Bird](https://github.com/amosbird)).
* * Fix reading port from config, close [#34776](https://github.com/ClickHouse/ClickHouse/issues/34776). [#35193](https://github.com/ClickHouse/ClickHouse/pull/35193) ([Vladimir C](https://github.com/vdimir)).
* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)).
* Wait for IDiskRemote thread pool properly. [#35257](https://github.com/ClickHouse/ClickHouse/pull/35257) ([Azat Khuzhin](https://github.com/azat)).
* Fix `CHECK TABLE` query in case when sparse columns are enabled in table. [#35274](https://github.com/ClickHouse/ClickHouse/pull/35274) ([Anton Popov](https://github.com/CurtizJ)).
* Fix possible Abort while using Brotli compression with a small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35281](https://github.com/ClickHouse/ClickHouse/pull/35281) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix possible segfault in JSONEachRow schema inference. [#35291](https://github.com/ClickHouse/ClickHouse/pull/35291) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix possible `Assertion 'position() != working_buffer.end()' failed` while using lzma compression with small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35295](https://github.com/ClickHouse/ClickHouse/pull/35295) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix possible segfault while using lz4 compression with a small max_read_buffer_size setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35296](https://github.com/ClickHouse/ClickHouse/pull/35296) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix possible `Assertion 'position() != working_buffer.end()' failed` while using bzip2 compression with small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35300](https://github.com/ClickHouse/ClickHouse/pull/35300) ([Kruglov Pavel](https://github.com/Avogar)).
* - Fix partial merge join duplicate rows bug, close [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009). [#35311](https://github.com/ClickHouse/ClickHouse/pull/35311) ([Vladimir C](https://github.com/vdimir)).
* Fix segfault in Postgres database when getting create table query if database was created using named collections. Closes [#35312](https://github.com/ClickHouse/ClickHouse/issues/35312). [#35313](https://github.com/ClickHouse/ClickHouse/pull/35313) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix bug in S3 zero-copy replication which can lead to errors like `Found parts with the same min block and with the same max block as the missing part` after concurrent fetch/drop table. [#35348](https://github.com/ClickHouse/ClickHouse/pull/35348) ([alesapin](https://github.com/alesapin)).
#### NO CL ENTRY
* NO CL ENTRY: '[ImgBot] Optimize images'. [#34590](https://github.com/ClickHouse/ClickHouse/pull/34590) ([imgbot[bot]](https://github.com/apps/imgbot)).
* NO CL ENTRY: 'Revert "Allow restrictive row policies without permissive"'. [#34782](https://github.com/ClickHouse/ClickHouse/pull/34782) ([Vitaly Baranov](https://github.com/vitlibar)).
* NO CL ENTRY: 'Revert "Remove "bugs" that do not exist anymore"'. [#35241](https://github.com/ClickHouse/ClickHouse/pull/35241) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Change timezone in Docker"'. [#35243](https://github.com/ClickHouse/ClickHouse/pull/35243) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Fix 00900_long_parquet_load"'. [#35301](https://github.com/ClickHouse/ClickHouse/pull/35301) ([Vladimir C](https://github.com/vdimir)).

View File

@ -0,0 +1,6 @@
### ClickHouse release v22.3.2.2-lts FIXME as compared to v22.3.1.1262-prestable
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Fix bug in S3 zero-copy replication which can lead to errors like `Found parts with the same min block and with the same max block as the missing part` after concurrent fetch/drop table. [#35348](https://github.com/ClickHouse/ClickHouse/pull/35348) ([alesapin](https://github.com/alesapin)).

View File

@ -0,0 +1,14 @@
### ClickHouse release v22.3.3.44-lts FIXME as compared to v22.3.2.2-lts
#### Bug Fix
* Backported in [#35928](https://github.com/ClickHouse/ClickHouse/issues/35928): Added settings `input_format_ipv4_default_on_conversion_error`, `input_format_ipv6_default_on_conversion_error` to allow insert of invalid ip address values as default into tables. Closes [#35726](https://github.com/ClickHouse/ClickHouse/issues/35726). [#35733](https://github.com/ClickHouse/ClickHouse/pull/35733) ([Maksim Kita](https://github.com/kitaisreal)).
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Backported in [#35415](https://github.com/ClickHouse/ClickHouse/issues/35415): Fix possible deadlock in cache. [#35378](https://github.com/ClickHouse/ClickHouse/pull/35378) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#35563](https://github.com/ClickHouse/ClickHouse/issues/35563): Fix cast into IPv4, IPv6 address in IN section. Fixes [#35528](https://github.com/ClickHouse/ClickHouse/issues/35528). [#35534](https://github.com/ClickHouse/ClickHouse/pull/35534) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#35783](https://github.com/ClickHouse/ClickHouse/issues/35783): Fix bug in conversion from custom types to string that could lead to segfault or unexpected error messages. Closes [#35752](https://github.com/ClickHouse/ClickHouse/issues/35752). [#35755](https://github.com/ClickHouse/ClickHouse/pull/35755) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#35881](https://github.com/ClickHouse/ClickHouse/issues/35881): Fixes parsing of the arguments of the functions `extract`. Fixes [#35751](https://github.com/ClickHouse/ClickHouse/issues/35751). [#35799](https://github.com/ClickHouse/ClickHouse/pull/35799) ([Nikolay Degterinsky](https://github.com/evillique)).
* Backported in [#35856](https://github.com/ClickHouse/ClickHouse/issues/35856): Respect only quota & period from groups, ignore shares (which are not really limit the number of the cores which can be used). [#35815](https://github.com/ClickHouse/ClickHouse/pull/35815) ([filimonov](https://github.com/filimonov)).
* Backported in [#35938](https://github.com/ClickHouse/ClickHouse/issues/35938): Avoid processing per-column TTL multiple times. [#35820](https://github.com/ClickHouse/ClickHouse/pull/35820) ([Azat Khuzhin](https://github.com/azat)).

View File

@ -0,0 +1,14 @@
### ClickHouse release v22.3.4.20-lts FIXME as compared to v22.3.3.44-lts
#### Build/Testing/Packaging Improvement
* - Add `_le_` method for ClickHouseVersion - Fix auto_version for existing tag - docker_server now support getting version from tags - Add python unit tests to backport workflow. [#36028](https://github.com/ClickHouse/ClickHouse/pull/36028) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Backported in [#36244](https://github.com/ClickHouse/ClickHouse/issues/36244): Fix usage of quota with asynchronous inserts. [#35645](https://github.com/ClickHouse/ClickHouse/pull/35645) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#36240](https://github.com/ClickHouse/ClickHouse/issues/36240): Fix possible loss of subcolumns in type `Object`. [#35682](https://github.com/ClickHouse/ClickHouse/pull/35682) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#36242](https://github.com/ClickHouse/ClickHouse/issues/36242): Fix possible `Can't adjust last granule` exception while reading subcolumns of type `Object`. [#35687](https://github.com/ClickHouse/ClickHouse/pull/35687) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#35938](https://github.com/ClickHouse/ClickHouse/issues/35938): Avoid processing per-column TTL multiple times. [#35820](https://github.com/ClickHouse/ClickHouse/pull/35820) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#36147](https://github.com/ClickHouse/ClickHouse/issues/36147): Fix reading from `Kafka` tables when `kafka_num_consumers > 1` and `kafka_thread_per_consumer = 0`. Returns parallel & multithreaded reading, accidentally broken in 21.11. Closes [#35153](https://github.com/ClickHouse/ClickHouse/issues/35153). [#35973](https://github.com/ClickHouse/ClickHouse/pull/35973) ([filimonov](https://github.com/filimonov)).
* Backported in [#36276](https://github.com/ClickHouse/ClickHouse/issues/36276): Fix reading of empty arrays in reverse order (in queries with descending sorting by prefix of primary key). [#36215](https://github.com/ClickHouse/ClickHouse/pull/36215) ([Anton Popov](https://github.com/CurtizJ)).

View File

@ -0,0 +1,7 @@
### ClickHouse release v22.3.5.5-lts FIXME as compared to v22.3.4.20-lts
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Backported in [#36525](https://github.com/ClickHouse/ClickHouse/issues/36525): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#36674](https://github.com/ClickHouse/ClickHouse/issues/36674): Fix merges of wide parts with type `Object`. [#36637](https://github.com/ClickHouse/ClickHouse/pull/36637) ([Anton Popov](https://github.com/CurtizJ)).

View File

@ -0,0 +1,7 @@
### ClickHouse release v22.3.6.5-lts FIXME as compared to v22.3.5.5-lts
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Backported in [#36525](https://github.com/ClickHouse/ClickHouse/issues/36525): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#36795](https://github.com/ClickHouse/ClickHouse/issues/36795): Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)).

View File

@ -0,0 +1,239 @@
### ClickHouse release v22.4.1.2305-prestable FIXME as compared to v22.3.1.1262-prestable
#### Backward Incompatible Change
* Function `yandexConsistentHash` (consistent hashing algorithm by Konstantin "kostik" Oblakov) is renamed to `kostikConsistentHash`. The old name is left as an alias for compatibility. Although this change is backward compatible, we may remove the alias in subsequent releases, that's why it's recommended to update the usages of this function in your apps. [#35553](https://github.com/ClickHouse/ClickHouse/pull/35553) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Do not allow SETTINGS after FORMAT for INSERT queries (there is compatibility setting `parser_settings_after_format_compact` to accept such queries, but it is turned OFF by default). [#35883](https://github.com/ClickHouse/ClickHouse/pull/35883) ([Azat Khuzhin](https://github.com/azat)).
* Changed hashed path for cache files. [#36079](https://github.com/ClickHouse/ClickHouse/pull/36079) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### New Feature
* Added support for transactions for simple `MergeTree` tables. This feature is highly experimental and not recommended for production. Part of [#22086](https://github.com/ClickHouse/ClickHouse/issues/22086). [#24258](https://github.com/ClickHouse/ClickHouse/pull/24258) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Added load balancing setting for [Zoo]Keeper client. Closes [#29617](https://github.com/ClickHouse/ClickHouse/issues/29617). [#30325](https://github.com/ClickHouse/ClickHouse/pull/30325) ([小路](https://github.com/nicelulu)).
* New aggregation function groupSortedArray to obtain an array of first N values. [#34055](https://github.com/ClickHouse/ClickHouse/pull/34055) ([palegre-tiny](https://github.com/palegre-tiny)).
* New functions minSampleSizeContinous and minSampleSizeConversion. [#34354](https://github.com/ClickHouse/ClickHouse/pull/34354) ([achimbab](https://github.com/achimbab)).
* Profiling on Processors level (under `log_processors_profiles` setting, ClickHouse will write time that processor spent during execution/waiting for data to `system.processors_profile_log` table). [#34355](https://github.com/ClickHouse/ClickHouse/pull/34355) ([Azat Khuzhin](https://github.com/azat)).
* Add `toEndOfMonth` function which rounds up a date or date with time to the last day of the month. [#33501](https://github.com/ClickHouse/ClickHouse/issues/33501). [#34394](https://github.com/ClickHouse/ClickHouse/pull/34394) ([Habibullah Oladepo](https://github.com/holadepo)).
* Add `h3PointDistM`, `h3PointDistKm`, `h3PointDistRads`, `h3GetRes0Indexes`, `h3GetPentagonIndexes` functions. [#34568](https://github.com/ClickHouse/ClickHouse/pull/34568) ([Bharat Nallan](https://github.com/bharatnc)).
* Introduce format `ProtobufList`. Fixes [#16436](https://github.com/ClickHouse/ClickHouse/issues/16436). [#35152](https://github.com/ClickHouse/ClickHouse/pull/35152) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* A dedicated small package for `clickhouse-keeper`. [#35308](https://github.com/ClickHouse/ClickHouse/pull/35308) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* added INTERPOLATE extension to the ORDER BY ... WITH FILL closes [#34903](https://github.com/ClickHouse/ClickHouse/issues/34903). [#35349](https://github.com/ClickHouse/ClickHouse/pull/35349) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Added functions `minSampleSizeContinous` and `minSampleSizeConversion`. Author @achimbab. [#35360](https://github.com/ClickHouse/ClickHouse/pull/35360) ([Maksim Kita](https://github.com/kitaisreal)).
* Added functions `arrayFirstOrNull`, `arrayLastOrNull`. Closes [#35238](https://github.com/ClickHouse/ClickHouse/issues/35238). [#35414](https://github.com/ClickHouse/ClickHouse/pull/35414) ([Maksim Kita](https://github.com/kitaisreal)).
* Allow to write remote fs cache on all write operations. Add `system.remote_filesystem_cache` table. Add `drop remote filesystem cache` query. Add introspection for s3 metadata with `system.remote_data_paths` table. Closes [#34021](https://github.com/ClickHouse/ClickHouse/issues/34021). Add cache option for merges by adding mode `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` (turned on by default for merges and can also be turned on by query setting with the same name). Rename cache related settings (`remote_fs_enable_cache -> enable_filesystem_cache`, etc). [#35475](https://github.com/ClickHouse/ClickHouse/pull/35475) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Added functions makeDate(year, month, day), makeDate32(year, month, day). [#35628](https://github.com/ClickHouse/ClickHouse/pull/35628) ([Alexander Gololobov](https://github.com/davenger)).
* Added function `flattenTuple`. It receives nested named `Tuple` as an argument and returns a flatten `Tuple` which elements are the paths from the original `Tuple`. E.g.: `Tuple(a Int, Tuple(b Int, c Int)) -> Tuple(a Int, b Int, c Int)`. `flattenTuple` can be used to select all paths from type `Object` as separate columns. [#35690](https://github.com/ClickHouse/ClickHouse/pull/35690) ([Anton Popov](https://github.com/CurtizJ)).
* Support new type of quota `WRITTEN BYTES` to limit amount of written bytes during insert queries. [#35736](https://github.com/ClickHouse/ClickHouse/pull/35736) ([Anton Popov](https://github.com/CurtizJ)).
* Implementation of makeDateTime() and makeDateTIme64(). [#35934](https://github.com/ClickHouse/ClickHouse/pull/35934) ([Alexander Gololobov](https://github.com/davenger)).
* Support '\G;' at the end of query for FORMAT Vertical. Closes [#36111](https://github.com/ClickHouse/ClickHouse/issues/36111). [#36130](https://github.com/ClickHouse/ClickHouse/pull/36130) ([yuuch](https://github.com/yuuch)).
* Adding random salt and appending to password to generate password hash. [#36172](https://github.com/ClickHouse/ClickHouse/pull/36172) ([Rajkumar Varada](https://github.com/varadarajkumar)).
* Add setting throw_if_no_data_to_insert. Closes [#36336](https://github.com/ClickHouse/ClickHouse/issues/36336). [#36345](https://github.com/ClickHouse/ClickHouse/pull/36345) ([flynn](https://github.com/ucasfl)).
* Implement type inference for INSERT INTO function null(). Closes [#36334](https://github.com/ClickHouse/ClickHouse/issues/36334). [#36353](https://github.com/ClickHouse/ClickHouse/pull/36353) ([flynn](https://github.com/ucasfl)).
* ... [#36436](https://github.com/ClickHouse/ClickHouse/pull/36436) ([Rich Raposa](https://github.com/rfraposa)).
#### Performance Improvement
* Speed up parts loading process of MergeTree to accelerate starting up of clickhouse-server. With this improvement, clickhouse-server was able to decrease starting up time from 75 minutes to 20 seconds, with 700k mergetree parts. [#32928](https://github.com/ClickHouse/ClickHouse/pull/32928) ([李扬](https://github.com/taiyang-li)).
* Sizes of hash tables used during aggregation now collected and used in later queries to avoid hash tables resizes. [#33439](https://github.com/ClickHouse/ClickHouse/pull/33439) ([Nikita Taranov](https://github.com/nickitat)).
* Multiple changes to improve ASOF join performance (1.2 - 1.6x as fast). It also adds support to use big integers. [#34733](https://github.com/ClickHouse/ClickHouse/pull/34733) ([Raúl Marín](https://github.com/Algunenano)).
* URL storage engine now downloads multiple chunks in parallel if the endpoint supports HTTP Range. Two additional settings were added, `max_download_threads` and `max_download_buffer_size`, which control maximum number of threads a single query can use to download the file and the maximum number of bytes each thread can process. [#35150](https://github.com/ClickHouse/ClickHouse/pull/35150) ([Antonio Andelic](https://github.com/antonio2368)).
* parallelization of multipart upload into S3 storage. [#35343](https://github.com/ClickHouse/ClickHouse/pull/35343) ([Sergei Trifonov](https://github.com/serxa)).
* Improve performance of ASOF JOIN if key is native integer. [#35525](https://github.com/ClickHouse/ClickHouse/pull/35525) ([Maksim Kita](https://github.com/kitaisreal)).
* A new query plan optimization. Evaluate functions after `ORDER BY` when possible. As an example, for a query `SELECT sipHash64(number) FROM numbers(1e8) ORDER BY number LIMIT 5`, function `sipHash64` would be evaluated after `ORDER BY` and `LIMIT`, which gives ~20x speed up. [#35623](https://github.com/ClickHouse/ClickHouse/pull/35623) ([Nikita Taranov](https://github.com/nickitat)).
* narrow mutex scope when setenv LIBHDFS3_CONF related issue [#35292](https://github.com/ClickHouse/ClickHouse/issues/35292). [#35646](https://github.com/ClickHouse/ClickHouse/pull/35646) ([shuchaome](https://github.com/shuchaome)).
* Improve performance of `hasAll` function using specializations for SSE and AVX2. Author @youennL-cs. [#35723](https://github.com/ClickHouse/ClickHouse/pull/35723) ([Maksim Kita](https://github.com/kitaisreal)).
* - The explain statement of GLOBAL JOIN two distributed tables can speed up 100x: explain plan select ... from t1_dist global join t2_dist on ... explain pipeline select ... from t1_dist global join t2_dist on ... [#36055](https://github.com/ClickHouse/ClickHouse/pull/36055) ([何李夫](https://github.com/helifu)).
* 2 optimizations: - Optimize trivail count hive query - Speed up hive query by caching metadata of hive file. [#36082](https://github.com/ClickHouse/ClickHouse/pull/36082) ([李扬](https://github.com/taiyang-li)).
#### Improvement
* ... [#21474](https://github.com/ClickHouse/ClickHouse/pull/21474) ([nvartolomei](https://github.com/nvartolomei)).
* As talked in [issue 27025](https://github.com/ClickHouse/ClickHouse/issues/27025), there is an improvement of the HasAll function using SIMD instruction (SSE and AVX2). Gtest tests have also been added. [#27653](https://github.com/ClickHouse/ClickHouse/pull/27653) ([youennL-cs](https://github.com/youennL-cs)).
* Proper support of setting `max_rows_to_read` in case of reading in order of sorting key and specified limit. Previously the exception `Limit for rows or bytes to read exceeded` could be thrown even if query actually requires to read less amount of rows. [#33230](https://github.com/ClickHouse/ClickHouse/pull/33230) ([Anton Popov](https://github.com/CurtizJ)).
* INTERVAL improvement - can be used with `[MILLI|MICRO|NANO]SECOND`. Added `toStartOf[Milli|Micro|Nano]second()` functions. Added `[add|subtract][Milli|Micro|Nano]second()`. [#34353](https://github.com/ClickHouse/ClickHouse/pull/34353) ([Andrey Zvonov](https://github.com/zvonand)).
* System log tables allow to specify COMMENT in ENGINE declaration. Closes [#33768](https://github.com/ClickHouse/ClickHouse/issues/33768). [#34536](https://github.com/ClickHouse/ClickHouse/pull/34536) ([Maksim Kita](https://github.com/kitaisreal)).
* added sanity checks on server startup (available memory and disk space, max thread count, etc). [#34566](https://github.com/ClickHouse/ClickHouse/pull/34566) ([Sergei Trifonov](https://github.com/serxa)).
* Use minmax index for orc/parquet file in Hive Engine. Related pr: https://github.com/ClickHouse-Extras/arrow/pull/10. [#34631](https://github.com/ClickHouse/ClickHouse/pull/34631) ([李扬](https://github.com/taiyang-li)).
* If `port` is not specified in cluster configuration, default server port will be used. This closes [#34769](https://github.com/ClickHouse/ClickHouse/issues/34769). [#34772](https://github.com/ClickHouse/ClickHouse/pull/34772) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Added better error messages in case of connection failed to MySQL. Closes [#35128](https://github.com/ClickHouse/ClickHouse/issues/35128). [#35234](https://github.com/ClickHouse/ClickHouse/pull/35234) ([zzsmdfj](https://github.com/zzsmdfj)).
* Add function `getTypeSerializationStreams`. For a specified type (which is detected from column), it returns an array with all the serialization substream paths. This function is useful mainly for developers. [#35290](https://github.com/ClickHouse/ClickHouse/pull/35290) ([李扬](https://github.com/taiyang-li)).
* - wchc operation is expensive and should not be in the four_letter_word_white_list defaults. [#35320](https://github.com/ClickHouse/ClickHouse/pull/35320) ([zhangyuli1](https://github.com/zhangyuli1)).
* Added an ability to specify cluster secret in replicated database. [#35333](https://github.com/ClickHouse/ClickHouse/pull/35333) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Add a new kind of row policies named `simple`. Before this PR we had two kinds or row policies: `permissive` and `restrictive`. A `simple` row policy adds a new filter on a table without any side-effects like it was for permissive and restrictive policies. [#35345](https://github.com/ClickHouse/ClickHouse/pull/35345) ([Vitaly Baranov](https://github.com/vitlibar)).
* Remove testmode option, enable it unconditionally. [#35354](https://github.com/ClickHouse/ClickHouse/pull/35354) ([Kseniia Sumarokova](https://github.com/kssenii)).
* For table function `s3cluster` or `HDFSCluster` or `hive`, we can't get right `AccessType` by `StorageFactory::instance().getSourceAccessType(getStorageTypeName())`. This pr fix it. [#35365](https://github.com/ClickHouse/ClickHouse/pull/35365) ([李扬](https://github.com/taiyang-li)).
* For lts releases packages will be pushed to both lts and stable repos. [#35382](https://github.com/ClickHouse/ClickHouse/pull/35382) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Support uuid for postgres engines. Closes [#35384](https://github.com/ClickHouse/ClickHouse/issues/35384). [#35403](https://github.com/ClickHouse/ClickHouse/pull/35403) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add arguments `--user`, `--password`, `--host`, `--port` for clickhouse-diagnostics. [#35422](https://github.com/ClickHouse/ClickHouse/pull/35422) ([李扬](https://github.com/taiyang-li)).
* fix INSERT INTO table FROM INFILE does not display progress bar. [#35429](https://github.com/ClickHouse/ClickHouse/pull/35429) ([xiedeyantu](https://github.com/xiedeyantu)).
* Allow server to bind to low-numbered ports (e.g. 443). ClickHouse installation script will set `cap_net_bind_service` to the binary file. [#35451](https://github.com/ClickHouse/ClickHouse/pull/35451) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add settings `input_format_orc_case_insensitive_column_matching`, `input_format_arrow_case_insensitive_column_matching`, and `input_format_parquet_case_insensitive_column_matching` which allows ClickHouse to use case insensitive matching of columns while reading data from ORC, Arrow or Parquet files. [#35459](https://github.com/ClickHouse/ClickHouse/pull/35459) ([Antonio Andelic](https://github.com/antonio2368)).
* - Add explicit table info to the scan node of query plan and pipeline. [#35460](https://github.com/ClickHouse/ClickHouse/pull/35460) ([何李夫](https://github.com/helifu)).
* Propagate query and session settings for distributed DDL queries. Setting `distributed_ddl_entry_format_version` is set to 2 by default now. [#35463](https://github.com/ClickHouse/ClickHouse/pull/35463) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add sizes of subcolumns to `system.parts_columns` table. [#35488](https://github.com/ClickHouse/ClickHouse/pull/35488) ([Anton Popov](https://github.com/CurtizJ)).
* It was possible to get stack overflow in distributed queries if one of the settings `async_socket_for_remote` and `use_hedged_requests` is enabled while parsing very deeply nested data type (at least in debug build). Closes [#35509](https://github.com/ClickHouse/ClickHouse/issues/35509). [#35524](https://github.com/ClickHouse/ClickHouse/pull/35524) ([Kruglov Pavel](https://github.com/Avogar)).
* Improve pasting performance and compatibility of clickhouse-client. This helps [#35501](https://github.com/ClickHouse/ClickHouse/issues/35501). [#35541](https://github.com/ClickHouse/ClickHouse/pull/35541) ([Amos Bird](https://github.com/amosbird)).
* Added a support for automatic schema inference to `s3Cluster` table function. Synced the signatures of `s3 ` and `s3Cluster`. [#35544](https://github.com/ClickHouse/ClickHouse/pull/35544) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Use multiple threads to download objects from S3. Downloading is controllable using `max_download_threads` and `max_download_buffer_size` settings. [#35571](https://github.com/ClickHouse/ClickHouse/pull/35571) ([Antonio Andelic](https://github.com/antonio2368)).
* Deduce absolute hdfs config path. [#35572](https://github.com/ClickHouse/ClickHouse/pull/35572) ([李扬](https://github.com/taiyang-li)).
* - Use some tweaks and heuristics to determine numbers, strings, arrays, tuples and maps in CSV, TSV and TSVRaw data formats. Add setting `input_format_csv_use_best_effort_in_schema_inference` for CSV format that enables/disables using these heuristics, if it's disabled, we treat everything as string. Add similar setting `input_format_tsv_use_best_effort_in_schema_inference` for TSV/TSVRaw format. These settings are enabled by default. - Add Maps support for schema inference in Values format. - Fix possible segfault in schema inference in Values format. - Allow to skip columns with unsupported types in Arrow/ORC/Parquet formats. Add corresponding settings for it: `input_format_{parquet|orc|arrow}_skip_columns_with_unsupported_types_in_schema_inference`. These settings are disabled by default. - Allow to convert a column with type Null to a Nullable column with all NULL values in Arrow/Parquet formats. - Allow to specify column names in schema inference via setting `column_names_for_schema_inference` for formats that don't contain column names (like CSV, TSV, JSONCompactEachRow, etc) - Fix schema inference in ORC/Arrow/Parquet formats in terms of working with Nullable columns. Previously all inferred types were not Nullable and it blocked reading Nullable columns from data, now it's fixed and all inferred types are always Nullable (because we cannot understand that column is Nullable or not by reading the schema). - Fix schema inference in Template format with CSV escaping rules. [#35582](https://github.com/ClickHouse/ClickHouse/pull/35582) ([Kruglov Pavel](https://github.com/Avogar)).
* Add parallel parsing and schema inference for format `JSONAsObject`. [#35592](https://github.com/ClickHouse/ClickHouse/pull/35592) ([Anton Popov](https://github.com/CurtizJ)).
* Added support for schema inference for `hdfsCluster`. [#35602](https://github.com/ClickHouse/ClickHouse/pull/35602) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* - Improve the pipeline description for JOIN. [#35612](https://github.com/ClickHouse/ClickHouse/pull/35612) ([何李夫](https://github.com/helifu)).
* Support schema inference for type `Object` in format `JSONEachRow`. Allow to convert columns of type `Map` to columns of type `Object`. [#35629](https://github.com/ClickHouse/ClickHouse/pull/35629) ([Anton Popov](https://github.com/CurtizJ)).
* Add profile event counter `AsyncInsertBytes` about size of async INSERTs. [#35644](https://github.com/ClickHouse/ClickHouse/pull/35644) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Added `is_secure` column to `system.query_log` which denotes if the client is using a secure connection over TCP or HTTP. [#35705](https://github.com/ClickHouse/ClickHouse/pull/35705) ([Antonio Andelic](https://github.com/antonio2368)).
* closes [#35641](https://github.com/ClickHouse/ClickHouse/issues/35641) Allow EPHEMERAL without explicit default expression. [#35706](https://github.com/ClickHouse/ClickHouse/pull/35706) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix send_logs_level for clickhouse local. Closes [#35653](https://github.com/ClickHouse/ClickHouse/issues/35653). [#35716](https://github.com/ClickHouse/ClickHouse/pull/35716) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Improve columns ordering in schema inference for formats TSKV and JSONEachRow, closes [#35640](https://github.com/ClickHouse/ClickHouse/issues/35640). Don't stop schema inference when reading empty row in schema inference for formats TSKV and JSONEachRow. [#35724](https://github.com/ClickHouse/ClickHouse/pull/35724) ([Kruglov Pavel](https://github.com/Avogar)).
* Add new setting `input_format_json_read_bools_as_numbers` that allows to infer and parse bools as numbers in JSON input formats. It's enabled by default. Suggested by @alexey-milovidov. [#35735](https://github.com/ClickHouse/ClickHouse/pull/35735) ([Kruglov Pavel](https://github.com/Avogar)).
* Respect remote_url_allow_hosts for hive. [#35743](https://github.com/ClickHouse/ClickHouse/pull/35743) ([李扬](https://github.com/taiyang-li)).
* Support schema inference for insert select with using `input` table function. Get schema from insertion table instead of inferring it from the data in case of insert select from table functions that support schema inference. Closes [#35639](https://github.com/ClickHouse/ClickHouse/issues/35639). [#35760](https://github.com/ClickHouse/ClickHouse/pull/35760) ([Kruglov Pavel](https://github.com/Avogar)).
* Improve projection analysis to optimize trivial queries such as `count()`. [#35788](https://github.com/ClickHouse/ClickHouse/pull/35788) ([Amos Bird](https://github.com/amosbird)).
* support ALTER TABLE t DETACH PARTITION (ALL). [#35794](https://github.com/ClickHouse/ClickHouse/pull/35794) ([awakeljw](https://github.com/awakeljw)).
* Added an animation to the hourglass icon to indicate to the user that a query is running. [#35860](https://github.com/ClickHouse/ClickHouse/pull/35860) ([peledni](https://github.com/peledni)).
* Now some `ALTER MODIFY COLUMN` queries for `Arrays` and `Nullable` types can be done at metadata level without mutations. For example, alter from `Array(Enum8('Option1'=1))` to `Array(Enum8('Option1'=1, 'Option2'=2))`. [#35882](https://github.com/ClickHouse/ClickHouse/pull/35882) ([alesapin](https://github.com/alesapin)).
* Now it's not allowed to `ALTER TABLE ... RESET SETTING` for non-existing settings for MergeTree engines family. Fixes [#35816](https://github.com/ClickHouse/ClickHouse/issues/35816). [#35884](https://github.com/ClickHouse/ClickHouse/pull/35884) ([alesapin](https://github.com/alesapin)).
* Improve settings configuration for s3 storage / table function. [#35915](https://github.com/ClickHouse/ClickHouse/pull/35915) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add some basic metrics to monitor engine=Kafka tables. [#35916](https://github.com/ClickHouse/ClickHouse/pull/35916) ([filimonov](https://github.com/filimonov)).
* Now `kafka_num_consumers` can be bigger than amount of physical cores in case of low resource machine (less than 16 cores). [#35926](https://github.com/ClickHouse/ClickHouse/pull/35926) ([alesapin](https://github.com/alesapin)).
* Update unixodbc to mitigate CVE-2018-7485. [#35943](https://github.com/ClickHouse/ClickHouse/pull/35943) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Require mutations for per-table TTL only when it had been changed. [#35953](https://github.com/ClickHouse/ClickHouse/pull/35953) ([Azat Khuzhin](https://github.com/azat)).
* - Add `dns_max_consecutive_failures` setting to stop re-resolving cached DNS entries after a number of consecutive failures (5 by default). [#35956](https://github.com/ClickHouse/ClickHouse/pull/35956) ([Raúl Marín](https://github.com/Algunenano)).
* ASTPartition::formatImpl should output ALL while executing ALTER TABLE t DETACH PARTITION ALL. [#35987](https://github.com/ClickHouse/ClickHouse/pull/35987) ([awakeljw](https://github.com/awakeljw)).
* `clickhouse-keeper` starts answering 4-letter commands before getting the quorum. [#35992](https://github.com/ClickHouse/ClickHouse/pull/35992) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix wrong assertion in replxx which happens when navigating back the history when the first line of input is a newline. Mark as improvement because it only affects debug build. This fixes [#34511](https://github.com/ClickHouse/ClickHouse/issues/34511). [#36007](https://github.com/ClickHouse/ClickHouse/pull/36007) ([Amos Bird](https://github.com/amosbird)).
* If someone writes DEFAULT NULL in table definition, make data type Nullable. [#35887](https://github.com/ClickHouse/ClickHouse/issues/35887). [#36058](https://github.com/ClickHouse/ClickHouse/pull/36058) ([xiedeyantu](https://github.com/xiedeyantu)).
* Added `thread_id` and `query_id` columns to `system.zookeeper_log` table. [#36074](https://github.com/ClickHouse/ClickHouse/pull/36074) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Auto assign numbers for Enum elements. [#36101](https://github.com/ClickHouse/ClickHouse/pull/36101) ([awakeljw](https://github.com/awakeljw)).
* Reset thread name in `ThreadPool` to `ThreadPoolIdle` after job is done. This is to avoid displaying the old thread name for idle threads. This closes [#36114](https://github.com/ClickHouse/ClickHouse/issues/36114). [#36115](https://github.com/ClickHouse/ClickHouse/pull/36115) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Support `UNSIGNED` modifier with unused parameters of `INT`. [#36126](https://github.com/ClickHouse/ClickHouse/pull/36126) ([awakeljw](https://github.com/awakeljw)).
* Add support for atomic exchange in OSX. [#36133](https://github.com/ClickHouse/ClickHouse/pull/36133) ([Raúl Marín](https://github.com/Algunenano)).
* Update the progress bar after receiving every ProfileEvents packet. This change must fix the showing of outdated profiling data in client. [#36202](https://github.com/ClickHouse/ClickHouse/pull/36202) ([Dmitry Novik](https://github.com/novikd)).
* Check ORC/Parquet/Arrow format magic bytes before loading file in memory to prevent high memory usage in case of wrong file format. [#36209](https://github.com/ClickHouse/ClickHouse/pull/36209) ([Kruglov Pavel](https://github.com/Avogar)).
* Allow queries `insert into function file(...) select from` for files with formats that don't support schema inference. For example: `insert into function file(data.json) select 42` - such query didn't work previously. [#36211](https://github.com/ClickHouse/ClickHouse/pull/36211) ([Kruglov Pavel](https://github.com/Avogar)).
* Send both stdin data and data from query/data from infile in client. Previously client ignored stdin data in case of both sources were present. Closes [#36100](https://github.com/ClickHouse/ClickHouse/issues/36100). [#36254](https://github.com/ClickHouse/ClickHouse/pull/36254) ([Kruglov Pavel](https://github.com/Avogar)).
* Allow missing columns for mongo storage. Closes [#36119](https://github.com/ClickHouse/ClickHouse/issues/36119). Closes [#26490](https://github.com/ClickHouse/ClickHouse/issues/26490). [#36272](https://github.com/ClickHouse/ClickHouse/pull/36272) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Input format parsers can synchronize after wrong value of `Bool` or `Map` data types (see the `input_format_allow_errors_*` settings). [#36333](https://github.com/ClickHouse/ClickHouse/pull/36333) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Check for harmful environment variables like `LD_PRELOAD` at startup. It makes sense in [Google Collab](https://colab.research.google.com/drive/1wzYn59PA9EDyra6356a8rUpwd3zUX0Zt?usp=sharing). This closes [#36340](https://github.com/ClickHouse/ClickHouse/issues/36340). [#36342](https://github.com/ClickHouse/ClickHouse/pull/36342) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix [#36307](https://github.com/ClickHouse/ClickHouse/issues/36307) [#35891](https://github.com/ClickHouse/ClickHouse/issues/35891) possible range issues in automatic assigned enums, also fix error message. [#36352](https://github.com/ClickHouse/ClickHouse/pull/36352) ([awakeljw](https://github.com/awakeljw)).
* hex function support for Int128/Int256/UInt128/UInt256. [#36386](https://github.com/ClickHouse/ClickHouse/pull/36386) ([Memo](https://github.com/Joeywzr)).
#### Bug Fix
* Add type checking when create materialized view. Try to close: [#23684](https://github.com/ClickHouse/ClickHouse/issues/23684). [#24896](https://github.com/ClickHouse/ClickHouse/pull/24896) ([hexiaoting](https://github.com/hexiaoting)).
* Avoid erasing columns from a block if it doesn't exist while reading data from Hive. [#35393](https://github.com/ClickHouse/ClickHouse/pull/35393) ([lgbo](https://github.com/lgbo-ustc)).
* Added settings `input_format_ipv4_default_on_conversion_error`, `input_format_ipv6_default_on_conversion_error` to allow insert of invalid ip address values as default into tables. Closes [#35726](https://github.com/ClickHouse/ClickHouse/issues/35726). [#35733](https://github.com/ClickHouse/ClickHouse/pull/35733) ([Maksim Kita](https://github.com/kitaisreal)).
* In FileSegmentsHolder::~FileSegmentsHolder(), when a segment is set to detach, it will assert its state is empty. However, in FileSegment::completeImpl(), when detach is set to true, its state may be PARTIALLY_DOWNLOADED_NO_CONTINUATION or SKIP_CACHE or PARTIALLY_DOWNLOADED, thus cause error in FileSegmentsHolder::~FileSegmentsHolder(). ``` if (file_segment->detached) { /// This file segment is not owned by cache, so it will be destructed /// at this point, therefore no completion required. assert(file_segment->state() == FileSegment::State::EMPTY); file_segment_it = file_segments.erase(current_file_segment_it); continue; } ```. [#36452](https://github.com/ClickHouse/ClickHouse/pull/36452) ([Han Shukai](https://github.com/KinderRiven)).
#### Build/Testing/Packaging Improvement
* Add backward compatibility check in stress test. Closes [#25088](https://github.com/ClickHouse/ClickHouse/issues/25088). [#27928](https://github.com/ClickHouse/ClickHouse/pull/27928) ([Kruglov Pavel](https://github.com/Avogar)).
* - Migrate package building to nfpm - Deprecate `release` script in favor of `packages/build` - Build everything in clickhouse/binary-builder image (cleanup: clickhouse/deb-builder) - Add symbol stripping to cmake (todo: use $prefix/lib/$bin_dir/clickhouse/$binary.debug) - Fix issue with DWARF symbols - Add Alpine APK packages - Rename `alien` to `additional_pkgs`. [#33664](https://github.com/ClickHouse/ClickHouse/pull/33664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add a night scan and upload for coverity. [#34895](https://github.com/ClickHouse/ClickHouse/pull/34895) ([Boris Kuschel](https://github.com/bkuschel)).
* - Switch to libcxx / libcxxabi from LLVM 14. [#34906](https://github.com/ClickHouse/ClickHouse/pull/34906) ([Raúl Marín](https://github.com/Algunenano)).
* Add next batch of random settings in functional tests. [#35047](https://github.com/ClickHouse/ClickHouse/pull/35047) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix stress-test report in CI, now we upload the runlog with information about started stress tests only once. [#35093](https://github.com/ClickHouse/ClickHouse/pull/35093) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* - [x] redo alpine image to use clean Dockerfile - [x] Create a script in tests/ci to build both ubuntu and alpine images - [x] Add clickhouse-keeper image (cc @nikitamikhaylov) - [x] Add build check to PullRequestCI - [x] Add a job to a ReleaseCI - [x] Add a job to MasterCI to build and push `clickhouse/clickhouse-server:head` and `clickhouse/clickhouse-keeper:head` images for each merged PR. [#35211](https://github.com/ClickHouse/ClickHouse/pull/35211) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Reverse `--no-prestable` key to match the logic. [#35372](https://github.com/ClickHouse/ClickHouse/pull/35372) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* All disabled tests run longer than 30 seconds. [#35413](https://github.com/ClickHouse/ClickHouse/pull/35413) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix copypaste error for clickhouse-keeper test. [#35428](https://github.com/ClickHouse/ClickHouse/pull/35428) ([zhangyuli1](https://github.com/zhangyuli1)).
* Fix failed tests in: https://s3.amazonaws.com/clickhouse-test-reports/35422/32348779fd0bac5276727cfc01e75c625ecc69b9/fuzzer_astfuzzerubsan,actions//report.html. [#35439](https://github.com/ClickHouse/ClickHouse/pull/35439) ([李扬](https://github.com/taiyang-li)).
* Apply black formatter to python code and add a per-commit check. [#35466](https://github.com/ClickHouse/ClickHouse/pull/35466) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add a label to recognize a building task for every image. [#35583](https://github.com/ClickHouse/ClickHouse/pull/35583) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Clean-up after functional test 02167 ... [#35681](https://github.com/ClickHouse/ClickHouse/pull/35681) ([Anton Kozlov](https://github.com/tonickkozlov)).
* Minor improvement in contrib/krb5 build configuration. [#35832](https://github.com/ClickHouse/ClickHouse/pull/35832) ([Anton Kozlov](https://github.com/tonickkozlov)).
* Update URL in test visualizer from `play-ci` to `play` (it was moved). [#35872](https://github.com/ClickHouse/ClickHouse/pull/35872) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Highlight headers in the PR template. Improve description checking logging. [#35947](https://github.com/ClickHouse/ClickHouse/pull/35947) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Running with podman is failing: it complains about specifying the same volume twice. [#35978](https://github.com/ClickHouse/ClickHouse/pull/35978) ([Roman Nikonov](https://github.com/nic11)).
* Add argument for total number of desired builds. [#35999](https://github.com/ClickHouse/ClickHouse/pull/35999) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* - Add `_le_` method for ClickHouseVersion - Fix auto_version for an existing tag - docker_server now supports getting the version from tags - Add python unit tests to backport workflow - Move version_arg to version_helper, add tests. [#36029](https://github.com/ClickHouse/ClickHouse/pull/36029) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Significant improvement in docker build-cache system. [#36041](https://github.com/ClickHouse/ClickHouse/pull/36041) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* After CVE-2022-24765 git needs additional config parameter when directory is owned by another user. [#36193](https://github.com/ClickHouse/ClickHouse/pull/36193) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add an option for build profiling (`-ftime-trace`). [#36318](https://github.com/ClickHouse/ClickHouse/pull/36318) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Delete old packaging infrastructure. [#36330](https://github.com/ClickHouse/ClickHouse/pull/36330) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fallback to a default event in case of broken API. [#36412](https://github.com/ClickHouse/ClickHouse/pull/36412) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Check a number of required reports in BuilderSpecialReport. [#36413](https://github.com/ClickHouse/ClickHouse/pull/36413) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add a labeling for `Revert` PRs. [#36422](https://github.com/ClickHouse/ClickHouse/pull/36422) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Disallow ALTER TTL for engines that does not support it, to avoid breaking ATTACH TABLE (closes [#33344](https://github.com/ClickHouse/ClickHouse/issues/33344)). [#33391](https://github.com/ClickHouse/ClickHouse/pull/33391) ([zhongyuankai](https://github.com/zhongyuankai)).
* Do not delay final part writing by default (fixes possible `Memory limit exceeded` during `INSERT` by adding `max_insert_delayed_streams_for_parallel_write` with default to 1000 for writes to s3 and disabled as before otherwise). [#34780](https://github.com/ClickHouse/ClickHouse/pull/34780) ([Azat Khuzhin](https://github.com/azat)).
* fix issueinput_format_null_as_default does not work for DEFAULT expressions Closes [#34890](https://github.com/ClickHouse/ClickHouse/issues/34890). [#35039](https://github.com/ClickHouse/ClickHouse/pull/35039) ([zzsmdfj](https://github.com/zzsmdfj)).
* Fix mutations in tables with enabled sparse columns. [#35284](https://github.com/ClickHouse/ClickHouse/pull/35284) ([Anton Popov](https://github.com/CurtizJ)).
* Fix schema inference for TSKV format while using small max_read_buffer_size. [#35332](https://github.com/ClickHouse/ClickHouse/pull/35332) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix partition pruning in case of comparison with constant in `WHERE`. If column and constant had different types, overflow was possible. Query could return an incorrect empty result. This fixes [#35304](https://github.com/ClickHouse/ClickHouse/issues/35304). [#35334](https://github.com/ClickHouse/ClickHouse/pull/35334) ([Amos Bird](https://github.com/amosbird)).
* Fix bug in S3 zero-copy replication which can lead to errors like `Found parts with the same min block and with the same max block as the missing part` after concurrent fetch/drop table. [#35348](https://github.com/ClickHouse/ClickHouse/pull/35348) ([alesapin](https://github.com/alesapin)).
* Fix issue with non-existing directory https://github.com/ClickHouse/ClickHouse/runs/5588046879?check_suite_focus=true. [#35376](https://github.com/ClickHouse/ClickHouse/pull/35376) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix possible deadlock in cache. [#35378](https://github.com/ClickHouse/ClickHouse/pull/35378) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix wrong assets path in release workflow. [#35379](https://github.com/ClickHouse/ClickHouse/pull/35379) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Cache fixes for high concurrency on corner cases. [#35381](https://github.com/ClickHouse/ClickHouse/pull/35381) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix working with columns that are not needed in query in Arrow/Parquet/ORC formats, it prevents possible errors like `Unsupported <format> type <type> of an input column <column_name>` when file contains column with unsupported type and we don't use it in query. [#35406](https://github.com/ClickHouse/ClickHouse/pull/35406) ([Kruglov Pavel](https://github.com/Avogar)).
* Skip empty chunks in GroupingAggregatedTransform. [#35417](https://github.com/ClickHouse/ClickHouse/pull/35417) ([Nikita Taranov](https://github.com/nickitat)).
* Now merges executed with zero copy replication will not spam logs with message `Found parts with the same min block and with the same max block as the missing part _ on replica _. Hoping that it will eventually appear as a result of a merge.`. [#35430](https://github.com/ClickHouse/ClickHouse/pull/35430) ([alesapin](https://github.com/alesapin)).
* Fix excessive logging when using S3 as backend for MergeTree or as separate table engine/function. Fixes [#30559](https://github.com/ClickHouse/ClickHouse/issues/30559). [#35434](https://github.com/ClickHouse/ClickHouse/pull/35434) ([alesapin](https://github.com/alesapin)).
* Fix wrong result of datetime64 when negative. Close [#34831](https://github.com/ClickHouse/ClickHouse/issues/34831). [#35440](https://github.com/ClickHouse/ClickHouse/pull/35440) ([李扬](https://github.com/taiyang-li)).
* Fix bug in function `if` when resulting column type differs with resulting data type that led to logical errors like `Logical error: 'Bad cast from type DB::ColumnVector<int> to DB::ColumnVector<long>'.`. Closes [#35367](https://github.com/ClickHouse/ClickHouse/issues/35367). [#35476](https://github.com/ClickHouse/ClickHouse/pull/35476) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix bug in Keeper which can lead to unstable client connections. Introduced in [#35031](https://github.com/ClickHouse/ClickHouse/issues/35031). [#35498](https://github.com/ClickHouse/ClickHouse/pull/35498) ([alesapin](https://github.com/alesapin)).
* Fix crash for function `throwIf` with constant arguments. [#35500](https://github.com/ClickHouse/ClickHouse/pull/35500) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix crash during short circuit function evaluation when one of arguments is nullable constant. Closes [#35497](https://github.com/ClickHouse/ClickHouse/issues/35497). Closes [#35496](https://github.com/ClickHouse/ClickHouse/issues/35496). [#35502](https://github.com/ClickHouse/ClickHouse/pull/35502) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix cast into IPv4, IPv6 address in IN section. Fixes [#35528](https://github.com/ClickHouse/ClickHouse/issues/35528). [#35534](https://github.com/ClickHouse/ClickHouse/pull/35534) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix parsing of IPv6 addresses longer than 39 characters. Closes [#34022](https://github.com/ClickHouse/ClickHouse/issues/34022). [#35539](https://github.com/ClickHouse/ClickHouse/pull/35539) ([Maksim Kita](https://github.com/kitaisreal)).
* Fixed return type deduction for `caseWithExpression`. The type of the ELSE branch is now correctly taken into account. [#35576](https://github.com/ClickHouse/ClickHouse/pull/35576) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix s3 engine getting virtual columns. Closes [#35411](https://github.com/ClickHouse/ClickHouse/issues/35411). [#35586](https://github.com/ClickHouse/ClickHouse/pull/35586) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix version string setting in version_helper.py. [#35589](https://github.com/ClickHouse/ClickHouse/pull/35589) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix headers with named collections, add compression_method. Closes [#35273](https://github.com/ClickHouse/ClickHouse/issues/35273). Closes [#35269](https://github.com/ClickHouse/ClickHouse/issues/35269). [#35593](https://github.com/ClickHouse/ClickHouse/pull/35593) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Setting `database_atomic_wait_for_drop_and_detach_synchronously` worked incorrectly for `ATTACH TABLE` query when previously detached table is still in use, It's fixed. [#35594](https://github.com/ClickHouse/ClickHouse/pull/35594) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix possible segfault in materialised postgresql which happened if exception occurred when data, collected in memory, was synced into underlying tables. Closes [#35611](https://github.com/ClickHouse/ClickHouse/issues/35611). [#35614](https://github.com/ClickHouse/ClickHouse/pull/35614) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix `HashJoin` when columns with `LowCardinality` type are used. This closes [#35548](https://github.com/ClickHouse/ClickHouse/issues/35548). [#35616](https://github.com/ClickHouse/ClickHouse/pull/35616) ([Antonio Andelic](https://github.com/antonio2368)).
* Check remote_url_allow_hosts before schema inference in URL engine Closes [#35064](https://github.com/ClickHouse/ClickHouse/issues/35064). [#35619](https://github.com/ClickHouse/ClickHouse/pull/35619) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix positional arguments with aliases. Closes [#35600](https://github.com/ClickHouse/ClickHouse/issues/35600). [#35620](https://github.com/ClickHouse/ClickHouse/pull/35620) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix projection analysis which might lead to wrong query result when IN subquery is used. This fixes [#35336](https://github.com/ClickHouse/ClickHouse/issues/35336). [#35631](https://github.com/ClickHouse/ClickHouse/pull/35631) ([Amos Bird](https://github.com/amosbird)).
* Fix usage of quota with asynchronous inserts. [#35645](https://github.com/ClickHouse/ClickHouse/pull/35645) ([Anton Popov](https://github.com/CurtizJ)).
* Fix server crash when large number of arguments are passed into `format` function. Please refer to the test file and see how to reproduce the crash. [#35651](https://github.com/ClickHouse/ClickHouse/pull/35651) ([Amos Bird](https://github.com/amosbird)).
* Fix part checking logic for parts with projections. Error happened when projection and main part had different types. This is similar to https://github.com/ClickHouse/ClickHouse/pull/33774 . The bug is addressed by @caoyang10. [#35667](https://github.com/ClickHouse/ClickHouse/pull/35667) ([Amos Bird](https://github.com/amosbird)).
* Fix check asof join key nullability, close [#35565](https://github.com/ClickHouse/ClickHouse/issues/35565). [#35674](https://github.com/ClickHouse/ClickHouse/pull/35674) ([Vladimir C](https://github.com/vdimir)).
* Fix possible loss of subcolumns in type `Object`. [#35682](https://github.com/ClickHouse/ClickHouse/pull/35682) ([Anton Popov](https://github.com/CurtizJ)).
* Enable build with JIT compilation by default. [#35683](https://github.com/ClickHouse/ClickHouse/pull/35683) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix possible `Can't adjust last granule` exception while reading subcolumns of type `Object`. [#35687](https://github.com/ClickHouse/ClickHouse/pull/35687) ([Anton Popov](https://github.com/CurtizJ)).
* Fix bug in creating materialized view with subquery after server restart. Materialized view was not getting updated after inserts into underlying table after server restart. Closes [#35511](https://github.com/ClickHouse/ClickHouse/issues/35511). [#35691](https://github.com/ClickHouse/ClickHouse/pull/35691) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix dropping non-empty database in clickhouse local. Closes [#35692](https://github.com/ClickHouse/ClickHouse/issues/35692). [#35711](https://github.com/ClickHouse/ClickHouse/pull/35711) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix any/all(subquery) implementation. Closes [#35489](https://github.com/ClickHouse/ClickHouse/issues/35489). [#35727](https://github.com/ClickHouse/ClickHouse/pull/35727) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix bug in conversion from custom types to string that could lead to segfault or unexpected error messages. Closes [#35752](https://github.com/ClickHouse/ClickHouse/issues/35752). [#35755](https://github.com/ClickHouse/ClickHouse/pull/35755) ([Kruglov Pavel](https://github.com/Avogar)).
* Now metadata for broken parts will be removed from metadata cache (introduced in [#32928](https://github.com/ClickHouse/ClickHouse/issues/32928)) on server start. [#35759](https://github.com/ClickHouse/ClickHouse/pull/35759) ([chen9t](https://github.com/chen9t)).
* fix filebuffer pos in RemoteReadBuffer When RemoteReadBuffer is consumed, its pos will increase, for example in HadoopSnappyReadBuffer::nextImpl. ![image](https://user-images.githubusercontent.com/80669699/160880640-8535a701-63bd-42e9-a2c2-e5f215bcd96b.png). [#35771](https://github.com/ClickHouse/ClickHouse/pull/35771) ([shuchaome](https://github.com/shuchaome)).
* Fixes parsing of the arguments of the functions `extract`. Fixes [#35751](https://github.com/ClickHouse/ClickHouse/issues/35751). [#35799](https://github.com/ClickHouse/ClickHouse/pull/35799) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix bug in indexes of not presented columns in -WithNames formats that led to error `INCORRECT_NUMBER_OF_COLUMNS ` when the number of columns is more than 256. Closes [#35793](https://github.com/ClickHouse/ClickHouse/issues/35793). [#35803](https://github.com/ClickHouse/ClickHouse/pull/35803) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix inserts to columns of type `Object` in case when there is data related to several partitions in insert query. [#35806](https://github.com/ClickHouse/ClickHouse/pull/35806) ([Anton Popov](https://github.com/CurtizJ)).
* Respect only quota & period from groups, ignore shares (which are not really limit the number of the cores which can be used). [#35815](https://github.com/ClickHouse/ClickHouse/pull/35815) ([filimonov](https://github.com/filimonov)).
* Avoid processing per-column TTL multiple times. [#35820](https://github.com/ClickHouse/ClickHouse/pull/35820) ([Azat Khuzhin](https://github.com/azat)).
* fix issue: [#34966](https://github.com/ClickHouse/ClickHouse/issues/34966). [#35840](https://github.com/ClickHouse/ClickHouse/pull/35840) ([zzsmdfj](https://github.com/zzsmdfj)).
* Disable `session_log` because memory safety issue has been found by fuzzing. See [#35714](https://github.com/ClickHouse/ClickHouse/issues/35714). [#35873](https://github.com/ClickHouse/ClickHouse/pull/35873) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix formatting of INSERT INFILE queries (missing quotes). [#35886](https://github.com/ClickHouse/ClickHouse/pull/35886) ([Azat Khuzhin](https://github.com/azat)).
* Fixed GA not reporting events. [#35935](https://github.com/ClickHouse/ClickHouse/pull/35935) ([peledni](https://github.com/peledni)).
* Fix reading from `Kafka` tables when `kafka_num_consumers > 1` and `kafka_thread_per_consumer = 0`. Returns parallel & multithreaded reading, accidentally broken in 21.11. Closes [#35153](https://github.com/ClickHouse/ClickHouse/issues/35153). [#35973](https://github.com/ClickHouse/ClickHouse/pull/35973) ([filimonov](https://github.com/filimonov)).
* Fix performance regression of scalar query optimization. [#35986](https://github.com/ClickHouse/ClickHouse/pull/35986) ([Amos Bird](https://github.com/amosbird)).
* Fix error while moving table with `JOIN` engine from `Ordinary` database to `Atomic`, close [#35686](https://github.com/ClickHouse/ClickHouse/issues/35686). [#35995](https://github.com/ClickHouse/ClickHouse/pull/35995) ([Vladimir C](https://github.com/vdimir)).
* Fix error `Empty list of columns in SELECT query` in CROSS JOIN close [#35672](https://github.com/ClickHouse/ClickHouse/issues/35672). [#36033](https://github.com/ClickHouse/ClickHouse/pull/36033) ([Vladimir C](https://github.com/vdimir)).
* Fix possible incorrect result of `WINDOW` functions in queries with `LIMIT` which was caused by wrong limit-push-down query plan optimization. Fixes [#36071](https://github.com/ClickHouse/ClickHouse/issues/36071) and [#23125](https://github.com/ClickHouse/ClickHouse/issues/23125). [#36075](https://github.com/ClickHouse/ClickHouse/pull/36075) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Throw an exception when CH cannot execute a file instead of displaying success and silently failing. [#36088](https://github.com/ClickHouse/ClickHouse/pull/36088) ([Julian Gilyadov](https://github.com/israelg99)).
* Fix window view when is proc time and window kind larger than day, see code comment. [#36109](https://github.com/ClickHouse/ClickHouse/pull/36109) ([flynn](https://github.com/ucasfl)).
* Fix bug of read buffer from hdfs. ReadBufferFromHDFSImpl::offset was misused as offset of working_buffer, but it is file offset. cc @kssenii. [#36153](https://github.com/ClickHouse/ClickHouse/pull/36153) ([李扬](https://github.com/taiyang-li)).
* Fix crash in ParallelReadBuffer. [#36169](https://github.com/ClickHouse/ClickHouse/pull/36169) ([Kruglov Pavel](https://github.com/Avogar)).
* Allow to convert empty strings to empty values of type `Objects`. [#36179](https://github.com/ClickHouse/ClickHouse/pull/36179) ([Anton Popov](https://github.com/CurtizJ)).
* Fix possible segfault in schema inference for JSON formats. [#36195](https://github.com/ClickHouse/ClickHouse/pull/36195) ([Kruglov Pavel](https://github.com/Avogar)).
* `CREATE TABLE ... AS` might fail with `Replica ... already exists` even if `ReplicatedMergeTree` table was created with default arguments. It's fixed. Now `{uuid}` macro is not unfolded when saving table metadata. Therefore, it's not allowed to move `ReplicatedMergeTree` table from `Atomic` to `Ordinary` database if `zookeeper_path` contains `{uuid}` macro (or table was created with default engine arguments). Fixes [#35577](https://github.com/ClickHouse/ClickHouse/issues/35577). [#36200](https://github.com/ClickHouse/ClickHouse/pull/36200) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix reading of empty arrays in reverse order (in queries with descending sorting by prefix of primary key). [#36215](https://github.com/ClickHouse/ClickHouse/pull/36215) ([Anton Popov](https://github.com/CurtizJ)).
* Play UI was not able to display some resultsets, for example `SELECT * FROM dish`. [#36283](https://github.com/ClickHouse/ClickHouse/pull/36283) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix crash in ReadBufferFromHDFS in debug mode. <img width="1285" alt="image" src="https://user-images.githubusercontent.com/985418/163528606-6b2116de-a66b-4f71-b642-ed9afc7d7f0c.png">. [#36287](https://github.com/ClickHouse/ClickHouse/pull/36287) ([zhanglistar](https://github.com/zhanglistar)).
* Fix "Cannot find column" error for distributed queries with LIMIT BY. [#36454](https://github.com/ClickHouse/ClickHouse/pull/36454) ([Azat Khuzhin](https://github.com/azat)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "[WIP] New row policies"'. [#35454](https://github.com/ClickHouse/ClickHouse/pull/35454) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'remove ATOMIC_FLAG_INIT: depreciated in C++20 and warns in clang-14'. [#35785](https://github.com/ClickHouse/ClickHouse/pull/35785) ([Brendan Cox](https://github.com/justnoise)).
* NO CL ENTRY: 'Revert "Added support for schema inference for `hdfsCluster`"'. [#35802](https://github.com/ClickHouse/ClickHouse/pull/35802) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* NO CL ENTRY: '[Snyk] Security upgrade mkdocs from 1.1.2 to 1.3.0'. [#35864](https://github.com/ClickHouse/ClickHouse/pull/35864) ([Snyk bot](https://github.com/snyk-bot)).
* NO CL ENTRY: 'Revert "Format changes for new docs"'. [#35894](https://github.com/ClickHouse/ClickHouse/pull/35894) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "clang-tidy report issues with Medium priority"'. [#35941](https://github.com/ClickHouse/ClickHouse/pull/35941) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Fix crash in ParallelReadBuffer"'. [#36210](https://github.com/ClickHouse/ClickHouse/pull/36210) ([Alexander Tokmakov](https://github.com/tavplubix)).
#### Bug Fix (prestable release)
* call RemoteQueryExecutor with original_query instead of an rewritten query, elimate the AMBIGUOUS_COLUMN_NAME exception. [#35748](https://github.com/ClickHouse/ClickHouse/pull/35748) ([lgbo](https://github.com/lgbo-ustc)).

View File

@ -0,0 +1,6 @@
### ClickHouse release v22.4.2.1-stable FIXME as compared to v22.4.1.2305-prestable
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Fix projection analysis which might lead to wrong query result when IN subquery is used. This fixes [#35336](https://github.com/ClickHouse/ClickHouse/issues/35336). [#35631](https://github.com/ClickHouse/ClickHouse/pull/35631) ([Amos Bird](https://github.com/amosbird)).

View File

@ -0,0 +1,6 @@
### ClickHouse release v22.4.3.3-stable FIXME as compared to v22.4.2.1-stable
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Backported in [#36582](https://github.com/ClickHouse/ClickHouse/issues/36582): Fix nullptr dereference in JOIN and COLUMNS matcher. This fixes [#36416](https://github.com/ClickHouse/ClickHouse/issues/36416) . This is for https://github.com/ClickHouse/ClickHouse/pull/36417. [#36430](https://github.com/ClickHouse/ClickHouse/pull/36430) ([Amos Bird](https://github.com/amosbird)).

View File

@ -0,0 +1,8 @@
### ClickHouse release v22.4.4.7-stable FIXME as compared to v22.4.3.3-stable
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Backported in [#36524](https://github.com/ClickHouse/ClickHouse/issues/36524): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#36582](https://github.com/ClickHouse/ClickHouse/issues/36582): Fix nullptr dereference in JOIN and COLUMNS matcher. This fixes [#36416](https://github.com/ClickHouse/ClickHouse/issues/36416) . This is for https://github.com/ClickHouse/ClickHouse/pull/36417. [#36430](https://github.com/ClickHouse/ClickHouse/pull/36430) ([Amos Bird](https://github.com/amosbird)).
* Backported in [#36673](https://github.com/ClickHouse/ClickHouse/issues/36673): Fix merges of wide parts with type `Object`. [#36637](https://github.com/ClickHouse/ClickHouse/pull/36637) ([Anton Popov](https://github.com/CurtizJ)).

View File

@ -0,0 +1,9 @@
### ClickHouse release v22.4.5.9-stable FIXME as compared to v22.4.4.7-stable
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Backported in [#36524](https://github.com/ClickHouse/ClickHouse/issues/36524): Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#36635](https://github.com/ClickHouse/ClickHouse/issues/36635): Fix `Missing column` exception which could happen while using `INTERPOLATE` with `ENGINE = MergeTree` table. [#36549](https://github.com/ClickHouse/ClickHouse/pull/36549) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#36794](https://github.com/ClickHouse/ClickHouse/issues/36794): Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#36926](https://github.com/ClickHouse/ClickHouse/issues/36926): Fix bug in clickhouse-keeper which can lead to corrupted compressed log files in case of small load and restarts. [#36910](https://github.com/ClickHouse/ClickHouse/pull/36910) ([alesapin](https://github.com/alesapin)).

View File

@ -694,6 +694,49 @@ auto s = std::string{"Hello"};
**2.** Exception specifiers from C++03 are not used.
**3.** Constructs which have convenient syntactic sugar in modern C++, e.g.
```
// Traditional way without syntactic sugar
template <typename G, typename = std::enable_if_t<std::is_same<G, F>::value, void>> // SFINAE via std::enable_if, usage of ::value
std::pair<int, int> func(const E<G> & e) // explicitly specified return type
{
if (elements.count(e)) // .count() membership test
{
// ...
}
elements.erase(
std::remove_if(
elements.begin(), elements.end(),
[&](const auto x){
return x == 1;
}),
elements.end()); // remove-erase idiom
return std::make_pair(1, 2); // create pair via make_pair()
}
// With syntactic sugar (C++14/17/20)
template <typename G>
requires std::same_v<G, F> // SFINAE via C++20 concept, usage of C++14 template alias
auto func(const E<G> & e) // auto return type (C++14)
{
if (elements.contains(e)) // C++20 .contains membership test
{
// ...
}
elements.erase_if(
elements,
[&](const auto x){
return x == 1;
}); // C++20 std::erase_if
return {1, 2}; // or: return std::pair(1, 2); // create pair via initialization list or value initialization (C++17)
}
```
## Platform {#platform}
**1.** We write code for a specific platform.

View File

@ -14,7 +14,7 @@ Each functional test sends one or multiple queries to the running ClickHouse ser
Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from CLickHouse and it is available to general public.
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
### Running a Test Locally {#functional-test-locally}
@ -30,7 +30,7 @@ For more options, see `tests/clickhouse-test --help`. You can simply run all tes
### Adding a New Test
To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.
To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.
Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables.

View File

@ -13,7 +13,7 @@ Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_
Engine parameters:
- `database` Database name. Instead of the database name, you can use a constant expression that returns a string.
- `database` Database name. You can use `currentDatabase()` or another constant expression that returns a string.
- `table` Table to flush data to.
- `num_layers` Parallelism layer. Physically, the table will be represented as `num_layers` of independent buffers. Recommended value: 16.
- `min_time`, `max_time`, `min_rows`, `max_rows`, `min_bytes`, and `max_bytes` Conditions for flushing data from the buffer.

View File

@ -45,7 +45,7 @@ clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets"
# for hits_v1
clickhouse-client --query "CREATE TABLE datasets.hits_v1 ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
# for hits_100m_obfuscated
clickhouse-client --query="CREATE TABLE hits_100m_obfuscated (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
clickhouse-client --query="CREATE TABLE default.hits_100m_obfuscated (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
# import data
cat hits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.hits_v1 FORMAT TSV" --max_insert_block_size=100000

View File

@ -127,22 +127,22 @@ After that downloaded archives should be unpacked and installed with installatio
LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \
grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1)
export LATEST_VERSION
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz"
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz"
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz"
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz"
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION-amd64.tgz"
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION-amd64.tgz"
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION-amd64.tgz"
curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION-amd64.tgz"
tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz"
tar -xzvf "clickhouse-common-static-$LATEST_VERSION-amd64.tgz"
sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh"
tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz"
tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION-amd64.tgz"
sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh"
tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz"
tar -xzvf "clickhouse-server-$LATEST_VERSION-amd64.tgz"
sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh"
sudo /etc/init.d/clickhouse-server start
tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz"
tar -xzvf "clickhouse-client-$LATEST_VERSION-amd64.tgz"
sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh"
```

View File

@ -9,66 +9,67 @@ ClickHouse can accept and return data in various formats. A format supported for
results of a `SELECT`, and to perform `INSERT`s into a file-backed table.
The supported formats are:
| Input | Output |
|-------------------------------------------------------------------------------------------|-------|-------|
| [TabSeparated](#tabseparated) | ✔ | ✔ |
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ |
| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ |
| [Template](#format-template) | ✔ | ✔ |
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
| [CSV](#csv) | ✔ | ✔ |
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
| [Values](#data-format-values) | ✔ | ✔ |
| [Vertical](#vertical) | ✗ | ✔ |
| [JSON](#json) | ✗ | ✔ |
| [JSONAsString](#jsonasstring) | ✔ | ✗ |
| [JSONStrings](#jsonstrings) | ✗ | ✔ |
| [JSONCompact](#jsoncompact) | ✗ | ✔ |
| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ |
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ |
| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ |
| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ |
| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ |
| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ |
| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ |
| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ |
| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ |
| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ |
| [TSKV](#tskv) | ✔ | ✔ |
| [Pretty](#pretty) | ✗ | ✔ |
| [PrettyCompact](#prettycompact) | ✗ | ✔ |
| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ |
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
| [PrettySpace](#prettyspace) | ✗ | ✔ |
| [Prometheus](#prometheus) | ✗ | ✔ |
| [Protobuf](#protobuf) | ✔ | ✔ |
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
| [Avro](#data-format-avro) | ✔ | ✔ |
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
| [Parquet](#data-format-parquet) | ✔ | ✔ |
| [Arrow](#data-format-arrow) | ✔ | ✔ |
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
| [ORC](#data-format-orc) | ✔ | ✔ |
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [Native](#native) | ✔ | ✔ |
| [Null](#null) | ✗ | ✔ |
| [XML](#xml) | ✗ | ✔ |
| [CapnProto](#capnproto) | ✔ | ✔ |
| [LineAsString](#lineasstring) | ✔ | ✗ |
| [Regexp](#data-format-regexp) | ✔ | ✗ |
| [RawBLOB](#rawblob) | ✔ | ✔ |
| [MsgPack](#msgpack) | ✔ | ✔ |
| [MySQLDump](#mysqldump) | ✔ | ✗ |
| Format | Input | Output |
|-------------------------------------------------------------------------------------------|-------|--------|
| [TabSeparated](#tabseparated) | ✔ | ✔ |
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ |
| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ |
| [Template](#format-template) | ✔ | ✔ |
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
| [CSV](#csv) | ✔ | ✔ |
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
| [Values](#data-format-values) | ✔ | ✔ |
| [Vertical](#vertical) | ✗ | ✔ |
| [JSON](#json) | ✗ | ✔ |
| [JSONAsString](#jsonasstring) | ✔ | ✗ |
| [JSONStrings](#jsonstrings) | ✗ | ✔ |
| [JSONCompact](#jsoncompact) | ✗ | ✔ |
| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ |
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ |
| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ |
| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ |
| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ |
| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ |
| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ |
| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ |
| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ |
| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ |
| [TSKV](#tskv) | ✔ | ✔ |
| [Pretty](#pretty) | ✗ | ✔ |
| [PrettyCompact](#prettycompact) | ✗ | ✔ |
| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ |
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
| [PrettySpace](#prettyspace) | ✗ | ✔ |
| [Prometheus](#prometheus) | ✗ | ✔ |
| [Protobuf](#protobuf) | ✔ | ✔ |
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
| [Avro](#data-format-avro) | ✔ | ✔ |
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
| [Parquet](#data-format-parquet) | ✔ | ✔ |
| [Arrow](#data-format-arrow) | ✔ | ✔ |
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
| [ORC](#data-format-orc) | ✔ | ✔ |
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [Native](#native) | ✔ | ✔ |
| [Null](#null) | ✗ | ✔ |
| [XML](#xml) | ✗ | ✔ |
| [CapnProto](#capnproto) | ✔ | ✔ |
| [LineAsString](#lineasstring) | ✔ | ✗ |
| [Regexp](#data-format-regexp) | ✔ | ✗ |
| [RawBLOB](#rawblob) | ✔ | ✔ |
| [MsgPack](#msgpack) | ✔ | ✔ |
| [MySQLDump](#mysqldump) | ✔ | ✗ |
You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section.

View File

@ -426,7 +426,7 @@ Now `rule` can configure `method`, `headers`, `url`, `handler`:
- `status` — use with `static` type, response status code.
- `content_type` — use with `static` type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
- `content_type` — use with any type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
- `response_content` — use with `static` type, response content sent to client, when using the prefix file:// or config://, find the content from the file or configuration sends to client.

View File

@ -0,0 +1,72 @@
---
sidebar_position: 20
sidebar_label: PostgreSQL Interface
---
# PostgreSQL Interface
ClickHouse supports the PostgreSQL wire protocol, which allows you to use Postgres clients to connect to ClickHouse. In a sense, ClickHouse can pretend to be a PostgreSQL instance - allowing you to connect a PostgreSQL client application to ClickHouse that is not already directy supported by ClickHouse (for example, Amazon Redshift).
To enable the PostgreSQL wire protocol, add the [postgresql_port](../operations/server-configuration-parameters/settings#server_configuration_parameters-postgresql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:
```xml
<clickhouse>
<postgresql_port>9005</postgresql_port>
</clickhouse>
```
Startup your ClickHouse server and look for a log message similar to the following that mentions **Listening for PostgreSQL compatibility protocol**:
```response
{} <Information> Application: Listening for PostgreSQL compatibility protocol: 127.0.0.1:9005
```
## Connect psql to ClickHouse
The following command demonstrates how to connect the PostgreSQL client `psql` to ClickHouse:
```bash
psql -p [port] -h [hostname] -U [username] [database_name]
```
For example:
```bash
psql -p 9005 -h 127.0.0.1 -U alice default
```
:::note
The `psql` client requires a login with a password, so you will not be able connect using the `default` user with no password. Either assign a password to the `default` user, or login as a different user.
:::
The `psql` client prompts for the password:
```response
Password for user alice:
psql (14.2, server 22.3.1.1)
WARNING: psql major version 14, server major version 22.
Some psql features might not work.
Type "help" for help.
default=>
```
And that's it! You now have a PostgreSQL client connected to ClickHouse, and all commands and queries are executed on ClickHouse.
:::caution
The PostgreSQL protocol currently only supports plain-text passwords.
:::
## Using SSL
If you have SSL/TLS configured on your ClickHouse instance, then `postgresql_port` will use the same settings (the port is shared for both secure and unsecure clients).
Each client has their own method of how to connect using SSL. The following command demonstrates how to pass in the certificates and key to securely connect `psql` to ClickHouse:
```bash
psql "port=9005 host=127.0.0.1 user=alice dbname=default sslcert=/path/to/certificate.pem sslkey=/path/to/key.pem sslrootcert=/path/to/rootcert.pem sslmode=verify-ca"
```
View the [PostgreSQL docs](https://jdbc.postgresql.org/documentation/head/ssl-client.html) for more details on their SSL settings.
[Original article](https://clickhouse.com/docs/en/interfaces/postgresql)

View File

@ -1,48 +0,0 @@
---
sidebar_position: 108
---
# groupArraySorted {#groupArraySorted}
Returns an array with the first N items in ascending order.
``` sql
groupArraySorted(N)(column)
```
**Arguments**
- `N` The number of elements to return.
If the parameter is omitted, default value 10 is used.
**Arguments**
- `column` The value.
- `expr` — Optional. The field or expresion to sort by. If not set values are sorted by themselves.
**Example**
Gets the first 10 numbers:
``` sql
SELECT groupArraySorted(10)(number) FROM numbers(100)
```
``` text
┌─groupArraySorted(10)(number)─┐
│ [0,1,2,3,4,5,6,7,8,9] │
└──────────────────────────────┘
```
Or the last 10:
``` sql
SELECT groupArraySorted(10)(number, -number) FROM numbers(100)
```
``` text
┌─groupArraySorted(10)(number, negate(number))─┐
│ [99,98,97,96,95,94,93,92,91,90] │
└──────────────────────────────────────────────┘
```

View File

@ -620,9 +620,9 @@ arraySlice(array, offset[, length])
**Arguments**
- `array` Array of data.
- `offset` Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1.
- `length` The length of the required slice. If you specify a negative value, the function returns an open slice `[offset, array_length - length)`. If you omit the value, the function returns the slice `[offset, the_end_of_array]`.
- `array` Array of data.
- `offset` Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1.
- `length` The length of the required slice. If you specify a negative value, the function returns an open slice `[offset, array_length - length]`. If you omit the value, the function returns the slice `[offset, the_end_of_array]`.
**Example**

View File

@ -130,13 +130,9 @@ bitSlice(s, offset[, length])
**Arguments**
- `s` — s is [String](../../sql-reference/data-types/string.md)
or [FixedString](../../sql-reference/data-types/fixedstring.md).
- `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an
indent on the right. Numbering of the bits begins with 1.
- `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring [
offset, array_length - length). If you omit the value, the function returns the substring [offset, the_end_string].
If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right.
- `s` — s is [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
- `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the bits begins with 1.
- `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring \[offset, array_length - length\]. If you omit the value, the function returns the substring \[offset, the_end_string\]. If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right.
**Returned value**

View File

@ -478,3 +478,17 @@ Result:
│ 0 │
└──────────────────────────────────────────────┘
```
Query:
``` sql
SELECT isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128');
```
Result:
``` text
┌─isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128')─┐
│ 0 │
└────────────────────────────────────────────────────────────────────┘
```

View File

@ -480,7 +480,7 @@ Result:
## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) {#substring}
Returns a substring starting with the byte from the offset index that is length bytes long. Character indexing starts from one (as in standard SQL). The offset and length arguments must be constants.
Returns a substring starting with the byte from the offset index that is length bytes long. Character indexing starts from one (as in standard SQL).
## substringUTF8(s, offset, length) {#substringutf8}

View File

@ -11,7 +11,7 @@ ClickHouse supports the standard grammar for defining windows and window functio
| --------| ----------|
| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported |
| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | not supported, wrap in a subquery ([feature request](https://github.com/ClickHouse/ClickHouse/issues/19857)) |
| `WINDOW` clause (`select ... from table window w as (partiton by id)`) | supported |
| `WINDOW` clause (`select ... from table window w as (partition by id)`) | supported |
| `ROWS` frame | supported |
| `RANGE` frame | supported, the default |
| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead |

View File

@ -410,7 +410,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
- `status` — используется с типом `static`, возвращает код состояния ответа.
- `content_type` — используется с типом `static`, возвращает [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
- `content_type` — используется со всеми типами, возвращает [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
- `response_content` — используется с типом`static`, содержимое ответа, отправленное клиенту, при использовании префикса file:// or config://, находит содержимое из файла или конфигурации, отправленного клиенту.

View File

@ -31,7 +31,5 @@ sidebar_label: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64
- `UInt16` — \[0 : 65535\]
- `UInt32` — \[0 : 4294967295\]
- `UInt64` — \[0 : 18446744073709551615\]
- `UInt128` — \[0 : 340282366920938463463374607431768211455\]
- `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\]
`UInt128` пока не реализован.

View File

@ -21,7 +21,7 @@ LowCardinality(data_type)
`LowCardinality` — это надстройка, изменяющая способ хранения и правила обработки данных. ClickHouse применяет [словарное кодирование](https://en.wikipedia.org/wiki/Dictionary_coder) в столбцы типа `LowCardinality`. Работа с данными, представленными в словарном виде, может значительно увеличивать производительность запросов [SELECT](../statements/select/index.md) для многих приложений.
Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
Эффективность использования типа данных `LowCardinality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
При работе со строками использование `LowCardinality` вместо [Enum](enum.md) обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.

View File

@ -574,9 +574,9 @@ arraySlice(array, offset[, length])
**Аргументы**
- `array` массив данных.
- `offset` отступ от края массива. Положительное значение - отступ слева, отрицательное значение - отступ справа. Отсчет элементов массива начинается с 1.
- `length` длина необходимого среза. Если указать отрицательное значение, то функция вернёт открытый срез `[offset, array_length - length)`. Если не указать значение, то функция вернёт срез `[offset, the_end_of_array]`.
- `array` массив данных.
- `offset` отступ от края массива. Положительное значение - отступ слева, отрицательное значение - отступ справа. Отсчёт элементов массива начинается с 1.
- `length` длина необходимого среза. Если указать отрицательное значение, то функция вернёт открытый срез `[offset, array_length - length]`. Если не указать значение, то функция вернёт срез `[offset, the_end_of_array]`.
**Пример**

View File

@ -446,3 +446,17 @@ SELECT isIPAddressInRange('127.0.0.1', 'ffff::/16');
│ 0 │
└──────────────────────────────────────────────┘
```
Запрос:
``` sql
SELECT isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128');
```
Результат:
``` text
┌─isIPAddressInRange('::ffff:192.168.0.1', '::ffff:192.168.0.4/128')─┐
│ 0 │
└────────────────────────────────────────────────────────────────────┘
```

View File

@ -157,6 +157,7 @@ def build(args):
if not args.skip_website:
website.process_benchmark_results(args)
website.minify_website(args)
redirects.build_static_redirects(args)

View File

@ -1,7 +1,10 @@
import hashlib
import json
import logging
import os
import shutil
import subprocess
import bs4
import util
@ -178,6 +181,59 @@ def build_website(args):
f.write(content.encode("utf-8"))
def get_css_in(args):
return [
f"'{args.website_dir}/css/bootstrap.css'",
f"'{args.website_dir}/css/docsearch.css'",
f"'{args.website_dir}/css/base.css'",
f"'{args.website_dir}/css/blog.css'",
f"'{args.website_dir}/css/docs.css'",
f"'{args.website_dir}/css/highlight.css'",
f"'{args.website_dir}/css/main.css'",
]
def get_js_in(args):
return [
f"'{args.website_dir}/js/jquery.js'",
f"'{args.website_dir}/js/popper.js'",
f"'{args.website_dir}/js/bootstrap.js'",
f"'{args.website_dir}/js/sentry.js'",
f"'{args.website_dir}/js/base.js'",
f"'{args.website_dir}/js/index.js'",
f"'{args.website_dir}/js/docsearch.js'",
f"'{args.website_dir}/js/docs.js'",
f"'{args.website_dir}/js/main.js'",
]
def minify_website(args):
css_in = " ".join(get_css_in(args))
css_out = f"{args.output_dir}/docs/css/base.css"
os.makedirs(f"{args.output_dir}/docs/css")
command = f"cat {css_in}"
output = subprocess.check_output(command, shell=True)
with open(css_out, "wb+") as f:
f.write(output)
with open(css_out, "rb") as f:
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
js_in = " ".join(get_js_in(args))
js_out = f"{args.output_dir}/docs/js/base.js"
os.makedirs(f"{args.output_dir}/docs/js")
command = f"cat {js_in}"
output = subprocess.check_output(command, shell=True)
with open(js_out, "wb+") as f:
f.write(output)
with open(js_out, "rb") as f:
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
logging.info(js_digest)
def process_benchmark_results(args):
benchmark_root = os.path.join(args.website_dir, "benchmark")
required_keys = {

View File

@ -397,9 +397,9 @@ SELECT arrayPushFront(['b'], 'a') AS res
**参数**
- `array` 数组。
- `offset` 数组的偏移。正值表示左侧的偏移量负值表示右侧的缩进值。数组下标从1开始。
- `length` - 子数组的长度。如果指定负值,则该函数返回`[offsetarray_length - length`。如果省略该值,则该函数返回`[offsetthe_end_of_array]`。
- `array` 数组。
- `offset` 数组的偏移。正值表示左侧的偏移量负值表示右侧的缩进值。数组下标从1开始。
- `length` - 子数组的长度。如果指定负值,则该函数返回`[offsetarray_length - length]`。如果省略该值,则该函数返回`[offsetthe_end_of_array]`。
**示例**

View File

@ -4,6 +4,7 @@
#include <iostream>
#include <iomanip>
#include <optional>
#include <string_view>
#include <Common/scope_guard_safe.h>
#include <boost/program_options.hpp>
#include <boost/algorithm/string/replace.hpp>
@ -48,6 +49,7 @@
#endif
namespace fs = std::filesystem;
using namespace std::literals;
namespace DB
@ -1038,6 +1040,158 @@ void Client::processConfig()
client_info.quota_key = config().getString("quota_key", "");
}
void Client::readArguments(
int argc,
char ** argv,
Arguments & common_arguments,
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments)
{
/** We allow different groups of arguments:
* - common arguments;
* - arguments for any number of external tables each in form "--external args...",
* where possible args are file, name, format, structure, types;
* - param arguments for prepared statements.
* Split these groups before processing.
*/
bool in_external_group = false;
std::string prev_host_arg;
std::string prev_port_arg;
for (int arg_num = 1; arg_num < argc; ++arg_num)
{
std::string_view arg = argv[arg_num];
if (arg == "--external")
{
in_external_group = true;
external_tables_arguments.emplace_back(Arguments{""});
}
/// Options with value after equal sign.
else if (
in_external_group
&& (arg.starts_with("--file=") || arg.starts_with("--name=") || arg.starts_with("--format=") || arg.starts_with("--structure=")
|| arg.starts_with("--types=")))
{
external_tables_arguments.back().emplace_back(arg);
}
/// Options with value after whitespace.
else if (in_external_group && (arg == "--file" || arg == "--name" || arg == "--format" || arg == "--structure" || arg == "--types"))
{
if (arg_num + 1 < argc)
{
external_tables_arguments.back().emplace_back(arg);
++arg_num;
arg = argv[arg_num];
external_tables_arguments.back().emplace_back(arg);
}
else
break;
}
else
{
in_external_group = false;
if (arg == "--file"sv || arg == "--name"sv || arg == "--structure"sv || arg == "--types"sv)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter must be in external group, try add --external before {}", arg);
/// Parameter arg after underline.
if (arg.starts_with("--param_"))
{
auto param_continuation = arg.substr(strlen("--param_"));
auto equal_pos = param_continuation.find_first_of('=');
if (equal_pos == std::string::npos)
{
/// param_name value
++arg_num;
if (arg_num >= argc)
throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
query_parameters.emplace(String(param_continuation), String(arg));
}
else
{
if (equal_pos == 0)
throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
/// param_name=value
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
}
}
else if (arg.starts_with("--host") || arg.starts_with("-h"))
{
std::string host_arg;
/// --host host
if (arg == "--host" || arg == "-h")
{
++arg_num;
if (arg_num >= argc)
throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
host_arg = "--host=";
host_arg.append(arg);
}
else
host_arg = arg;
/// --port port1 --host host1
if (!prev_port_arg.empty())
{
hosts_and_ports_arguments.push_back({host_arg, prev_port_arg});
prev_port_arg.clear();
}
else
{
/// --host host1 --host host2
if (!prev_host_arg.empty())
hosts_and_ports_arguments.push_back({prev_host_arg});
prev_host_arg = host_arg;
}
}
else if (arg.starts_with("--port"))
{
auto port_arg = String{arg};
/// --port port
if (arg == "--port")
{
port_arg.push_back('=');
++arg_num;
if (arg_num >= argc)
throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
port_arg.append(arg);
}
/// --host host1 --port port1
if (!prev_host_arg.empty())
{
hosts_and_ports_arguments.push_back({port_arg, prev_host_arg});
prev_host_arg.clear();
}
else
{
/// --port port1 --port port2
if (!prev_port_arg.empty())
hosts_and_ports_arguments.push_back({prev_port_arg});
prev_port_arg = port_arg;
}
}
else if (arg == "--allow_repeated_settings")
allow_repeated_settings = true;
else
common_arguments.emplace_back(arg);
}
}
if (!prev_host_arg.empty())
hosts_and_ports_arguments.push_back({prev_host_arg});
if (!prev_port_arg.empty())
hosts_and_ports_arguments.push_back({prev_port_arg});
}
}

View File

@ -36,6 +36,13 @@ protected:
void processConfig() override;
void readArguments(
int argc,
char ** argv,
Arguments & common_arguments,
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments) override;
private:
void printChangedSettings() const;
std::vector<String> loadWarningMessages();

View File

@ -68,6 +68,7 @@ namespace ErrorCodes
extern const int NOT_ENOUGH_SPACE;
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_KILL;
extern const int BAD_ARGUMENTS;
}
}
@ -1062,8 +1063,11 @@ namespace
return pid;
}
int stop(const fs::path & pid_file, bool force)
int stop(const fs::path & pid_file, bool force, bool do_not_kill)
{
if (force && do_not_kill)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified flags are incompatible");
UInt64 pid = isRunning(pid_file);
if (!pid)
@ -1092,9 +1096,15 @@ namespace
if (try_num == num_tries)
{
fmt::print("Will terminate forcefully.\n", pid);
if (do_not_kill)
{
fmt::print("Process (pid = {}) is still running. Will not try to kill it.\n", pid);
return 1;
}
fmt::print("Will terminate forcefully (pid = {}).\n", pid);
if (0 == kill(pid, 9))
fmt::print("Sent kill signal.\n", pid);
fmt::print("Sent kill signal (pid = {}).\n", pid);
else
throwFromErrno("Cannot send kill signal", ErrorCodes::SYSTEM_ERROR);
@ -1175,6 +1185,7 @@ int mainEntryClickHouseStop(int argc, char ** argv)
("prefix", po::value<std::string>()->default_value("/"), "prefix for all paths")
("pid-path", po::value<std::string>()->default_value("var/run/clickhouse-server"), "directory for pid file")
("force", po::bool_switch(), "Stop with KILL signal instead of TERM")
("do-not-kill", po::bool_switch(), "Do not send KILL even if TERM did not help")
;
po::variables_map options;
@ -1189,7 +1200,9 @@ int mainEntryClickHouseStop(int argc, char ** argv)
fs::path prefix = options["prefix"].as<std::string>();
fs::path pid_file = prefix / options["pid-path"].as<std::string>() / "clickhouse-server.pid";
return stop(pid_file, options["force"].as<bool>());
bool force = options["force"].as<bool>();
bool do_not_kill = options["do-not-kill"].as<bool>();
return stop(pid_file, force, do_not_kill);
}
catch (...)
{
@ -1247,6 +1260,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
("pid-path", po::value<std::string>()->default_value("var/run/clickhouse-server"), "directory for pid file")
("user", po::value<std::string>()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user")
("force", po::value<bool>()->default_value(false), "Stop with KILL signal instead of TERM")
("do-not-kill", po::bool_switch(), "Do not send KILL even if TERM did not help")
;
po::variables_map options;
@ -1265,7 +1279,9 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
fs::path config = prefix / options["config-path"].as<std::string>() / "config.xml";
fs::path pid_file = prefix / options["pid-path"].as<std::string>() / "clickhouse-server.pid";
if (int res = stop(pid_file, options["force"].as<bool>()))
bool force = options["force"].as<bool>();
bool do_not_kill = options["do-not-kill"].as<bool>();
if (int res = stop(pid_file, force, do_not_kill))
return res;
return start(user, executable, config, pid_file);

View File

@ -28,6 +28,7 @@
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/UseSSL.h>
#include <IO/IOThreadPool.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTInsertQuery.h>
#include <Common/ErrorHandlers.h>
@ -105,6 +106,17 @@ void LocalServer::initialize(Poco::Util::Application & self)
auto loaded_config = config_processor.loadConfig();
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
}
GlobalThreadPool::initialize(
config().getUInt("max_thread_pool_size", 10000),
config().getUInt("max_thread_pool_free_size", 1000),
config().getUInt("thread_pool_queue_size", 10000)
);
IOThreadPool::initialize(
config().getUInt("max_io_thread_pool_size", 100),
config().getUInt("max_io_thread_pool_free_size", 0),
config().getUInt("io_thread_pool_queue_size", 10000));
}
@ -726,6 +738,15 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
config().setString("send_logs_level", options["send_logs_level"].as<std::string>());
}
void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &)
{
for (int arg_num = 1; arg_num < argc; ++arg_num)
{
const char * arg = argv[arg_num];
common_arguments.emplace_back(arg);
}
}
}
#pragma GCC diagnostic ignored "-Wunused-function"

View File

@ -45,6 +45,8 @@ protected:
const std::vector<Arguments> &, const std::vector<Arguments> &) override;
void processConfig() override;
void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &) override;
void updateLoggerLevel(const String & logs_level) override;

View File

@ -540,7 +540,7 @@ static void sanityChecks(Server & server)
try
{
if (readString("/sys/devices/system/clocksource/clocksource0/current_clocksource").find("tsc") == std::string::npos)
server.context()->addWarningMessage("Linux is not using fast TSC clock source. Performance can be degraded.");
server.context()->addWarningMessage("Linux is not using a fast TSC clock source. Performance can be degraded.");
}
catch (...)
{
@ -558,7 +558,7 @@ static void sanityChecks(Server & server)
try
{
if (readString("/sys/kernel/mm/transparent_hugepage/enabled").find("[always]") != std::string::npos)
server.context()->addWarningMessage("Linux transparent hugepage are set to \"always\".");
server.context()->addWarningMessage("Linux transparent hugepages are set to \"always\".");
}
catch (...)
{
@ -1088,11 +1088,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker();
if (config->has("global_memory_usage_overcommit_max_wait_microseconds"))
{
UInt64 max_overcommit_wait_time = config->getUInt64("global_memory_usage_overcommit_max_wait_microseconds", 0);
global_overcommit_tracker->setMaxWaitTime(max_overcommit_wait_time);
}
UInt64 max_overcommit_wait_time = config->getUInt64("global_memory_usage_overcommit_max_wait_microseconds", 200);
global_overcommit_tracker->setMaxWaitTime(max_overcommit_wait_time);
total_memory_tracker.setOvercommitTracker(global_overcommit_tracker);
// FIXME logging-related things need synchronization -- see the 'Logger * log' saved
@ -1294,17 +1291,11 @@ int Server::main(const std::vector<std::string> & /*args*/)
LOG_INFO(log, "Listening for {}", server.getDescription());
}
auto & access_control = global_context->getAccessControl();
if (config().has("custom_settings_prefixes"))
access_control.setCustomSettingsPrefixes(config().getString("custom_settings_prefixes"));
access_control.setNoPasswordAllowed(config().getBool("allow_no_password", true));
access_control.setPlaintextPasswordAllowed(config().getBool("allow_plaintext_password", true));
/// Initialize access storages.
auto & access_control = global_context->getAccessControl();
try
{
access_control.addStoragesFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
access_control.setUpFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
}
catch (...)
{

View File

@ -545,6 +545,14 @@
-->
</user_directories>
<access_control_improvements>
<!-- Enables logic that users without permissive row policies can still read rows using a SELECT query.
For example, if there two users A, B and a row policy is defined only for A, then
if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows.
By default this setting is false for compatibility with earlier access configurations. -->
<users_without_row_policies_can_read_rows>false</users_without_row_policies_can_read_rows>
</access_control_improvements>
<!-- Default profile of settings. -->
<default_profile>default</default_profile>
@ -1297,8 +1305,8 @@
-->
<!-- Uncomment if enable merge tree metadata cache -->
<merge_tree_metadata_cache>
<!--merge_tree_metadata_cache>
<lru_cache_size>268435456</lru_cache_size>
<continue_if_corrupted>true</continue_if_corrupted>
</merge_tree_metadata_cache>
</merge_tree_metadata_cache-->
</clickhouse>

View File

@ -129,8 +129,8 @@
#query_div
{
/* Make enough space for even huge queries. */
height: 20%;
/* Make enough space for medium/large queries but allowing query textarea to grow. */
min-height: 20%;
}
#query
@ -748,7 +748,7 @@
const max_rows = 10000 / response.meta.length;
let row_num = 0;
const column_is_number = response.meta.map(elem => !!elem.type.match(/^(U?Int|Decimal|Float)/));
const column_is_number = response.meta.map(elem => !!elem.type.match(/^(Nullable\()?(U?Int|Decimal|Float)/));
const column_maximums = column_is_number.map((elem, idx) => elem ? Math.max(...response.data.map(row => row[idx])) : 0);
const column_minimums = column_is_number.map((elem, idx) => elem ? Math.min(...response.data.map(row => Math.max(0, row[idx]))) : 0);
const column_need_render_bars = column_is_number.map((elem, idx) => column_maximums[idx] > 0 && column_maximums[idx] > column_minimums[idx]);

View File

@ -6,9 +6,6 @@
<profiles>
<!-- Default settings. -->
<default>
<!-- Maximum memory usage for processing single query, in bytes. -->
<max_memory_usage>10000000000</max_memory_usage>
<!-- How to choose between replicas during distributed query processing.
random - choose random replica from set of replicas with minimum number of errors
nearest_hostname - from set of replicas with minimum number of errors, choose replica

View File

@ -149,6 +149,24 @@ AccessControl::AccessControl()
AccessControl::~AccessControl() = default;
void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
const zkutil::GetZooKeeper & get_zookeeper_function_)
{
if (config_.has("custom_settings_prefixes"))
setCustomSettingsPrefixes(config_.getString("custom_settings_prefixes"));
setNoPasswordAllowed(config_.getBool("allow_no_password", true));
setPlaintextPasswordAllowed(config_.getBool("allow_plaintext_password", true));
setEnabledUsersWithoutRowPoliciesCanReadRows(config_.getBool(
"access_control_improvements.users_without_row_policies_can_read_rows",
false /* false because we need to be compatible with earlier access configurations */));
addStoragesFromMainConfig(config_, config_path_, get_zookeeper_function_);
}
void AccessControl::setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_)
{
auto storages = getStoragesPtr();
@ -170,11 +188,7 @@ void AccessControl::addUsersConfigStorage(const Poco::Util::AbstractConfiguratio
void AccessControl::addUsersConfigStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & users_config_)
{
auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); };
auto is_no_password_allowed_function = [this]() -> bool { return isNoPasswordAllowed(); };
auto is_plaintext_password_allowed_function = [this]() -> bool { return isPlaintextPasswordAllowed(); };
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function,
is_no_password_allowed_function, is_plaintext_password_allowed_function);
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this);
new_storage->setConfig(users_config_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}",
@ -207,11 +221,7 @@ void AccessControl::addUsersConfigStorage(
return;
}
}
auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); };
auto is_no_password_allowed_function = [this]() -> bool { return isNoPasswordAllowed(); };
auto is_plaintext_password_allowed_function = [this]() -> bool { return isPlaintextPasswordAllowed(); };
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function,
is_no_password_allowed_function, is_plaintext_password_allowed_function);
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, *this);
new_storage->load(users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());

View File

@ -50,6 +50,9 @@ public:
AccessControl();
~AccessControl() override;
void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_,
const zkutil::GetZooKeeper & get_zookeeper_function_);
/// Parses access entities from a configuration loaded from users.xml.
/// This function add UsersConfigAccessStorage if it wasn't added before.
void setUsersConfig(const Poco::Util::AbstractConfiguration & users_config_);
@ -122,6 +125,12 @@ public:
void setPlaintextPasswordAllowed(const bool allow_plaintext_password_);
bool isPlaintextPasswordAllowed() const;
/// Enables logic that users without permissive row policies can still read rows using a SELECT query.
/// For example, if there two users A, B and a row policy is defined only for A, then
/// if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows.
void setEnabledUsersWithoutRowPoliciesCanReadRows(bool enable) { users_without_row_policies_can_read_rows = enable; }
bool isEnabledUsersWithoutRowPoliciesCanReadRows() const { return users_without_row_policies_can_read_rows; }
UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const;
void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config);
@ -178,6 +187,7 @@ private:
std::unique_ptr<CustomSettingsPrefixes> custom_settings_prefixes;
std::atomic_bool allow_plaintext_password = true;
std::atomic_bool allow_no_password = true;
std::atomic_bool users_without_row_policies_can_read_rows = false;
};
}

View File

@ -28,17 +28,25 @@ namespace
permissions.push_back(filter);
}
ASTPtr getResult() &&
ASTPtr getResult(bool users_without_row_policies_can_read_rows) &&
{
/// Process permissive filters.
restrictions.push_back(makeASTForLogicalOr(std::move(permissions)));
if (!permissions.empty() || !users_without_row_policies_can_read_rows)
{
/// Process permissive filters.
restrictions.push_back(makeASTForLogicalOr(std::move(permissions)));
}
/// Process restrictive filters.
auto result = makeASTForLogicalAnd(std::move(restrictions));
ASTPtr result;
if (!restrictions.empty())
result = makeASTForLogicalAnd(std::move(restrictions));
bool value;
if (tryGetLiteralBool(result.get(), value) && value)
result = nullptr; /// The condition is always true, no need to check it.
if (result)
{
bool value;
if (tryGetLiteralBool(result.get(), value) && value)
result = nullptr; /// The condition is always true, no need to check it.
}
return result;
}
@ -234,7 +242,7 @@ void RowPolicyCache::mixFiltersFor(EnabledRowPolicies & enabled)
{
auto & mixed_filter = (*mixed_filters)[key];
mixed_filter.database_and_table_name = mixer.database_and_table_name;
mixed_filter.ast = std::move(mixer.mixer).getResult();
mixed_filter.ast = std::move(mixer.mixer).getResult(access_control.isEnabledUsersWithoutRowPoliciesCanReadRows());
}
enabled.mixed_filters.store(mixed_filters);

View File

@ -3,6 +3,7 @@
#include <Access/RowPolicy.h>
#include <Access/User.h>
#include <Access/SettingsProfile.h>
#include <Access/AccessControl.h>
#include <Dictionaries/IDictionary.h>
#include <Common/Config/ConfigReloader.h>
#include <Common/StringUtils/StringUtils.h>
@ -339,7 +340,7 @@ namespace
}
std::vector<AccessEntityPtr> parseRowPolicies(const Poco::Util::AbstractConfiguration & config)
std::vector<AccessEntityPtr> parseRowPolicies(const Poco::Util::AbstractConfiguration & config, bool users_without_row_policies_can_read_rows)
{
std::map<std::pair<String /* database */, String /* table */>, std::unordered_map<String /* user */, String /* filter */>> all_filters_map;
@ -395,8 +396,19 @@ namespace
const auto & [database, table_name] = database_and_table_name;
for (const String & user_name : user_names)
{
String filter;
auto it = user_to_filters.find(user_name);
String filter = (it != user_to_filters.end()) ? it->second : "1";
if (it != user_to_filters.end())
{
filter = it->second;
}
else
{
if (users_without_row_policies_can_read_rows)
continue;
else
filter = "1";
}
auto policy = std::make_shared<RowPolicy>();
policy->setFullName(user_name, database, table_name);
@ -411,7 +423,7 @@ namespace
SettingsProfileElements parseSettingsConstraints(const Poco::Util::AbstractConfiguration & config,
const String & path_to_constraints,
Fn<void(std::string_view)> auto && check_setting_name_function)
const AccessControl & access_control)
{
SettingsProfileElements profile_elements;
Poco::Util::AbstractConfiguration::Keys keys;
@ -419,8 +431,7 @@ namespace
for (const String & setting_name : keys)
{
if (check_setting_name_function)
check_setting_name_function(setting_name);
access_control.checkSettingNameIsAllowed(setting_name);
SettingsProfileElement profile_element;
profile_element.setting_name = setting_name;
@ -448,7 +459,7 @@ namespace
std::shared_ptr<SettingsProfile> parseSettingsProfile(
const Poco::Util::AbstractConfiguration & config,
const String & profile_name,
Fn<void(std::string_view)> auto && check_setting_name_function)
const AccessControl & access_control)
{
auto profile = std::make_shared<SettingsProfile>();
profile->setName(profile_name);
@ -470,13 +481,12 @@ namespace
if (key == "constraints" || key.starts_with("constraints["))
{
profile->elements.merge(parseSettingsConstraints(config, profile_config + "." + key, check_setting_name_function));
profile->elements.merge(parseSettingsConstraints(config, profile_config + "." + key, access_control));
continue;
}
const auto & setting_name = key;
if (check_setting_name_function)
check_setting_name_function(setting_name);
access_control.checkSettingNameIsAllowed(setting_name);
SettingsProfileElement profile_element;
profile_element.setting_name = setting_name;
@ -490,7 +500,7 @@ namespace
std::vector<AccessEntityPtr> parseSettingsProfiles(
const Poco::Util::AbstractConfiguration & config,
Fn<void(std::string_view)> auto && check_setting_name_function)
const AccessControl & access_control)
{
Poco::Util::AbstractConfiguration::Keys profile_names;
config.keys("profiles", profile_names);
@ -502,7 +512,7 @@ namespace
{
try
{
profiles.push_back(parseSettingsProfile(config, profile_name, check_setting_name_function));
profiles.push_back(parseSettingsProfile(config, profile_name, access_control));
}
catch (Exception & e)
{
@ -515,13 +525,8 @@ namespace
}
}
UsersConfigAccessStorage::UsersConfigAccessStorage(const CheckSettingNameFunction & check_setting_name_function_, const IsNoPasswordFunction & is_no_password_allowed_function_, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_)
: UsersConfigAccessStorage(STORAGE_TYPE, check_setting_name_function_, is_no_password_allowed_function_, is_plaintext_password_allowed_function_)
{
}
UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, const CheckSettingNameFunction & check_setting_name_function_, const IsNoPasswordFunction & is_no_password_allowed_function_, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_)
: IAccessStorage(storage_name_), check_setting_name_function(check_setting_name_function_),is_no_password_allowed_function(is_no_password_allowed_function_), is_plaintext_password_allowed_function(is_plaintext_password_allowed_function_)
UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, const AccessControl & access_control_)
: IAccessStorage(storage_name_), access_control(access_control_)
{
}
@ -563,16 +568,16 @@ void UsersConfigAccessStorage::parseFromConfig(const Poco::Util::AbstractConfigu
{
try
{
bool no_password_allowed = is_no_password_allowed_function();
bool plaintext_password_allowed = is_plaintext_password_allowed_function();
bool no_password_allowed = access_control.isNoPasswordAllowed();
bool plaintext_password_allowed = access_control.isPlaintextPasswordAllowed();
std::vector<std::pair<UUID, AccessEntityPtr>> all_entities;
for (const auto & entity : parseUsers(config, no_password_allowed, plaintext_password_allowed))
all_entities.emplace_back(generateID(*entity), entity);
for (const auto & entity : parseQuotas(config))
all_entities.emplace_back(generateID(*entity), entity);
for (const auto & entity : parseRowPolicies(config))
for (const auto & entity : parseRowPolicies(config, access_control.isEnabledUsersWithoutRowPoliciesCanReadRows()))
all_entities.emplace_back(generateID(*entity), entity);
for (const auto & entity : parseSettingsProfiles(config, check_setting_name_function))
for (const auto & entity : parseSettingsProfiles(config, access_control))
all_entities.emplace_back(generateID(*entity), entity);
memory_storage.setAll(all_entities);
}

View File

@ -12,6 +12,7 @@ namespace Poco::Util
namespace DB
{
class AccessControl;
class ConfigReloader;
/// Implementation of IAccessStorage which loads all from users.xml periodically.
@ -20,13 +21,8 @@ class UsersConfigAccessStorage : public IAccessStorage
public:
static constexpr char STORAGE_TYPE[] = "users.xml";
using CheckSettingNameFunction = std::function<void(const std::string_view &)>;
using IsNoPasswordFunction = std::function<bool()>;
using IsPlaintextPasswordFunction = std::function<bool()>;
UsersConfigAccessStorage(const String & storage_name_ = STORAGE_TYPE, const CheckSettingNameFunction & check_setting_name_function_ = {}, const IsNoPasswordFunction & is_no_password_allowed_function_ ={}, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_ = {}); /// NOLINT
UsersConfigAccessStorage(const CheckSettingNameFunction & check_setting_name_function_, const IsNoPasswordFunction & is_no_password_allowed_function_, const IsPlaintextPasswordFunction & is_plaintext_password_allowed_function_); /// NOLINT
UsersConfigAccessStorage(const String & storage_name_, const AccessControl & access_control_);
~UsersConfigAccessStorage() override;
const char * getStorageType() const override { return STORAGE_TYPE; }
@ -58,10 +54,8 @@ private:
scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override;
scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override;
const AccessControl & access_control;
MemoryAccessStorage memory_storage;
CheckSettingNameFunction check_setting_name_function;
IsNoPasswordFunction is_no_password_allowed_function;
IsPlaintextPasswordFunction is_plaintext_password_allowed_function;
String path;
std::unique_ptr<ConfigReloader> config_reloader;
mutable std::mutex load_mutex;

View File

@ -1,147 +0,0 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionGroupArraySorted.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Helpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeString.h>
#include <Common/FieldVisitorConvertToNumber.h>
static inline constexpr UInt64 GROUP_SORTED_ARRAY_MAX_SIZE = 0xFFFFFF;
static inline constexpr UInt64 GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD = 10;
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
class AggregateFunctionGroupArraySortedNumeric : public AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>
{
using AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>::AggregateFunctionGroupArraySorted;
};
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
class AggregateFunctionGroupArraySortedFieldType
: public AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>
{
using AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>::
AggregateFunctionGroupArraySorted;
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<T>()); }
};
template <template <typename, bool, typename, bool> class AggregateFunctionTemplate, typename TColumnA, bool expr_sorted, typename TColumnB, bool is_plain_b, typename... TArgs>
AggregateFunctionPtr
createAggregateFunctionGroupArraySortedTypedFinal(TArgs && ... args)
{
return AggregateFunctionPtr(new AggregateFunctionTemplate<TColumnA, expr_sorted, TColumnB, is_plain_b>(std::forward<TArgs>(args)...));
}
template <bool expr_sorted = false, typename TColumnB = UInt64, bool is_plain_b = false>
AggregateFunctionPtr
createAggregateFunctionGroupArraySortedTyped(const DataTypes & argument_types, const Array & params, UInt64 threshold)
{
#define DISPATCH(A, C, B) \
if (which.idx == TypeIndex::A) \
return createAggregateFunctionGroupArraySortedTypedFinal<C, B, expr_sorted, TColumnB, is_plain_b>(threshold, argument_types, params);
#define DISPATCH_NUMERIC(A) DISPATCH(A, AggregateFunctionGroupArraySortedNumeric, A)
WhichDataType which(argument_types[0]);
FOR_NUMERIC_TYPES(DISPATCH_NUMERIC)
DISPATCH(Enum8, AggregateFunctionGroupArraySortedNumeric, Int8)
DISPATCH(Enum16, AggregateFunctionGroupArraySortedNumeric, Int16)
DISPATCH(Date, AggregateFunctionGroupArraySortedFieldType, DataTypeDate)
DISPATCH(DateTime, AggregateFunctionGroupArraySortedFieldType, DataTypeDateTime)
#undef DISPATCH
#undef DISPATCH_NUMERIC
if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
{
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, true, expr_sorted, TColumnB, is_plain_b>(
threshold, argument_types, params));
}
else
{
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, false, expr_sorted, TColumnB, is_plain_b>(
threshold, argument_types, params));
}
}
AggregateFunctionPtr createAggregateFunctionGroupArraySorted(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
UInt64 threshold = GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD;
if (params.size() == 1)
{
UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
if (k > GROUP_SORTED_ARRAY_MAX_SIZE)
throw Exception(
"Too large parameter(s) for aggregate function " + name + ". Maximum: " + toString(GROUP_SORTED_ARRAY_MAX_SIZE),
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
if (k == 0)
throw Exception("Parameter 0 is illegal for aggregate function " + name, ErrorCodes::ARGUMENT_OUT_OF_BOUND);
threshold = k;
}
else if (!params.empty())
{
throw Exception("Aggregate function " + name + " only supports 1 parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if (argument_types.size() == 2)
{
if (isNumber(argument_types[1]))
{
#define DISPATCH2(A, B) \
if (which.idx == TypeIndex::A) \
return createAggregateFunctionGroupArraySortedTyped<true, B>(argument_types, params, threshold);
#define DISPATCH(A) DISPATCH2(A, A)
WhichDataType which(argument_types[1]);
FOR_NUMERIC_TYPES(DISPATCH)
DISPATCH2(Enum8, Int8)
DISPATCH2(Enum16, Int16)
#undef DISPATCH
#undef DISPATCH2
throw Exception("Invalid parameter type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
else if (argument_types[1]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
{
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, true>(argument_types, params, threshold);
}
else
{
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, false>(argument_types, params, threshold);
}
}
else if (argument_types.size() == 1)
{
return createAggregateFunctionGroupArraySortedTyped<>(argument_types, params, threshold);
}
else
{
throw Exception(
"Aggregate function " + name + " requires one or two parameters.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
}
}
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
factory.registerFunction("groupArraySorted", {createAggregateFunctionGroupArraySorted, properties});
}
}

View File

@ -1,315 +0,0 @@
#pragma once
#include <Columns/ColumnArray.h>
#include <DataTypes/DataTypeArray.h>
#include <AggregateFunctions/AggregateFunctionGroupArraySortedData.h>
#include <AggregateFunctions/IAggregateFunction.h>
namespace DB
{
template <typename TColumn, bool is_plain>
inline TColumn readItem(const IColumn * column, Arena * arena, size_t row)
{
if constexpr (std::is_same_v<TColumn, StringRef>)
{
if constexpr (is_plain)
{
StringRef str = column->getDataAt(row);
auto ptr = arena->alloc(str.size);
std::copy(str.data, str.data + str.size, ptr);
return StringRef(ptr, str.size);
}
else
{
const char * begin = nullptr;
return column->serializeValueIntoArena(row, *arena, begin);
}
}
else
{
if constexpr (std::is_same_v<TColumn, UInt64>)
return column->getUInt(row);
else
return column->getInt(row);
}
}
template <typename TColumn, typename TFilter = void>
size_t
getFirstNElements_low_threshold(const TColumn * data, size_t row_begin, size_t row_end, size_t threshold, size_t * results, const TFilter * filter = nullptr)
{
for (size_t i = 0; i < threshold; i++)
{
results[i] = 0;
}
threshold = std::min(row_end - row_begin, threshold);
size_t current_max = 0;
size_t cur;
size_t z;
for (size_t i = row_begin; i < row_end; i++)
{
if constexpr (!std::is_same_v<TFilter, void>)
{
if (filter[i] == 0)
continue;
}
//Starting from the highest values and we look for the immediately lower than the given one
for (cur = current_max; cur > 0; cur--)
{
if (data[i] > data[results[cur - 1]])
break;
}
if (cur < threshold)
{
//Move all the higher values 1 position to the right
for (z = std::min(threshold - 1, current_max); z > cur; z--)
results[z] = results[z - 1];
if (current_max < threshold)
++current_max;
//insert element into the given position
results[cur] = i;
}
}
return current_max;
}
template <typename T>
struct SortableItem
{
T a;
size_t b;
bool operator<(const SortableItem & other) const { return (this->a < other.a); }
};
template <typename TColumn, typename TFilter = void>
size_t getFirstNElements_high_threshold(
const TColumn * data, size_t row_begin, size_t row_end, size_t threshold, size_t * results, const TFilter * filter = nullptr)
{
std::vector<SortableItem<TColumn>> dataIndexed(row_end);
size_t num_elements_filtered = 0;
for (size_t i = row_begin; i < row_end; i++)
{
if constexpr (!std::is_same_v<TFilter, void>)
{
if (filter[i] == 0)
continue;
}
dataIndexed.data()[num_elements_filtered].a = data[i];
dataIndexed.data()[num_elements_filtered].b = i;
num_elements_filtered++;
}
threshold = std::min(num_elements_filtered, threshold);
std::nth_element(dataIndexed.data(), dataIndexed.data() + threshold, dataIndexed.data() + num_elements_filtered);
std::sort(dataIndexed.data(), dataIndexed.data() + threshold);
for (size_t i = 0; i < threshold; i++)
{
results[i] = dataIndexed[i].b;
}
return threshold;
}
static const size_t THRESHOLD_MAX_CUSTOM_FUNCTION = 1000;
template <typename TColumn>
size_t getFirstNElements(const TColumn * data, size_t row_begin, size_t row_end, size_t threshold, size_t * results, const UInt8 * filter = nullptr)
{
if (threshold < THRESHOLD_MAX_CUSTOM_FUNCTION)
{
if (filter != nullptr)
return getFirstNElements_low_threshold(data, row_begin, row_end, threshold, results, filter);
else
return getFirstNElements_low_threshold(data, row_begin, row_end, threshold, results);
}
else
{
if (filter != nullptr)
return getFirstNElements_high_threshold(data, row_begin, row_end, threshold, results, filter);
else
return getFirstNElements_high_threshold(data, row_begin, row_end, threshold, results);
}
}
template <typename TColumnA, bool is_plain_a, bool use_column_b, typename TColumnB, bool is_plain_b>
class AggregateFunctionGroupArraySorted : public IAggregateFunctionDataHelper<
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
AggregateFunctionGroupArraySorted<TColumnA, is_plain_a, use_column_b, TColumnB, is_plain_b>>
{
protected:
using State = AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>;
using Base = IAggregateFunctionDataHelper<
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
AggregateFunctionGroupArraySorted>;
UInt64 threshold;
DataTypePtr & input_data_type;
mutable std::mutex mutex;
static void deserializeAndInsert(StringRef str, IColumn & data_to);
public:
AggregateFunctionGroupArraySorted(UInt64 threshold_, const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
AggregateFunctionGroupArraySorted>(argument_types_, params)
, threshold(threshold_)
, input_data_type(this->argument_types[0])
{
}
void create(AggregateDataPtr place) const override
{
Base::create(place);
this->data(place).threshold = threshold;
}
String getName() const override { return "groupArraySorted"; }
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(input_data_type); }
bool allocatesMemoryInArena() const override
{
if constexpr (std::is_same_v<TColumnA, StringRef>)
return true;
else
return false;
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
State & data = this->data(place);
if constexpr (use_column_b)
{
data.add(
readItem<TColumnA, is_plain_a>(columns[0], arena, row_num), readItem<TColumnB, is_plain_b>(columns[1], arena, row_num));
}
else
{
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row_num));
}
}
template <typename TColumn, bool is_plain, typename TFunc>
void
forFirstRows(size_t row_begin, size_t row_end, const IColumn ** columns, size_t data_column, Arena * arena, ssize_t if_argument_pos, TFunc func) const
{
const TColumn * values = nullptr;
std::unique_ptr<std::vector<TColumn>> values_vector;
std::vector<size_t> best_rows(threshold);
if constexpr (std::is_same_v<TColumn, StringRef>)
{
values_vector.reset(new std::vector<TColumn>(row_end));
for (size_t i = row_begin; i < row_end; i++)
(*values_vector)[i] = readItem<TColumn, is_plain>(columns[data_column], arena, i);
values = (*values_vector).data();
}
else
{
const auto & column = assert_cast<const ColumnVector<TColumn> &>(*columns[data_column]);
values = column.getData().data();
}
const UInt8 * filter = nullptr;
StringRef refFilter;
if (if_argument_pos >= 0)
{
refFilter = columns[if_argument_pos]->getRawData();
filter = reinterpret_cast<const UInt8 *>(refFilter.data);
}
size_t num_elements = getFirstNElements(values, row_begin, row_end, threshold, best_rows.data(), filter);
for (size_t i = 0; i < num_elements; i++)
{
func(best_rows[i], values);
}
}
void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr place,
const IColumn ** columns,
Arena * arena,
ssize_t if_argument_pos) const override
{
State & data = this->data(place);
if constexpr (use_column_b)
{
forFirstRows<TColumnB, is_plain_b>(
row_begin, row_end, columns, 1, arena, if_argument_pos, [columns, &arena, &data](size_t row, const TColumnB * values)
{
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row), values[row]);
});
}
else
{
forFirstRows<TColumnA, is_plain_a>(
row_begin, row_end, columns, 0, arena, if_argument_pos, [&data](size_t row, const TColumnA * values)
{
data.add(values[row]);
});
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).serialize(buf);
}
void
deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).deserialize(buf, arena);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * /*arena*/) const override
{
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
auto & values = this->data(place).values;
offsets_to.push_back(offsets_to.back() + values.size());
IColumn & data_to = arr_to.getData();
for (auto value : values)
{
if constexpr (std::is_same_v<TColumnA, StringRef>)
{
auto str = State::itemValue(value);
if constexpr (is_plain_a)
{
data_to.insertData(str.data, str.size);
}
else
{
data_to.deserializeAndInsertFromArena(str.data);
}
}
else
{
data_to.insert(State::itemValue(value));
}
}
}
};
}

View File

@ -1,162 +0,0 @@
#pragma once
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/VarInt.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
static inline constexpr UInt64 GROUP_SORTED_DEFAULT_THRESHOLD = 0xFFFFFF;
namespace DB
{
template <typename T>
static void writeOneItem(WriteBuffer & buf, T item)
{
if constexpr (std::numeric_limits<T>::is_signed)
{
writeVarInt(item, buf);
}
else
{
writeVarUInt(item, buf);
}
}
static void writeOneItem(WriteBuffer & buf, const StringRef & item)
{
writeBinary(item, buf);
}
template <typename T>
static void readOneItem(ReadBuffer & buf, Arena * /*arena*/, T & item)
{
if constexpr (std::numeric_limits<T>::is_signed)
{
DB::Int64 val;
readVarT(val, buf);
item = val;
}
else
{
DB::UInt64 val;
readVarT(val, buf);
item = val;
}
}
static void readOneItem(ReadBuffer & buf, Arena * arena, StringRef & item)
{
item = readStringBinaryInto(*arena, buf);
}
template <typename Storage>
struct AggregateFunctionGroupArraySortedDataBase
{
typedef typename Storage::value_type ValueType;
AggregateFunctionGroupArraySortedDataBase(UInt64 threshold_ = GROUP_SORTED_DEFAULT_THRESHOLD) : threshold(threshold_) { }
virtual ~AggregateFunctionGroupArraySortedDataBase() { }
inline void narrowDown()
{
while (values.size() > threshold)
values.erase(--values.end());
}
void merge(const AggregateFunctionGroupArraySortedDataBase & other)
{
values.merge(Storage(other.values));
narrowDown();
}
void serialize(WriteBuffer & buf) const
{
writeOneItem(buf, UInt64(values.size()));
for (auto value : values)
{
serializeItem(buf, value);
}
}
virtual void serializeItem(WriteBuffer & buf, ValueType & val) const = 0;
virtual ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const = 0;
void deserialize(ReadBuffer & buf, Arena * arena)
{
values.clear();
UInt64 length;
readOneItem(buf, nullptr, length);
while (length--)
{
values.insert(deserializeItem(buf, arena));
}
narrowDown();
}
UInt64 threshold;
Storage values;
};
template <typename T, bool expr_sorted, typename TIndex>
struct AggregateFunctionGroupArraySortedData
{
};
template <typename T, typename TIndex>
struct AggregateFunctionGroupArraySortedData<T, true, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>
{
using Base = AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>;
using Base::Base;
void add(T item, TIndex weight)
{
Base::values.insert({weight, item});
Base::narrowDown();
}
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override
{
writeOneItem(buf, value.first);
writeOneItem(buf, value.second);
}
virtual typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
{
TIndex first;
T second;
readOneItem(buf, arena, first);
readOneItem(buf, arena, second);
return {first, second};
}
static T itemValue(typename Base::ValueType & value) { return value.second; }
};
template <typename T, typename TIndex>
struct AggregateFunctionGroupArraySortedData<T, false, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>
{
using Base = AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>;
using Base::Base;
void add(T item)
{
Base::values.insert(item);
Base::narrowDown();
}
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override { writeOneItem(buf, value); }
typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
{
T value;
readOneItem(buf, arena, value);
return value;
}
static T itemValue(typename Base::ValueType & value) { return value; }
};
}

View File

@ -59,7 +59,6 @@ void registerAggregateFunctionNothing(AggregateFunctionFactory &);
void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &);
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -131,7 +130,6 @@ void registerAggregateFunctions()
registerAggregateFunctionIntervalLengthSum(factory);
registerAggregateFunctionExponentialMovingAverage(factory);
registerAggregateFunctionSparkbar(factory);
registerAggregateFunctionGroupArraySorted(factory);
registerWindowFunctions(factory);
}

View File

@ -2,7 +2,6 @@
#include <iostream>
#include <iomanip>
#include <string_view>
#include <filesystem>
#include <map>
#include <unordered_map>
@ -392,7 +391,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
processed_rows += block.rows();
/// Even if all blocks are empty, we still need to initialize the output stream to write empty resultset.
initBlockOutputStream(block, parsed_query);
initOutputFormat(block, parsed_query);
/// The header block containing zero rows was used to initialize
/// output_format, do not output it.
@ -439,14 +438,14 @@ void ClientBase::onLogData(Block & block)
void ClientBase::onTotals(Block & block, ASTPtr parsed_query)
{
initBlockOutputStream(block, parsed_query);
initOutputFormat(block, parsed_query);
output_format->setTotals(block);
}
void ClientBase::onExtremes(Block & block, ASTPtr parsed_query)
{
initBlockOutputStream(block, parsed_query);
initOutputFormat(block, parsed_query);
output_format->setExtremes(block);
}
@ -466,7 +465,7 @@ void ClientBase::onProfileInfo(const ProfileInfo & profile_info)
}
void ClientBase::initBlockOutputStream(const Block & block, ASTPtr parsed_query)
void ClientBase::initOutputFormat(const Block & block, ASTPtr parsed_query)
try
{
if (!output_format)
@ -1430,15 +1429,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
apply_query_settings(*with_output->settings_ast);
if (!connection->checkConnected())
{
auto poco_logs_level = Poco::Logger::parseLevel(config().getString("send_logs_level", "none"));
/// Print under WARNING also because it is used by clickhouse-test.
if (poco_logs_level >= Poco::Message::PRIO_WARNING)
{
fmt::print(stderr, "Connection lost. Reconnecting.\n");
}
connect();
}
ASTPtr input_function;
if (insert && insert->select)
@ -1495,7 +1486,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (is_interactive)
{
std::cout << std::endl << processed_rows << " rows in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. ";
std::cout << std::endl
<< processed_rows << " row" << (processed_rows == 1 ? "" : "s")
<< " in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. ";
progress_indication.writeFinalProgress();
std::cout << std::endl << std::endl;
}
@ -2065,156 +2058,6 @@ void ClientBase::showClientVersion()
}
void ClientBase::readArguments(
int argc,
char ** argv,
Arguments & common_arguments,
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments)
{
/** We allow different groups of arguments:
* - common arguments;
* - arguments for any number of external tables each in form "--external args...",
* where possible args are file, name, format, structure, types;
* - param arguments for prepared statements.
* Split these groups before processing.
*/
bool in_external_group = false;
std::string prev_host_arg;
std::string prev_port_arg;
for (int arg_num = 1; arg_num < argc; ++arg_num)
{
std::string_view arg = argv[arg_num];
if (arg == "--external")
{
in_external_group = true;
external_tables_arguments.emplace_back(Arguments{""});
}
/// Options with value after equal sign.
else if (
in_external_group
&& (arg.starts_with("--file=") || arg.starts_with("--name=") || arg.starts_with("--format=") || arg.starts_with("--structure=")
|| arg.starts_with("--types=")))
{
external_tables_arguments.back().emplace_back(arg);
}
/// Options with value after whitespace.
else if (in_external_group && (arg == "--file" || arg == "--name" || arg == "--format" || arg == "--structure" || arg == "--types"))
{
if (arg_num + 1 < argc)
{
external_tables_arguments.back().emplace_back(arg);
++arg_num;
arg = argv[arg_num];
external_tables_arguments.back().emplace_back(arg);
}
else
break;
}
else
{
in_external_group = false;
/// Parameter arg after underline.
if (arg.starts_with("--param_"))
{
auto param_continuation = arg.substr(strlen("--param_"));
auto equal_pos = param_continuation.find_first_of('=');
if (equal_pos == std::string::npos)
{
/// param_name value
++arg_num;
if (arg_num >= argc)
throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
query_parameters.emplace(String(param_continuation), String(arg));
}
else
{
if (equal_pos == 0)
throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
/// param_name=value
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
}
}
else if (arg.starts_with("--host") || arg.starts_with("-h"))
{
std::string host_arg;
/// --host host
if (arg == "--host" || arg == "-h")
{
++arg_num;
if (arg_num >= argc)
throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
host_arg = "--host=";
host_arg.append(arg);
}
else
host_arg = arg;
/// --port port1 --host host1
if (!prev_port_arg.empty())
{
hosts_and_ports_arguments.push_back({host_arg, prev_port_arg});
prev_port_arg.clear();
}
else
{
/// --host host1 --host host2
if (!prev_host_arg.empty())
hosts_and_ports_arguments.push_back({prev_host_arg});
prev_host_arg = host_arg;
}
}
else if (arg.starts_with("--port"))
{
auto port_arg = String{arg};
/// --port port
if (arg == "--port")
{
port_arg.push_back('=');
++arg_num;
if (arg_num >= argc)
throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS);
arg = argv[arg_num];
port_arg.append(arg);
}
/// --host host1 --port port1
if (!prev_host_arg.empty())
{
hosts_and_ports_arguments.push_back({port_arg, prev_host_arg});
prev_host_arg.clear();
}
else
{
/// --port port1 --port port2
if (!prev_port_arg.empty())
hosts_and_ports_arguments.push_back({prev_port_arg});
prev_port_arg = port_arg;
}
}
else if (arg == "--allow_repeated_settings")
allow_repeated_settings = true;
else
common_arguments.emplace_back(arg);
}
}
if (!prev_host_arg.empty())
hosts_and_ports_arguments.push_back({prev_host_arg});
if (!prev_port_arg.empty())
hosts_and_ports_arguments.push_back({prev_port_arg});
}
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
{
if (allow_repeated_settings)

View File

@ -106,6 +106,14 @@ protected:
bool processQueryText(const String & text);
virtual void readArguments(
int argc,
char ** argv,
Arguments & common_arguments,
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments) = 0;
private:
void receiveResult(ASTPtr parsed_query);
bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_);
@ -131,19 +139,13 @@ private:
void sendDataFromStdin(Block & sample, const ColumnsDescription & columns_description, ASTPtr parsed_query);
void sendExternalTables(ASTPtr parsed_query);
void initBlockOutputStream(const Block & block, ASTPtr parsed_query);
void initOutputFormat(const Block & block, ASTPtr parsed_query);
void initLogsOutputStream();
String prompt() const;
void resetOutput();
void outputQueryInfo(bool echo_query_);
void readArguments(
int argc,
char ** argv,
Arguments & common_arguments,
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments);
void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments);
void updateSuggest(const ASTPtr & ast);

View File

@ -410,7 +410,7 @@ Packet LocalConnection::receivePacket()
{
if (state->profile_info)
{
packet.profile_info = std::move(*state->profile_info);
packet.profile_info = *state->profile_info;
state->profile_info.reset();
}
next_packet_type.reset();

View File

@ -306,7 +306,6 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn & src, size_t start, size_t length)
{
assert(src.isFinalized());
const auto & src_column = src.data.back();
const auto & src_type = src.least_common_type.get();
@ -618,9 +617,17 @@ void ColumnObject::get(size_t n, Field & res) const
}
}
void ColumnObject::insertFrom(const IColumn & src, size_t n)
{
insert(src[n]);
finalize();
}
void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{
const auto & src_object = assert_cast<const ColumnObject &>(src);
if (!src_object.isFinalized())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insertRangeFrom non-finalized ColumnObject");
for (auto & entry : subcolumns)
{
@ -630,6 +637,33 @@ void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t len
entry->data.insertManyDefaults(length);
}
for (const auto & entry : src_object.subcolumns)
{
if (!hasSubcolumn(entry->path))
{
if (entry->path.hasNested())
{
const auto & base_type = entry->data.getLeastCommonTypeBase();
FieldInfo field_info
{
.scalar_type = base_type,
.have_nulls = base_type->isNullable(),
.need_convert = false,
.num_dimensions = entry->data.getNumberOfDimensions(),
};
addNestedSubcolumn(entry->path, field_info, num_rows);
}
else
{
addSubcolumn(entry->path, num_rows);
}
auto & subcolumn = getSubcolumn(entry->path);
subcolumn.insertRangeFrom(entry->data, start, length);
}
}
num_rows += length;
finalize();
}
@ -657,6 +691,36 @@ void ColumnObject::popBack(size_t length)
num_rows -= length;
}
template <typename Func>
ColumnPtr ColumnObject::applyForSubcolumns(Func && func, std::string_view func_name) const
{
if (!isFinalized())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot {} non-finalized ColumnObject", func_name);
auto res = ColumnObject::create(is_nullable);
for (const auto & subcolumn : subcolumns)
{
auto new_subcolumn = func(subcolumn->data.getFinalizedColumn());
res->addSubcolumn(subcolumn->path, new_subcolumn->assumeMutable());
}
return res;
}
ColumnPtr ColumnObject::permute(const Permutation & perm, size_t limit) const
{
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.permute(perm, limit); }, "permute");
}
ColumnPtr ColumnObject::filter(const Filter & filter, ssize_t result_size_hint) const
{
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.filter(filter, result_size_hint); }, "filter");
}
ColumnPtr ColumnObject::index(const IColumn & indexes, size_t limit) const
{
return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.index(indexes, limit); }, "index");
}
const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key) const
{
if (const auto * node = subcolumns.findLeaf(key))

View File

@ -68,6 +68,8 @@ public:
bool isFinalized() const;
const DataTypePtr & getLeastCommonType() const { return least_common_type.get(); }
const DataTypePtr & getLeastCommonTypeBase() const { return least_common_type.getBase(); }
size_t getNumberOfDimensions() const { return least_common_type.getNumberOfDimensions(); }
/// Checks the consistency of column's parts stored in @data.
void checkTypes() const;
@ -193,15 +195,18 @@ public:
void forEachSubcolumn(ColumnCallback callback) override;
void insert(const Field & field) override;
void insertDefault() override;
void insertFrom(const IColumn & src, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr replicate(const Offsets & offsets) const override;
void popBack(size_t length) override;
Field operator[](size_t n) const override;
void get(size_t n, Field & res) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
/// All other methods throw exception.
ColumnPtr decompress() const override { throwMustBeConcrete(); }
StringRef getDataAt(size_t) const override { throwMustBeConcrete(); }
bool isDefaultAt(size_t) const override { throwMustBeConcrete(); }
void insertData(const char *, size_t) override { throwMustBeConcrete(); }
@ -211,10 +216,7 @@ public:
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
void updateHashFast(SipHash &) const override { throwMustBeConcrete(); }
ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeConcrete(); }
void expand(const Filter &, bool) override { throwMustBeConcrete(); }
ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeConcrete(); }
ColumnPtr index(const IColumn &, size_t) const override { throwMustBeConcrete(); }
int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeConcrete(); }
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override { throwMustBeConcrete(); }
bool hasEqualValues() const override { throwMustBeConcrete(); }
@ -232,6 +234,9 @@ private:
{
throw Exception("ColumnObject must be converted to ColumnTuple before use", ErrorCodes::LOGICAL_ERROR);
}
template <typename Func>
ColumnPtr applyForSubcolumns(Func && func, std::string_view func_name) const;
};
}

View File

@ -59,12 +59,6 @@ Exception::Exception(const std::string & msg, int code, bool remote_)
handle_error_code(msg, code, remote, getStackFramePointers());
}
Exception::Exception(const std::string & msg, const Exception & nested, int code)
: Poco::Exception(msg, nested, code)
{
handle_error_code(msg, code, remote, getStackFramePointers());
}
Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
: Poco::Exception(exc.displayText(), ErrorCodes::POCO_EXCEPTION)
{

View File

@ -29,7 +29,6 @@ public:
Exception() = default;
Exception(const std::string & msg, int code, bool remote_ = false);
Exception(const std::string & msg, const Exception & nested, int code);
Exception(int code, const std::string & message)
: Exception(message, code)

View File

@ -90,7 +90,7 @@ void LRUFileCache::initialize()
}
void LRUFileCache::useCell(
const FileSegmentCell & cell, FileSegments & result, std::lock_guard<std::mutex> & /* cache_lock */)
const FileSegmentCell & cell, FileSegments & result, std::lock_guard<std::mutex> & cache_lock)
{
auto file_segment = cell.file_segment;
@ -109,7 +109,7 @@ void LRUFileCache::useCell(
if (cell.queue_iterator)
{
/// Move to the end of the queue. The iterator remains valid.
queue.splice(queue.end(), queue, *cell.queue_iterator);
queue.moveToEnd(*cell.queue_iterator, cache_lock);
}
}
@ -237,7 +237,11 @@ FileSegments LRUFileCache::splitRangeIntoCells(
}
void LRUFileCache::fillHolesWithEmptyFileSegments(
FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock)
FileSegments & file_segments,
const Key & key,
const FileSegment::Range & range,
bool fill_with_detached_file_segments,
std::lock_guard<std::mutex> & cache_lock)
{
/// There are segments [segment1, ..., segmentN]
/// (non-overlapping, non-empty, ascending-ordered) which (maybe partially)
@ -319,7 +323,8 @@ void LRUFileCache::fillHolesWithEmptyFileSegments(
}
else
{
file_segments.splice(file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
file_segments.splice(
file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
}
}
}
@ -397,10 +402,10 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell(
throw Exception(
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cache already exists for key: `{}`, offset: {}, size: {}.\nCurrent cache structure: {}",
keyToStr(key), offset, size, dumpStructureImpl(key, cache_lock));
keyToStr(key), offset, size, dumpStructureUnlocked(key, cache_lock));
auto file_segment = std::make_shared<FileSegment>(offset, size, key, this, state);
FileSegmentCell cell(std::move(file_segment), queue);
FileSegmentCell cell(std::move(file_segment), this, cache_lock);
auto & offsets = files[key];
@ -425,6 +430,10 @@ FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset,
{
std::lock_guard cache_lock(mutex);
#ifndef NDEBUG
assertCacheCorrectness(key, cache_lock);
#endif
auto * cell = getCell(key, offset, cache_lock);
if (cell)
throw Exception(
@ -437,15 +446,15 @@ FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset,
}
bool LRUFileCache::tryReserve(
const Key & key_, size_t offset_, size_t size, std::lock_guard<std::mutex> & cache_lock)
const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
{
auto removed_size = 0;
size_t queue_size = queue.size();
size_t queue_size = queue.getElementsNum(cache_lock);
assert(queue_size <= max_element_size);
/// Since space reservation is incremental, cache cell already exists if it's state is EMPTY.
/// And it cache cell does not exist on startup -- as we first check for space and then add a cell.
auto * cell_for_reserve = getCell(key_, offset_, cache_lock);
auto * cell_for_reserve = getCell(key, offset, cache_lock);
/// A cell acquires a LRUQueue iterator on first successful space reservation attempt.
/// cell_for_reserve can be nullptr here when we call tryReserve() from loadCacheInfoIntoMemory().
@ -455,24 +464,27 @@ bool LRUFileCache::tryReserve(
auto is_overflow = [&]
{
/// max_size == 0 means unlimited cache size, max_element_size means unlimited number of cache elements.
return (max_size != 0 && current_size + size - removed_size > max_size)
return (max_size != 0 && queue.getTotalWeight(cache_lock) + size - removed_size > max_size)
|| (max_element_size != 0 && queue_size > max_element_size);
};
std::vector<FileSegmentCell *> to_evict;
std::vector<FileSegmentCell *> trash;
auto key_it = queue.begin();
while (is_overflow() && key_it != queue.end())
for (const auto & [entry_key, entry_offset, entry_size] : queue)
{
const auto [key, offset] = *key_it;
++key_it;
if (!is_overflow())
break;
auto * cell = getCell(key, offset, cache_lock);
auto * cell = getCell(entry_key, entry_offset, cache_lock);
if (!cell)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cache became inconsistent. Key: {}, offset: {}", keyToStr(key), offset);
throw Exception(
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cache became inconsistent. Key: {}, offset: {}",
keyToStr(key), offset);
size_t cell_size = cell->size();
assert(entry_size == cell_size);
/// It is guaranteed that cell is not removed from cache as long as
/// pointer to corresponding file segment is hold by any other thread.
@ -495,7 +507,7 @@ bool LRUFileCache::tryReserve(
}
default:
{
remove(key, offset, cache_lock, segment_lock);
trash.push_back(cell);
break;
}
}
@ -505,11 +517,35 @@ bool LRUFileCache::tryReserve(
}
}
/// This case is very unlikely, can happen in case of exception from
/// file_segment->complete(), which would be a logical error.
assert(trash.empty());
for (auto & cell : trash)
{
auto file_segment = cell->file_segment;
if (file_segment)
{
std::lock_guard segment_lock(file_segment->mutex);
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
}
}
if (is_overflow())
return false;
if (cell_for_reserve && !cell_for_reserve->queue_iterator)
cell_for_reserve->queue_iterator = queue.insert(queue.end(), std::make_pair(key_, offset_));
/// cache cell is nullptr on server startup because we first check for space and then add a cell.
if (cell_for_reserve)
{
/// queue_iteratir is std::nullopt here if no space has been reserved yet, a cache cell
/// acquires queue iterator on first successful space reservation attempt.
/// If queue iterator already exists, we need to update the size after each space reservation.
auto queue_iterator = cell_for_reserve->queue_iterator;
if (queue_iterator)
queue.incrementSize(*queue_iterator, size, cache_lock);
else
cell_for_reserve->queue_iterator = queue.add(key, offset, size, cache_lock);
}
for (auto & cell : to_evict)
{
@ -521,8 +557,7 @@ bool LRUFileCache::tryReserve(
}
}
current_size += size - removed_size;
if (current_size > (1ull << 63))
if (queue.getTotalWeight(cache_lock) > (1ull << 63))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache became inconsistent. There must be a bug");
return true;
@ -549,7 +584,10 @@ void LRUFileCache::remove(const Key & key)
for (auto & cell : to_remove)
{
if (!cell->releasable())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot remove file from cache because someone reads from it. File segment info: {}", cell->file_segment->getInfoForLog());
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot remove file from cache because someone reads from it. File segment info: {}",
cell->file_segment->getInfoForLog());
auto file_segment = cell->file_segment;
if (file_segment)
@ -565,6 +603,10 @@ void LRUFileCache::remove(const Key & key)
if (fs::exists(key_path))
fs::remove(key_path);
#ifndef NDEBUG
assertCacheCorrectness(cache_lock);
#endif
}
void LRUFileCache::remove(bool force_remove_unreleasable)
@ -574,20 +616,22 @@ void LRUFileCache::remove(bool force_remove_unreleasable)
std::lock_guard cache_lock(mutex);
std::vector<FileSegment *> to_remove;
for (auto it = queue.begin(); it != queue.end();)
{
auto & [key, offset] = *it++;
const auto & [key, offset, size] = *it++;
auto * cell = getCell(key, offset, cache_lock);
if (!cell)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache is in inconsistent state: LRU queue contains entries with no cache cell");
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cache is in inconsistent state: LRU queue contains entries with no cache cell");
if (cell->releasable() || force_remove_unreleasable)
{
auto file_segment = cell->file_segment;
if (file_segment)
{
std::lock_guard<std::mutex> segment_lock(file_segment->mutex);
std::lock_guard segment_lock(file_segment->mutex);
file_segment->detach(cache_lock, segment_lock);
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
}
@ -606,7 +650,9 @@ void LRUFileCache::remove(
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "No cache cell for key: {}, offset: {}", keyToStr(key), offset);
if (cell->queue_iterator)
queue.erase(*cell->queue_iterator);
{
queue.remove(*cell->queue_iterator, cache_lock);
}
auto & offsets = files[key];
offsets.erase(offset);
@ -642,7 +688,7 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_l
Key key;
UInt64 offset = 0;
size_t size = 0;
std::vector<std::pair<LRUQueueIterator, std::weak_ptr<FileSegment>>> queue_entries;
std::vector<std::pair<LRUQueue::Iterator, std::weak_ptr<FileSegment>>> queue_entries;
/// cache_base_path / key_prefix / key / offset
@ -681,7 +727,7 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_l
{
LOG_WARNING(log,
"Cache capacity changed (max size: {}, available: {}), cached file `{}` does not fit in cache anymore (size: {})",
max_size, availableSize(), key_it->path().string(), size);
max_size, getAvailableCacheSizeUnlocked(cache_lock), key_it->path().string(), size);
fs::remove(offset_it->path());
}
}
@ -699,47 +745,11 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_l
if (file_segment.expired())
continue;
queue.splice(queue.end(), queue, it);
queue.moveToEnd(it, cache_lock);
}
}
LRUFileCache::Stat LRUFileCache::getStat()
{
std::lock_guard cache_lock(mutex);
Stat stat
{
.size = queue.size(),
.available = availableSize(),
.downloaded_size = 0,
.downloading_size = 0,
};
for (const auto & [key, offset] : queue)
{
const auto * cell = getCell(key, offset, cache_lock);
if (!cell)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cache became inconsistent. Key: {}, offset: {}", keyToStr(key), offset);
switch (cell->file_segment->download_state)
{
case FileSegment::State::DOWNLOADED:
{
++stat.downloaded_size;
break;
}
case FileSegment::State::DOWNLOADING:
{
++stat.downloading_size;
break;
}
default:
break;
}
}
return stat;
#ifndef NDEBUG
assertCacheCorrectness(cache_lock);
#endif
}
void LRUFileCache::reduceSizeToDownloaded(
@ -754,14 +764,23 @@ void LRUFileCache::reduceSizeToDownloaded(
auto * cell = getCell(key, offset, cache_lock);
if (!cell)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "No cell found for key: {}, offset: {}", keyToStr(key), offset);
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"No cell found for key: {}, offset: {}",
keyToStr(key), offset);
}
const auto & file_segment = cell->file_segment;
size_t downloaded_size = file_segment->downloaded_size;
if (downloaded_size == file_segment->range().size())
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Nothing to reduce, file segment fully downloaded, key: {}, offset: {}", keyToStr(key), offset);
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Nothing to reduce, file segment fully downloaded, key: {}, offset: {}",
keyToStr(key), offset);
}
cell->file_segment = std::make_shared<FileSegment>(offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED);
}
@ -811,7 +830,43 @@ std::vector<String> LRUFileCache::tryGetCachePaths(const Key & key)
return cache_paths;
}
LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRUQueue & queue_)
size_t LRUFileCache::getUsedCacheSize() const
{
std::lock_guard cache_lock(mutex);
return getUsedCacheSizeUnlocked(cache_lock);
}
size_t LRUFileCache::getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const
{
return queue.getTotalWeight(cache_lock);
}
size_t LRUFileCache::getAvailableCacheSize() const
{
std::lock_guard cache_lock(mutex);
return getAvailableCacheSizeUnlocked(cache_lock);
}
size_t LRUFileCache::getAvailableCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const
{
return max_size - getUsedCacheSizeUnlocked(cache_lock);
}
size_t LRUFileCache::getFileSegmentsNum() const
{
std::lock_guard cache_lock(mutex);
return getFileSegmentsNumUnlocked(cache_lock);
}
size_t LRUFileCache::getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const
{
return queue.getElementsNum(cache_lock);
}
LRUFileCache::FileSegmentCell::FileSegmentCell(
FileSegmentPtr file_segment_,
LRUFileCache * cache,
std::lock_guard<std::mutex> & cache_lock)
: file_segment(file_segment_)
{
/**
@ -824,7 +879,7 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU
{
case FileSegment::State::DOWNLOADED:
{
queue_iterator = queue_.insert(queue_.end(), getKeyAndOffset());
queue_iterator = cache->queue.add(file_segment->key(), file_segment->offset(), file_segment->range().size(), cache_lock);
break;
}
case FileSegment::State::EMPTY:
@ -839,13 +894,97 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU
}
}
LRUFileCache::LRUQueue::Iterator LRUFileCache::LRUQueue::add(
const IFileCache::Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & /* cache_lock */)
{
#ifndef NDEBUG
for (const auto & [entry_key, entry_offset, _] : queue)
{
if (entry_key == key && entry_offset == offset)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Attempt to add duplicate queue entry to queue. (Key: {}, offset: {}, size: {})",
keyToStr(key), offset, size);
}
#endif
cache_size += size;
return queue.insert(queue.end(), FileKeyAndOffset(key, offset, size));
}
void LRUFileCache::LRUQueue::remove(Iterator queue_it, std::lock_guard<std::mutex> & /* cache_lock */)
{
cache_size -= queue_it->size;
queue.erase(queue_it);
}
void LRUFileCache::LRUQueue::moveToEnd(Iterator queue_it, std::lock_guard<std::mutex> & /* cache_lock */)
{
queue.splice(queue.end(), queue, queue_it);
}
void LRUFileCache::LRUQueue::incrementSize(Iterator queue_it, size_t size_increment, std::lock_guard<std::mutex> & /* cache_lock */)
{
cache_size += size_increment;
queue_it->size += size_increment;
}
bool LRUFileCache::LRUQueue::contains(
const IFileCache::Key & key, size_t offset, std::lock_guard<std::mutex> & /* cache_lock */) const
{
/// This method is used for assertions in debug mode.
/// So we do not care about complexity here.
for (const auto & [entry_key, entry_offset, size] : queue)
{
if (key == entry_key && offset == entry_offset)
return true;
}
return false;
}
void LRUFileCache::LRUQueue::assertCorrectness(LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock)
{
[[maybe_unused]] size_t total_size = 0;
for (auto it = queue.begin(); it != queue.end();)
{
auto & [key, offset, size] = *it++;
auto * cell = cache->getCell(key, offset, cache_lock);
if (!cell)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cache is in inconsistent state: LRU queue contains entries with no cache cell (assertCorrectness())");
}
assert(cell->size() == size);
total_size += size;
}
assert(total_size == cache_size);
assert(cache_size <= cache->max_size);
assert(queue.size() <= cache->max_element_size);
}
String LRUFileCache::LRUQueue::toString(std::lock_guard<std::mutex> & /* cache_lock */) const
{
String result;
for (const auto & [key, offset, size] : queue)
{
if (!result.empty())
result += ", ";
result += fmt::format("{}: [{}, {}]", keyToStr(key), offset, offset + size - 1);
}
return result;
}
String LRUFileCache::dumpStructure(const Key & key)
{
std::lock_guard cache_lock(mutex);
return dumpStructureImpl(key, cache_lock);
return dumpStructureUnlocked(key, cache_lock);
}
String LRUFileCache::dumpStructureImpl(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */)
String LRUFileCache::dumpStructureUnlocked(const Key & key, std::lock_guard<std::mutex> & cache_lock)
{
WriteBufferFromOwnString result;
const auto & cells_by_offset = files[key];
@ -853,18 +992,37 @@ String LRUFileCache::dumpStructureImpl(const Key & key, std::lock_guard<std::mut
for (const auto & [offset, cell] : cells_by_offset)
result << cell.file_segment->getInfoForLog() << "\n";
result << "\n\nQueue: " << queue.toString(cache_lock);
return result.str();
}
void LRUFileCache::assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */)
void LRUFileCache::assertCacheCellsCorrectness(
const FileSegmentsByOffset & cells_by_offset, [[maybe_unused]] std::lock_guard<std::mutex> & cache_lock)
{
const auto & cells_by_offset = files[key];
for (const auto & [_, cell] : cells_by_offset)
{
const auto & file_segment = cell.file_segment;
file_segment->assertCorrectness();
if (file_segment->reserved_size != 0)
{
assert(cell.queue_iterator);
assert(queue.contains(file_segment->key(), file_segment->offset(), cache_lock));
}
}
}
void LRUFileCache::assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock)
{
assertCacheCellsCorrectness(files[key], cache_lock);
queue.assertCorrectness(this, cache_lock);
}
void LRUFileCache::assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock)
{
for (const auto & [key, cells_by_offset] : files)
assertCacheCellsCorrectness(files[key], cache_lock);
queue.assertCorrectness(this, cache_lock);
}
}

View File

@ -90,6 +90,10 @@ public:
/// For debug.
virtual String dumpStructure(const Key & key) = 0;
virtual size_t getUsedCacheSize() const = 0;
virtual size_t getFileSegmentsNum() const = 0;
protected:
String cache_base_path;
size_t max_size;
@ -149,17 +153,59 @@ public:
std::vector<String> tryGetCachePaths(const Key & key) override;
size_t getUsedCacheSize() const override;
size_t getFileSegmentsNum() const override;
private:
using FileKeyAndOffset = std::pair<Key, size_t>;
using LRUQueue = std::list<FileKeyAndOffset>;
using LRUQueueIterator = typename LRUQueue::iterator;
class LRUQueue
{
public:
struct FileKeyAndOffset
{
Key key;
size_t offset;
size_t size;
FileKeyAndOffset(const Key & key_, size_t offset_, size_t size_) : key(key_), offset(offset_), size(size_) {}
};
using Iterator = typename std::list<FileKeyAndOffset>::iterator;
size_t getTotalWeight(std::lock_guard<std::mutex> & /* cache_lock */) const { return cache_size; }
size_t getElementsNum(std::lock_guard<std::mutex> & /* cache_lock */) const { return queue.size(); }
Iterator add(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
void remove(Iterator queue_it, std::lock_guard<std::mutex> & cache_lock);
void moveToEnd(Iterator queue_it, std::lock_guard<std::mutex> & cache_lock);
/// Space reservation for a file segment is incremental, so we need to be able to increment size of the queue entry.
void incrementSize(Iterator queue_it, size_t size_increment, std::lock_guard<std::mutex> & cache_lock);
void assertCorrectness(LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock);
String toString(std::lock_guard<std::mutex> & cache_lock) const;
bool contains(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock) const;
Iterator begin() { return queue.begin(); }
Iterator end() { return queue.end(); }
private:
std::list<FileKeyAndOffset> queue;
size_t cache_size = 0;
};
struct FileSegmentCell : private boost::noncopyable
{
FileSegmentPtr file_segment;
/// Iterator is put here on first reservation attempt, if successful.
std::optional<LRUQueueIterator> queue_iterator;
std::optional<LRUQueue::Iterator> queue_iterator;
/// Pointer to file segment is always hold by the cache itself.
/// Apart from pointer in cache, it can be hold by cache users, when they call
@ -168,13 +214,11 @@ private:
size_t size() const { return file_segment->reserved_size; }
FileSegmentCell(FileSegmentPtr file_segment_, LRUQueue & queue_);
FileSegmentCell(FileSegmentPtr file_segment_, LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock);
FileSegmentCell(FileSegmentCell && other)
FileSegmentCell(FileSegmentCell && other) noexcept
: file_segment(std::move(other.file_segment))
, queue_iterator(std::move(other.queue_iterator)) {}
std::pair<Key, size_t> getKeyAndOffset() const { return std::make_pair(file_segment->key(), file_segment->range().left); }
, queue_iterator(other.queue_iterator) {}
};
using FileSegmentsByOffset = std::map<size_t, FileSegmentCell>;
@ -182,7 +226,6 @@ private:
CachedFiles files;
LRUQueue queue;
size_t current_size = 0;
Poco::Logger * log;
FileSegments getImpl(
@ -217,31 +260,32 @@ private:
std::lock_guard<std::mutex> & cache_lock,
std::lock_guard<std::mutex> & segment_lock) override;
size_t availableSize() const { return max_size - current_size; }
size_t getAvailableCacheSize() const;
void loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_lock);
FileSegments splitRangeIntoCells(
const Key & key, size_t offset, size_t size, FileSegment::State state, std::lock_guard<std::mutex> & cache_lock);
String dumpStructureImpl(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
String dumpStructureUnlocked(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
void fillHolesWithEmptyFileSegments(
FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock);
size_t getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
size_t getAvailableCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
size_t getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
void assertCacheCellsCorrectness(const FileSegmentsByOffset & cells_by_offset, std::lock_guard<std::mutex> & cache_lock);
public:
struct Stat
{
size_t size;
size_t available;
size_t downloaded_size;
size_t downloading_size;
};
Stat getStat();
String dumpStructure(const Key & key_) override;
void assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock);
void assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock);
};
}

View File

@ -94,11 +94,6 @@ size_t FileSegment::getDownloadedSize(std::lock_guard<std::mutex> & /* segment_l
}
String FileSegment::getCallerId()
{
return getCallerIdImpl();
}
String FileSegment::getCallerIdImpl()
{
if (!CurrentThread::isInitialized()
|| !CurrentThread::get().getQueryContext()
@ -400,7 +395,10 @@ bool FileSegment::reserve(size_t size)
bool reserved = cache->tryReserve(key(), offset(), size_to_reserve, cache_lock);
if (reserved)
{
std::lock_guard segment_lock(mutex);
reserved_size += size;
}
return reserved;
}
@ -606,6 +604,7 @@ String FileSegment::getInfoForLogImpl(std::lock_guard<std::mutex> & segment_lock
info << "File segment: " << range().toString() << ", ";
info << "state: " << download_state << ", ";
info << "downloaded size: " << getDownloadedSize(segment_lock) << ", ";
info << "reserved size: " << reserved_size << ", ";
info << "downloader id: " << downloader_id << ", ";
info << "caller id: " << getCallerId();

View File

@ -184,8 +184,6 @@ private:
std::lock_guard<std::mutex> & cache_lock,
std::lock_guard<std::mutex> & segment_lock);
static String getCallerIdImpl();
void resetDownloaderImpl(std::lock_guard<std::mutex> & segment_lock);
const Range segment_range;
@ -229,7 +227,7 @@ private:
struct FileSegmentsHolder : private boost::noncopyable
{
explicit FileSegmentsHolder(FileSegments && file_segments_) : file_segments(std::move(file_segments_)) {}
FileSegmentsHolder(FileSegmentsHolder && other) : file_segments(std::move(other.file_segments)) {}
FileSegmentsHolder(FileSegmentsHolder && other) noexcept : file_segments(std::move(other.file_segments)) {}
~FileSegmentsHolder();

View File

@ -14,7 +14,7 @@
* make a persistent copy of the key in each of the following cases:
* 1) the aggregation method doesn't use temporary keys, so they're persistent
* from the start;
* 1) the key is already present in the hash table;
* 2) the key is already present in the hash table;
* 3) that particular key is stored by value, e.g. a short StringRef key in
* StringHashMap.
*

View File

@ -83,7 +83,7 @@ public:
current_word = 0;
}
void update(const char * data, UInt64 size)
ALWAYS_INLINE void update(const char * data, UInt64 size)
{
const char * end = data + size;
@ -137,12 +137,12 @@ public:
}
template <typename T>
void update(const T & x)
ALWAYS_INLINE void update(const T & x)
{
update(reinterpret_cast<const char *>(&x), sizeof(x)); /// NOLINT
}
void update(const std::string & x)
ALWAYS_INLINE void update(const std::string & x)
{
update(x.data(), x.length());
}

View File

@ -181,7 +181,7 @@ ThreadStatus::~ThreadStatus()
deleter();
/// Only change current_thread if it's currently being used by this ThreadStatus
/// For example, PushingToViewsBlockOutputStream creates and deletes ThreadStatus instances while running in the main query thread
/// For example, PushingToViews chain creates and deletes ThreadStatus instances while running in the main query thread
if (current_thread == this)
current_thread = nullptr;
}

View File

@ -34,6 +34,12 @@ struct ACL
int32_t permissions;
String scheme;
String id;
bool operator<(const ACL & other) const
{
return std::tuple(permissions, scheme, id)
< std::tuple(other.permissions, other.scheme, other.id);
}
};
using ACLs = std::vector<ACL>;

View File

@ -135,6 +135,8 @@ TEST(LRUFileCache, get)
/// Current cache: [__________]
/// ^ ^
/// 0 9
ASSERT_EQ(cache.getFileSegmentsNum(), 1);
ASSERT_EQ(cache.getUsedCacheSize(), 10);
{
/// Want range [5, 14], but [0, 9] already in cache, so only [10, 14] will be put in cache.
@ -154,6 +156,8 @@ TEST(LRUFileCache, get)
/// Current cache: [__________][_____]
/// ^ ^^ ^
/// 0 910 14
ASSERT_EQ(cache.getFileSegmentsNum(), 2);
ASSERT_EQ(cache.getUsedCacheSize(), 15);
{
auto holder = cache.getOrSet(key, 9, 1); /// Get [9, 9]
@ -179,12 +183,15 @@ TEST(LRUFileCache, get)
complete(cache.getOrSet(key, 17, 4)); /// Get [17, 20]
complete(cache.getOrSet(key, 24, 3)); /// Get [24, 26]
complete(cache.getOrSet(key, 27, 1)); /// Get [27, 27]
// complete(cache.getOrSet(key, 27, 1)); /// Get [27, 27]
/// Current cache: [__________][_____] [____] [___][]
/// ^ ^^ ^ ^ ^ ^ ^^^
/// 0 910 14 17 20 24 2627
///
ASSERT_EQ(cache.getFileSegmentsNum(), 4);
ASSERT_EQ(cache.getUsedCacheSize(), 22);
{
auto holder = cache.getOrSet(key, 0, 26); /// Get [0, 25]
@ -249,7 +256,7 @@ TEST(LRUFileCache, get)
/// ^ ^ ^ ^ ^
/// 10 17 21 24 26
ASSERT_EQ(cache.getStat().size, 5);
ASSERT_EQ(cache.getFileSegmentsNum(), 5);
{
auto holder = cache.getOrSet(key, 23, 5); /// Get [23, 28]
@ -479,8 +486,6 @@ TEST(LRUFileCache, get)
auto cache2 = DB::LRUFileCache(cache_base_path, settings);
cache2.initialize();
ASSERT_EQ(cache2.getStat().downloaded_size, 5);
auto holder1 = cache2.getOrSet(key, 2, 28); /// Get [2, 29]
auto segments1 = fromHolder(holder1);
ASSERT_EQ(segments1.size(), 5);

View File

@ -149,9 +149,12 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr
serializeSnapshotMetadata(snapshot.snapshot_meta, out);
writeBinary(snapshot.session_id, out);
/// Serialize ACLs MAP
writeBinary(snapshot.acl_map.size(), out);
for (const auto & [acl_id, acls] : snapshot.acl_map)
/// Better to sort before serialization, otherwise snapshots can be different on different replicas
std::vector<std::pair<int64_t, Coordination::ACLs>> sorted_acl_map(snapshot.acl_map.begin(), snapshot.acl_map.end());
std::sort(sorted_acl_map.begin(), sorted_acl_map.end());
/// Serialize ACLs map
writeBinary(sorted_acl_map.size(), out);
for (const auto & [acl_id, acls] : sorted_acl_map)
{
writeBinary(acl_id, out);
writeBinary(acls.size(), out);
@ -187,10 +190,16 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr
++it;
}
/// Session must be saved in a sorted order,
/// otherwise snapshots will be different
std::vector<std::pair<int64_t, int64_t>> sorted_session_and_timeout(snapshot.session_and_timeout.begin(), snapshot.session_and_timeout.end());
std::sort(sorted_session_and_timeout.begin(), sorted_session_and_timeout.end());
/// Serialize sessions
size_t size = snapshot.session_and_timeout.size();
size_t size = sorted_session_and_timeout.size();
writeBinary(size, out);
for (const auto & [session_id, timeout] : snapshot.session_and_timeout)
for (const auto & [session_id, timeout] : sorted_session_and_timeout)
{
writeBinary(session_id, out);
writeBinary(timeout, out);

View File

@ -24,6 +24,7 @@
#include <Coordination/KeeperLogStore.h>
#include <Coordination/Changelog.h>
#include <filesystem>
#include <Common/SipHash.h>
#include <Coordination/SnapshotableHashTable.h>
@ -1709,6 +1710,46 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard)
EXPECT_EQ(changelog4.next_slot(), 5);
}
TEST_P(CoordinationTest, TestStorageSnapshotEqual)
{
auto params = GetParam();
ChangelogDirTest test("./snapshots");
std::optional<UInt128> snapshot_hash;
for (size_t i = 0; i < 15; ++i)
{
DB::KeeperSnapshotManager manager("./snapshots", 3, params.enable_compression);
DB::KeeperStorage storage(500, "");
for (size_t j = 0; j < 5000; ++j)
{
addNode(storage, "/hello_" + std::to_string(j), "world", 1);
addNode(storage, "/hello/somepath_" + std::to_string(j), "somedata", 3);
}
storage.session_id_counter = 5;
storage.ephemerals[3] = {"/hello"};
storage.ephemerals[1] = {"/hello/somepath"};
for (size_t j = 0; j < 3333; ++j)
storage.getSessionID(130 * j);
DB::KeeperStorageSnapshot snapshot(&storage, storage.zxid);
auto buf = manager.serializeSnapshotToBuffer(snapshot);
auto new_hash = sipHash128(reinterpret_cast<char *>(buf->data()), buf->size());
if (!snapshot_hash.has_value())
{
snapshot_hash = new_hash;
}
else
{
EXPECT_EQ(*snapshot_hash, new_hash);
}
}
}
TEST_P(CoordinationTest, TestLogGap)
{

View File

@ -46,7 +46,7 @@ struct BlockInfo
void read(ReadBuffer & in);
};
/// Block extension to support delayed defaults. AddingDefaultsBlockInputStream uses it to replace missing values with column defaults.
/// Block extension to support delayed defaults. AddingDefaultsTransform uses it to replace missing values with column defaults.
class BlockMissingValues
{
public:

View File

@ -22,6 +22,10 @@ namespace DB
{
class IColumn;
static constexpr UInt64 operator""_Gb(unsigned long long value)
{
return value * 1024 * 1024 * 1024;
}
/** List of settings: type, name, default value, description, flags
*
@ -340,7 +344,7 @@ class IColumn;
M(UInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).", 0) \
M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge'. 'auto' tries to change HashJoin to MergeJoin on the fly to avoid out of memory.", 0) \
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge', 'parallel_hash'. 'auto' tries to change HashJoin to MergeJoin on the fly to avoid out of memory.", 0) \
M(UInt64, default_max_bytes_in_join, 1000000000, "Maximum size of right-side table if limit is required but max_bytes_in_join is not set.", 0) \
M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.", 0) \
M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.", 0) \
@ -356,9 +360,9 @@ class IColumn;
M(OverflowMode, distinct_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
\
M(UInt64, max_memory_usage, 0, "Maximum memory usage for processing of single query. Zero means unlimited.", 0) \
M(UInt64, max_guaranteed_memory_usage, 0, "Maximum guaranteed memory usage for processing of single query. It represents soft limit. Zero means unlimited.", 0) \
M(UInt64, max_guaranteed_memory_usage, 10_Gb, "Maximum guaranteed memory usage for processing of single query. It represents soft limit. Zero means unlimited.", 0) \
M(UInt64, max_memory_usage_for_user, 0, "Maximum memory usage for processing all concurrently running queries for the user. Zero means unlimited.", 0) \
M(UInt64, max_guaranteed_memory_usage_for_user, 0, "Maximum guaranteed memory usage for processing all concurrently running queries for the user. It represents soft limit. Zero means unlimited.", 0) \
M(UInt64, max_guaranteed_memory_usage_for_user, 10_Gb, "Maximum guaranteed memory usage for processing all concurrently running queries for the user. It represents soft limit. Zero means unlimited.", 0) \
M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \
M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \
M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation. Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
@ -722,7 +726,7 @@ class IColumn;
M(Bool, output_format_pretty_row_numbers, false, "Add row numbers before each row for pretty output format", 0) \
M(Bool, insert_distributed_one_random_shard, false, "If setting is enabled, inserting into distributed table will choose a random shard to write when there is no sharding key", 0) \
\
M(Bool, cross_to_inner_join_rewrite, true, "Use inner join instead of comma/cross join if possible", 0) \
M(UInt64, cross_to_inner_join_rewrite, 1, "Use inner join instead of comma/cross join if possible. Possible values: 0 - no rewrite, 1 - apply if possible, 2 - force rewrite all cross joins", 0) \
\
M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \
\

View File

@ -34,7 +34,8 @@ IMPLEMENT_SETTING_ENUM(JoinAlgorithm, ErrorCodes::UNKNOWN_JOIN,
{{"auto", JoinAlgorithm::AUTO},
{"hash", JoinAlgorithm::HASH},
{"partial_merge", JoinAlgorithm::PARTIAL_MERGE},
{"prefer_partial_merge", JoinAlgorithm::PREFER_PARTIAL_MERGE}})
{"prefer_partial_merge", JoinAlgorithm::PREFER_PARTIAL_MERGE},
{"parallel_hash", JoinAlgorithm::PARALLEL_HASH}})
IMPLEMENT_SETTING_ENUM(TotalsMode, ErrorCodes::UNKNOWN_TOTALS_MODE,

View File

@ -42,6 +42,7 @@ enum class JoinAlgorithm
HASH,
PARTIAL_MERGE,
PREFER_PARTIAL_MERGE,
PARALLEL_HASH,
};
DECLARE_SETTING_ENUM(JoinAlgorithm)

Some files were not shown because too many files have changed in this diff Show More