mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge branch 'master' into xdbc-constraints
This commit is contained in:
commit
01700078d9
285
.clang-tidy
285
.clang-tidy
@ -1,170 +1,138 @@
|
||||
Checks: '-*,
|
||||
misc-throw-by-value-catch-by-reference,
|
||||
misc-misplaced-const,
|
||||
misc-unconventional-assign-operator,
|
||||
misc-redundant-expression,
|
||||
misc-static-assert,
|
||||
misc-unconventional-assign-operator,
|
||||
misc-uniqueptr-reset-release,
|
||||
misc-unused-alias-decls,
|
||||
misc-unused-parameters,
|
||||
misc-unused-using-decls,
|
||||
# Enable all checks + disale selected checks. Feel free to remove disabled checks from below list if
|
||||
# a) the new check is not controversial (this includes many checks in readability-* and google-*) or
|
||||
# b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*).
|
||||
Checks: '*,
|
||||
-abseil-*,
|
||||
|
||||
modernize-avoid-bind,
|
||||
modernize-loop-convert,
|
||||
modernize-make-shared,
|
||||
modernize-make-unique,
|
||||
modernize-raw-string-literal,
|
||||
modernize-redundant-void-arg,
|
||||
modernize-replace-auto-ptr,
|
||||
modernize-replace-random-shuffle,
|
||||
modernize-use-bool-literals,
|
||||
modernize-use-nullptr,
|
||||
modernize-use-using,
|
||||
modernize-use-equals-default,
|
||||
modernize-use-equals-delete,
|
||||
-altera-*,
|
||||
|
||||
performance-faster-string-find,
|
||||
performance-for-range-copy,
|
||||
performance-implicit-conversion-in-loop,
|
||||
performance-inefficient-algorithm,
|
||||
performance-inefficient-vector-operation,
|
||||
performance-move-constructor-init,
|
||||
performance-no-automatic-move,
|
||||
performance-trivially-destructible,
|
||||
performance-unnecessary-copy-initialization,
|
||||
performance-noexcept-move-constructor,
|
||||
performance-move-const-arg,
|
||||
-android-*,
|
||||
|
||||
readability-avoid-const-params-in-decls,
|
||||
readability-const-return-type,
|
||||
readability-container-size-empty,
|
||||
readability-convert-member-functions-to-static,
|
||||
readability-delete-null-pointer,
|
||||
readability-deleted-default,
|
||||
readability-make-member-function-const,
|
||||
readability-misplaced-array-index,
|
||||
readability-non-const-parameter,
|
||||
readability-qualified-auto,
|
||||
readability-redundant-access-specifiers,
|
||||
readability-redundant-control-flow,
|
||||
readability-redundant-function-ptr-dereference,
|
||||
readability-redundant-smartptr-get,
|
||||
readability-redundant-string-cstr,
|
||||
readability-redundant-string-init,
|
||||
readability-static-definition-in-anonymous-namespace,
|
||||
readability-string-compare,
|
||||
readability-uniqueptr-delete-release,
|
||||
readability-redundant-member-init,
|
||||
readability-simplify-subscript-expr,
|
||||
readability-simplify-boolean-expr,
|
||||
readability-inconsistent-declaration-parameter-name,
|
||||
readability-identifier-naming,
|
||||
-bugprone-branch-clone,
|
||||
-bugprone-easily-swappable-parameters,
|
||||
-bugprone-exception-escape,
|
||||
-bugprone-implicit-widening-of-multiplication-result,
|
||||
-bugprone-narrowing-conversions,
|
||||
-bugprone-not-null-terminated-result,
|
||||
|
||||
bugprone-undelegated-constructor,
|
||||
bugprone-argument-comment,
|
||||
bugprone-bad-signal-to-kill-thread,
|
||||
bugprone-bool-pointer-implicit-conversion,
|
||||
bugprone-copy-constructor-init,
|
||||
bugprone-dangling-handle,
|
||||
bugprone-forward-declaration-namespace,
|
||||
bugprone-fold-init-type,
|
||||
bugprone-inaccurate-erase,
|
||||
bugprone-incorrect-roundings,
|
||||
bugprone-infinite-loop,
|
||||
bugprone-integer-division,
|
||||
bugprone-macro-parentheses,
|
||||
bugprone-macro-repeated-side-effects,
|
||||
bugprone-misplaced-operator-in-strlen-in-alloc,
|
||||
bugprone-misplaced-pointer-artithmetic-in-alloc,
|
||||
bugprone-misplaced-widening-cast,
|
||||
bugprone-move-forwarding-reference,
|
||||
bugprone-multiple-statement-macro,
|
||||
bugprone-parent-virtual-call,
|
||||
bugprone-posix-return,
|
||||
bugprone-reserved-identifier,
|
||||
bugprone-signed-char-misuse,
|
||||
bugprone-sizeof-container,
|
||||
bugprone-sizeof-expression,
|
||||
bugprone-string-constructor,
|
||||
bugprone-string-integer-assignment,
|
||||
bugprone-string-literal-with-embedded-nul,
|
||||
bugprone-suspicious-enum-usage,
|
||||
bugprone-suspicious-include,
|
||||
bugprone-suspicious-memset-usage,
|
||||
bugprone-suspicious-missing-comma,
|
||||
bugprone-suspicious-string-compare,
|
||||
bugprone-swapped-arguments,
|
||||
bugprone-terminating-continue,
|
||||
bugprone-throw-keyword-missing,
|
||||
bugprone-too-small-loop-variable,
|
||||
bugprone-undefined-memory-manipulation,
|
||||
bugprone-unhandled-self-assignment,
|
||||
bugprone-unused-raii,
|
||||
bugprone-unused-return-value,
|
||||
bugprone-use-after-move,
|
||||
bugprone-virtual-near-miss,
|
||||
-cert-dcl16-c,
|
||||
-cert-err58-cpp,
|
||||
-cert-msc32-c,
|
||||
-cert-msc51-cpp,
|
||||
-cert-oop54-cpp,
|
||||
-cert-oop57-cpp,
|
||||
|
||||
cert-dcl21-cpp,
|
||||
cert-dcl50-cpp,
|
||||
cert-env33-c,
|
||||
cert-err34-c,
|
||||
cert-err52-cpp,
|
||||
cert-flp30-c,
|
||||
cert-mem57-cpp,
|
||||
cert-msc50-cpp,
|
||||
cert-oop58-cpp,
|
||||
-clang-analyzer-optin.performance.Padding,
|
||||
-clang-analyzer-optin.portability.UnixAPI,
|
||||
|
||||
google-build-explicit-make-pair,
|
||||
google-build-namespaces,
|
||||
google-default-arguments,
|
||||
google-explicit-constructor,
|
||||
google-readability-casting,
|
||||
google-readability-avoid-underscore-in-googletest-name,
|
||||
google-runtime-int,
|
||||
google-runtime-operator,
|
||||
-clang-analyzer-security.insecureAPI.bzero,
|
||||
-clang-analyzer-security.insecureAPI.strcpy,
|
||||
|
||||
hicpp-exception-baseclass,
|
||||
-cppcoreguidelines-avoid-c-arrays,
|
||||
-cppcoreguidelines-avoid-goto,
|
||||
-cppcoreguidelines-avoid-magic-numbers,
|
||||
-cppcoreguidelines-avoid-non-const-global-variables,
|
||||
-cppcoreguidelines-explicit-virtual-functions,
|
||||
-cppcoreguidelines-init-variables,
|
||||
-cppcoreguidelines-interfaces-global-init,
|
||||
-cppcoreguidelines-macro-usage,
|
||||
-cppcoreguidelines-narrowing-conversions,
|
||||
-cppcoreguidelines-no-malloc,
|
||||
-cppcoreguidelines-non-private-member-variables-in-classes,
|
||||
-cppcoreguidelines-owning-memory,
|
||||
-cppcoreguidelines-prefer-member-initializer,
|
||||
-cppcoreguidelines-pro-bounds-array-to-pointer-decay,
|
||||
-cppcoreguidelines-pro-bounds-constant-array-index,
|
||||
-cppcoreguidelines-pro-bounds-pointer-arithmetic,
|
||||
-cppcoreguidelines-pro-type-const-cast,
|
||||
-cppcoreguidelines-pro-type-cstyle-cast,
|
||||
-cppcoreguidelines-pro-type-member-init,
|
||||
-cppcoreguidelines-pro-type-reinterpret-cast,
|
||||
-cppcoreguidelines-pro-type-static-cast-downcast,
|
||||
-cppcoreguidelines-pro-type-union-access,
|
||||
-cppcoreguidelines-pro-type-vararg,
|
||||
-cppcoreguidelines-slicing,
|
||||
-cppcoreguidelines-special-member-functions,
|
||||
|
||||
clang-analyzer-core.CallAndMessage,
|
||||
clang-analyzer-core.DivideZero,
|
||||
clang-analyzer-core.NonNullParamChecker,
|
||||
clang-analyzer-core.NullDereference,
|
||||
clang-analyzer-core.StackAddressEscape,
|
||||
clang-analyzer-core.UndefinedBinaryOperatorResult,
|
||||
clang-analyzer-core.VLASize,
|
||||
clang-analyzer-core.uninitialized.ArraySubscript,
|
||||
clang-analyzer-core.uninitialized.Assign,
|
||||
clang-analyzer-core.uninitialized.Branch,
|
||||
clang-analyzer-core.uninitialized.CapturedBlockVariable,
|
||||
clang-analyzer-core.uninitialized.UndefReturn,
|
||||
clang-analyzer-cplusplus.InnerPointer,
|
||||
clang-analyzer-cplusplus.NewDelete,
|
||||
clang-analyzer-cplusplus.NewDeleteLeaks,
|
||||
clang-analyzer-cplusplus.PlacementNewChecker,
|
||||
clang-analyzer-cplusplus.SelfAssignment,
|
||||
clang-analyzer-deadcode.DeadStores,
|
||||
clang-analyzer-cplusplus.Move,
|
||||
clang-analyzer-optin.cplusplus.VirtualCall,
|
||||
clang-analyzer-security.insecureAPI.UncheckedReturn,
|
||||
clang-analyzer-security.insecureAPI.bcmp,
|
||||
clang-analyzer-security.insecureAPI.bcopy,
|
||||
clang-analyzer-security.insecureAPI.bzero,
|
||||
clang-analyzer-security.insecureAPI.getpw,
|
||||
clang-analyzer-security.insecureAPI.gets,
|
||||
clang-analyzer-security.insecureAPI.mkstemp,
|
||||
clang-analyzer-security.insecureAPI.mktemp,
|
||||
clang-analyzer-security.insecureAPI.rand,
|
||||
clang-analyzer-security.insecureAPI.strcpy,
|
||||
clang-analyzer-unix.Malloc,
|
||||
clang-analyzer-unix.MallocSizeof,
|
||||
clang-analyzer-unix.MismatchedDeallocator,
|
||||
clang-analyzer-unix.Vfork,
|
||||
clang-analyzer-unix.cstring.BadSizeArg,
|
||||
clang-analyzer-unix.cstring.NullArg,
|
||||
-concurrency-mt-unsafe,
|
||||
|
||||
boost-use-to-string,
|
||||
-darwin-*,
|
||||
|
||||
-fuchsia-*,
|
||||
|
||||
-google-build-using-namespace,
|
||||
-google-readability-braces-around-statements,
|
||||
-google-readability-casting,
|
||||
-google-readability-function-size,
|
||||
-google-readability-namespace-comments,
|
||||
-google-readability-todo,
|
||||
-google-upgrade-googletest-case,
|
||||
|
||||
-hicpp-avoid-c-arrays,
|
||||
-hicpp-avoid-goto,
|
||||
-hicpp-braces-around-statements,
|
||||
-hicpp-explicit-conversions,
|
||||
-hicpp-function-size,
|
||||
-hicpp-member-init,
|
||||
-hicpp-move-const-arg,
|
||||
-hicpp-multiway-paths-covered,
|
||||
-hicpp-named-parameter,
|
||||
-hicpp-no-array-decay,
|
||||
-hicpp-no-assembler,
|
||||
-hicpp-no-malloc,
|
||||
-hicpp-signed-bitwise,
|
||||
-hicpp-special-member-functions,
|
||||
-hicpp-uppercase-literal-suffix,
|
||||
-hicpp-use-auto,
|
||||
-hicpp-use-emplace,
|
||||
-hicpp-vararg,
|
||||
|
||||
-linuxkernel-*,
|
||||
|
||||
-llvm-*,
|
||||
|
||||
-llvmlibc-*,
|
||||
|
||||
-openmp-*,
|
||||
|
||||
-misc-no-recursion,
|
||||
-misc-non-private-member-variables-in-classes,
|
||||
|
||||
-modernize-avoid-c-arrays,
|
||||
-modernize-concat-nested-namespaces,
|
||||
-modernize-pass-by-value,
|
||||
-modernize-return-braced-init-list,
|
||||
-modernize-use-auto,
|
||||
-modernize-use-default-member-init,
|
||||
-modernize-use-emplace,
|
||||
-modernize-use-nodiscard,
|
||||
-modernize-use-override,
|
||||
-modernize-use-trailing-return-type,
|
||||
|
||||
-performance-inefficient-string-concatenation,
|
||||
-performance-no-int-to-ptr,
|
||||
-performance-unnecessary-value-param,
|
||||
|
||||
-portability-simd-intrinsics,
|
||||
|
||||
-readability-braces-around-statements,
|
||||
-readability-else-after-return,
|
||||
-readability-function-cognitive-complexity,
|
||||
-readability-function-size,
|
||||
-readability-identifier-length,
|
||||
-readability-implicit-bool-conversion,
|
||||
-readability-isolate-declaration,
|
||||
-readability-magic-numbers,
|
||||
-readability-named-parameter,
|
||||
-readability-redundant-declaration,
|
||||
-readability-static-accessed-through-instance,
|
||||
-readability-suspicious-call-argument,
|
||||
-readability-uppercase-literal-suffix,
|
||||
-readability-use-anyofallof,
|
||||
|
||||
-zirkon-*,
|
||||
'
|
||||
|
||||
WarningsAsErrors: '*'
|
||||
|
||||
CheckOptions:
|
||||
@ -210,3 +178,6 @@ CheckOptions:
|
||||
value: false
|
||||
- key: performance-move-const-arg.CheckTriviallyCopyableMove
|
||||
value: false
|
||||
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
|
||||
- key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp
|
||||
value: expr-type
|
||||
|
2
.github/ISSUE_TEMPLATE/10_question.md
vendored
2
.github/ISSUE_TEMPLATE/10_question.md
vendored
@ -7,6 +7,6 @@ assignees: ''
|
||||
|
||||
---
|
||||
|
||||
> Make sure to check documentation https://clickhouse.yandex/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
|
||||
> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
|
||||
|
||||
> If you still prefer GitHub issues, remove all this text and ask your question here.
|
||||
|
2
.github/ISSUE_TEMPLATE/50_build-issue.md
vendored
2
.github/ISSUE_TEMPLATE/50_build-issue.md
vendored
@ -7,7 +7,7 @@ assignees: ''
|
||||
|
||||
---
|
||||
|
||||
> Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.yandex/docs/en/development/build/
|
||||
> Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.com/docs/en/development/build/
|
||||
|
||||
**Operating system**
|
||||
|
||||
|
2
.github/ISSUE_TEMPLATE/85_bug-report.md
vendored
2
.github/ISSUE_TEMPLATE/85_bug-report.md
vendored
@ -1,6 +1,6 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Wrong behaviour (visible to users) in official ClickHouse release.
|
||||
about: Wrong behavior (visible to users) in the official ClickHouse release.
|
||||
title: ''
|
||||
labels: 'potential bug'
|
||||
assignees: ''
|
||||
|
14
.github/PULL_REQUEST_TEMPLATE.md
vendored
14
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -1,7 +1,13 @@
|
||||
Changelog category (leave one):
|
||||
<!---
|
||||
A technical comment, you are free to remove or leave it as it is when PR is created
|
||||
The following categories are used in the next scripts, update them accordingly
|
||||
utils/changelog/changelog.py
|
||||
tests/ci/run_check.py
|
||||
-->
|
||||
### Changelog category (leave one):
|
||||
- New Feature
|
||||
- Improvement
|
||||
- Bug Fix (user-visible misbehaviour in official stable or prestable release)
|
||||
- Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
- Performance Improvement
|
||||
- Backward Incompatible Change
|
||||
- Build/Testing/Packaging Improvement
|
||||
@ -9,8 +15,8 @@ Changelog category (leave one):
|
||||
- Not for changelog (changelog entry is not required)
|
||||
|
||||
|
||||
Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md):
|
||||
### Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md):
|
||||
...
|
||||
|
||||
|
||||
> Information about CI checks: https://clickhouse.tech/docs/en/development/continuous-integration/
|
||||
> Information about CI checks: https://clickhouse.com/docs/en/development/continuous-integration/
|
||||
|
43
.github/workflows/backport.yml
vendored
43
.github/workflows/backport.yml
vendored
@ -1,43 +0,0 @@
|
||||
name: CherryPick
|
||||
|
||||
env:
|
||||
# Force the stdout and stderr streams to be unbuffered
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
concurrency:
|
||||
group: cherry-pick
|
||||
on: # yamllint disable-line rule:truthy
|
||||
schedule:
|
||||
- cron: '0 */3 * * *'
|
||||
jobs:
|
||||
CherryPick:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/cherry_pick
|
||||
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
|
||||
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
|
||||
RCSK
|
||||
REPO_OWNER=ClickHouse
|
||||
REPO_NAME=ClickHouse
|
||||
REPO_TEAM=core
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
||||
fetch-depth: 0
|
||||
- name: Cherry pick
|
||||
run: |
|
||||
sudo pip install GitPython
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 cherry_pick.py
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
191
.github/workflows/backport_branches.yml
vendored
191
.github/workflows/backport_branches.yml
vendored
@ -9,6 +9,18 @@ on: # yamllint disable-line rule:truthy
|
||||
branches:
|
||||
- 'backport/**'
|
||||
jobs:
|
||||
PythonUnitTests:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Python unit tests
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 -m unittest discover -s . -p '*_test.py'
|
||||
DockerHubPushAarch64:
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
@ -100,8 +112,10 @@ jobs:
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
#########################################################################################
|
||||
#################################### ORDINARY BUILDS ####################################
|
||||
@ -117,7 +131,6 @@ jobs:
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
CHECK_NAME=ClickHouse build check (actions)
|
||||
BUILD_NAME=package_release
|
||||
EOF
|
||||
- name: Download changed images
|
||||
@ -131,25 +144,28 @@ jobs:
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: 'true'
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderDebAarch64:
|
||||
needs: [DockerHubPush]
|
||||
@ -162,35 +178,41 @@ jobs:
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
CHECK_NAME=ClickHouse build check (actions)
|
||||
BUILD_NAME=package_aarch64
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/images_path
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: 'true'
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderDebAsan:
|
||||
needs: [DockerHubPush]
|
||||
@ -203,7 +225,6 @@ jobs:
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
CHECK_NAME=ClickHouse build check (actions)
|
||||
BUILD_NAME=package_asan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
@ -216,26 +237,27 @@ jobs:
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: 'true'
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderDebTsan:
|
||||
needs: [DockerHubPush]
|
||||
@ -248,7 +270,6 @@ jobs:
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
CHECK_NAME=ClickHouse build check (actions)
|
||||
BUILD_NAME=package_tsan
|
||||
EOF
|
||||
- name: Download changed images
|
||||
@ -261,26 +282,27 @@ jobs:
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: 'true'
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderDebDebug:
|
||||
needs: [DockerHubPush]
|
||||
@ -293,7 +315,6 @@ jobs:
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
CHECK_NAME=ClickHouse build check (actions)
|
||||
BUILD_NAME=package_debug
|
||||
EOF
|
||||
- name: Download changed images
|
||||
@ -306,28 +327,59 @@ jobs:
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: 'true'
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
name: ${{ env.BUILD_URLS }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
DockerServerImages:
|
||||
needs:
|
||||
- BuilderDebRelease
|
||||
- BuilderDebAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # It MUST BE THE SAME for all dependencies and the job itself
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head --no-push
|
||||
python3 docker_server.py --release-type head --no-push --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
############################################################################################
|
||||
##################################### BUILD REPORTER #######################################
|
||||
############################################################################################
|
||||
BuilderReport:
|
||||
@ -342,9 +394,10 @@ jobs:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/report_check
|
||||
CHECK_NAME=ClickHouse build check
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=ClickHouse build check (actions)
|
||||
TEMP_PATH=${{runner.temp}}/report_check
|
||||
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
@ -359,13 +412,18 @@ jobs:
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cat > "$NEEDS_DATA_PATH" << 'EOF'
|
||||
${{ toJSON(needs) }}
|
||||
EOF
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 build_report_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
########################### FUNCTIONAl STATELESS TESTS #######################################
|
||||
@ -379,7 +437,7 @@ jobs:
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/stateless_debug
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Stateless tests (address, actions)
|
||||
CHECK_NAME=Stateless tests (address)
|
||||
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
|
||||
KILL_TIMEOUT=10800
|
||||
EOF
|
||||
@ -402,8 +460,10 @@ jobs:
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
############################ FUNCTIONAl STATEFUL TESTS #######################################
|
||||
@ -417,7 +477,7 @@ jobs:
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/stateful_debug
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Stateful tests (debug, actions)
|
||||
CHECK_NAME=Stateful tests (debug)
|
||||
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
|
||||
KILL_TIMEOUT=3600
|
||||
EOF
|
||||
@ -440,8 +500,10 @@ jobs:
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
######################################### STRESS TESTS #######################################
|
||||
@ -459,7 +521,7 @@ jobs:
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/stress_thread
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Stress test (thread, actions)
|
||||
CHECK_NAME=Stress test (thread)
|
||||
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
@ -481,8 +543,10 @@ jobs:
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
#############################################################################################
|
||||
############################# INTEGRATION TESTS #############################################
|
||||
@ -496,7 +560,7 @@ jobs:
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_release
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (release, actions)
|
||||
CHECK_NAME=Integration tests (release)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
@ -518,12 +582,15 @@ jobs:
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
FinishCheck:
|
||||
needs:
|
||||
- DockerHubPush
|
||||
- DockerServerImages
|
||||
- BuilderReport
|
||||
- FunctionalStatelessTestAsan
|
||||
- FunctionalStatefulTestDebug
|
||||
|
47
.github/workflows/cherry_pick.yml
vendored
Normal file
47
.github/workflows/cherry_pick.yml
vendored
Normal file
@ -0,0 +1,47 @@
|
||||
name: CherryPick
|
||||
|
||||
env:
|
||||
# Force the stdout and stderr streams to be unbuffered
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
concurrency:
|
||||
group: cherry-pick
|
||||
on: # yamllint disable-line rule:truthy
|
||||
schedule:
|
||||
- cron: '0 * * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
CherryPick:
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Set envs
|
||||
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/cherry_pick
|
||||
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
|
||||
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
|
||||
RCSK
|
||||
REPO_OWNER=ClickHouse
|
||||
REPO_NAME=ClickHouse
|
||||
REPO_TEAM=core
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
|
||||
fetch-depth: 0
|
||||
- name: Cherry pick
|
||||
run: |
|
||||
sudo pip install GitPython
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 cherry_pick.py
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
2
.github/workflows/debug.yml
vendored
2
.github/workflows/debug.yml
vendored
@ -2,7 +2,7 @@
|
||||
name: Debug
|
||||
|
||||
'on':
|
||||
[push, pull_request, release]
|
||||
[push, pull_request, release, workflow_dispatch]
|
||||
|
||||
jobs:
|
||||
DebugInfo:
|
||||
|
64
.github/workflows/docs_check.yml
vendored
64
.github/workflows/docs_check.yml
vendored
@ -4,7 +4,7 @@ env:
|
||||
# Force the stdout and stderr streams to be unbuffered
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
on: # yamllint disable-line rule:truthy
|
||||
on: # yamllint disable-line rule:truthy
|
||||
pull_request:
|
||||
types:
|
||||
- synchronize
|
||||
@ -13,6 +13,7 @@ on: # yamllint disable-line rule:truthy
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- 'docker/docs/**'
|
||||
- 'docs/**'
|
||||
- 'website/**'
|
||||
jobs:
|
||||
@ -92,9 +93,46 @@ jobs:
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/changed_images.json
|
||||
StyleCheck:
|
||||
needs: DockerHubPush
|
||||
runs-on: [self-hosted, style-checker]
|
||||
if: ${{ success() || failure() }}
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{ runner.temp }}/style_check
|
||||
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
|
||||
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
|
||||
RCSK
|
||||
EOF
|
||||
- name: Download changed images
|
||||
# even if artifact does not exist, e.g. on `do not test` label or failed Docker job
|
||||
continue-on-error: true
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.TEMP_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Style Check
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 style_check.py
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
DocsCheck:
|
||||
needs: DockerHubPush
|
||||
runs-on: [self-hosted, func-tester]
|
||||
runs-on: [self-hosted, func-tester-aarch64]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
@ -120,6 +158,24 @@ jobs:
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
FinishCheck:
|
||||
needs:
|
||||
- StyleCheck
|
||||
- DockerHubPush
|
||||
- DocsCheck
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Finish label
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 finish_check.py
|
||||
|
16
.github/workflows/docs_release.yml
vendored
16
.github/workflows/docs_release.yml
vendored
@ -7,16 +7,16 @@ env:
|
||||
concurrency:
|
||||
group: master-release
|
||||
cancel-in-progress: true
|
||||
on: # yamllint disable-line rule:truthy
|
||||
'on':
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- 'docs/**'
|
||||
- 'website/**'
|
||||
- 'benchmark/**'
|
||||
- 'docker/**'
|
||||
- '.github/**'
|
||||
- 'docker/docs/release/**'
|
||||
- 'docs/**'
|
||||
- 'utils/list-versions/version_date.tsv'
|
||||
- 'website/**'
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
DockerHubPushAarch64:
|
||||
@ -116,6 +116,8 @@ jobs:
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
|
11
.github/workflows/jepsen.yml
vendored
11
.github/workflows/jepsen.yml
vendored
@ -7,11 +7,8 @@ concurrency:
|
||||
on: # yamllint disable-line rule:truthy
|
||||
schedule:
|
||||
- cron: '0 */6 * * *'
|
||||
workflow_run:
|
||||
workflows: ["PullRequestCI"]
|
||||
types:
|
||||
- completed
|
||||
workflow_dispatch:
|
||||
workflow_call:
|
||||
jobs:
|
||||
KeeperJepsenRelease:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
@ -39,6 +36,8 @@ jobs:
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
|
1049
.github/workflows/master.yml
vendored
1049
.github/workflows/master.yml
vendored
File diff suppressed because it is too large
Load Diff
53
.github/workflows/nightly.yml
vendored
53
.github/workflows/nightly.yml
vendored
@ -7,6 +7,7 @@ env:
|
||||
"on":
|
||||
schedule:
|
||||
- cron: '13 3 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
DockerHubPushAarch64:
|
||||
@ -71,3 +72,55 @@ jobs:
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/changed_images.json
|
||||
BuilderCoverity:
|
||||
needs: DockerHubPush
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
BUILD_NAME=coverity
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
EOF
|
||||
echo "COVERITY_TOKEN=${{ secrets.COVERITY_TOKEN }}" >> "$GITHUB_ENV"
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
id: coverity-checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
|
||||
- name: Upload Coverity Analysis
|
||||
if: ${{ success() || failure() }}
|
||||
run: |
|
||||
curl --form token="${COVERITY_TOKEN}" \
|
||||
--form email='security+coverity@clickhouse.com' \
|
||||
--form file="@$TEMP_PATH/$BUILD_NAME/coverity-scan.tgz" \
|
||||
--form version="${GITHUB_REF#refs/heads/}-${GITHUB_SHA::6}" \
|
||||
--form description="Nighly Scan: $(date +'%Y-%m-%dT%H:%M:%S')" \
|
||||
https://scan.coverity.com/builds?project=ClickHouse%2FClickHouse
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
|
1160
.github/workflows/pull_request.yml
vendored
1160
.github/workflows/pull_request.yml
vendored
File diff suppressed because it is too large
Load Diff
30
.github/workflows/release.yml
vendored
30
.github/workflows/release.yml
vendored
@ -21,6 +21,9 @@ jobs:
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
# Always use the most recent script version
|
||||
ref: master
|
||||
- name: Download packages and push to Artifactory
|
||||
run: |
|
||||
rm -rf "$TEMP_PATH" && mkdir -p "$TEMP_PATH"
|
||||
@ -36,3 +39,30 @@ jobs:
|
||||
overwrite: true
|
||||
tag: ${{ github.ref }}
|
||||
file_glob: true
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
DockerServerImages:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # otherwise we will have no version info
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type auto --version "${{ github.ref }}"
|
||||
python3 docker_server.py --release-type auto --version "${{ github.ref }}" --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
|
454
.github/workflows/release_branches.yml
vendored
454
.github/workflows/release_branches.yml
vendored
File diff suppressed because it is too large
Load Diff
41
.github/workflows/tags_stable.yml
vendored
41
.github/workflows/tags_stable.yml
vendored
@ -3,38 +3,67 @@ name: TagsStableWorkflow
|
||||
# - Sends it to JFROG Artifactory
|
||||
# - Adds them to the release assets
|
||||
|
||||
env:
|
||||
# Force the stdout and stderr streams to be unbuffered
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
on: # yamllint disable-line rule:truthy
|
||||
push:
|
||||
tags:
|
||||
- 'v*-prestable'
|
||||
- 'v*-stable'
|
||||
- 'v*-lts'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: 'Test tag'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
|
||||
jobs:
|
||||
UpdateVersions:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set test tag
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
echo "GITHUB_TAG=${{ github.event.inputs.tag }}" >> "$GITHUB_ENV"
|
||||
- name: Get tag name
|
||||
run: echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
|
||||
if: github.event_name != 'workflow_dispatch'
|
||||
run: |
|
||||
echo "GITHUB_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
ref: master
|
||||
fetch-depth: 0
|
||||
- name: Generate versions
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
|
||||
run: |
|
||||
git fetch --tags
|
||||
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
|
||||
GID=$(id -g "${UID}")
|
||||
docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 \
|
||||
--volume="${GITHUB_WORKSPACE}:/ClickHouse" clickhouse/style-test \
|
||||
/ClickHouse/utils/changelog/changelog.py -v --debug-helpers \
|
||||
--gh-user-or-token="$GITHUB_TOKEN" --jobs=5 \
|
||||
--output="/ClickHouse/docs/changelogs/${GITHUB_TAG}.md" "${GITHUB_TAG}"
|
||||
git add "./docs/changelogs/${GITHUB_TAG}.md"
|
||||
git diff HEAD
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v3
|
||||
with:
|
||||
author: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
|
||||
token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
|
||||
committer: "robot-clickhouse <robot-clickhouse@users.noreply.github.com>"
|
||||
commit-message: Update version_date.tsv after ${{ env.GITHUB_TAG }}
|
||||
commit-message: Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }}
|
||||
branch: auto/${{ env.GITHUB_TAG }}
|
||||
delete-branch: true
|
||||
title: Update version_date.tsv after ${{ env.GITHUB_TAG }}
|
||||
title: Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }}
|
||||
labels: do not test
|
||||
body: |
|
||||
Update version_date.tsv after ${{ env.GITHUB_TAG }}
|
||||
Update version_date.tsv and changelogs after ${{ env.GITHUB_TAG }}
|
||||
|
||||
Changelog category (leave one):
|
||||
### Changelog category (leave one):
|
||||
- Not for changelog (changelog entry is not required)
|
||||
|
6
.github/workflows/woboq.yml
vendored
6
.github/workflows/woboq.yml
vendored
@ -37,6 +37,8 @@ jobs:
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker kill $(docker ps -q) ||:
|
||||
# shellcheck disable=SC2046
|
||||
docker rm -f $(docker ps -a -q) ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -33,10 +33,6 @@
|
||||
/docs/zh/single.md
|
||||
/docs/ja/single.md
|
||||
/docs/fa/single.md
|
||||
/docs/en/development/cmake-in-clickhouse.md
|
||||
/docs/ja/development/cmake-in-clickhouse.md
|
||||
/docs/zh/development/cmake-in-clickhouse.md
|
||||
/docs/ru/development/cmake-in-clickhouse.md
|
||||
|
||||
# callgrind files
|
||||
callgrind.out.*
|
||||
|
26
.gitmodules
vendored
26
.gitmodules
vendored
@ -86,9 +86,6 @@
|
||||
[submodule "contrib/h3"]
|
||||
path = contrib/h3
|
||||
url = https://github.com/ClickHouse/h3
|
||||
[submodule "contrib/hyperscan"]
|
||||
path = contrib/hyperscan
|
||||
url = https://github.com/ClickHouse/hyperscan.git
|
||||
[submodule "contrib/libunwind"]
|
||||
path = contrib/libunwind
|
||||
url = https://github.com/ClickHouse/libunwind.git
|
||||
@ -204,7 +201,7 @@
|
||||
[submodule "contrib/boringssl"]
|
||||
path = contrib/boringssl
|
||||
url = https://github.com/ClickHouse/boringssl.git
|
||||
branch = MergeWithUpstream
|
||||
branch = unknown_branch_from_artur
|
||||
[submodule "contrib/NuRaft"]
|
||||
path = contrib/NuRaft
|
||||
url = https://github.com/ClickHouse/NuRaft.git
|
||||
@ -262,3 +259,24 @@
|
||||
[submodule "contrib/minizip-ng"]
|
||||
path = contrib/minizip-ng
|
||||
url = https://github.com/zlib-ng/minizip-ng
|
||||
[submodule "contrib/qpl"]
|
||||
path = contrib/qpl
|
||||
url = https://github.com/intel/qpl.git
|
||||
[submodule "contrib/wyhash"]
|
||||
path = contrib/wyhash
|
||||
url = https://github.com/wangyi-fudan/wyhash.git
|
||||
[submodule "contrib/hashidsxx"]
|
||||
path = contrib/hashidsxx
|
||||
url = https://github.com/schoentoon/hashidsxx.git
|
||||
[submodule "contrib/nats-io"]
|
||||
path = contrib/nats-io
|
||||
url = https://github.com/ClickHouse/nats.c.git
|
||||
[submodule "contrib/vectorscan"]
|
||||
path = contrib/vectorscan
|
||||
url = https://github.com/VectorCamp/vectorscan.git
|
||||
[submodule "contrib/liburing"]
|
||||
path = contrib/liburing
|
||||
url = https://github.com/axboe/liburing.git
|
||||
[submodule "contrib/c-ares"]
|
||||
path = contrib/c-ares
|
||||
url = https://github.com/ClickHouse/c-ares
|
||||
|
@ -13,9 +13,7 @@ max-statements=200
|
||||
ignore-long-lines = (# )?<?https?://\S+>?$
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
disable = bad-continuation,
|
||||
missing-docstring,
|
||||
bad-whitespace,
|
||||
disable = missing-docstring,
|
||||
too-few-public-methods,
|
||||
invalid-name,
|
||||
too-many-arguments,
|
||||
|
803
CHANGELOG.md
803
CHANGELOG.md
@ -1,8 +1,800 @@
|
||||
### Table of Contents
|
||||
**[ClickHouse release v22.3-lts, 2022-03-17](#223)**<br>
|
||||
**[ClickHouse release v22.2, 2022-02-17](#222)**<br>
|
||||
**[ClickHouse release v22.1, 2022-01-18](#221)**<br>
|
||||
**[Changelog for 2021](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2021.md)**<br>
|
||||
**[ClickHouse release v22.8, 2022-08-18](#228)**<br/>
|
||||
**[ClickHouse release v22.7, 2022-07-21](#227)**<br/>
|
||||
**[ClickHouse release v22.6, 2022-06-16](#226)**<br/>
|
||||
**[ClickHouse release v22.5, 2022-05-19](#225)**<br/>
|
||||
**[ClickHouse release v22.4, 2022-04-20](#224)**<br/>
|
||||
**[ClickHouse release v22.3-lts, 2022-03-17](#223)**<br/>
|
||||
**[ClickHouse release v22.2, 2022-02-17](#222)**<br/>
|
||||
**[ClickHouse release v22.1, 2022-01-18](#221)**<br/>
|
||||
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**<br/>
|
||||
|
||||
|
||||
### <a id="228"></a> ClickHouse release 22.8, 2022-08-18
|
||||
|
||||
#### Backward Incompatible Change
|
||||
* Extended range of `Date32` and `DateTime64` to support dates from the year 1900 to 2299. In previous versions, the supported interval was only from the year 1925 to 2283. The implementation is using the proleptic Gregorian calendar (which is conformant with [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601):2004 (clause 3.2.1 The Gregorian calendar)) instead of accounting for historical transitions from the Julian to the Gregorian calendar. This change affects implementation-specific behavior for out-of-range arguments. E.g. if in previous versions the value of `1899-01-01` was clamped to `1925-01-01`, in the new version it will be clamped to `1900-01-01`. It changes the behavior of rounding with `toStartOfInterval` if you pass `INTERVAL 3 QUARTER` up to one quarter because the intervals are counted from an implementation-specific point of time. Closes [#28216](https://github.com/ClickHouse/ClickHouse/issues/28216), improves [#38393](https://github.com/ClickHouse/ClickHouse/issues/38393). [#39425](https://github.com/ClickHouse/ClickHouse/pull/39425) ([Roman Vasin](https://github.com/rvasin)).
|
||||
* Now, all relevant dictionary sources respect `remote_url_allow_hosts` setting. It was already done for HTTP, Cassandra, Redis. Added ClickHouse, MongoDB, MySQL, PostgreSQL. Host is checked only for dictionaries created from DDL. [#39184](https://github.com/ClickHouse/ClickHouse/pull/39184) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Prebuilt ClickHouse x86 binaries now require support for AVX instructions, i.e. a CPU not older than Intel Sandy Bridge / AMD Bulldozer, both released in 2011. [#39000](https://github.com/ClickHouse/ClickHouse/pull/39000) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Make the remote filesystem cache composable, allow not to evict certain files (regarding idx, mrk, ..), delete old cache version. Now it is possible to configure cache over Azure blob storage disk, over Local disk, over StaticWeb disk, etc. This PR is marked backward incompatible because cache configuration changes and in order for cache to work need to update the config file. Old cache will still be used with new configuration. The server will startup fine with the old cache configuration. Closes https://github.com/ClickHouse/ClickHouse/issues/36140. Closes https://github.com/ClickHouse/ClickHouse/issues/37889. ([Kseniia Sumarokova](https://github.com/kssenii)). [#36171](https://github.com/ClickHouse/ClickHouse/pull/36171))
|
||||
|
||||
#### New Feature
|
||||
* Support SQL standard DELETE FROM syntax on merge tree tables and lightweight delete implementation for merge tree families. [#37893](https://github.com/ClickHouse/ClickHouse/pull/37893) ([Jianmei Zhang](https://github.com/zhangjmruc)) ([Alexander Gololobov](https://github.com/davenger)). Note: this new feature does not make ClickHouse an HTAP DBMS.
|
||||
* Query parameters can be set in interactive mode as `SET param_abc = 'def'` and transferred via the native protocol as settings. [#39906](https://github.com/ClickHouse/ClickHouse/pull/39906) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Quota key can be set in the native protocol ([Yakov Olkhovsky](https://github.com/ClickHouse/ClickHouse/pull/39874)).
|
||||
* Added a setting `exact_rows_before_limit` (0/1). When enabled, ClickHouse will provide exact value for `rows_before_limit_at_least` statistic, but with the cost that the data before limit will have to be read completely. This closes [#6613](https://github.com/ClickHouse/ClickHouse/issues/6613). [#25333](https://github.com/ClickHouse/ClickHouse/pull/25333) ([kevin wan](https://github.com/MaxWk)).
|
||||
* Added support for parallel distributed insert select with `s3Cluster` table function into tables with `Distributed` and `Replicated` engine [#34670](https://github.com/ClickHouse/ClickHouse/issues/34670). [#39107](https://github.com/ClickHouse/ClickHouse/pull/39107) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Add new settings to control schema inference from text formats: - `input_format_try_infer_dates` - try infer dates from strings. - `input_format_try_infer_datetimes` - try infer datetimes from strings. - `input_format_try_infer_integers` - try infer `Int64` instead of `Float64`. - `input_format_json_try_infer_numbers_from_strings` - try infer numbers from json strings in JSON formats. [#39186](https://github.com/ClickHouse/ClickHouse/pull/39186) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* An option to provide JSON formatted log output. The purpose is to allow easier ingestion and query in log analysis tools. [#39277](https://github.com/ClickHouse/ClickHouse/pull/39277) ([Mallik Hassan](https://github.com/SadiHassan)).
|
||||
* Add function `nowInBlock` which allows getting the current time during long-running and continuous queries. Closes [#39522](https://github.com/ClickHouse/ClickHouse/issues/39522). Notes: there are no functions `now64InBlock` neither `todayInBlock`. [#39533](https://github.com/ClickHouse/ClickHouse/pull/39533) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add ability to specify settings for an `executable()` table function. [#39681](https://github.com/ClickHouse/ClickHouse/pull/39681) ([Constantine Peresypkin](https://github.com/pkit)).
|
||||
* Implemented automatic conversion of database engine from `Ordinary` to `Atomic`. Create empty `convert_ordinary_to_atomic` file in `flags` directory and all `Ordinary` databases will be converted automatically on next server start. Resolves [#39546](https://github.com/ClickHouse/ClickHouse/issues/39546). [#39933](https://github.com/ClickHouse/ClickHouse/pull/39933) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Support `SELECT ... INTO OUTFILE '...' AND STDOUT`. [#37490](https://github.com/ClickHouse/ClickHouse/issues/37490). [#39054](https://github.com/ClickHouse/ClickHouse/pull/39054) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Add formats `PrettyMonoBlock`, `PrettyNoEscapesMonoBlock`, `PrettyCompactNoEscapes`, `PrettyCompactNoEscapesMonoBlock`, `PrettySpaceNoEscapes`, `PrettySpaceMonoBlock`, `PrettySpaceNoEscapesMonoBlock`. [#39646](https://github.com/ClickHouse/ClickHouse/pull/39646) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add new setting schema_inference_hints that allows to specify structure hints in schema inference for specific columns. Closes [#39569](https://github.com/ClickHouse/ClickHouse/issues/39569). [#40068](https://github.com/ClickHouse/ClickHouse/pull/40068) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
|
||||
#### Performance Improvement
|
||||
* Improved memory usage during memory efficient merging of aggregation results. [#39429](https://github.com/ClickHouse/ClickHouse/pull/39429) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Added concurrency control logic to limit total number of concurrent threads created by queries. [#37558](https://github.com/ClickHouse/ClickHouse/pull/37558) ([Sergei Trifonov](https://github.com/serxa)). Add `concurrent_threads_soft_limit parameter` to increase performance in case of high QPS by means of limiting total number of threads for all queries. [#37285](https://github.com/ClickHouse/ClickHouse/pull/37285) ([Roman Vasin](https://github.com/rvasin)).
|
||||
* Add `SLRU` cache policy for uncompressed cache and marks cache. ([Kseniia Sumarokova](https://github.com/kssenii)). [#34651](https://github.com/ClickHouse/ClickHouse/pull/34651) ([alexX512](https://github.com/alexX512)). Decoupling local cache function and cache algorithm [#38048](https://github.com/ClickHouse/ClickHouse/pull/38048) ([Han Shukai](https://github.com/KinderRiven)).
|
||||
* Intel® In-Memory Analytics Accelerator (Intel® IAA) is a hardware accelerator available in the upcoming generation of Intel® Xeon® Scalable processors ("Sapphire Rapids"). Its goal is to speed up common operations in analytics like data (de)compression and filtering. ClickHouse gained the new "DeflateQpl" compression codec which utilizes the Intel® IAA offloading technology to provide a high-performance DEFLATE implementation. The codec uses the [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) which abstracts access to the hardware accelerator, respectively to a software fallback in case the hardware accelerator is not available. DEFLATE provides in general higher compression rates than ClickHouse's LZ4 default codec, and as a result, offers less disk I/O and lower main memory consumption. [#36654](https://github.com/ClickHouse/ClickHouse/pull/36654) ([jasperzhu](https://github.com/jinjunzh)). [#39494](https://github.com/ClickHouse/ClickHouse/pull/39494) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* `DISTINCT` in order with `ORDER BY`: Deduce way to sort based on input stream sort description. Skip sorting if input stream is already sorted. [#38719](https://github.com/ClickHouse/ClickHouse/pull/38719) ([Igor Nikonov](https://github.com/devcrafter)). Improve memory usage (significantly) and query execution time + use `DistinctSortedChunkTransform` for final distinct when `DISTINCT` columns match `ORDER BY` columns, but rename to `DistinctSortedStreamTransform` in `EXPLAIN PIPELINE` → this improves memory usage significantly + remove unnecessary allocations in hot loop in `DistinctSortedChunkTransform`. [#39432](https://github.com/ClickHouse/ClickHouse/pull/39432) ([Igor Nikonov](https://github.com/devcrafter)). Use `DistinctSortedTransform` only when sort description is applicable to DISTINCT columns, otherwise fall back to ordinary DISTINCT implementation + it allows making less checks during `DistinctSortedTransform` execution. [#39528](https://github.com/ClickHouse/ClickHouse/pull/39528) ([Igor Nikonov](https://github.com/devcrafter)). Fix: `DistinctSortedTransform` didn't take advantage of sorting. It never cleared HashSet since clearing_columns were detected incorrectly (always empty). So, it basically worked as ordinary `DISTINCT` (`DistinctTransform`). The fix reduces memory usage significantly. [#39538](https://github.com/ClickHouse/ClickHouse/pull/39538) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Use local node as first priority to get structure of remote table when executing `cluster` and similar table functions. [#39440](https://github.com/ClickHouse/ClickHouse/pull/39440) ([Mingliang Pan](https://github.com/liangliangpan)).
|
||||
* Optimize filtering by numeric columns with AVX512VBMI2 compress store. [#39633](https://github.com/ClickHouse/ClickHouse/pull/39633) ([Guo Wangyang](https://github.com/guowangy)). For systems with AVX512 VBMI2, this PR improves performance by ca. 6% for SSB benchmark queries queries 3.1, 3.2 and 3.3 (SF=100). Tested on Intel Icelake Xeon 8380 * 2 socket. [#40033](https://github.com/ClickHouse/ClickHouse/pull/40033) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Optimize index analysis with functional expressions in multi-thread scenario. [#39812](https://github.com/ClickHouse/ClickHouse/pull/39812) ([Guo Wangyang](https://github.com/guowangy)).
|
||||
* Optimizations for complex queries: Don't visit the AST for UDFs if none are registered. [#40069](https://github.com/ClickHouse/ClickHouse/pull/40069) ([Raúl Marín](https://github.com/Algunenano)). Optimize CurrentMemoryTracker alloc and free. [#40078](https://github.com/ClickHouse/ClickHouse/pull/40078) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Improved Base58 encoding/decoding. [#39292](https://github.com/ClickHouse/ClickHouse/pull/39292) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
* Improve bytes to bits mask transform for SSE/AVX/AVX512. [#39586](https://github.com/ClickHouse/ClickHouse/pull/39586) ([Guo Wangyang](https://github.com/guowangy)).
|
||||
|
||||
#### Improvement
|
||||
* Normalize `AggregateFunction` types and state representations because optimizations like [#35788](https://github.com/ClickHouse/ClickHouse/pull/35788) will treat `count(not null columns)` as `count()`, which might confuses distributed interpreters with the following error : `Conversion from AggregateFunction(count) to AggregateFunction(count, Int64) is not supported`. [#39420](https://github.com/ClickHouse/ClickHouse/pull/39420) ([Amos Bird](https://github.com/amosbird)). The functions with identical states can be used in materialized views interchangeably.
|
||||
* Rework and simplify the `system.backups` table, remove the `internal` column, allow user to set the ID of operation, add columns `num_files`, `uncompressed_size`, `compressed_size`, `start_time`, `end_time`. [#39503](https://github.com/ClickHouse/ClickHouse/pull/39503) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Improved structure of DDL query result table for `Replicated` database (separate columns with shard and replica name, more clear status) - `CREATE TABLE ... ON CLUSTER` queries can be normalized on initiator first if `distributed_ddl_entry_format_version` is set to 3 (default value). It means that `ON CLUSTER` queries may not work if initiator does not belong to the cluster that specified in query. Fixes [#37318](https://github.com/ClickHouse/ClickHouse/issues/37318), [#39500](https://github.com/ClickHouse/ClickHouse/issues/39500) - Ignore `ON CLUSTER` clause if database is `Replicated` and cluster name equals to database name. Related to [#35570](https://github.com/ClickHouse/ClickHouse/issues/35570) - Miscellaneous minor fixes for `Replicated` database engine - Check metadata consistency when starting up `Replicated` database, start replica recovery in case of mismatch of local metadata and metadata in Keeper. Resolves [#24880](https://github.com/ClickHouse/ClickHouse/issues/24880). [#37198](https://github.com/ClickHouse/ClickHouse/pull/37198) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Add result_rows and result_bytes to progress reports (`X-ClickHouse-Summary`). [#39567](https://github.com/ClickHouse/ClickHouse/pull/39567) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Improve primary key analysis for MergeTree. [#25563](https://github.com/ClickHouse/ClickHouse/pull/25563) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* `timeSlots` now works with DateTime64; subsecond duration and slot size available when working with DateTime64. [#37951](https://github.com/ClickHouse/ClickHouse/pull/37951) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
* Added support of `LEFT SEMI` and `LEFT ANTI` direct join with `EmbeddedRocksDB` tables. [#38956](https://github.com/ClickHouse/ClickHouse/pull/38956) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Add profile events for fsync operations. [#39179](https://github.com/ClickHouse/ClickHouse/pull/39179) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Add the second argument to the ordinary function `file(path[, default])`, which function returns in the case when a file does not exists. [#39218](https://github.com/ClickHouse/ClickHouse/pull/39218) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Some small fixes for reading via http, allow to retry partial content in case if 200 OK. [#39244](https://github.com/ClickHouse/ClickHouse/pull/39244) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Support queries `CREATE TEMPORARY TABLE ... (<list of columns>) AS ...`. [#39462](https://github.com/ClickHouse/ClickHouse/pull/39462) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add support of `!`/`*` (exclamation/asterisk) in custom TLDs (`cutToFirstSignificantSubdomainCustom()`/`cutToFirstSignificantSubdomainCustomWithWWW()`/`firstSignificantSubdomainCustom()`). [#39496](https://github.com/ClickHouse/ClickHouse/pull/39496) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Add support for TLS connections to NATS. Implements [#39525](https://github.com/ClickHouse/ClickHouse/issues/39525). [#39527](https://github.com/ClickHouse/ClickHouse/pull/39527) ([Constantine Peresypkin](https://github.com/pkit)).
|
||||
* `clickhouse-obfuscator` (a tool for database obfuscation for testing and load generation) now has the new `--save` and `--load` parameters to work with pre-trained models. This closes [#39534](https://github.com/ClickHouse/ClickHouse/issues/39534). [#39541](https://github.com/ClickHouse/ClickHouse/pull/39541) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix incorrect behavior of log rotation during restart. [#39558](https://github.com/ClickHouse/ClickHouse/pull/39558) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix building aggregate projections when external aggregation is on. Mark as improvement because the case is rare and there exists easy workaround to fix it via changing settings. This fixes [#39667](https://github.com/ClickHouse/ClickHouse/issues/39667) . [#39671](https://github.com/ClickHouse/ClickHouse/pull/39671) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Allow to execute hash functions with arguments of type `Map`. [#39685](https://github.com/ClickHouse/ClickHouse/pull/39685) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Add a configuration parameter to hide addresses in stack traces. It may improve security a little but generally, it is harmful and should not be used. [#39690](https://github.com/ClickHouse/ClickHouse/pull/39690) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Change the prefix size of AggregateFunctionDistinct to make sure nested function data memory segment is aligned. [#39696](https://github.com/ClickHouse/ClickHouse/pull/39696) ([Pxl](https://github.com/BiteTheDDDDt)).
|
||||
* Properly escape credentials passed to the `clickhouse-diagnostic` tool. [#39707](https://github.com/ClickHouse/ClickHouse/pull/39707) ([Dale McDiarmid](https://github.com/gingerwizard)).
|
||||
* ClickHouse Keeper improvement: create a snapshot on exit. It can be controlled with the config `keeper_server.create_snapshot_on_exit`, `true` by default. [#39755](https://github.com/ClickHouse/ClickHouse/pull/39755) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Support primary key analysis for `row_policy_filter` and `additional_filter`. It also helps fix issues like [#37454](https://github.com/ClickHouse/ClickHouse/issues/37454) . [#39826](https://github.com/ClickHouse/ClickHouse/pull/39826) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix two usability issues in Play UI: - it was non-pixel-perfect on iPad due to parasitic border radius and margins; - the progress indication did not display after the first query. This closes [#39957](https://github.com/ClickHouse/ClickHouse/issues/39957). This closes [#39960](https://github.com/ClickHouse/ClickHouse/issues/39960). [#39961](https://github.com/ClickHouse/ClickHouse/pull/39961) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Play UI: add row numbers; add cell selection on click; add hysteresis for table cells. [#39962](https://github.com/ClickHouse/ClickHouse/pull/39962) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Play UI: recognize tab key in textarea, but at the same time don't mess up with tab navigation. [#40053](https://github.com/ClickHouse/ClickHouse/pull/40053) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* The client will show server-side elapsed time. This is important for the performance comparison of ClickHouse services in remote datacenters. This closes [#38070](https://github.com/ClickHouse/ClickHouse/issues/38070). See also [this](https://github.com/ClickHouse/ClickBench/blob/main/hardware/benchmark-cloud.sh#L37) for motivation. [#39968](https://github.com/ClickHouse/ClickHouse/pull/39968) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Adds `parseDateTime64BestEffortUS`, `parseDateTime64BestEffortUSOrNull`, `parseDateTime64BestEffortUSOrZero` functions, closing [#37492](https://github.com/ClickHouse/ClickHouse/issues/37492). [#40015](https://github.com/ClickHouse/ClickHouse/pull/40015) ([Tanya Bragin](https://github.com/tbragin)).
|
||||
* Extend the `system.processors_profile_log` with more information such as input rows. [#40121](https://github.com/ClickHouse/ClickHouse/pull/40121) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Display server-side time in `clickhouse-benchmark` by default if it is available (since ClickHouse version 22.8). This is needed to correctly compare the performance of clouds. This behavior can be changed with the new `--client-side-time` command line option. Change the `--randomize` command line option from `--randomize 1` to the form without argument. [#40193](https://github.com/ClickHouse/ClickHouse/pull/40193) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add counters (ProfileEvents) for cases when query complexity limitation has been set and has reached (a separate counter for `overflow_mode` = `break` and `throw`). For example, if you have set up `max_rows_to_read` with `read_overflow_mode = 'break'`, looking at the value of `OverflowBreak` counter will allow distinguishing incomplete results. [#40205](https://github.com/ClickHouse/ClickHouse/pull/40205) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix memory accounting in case of "Memory limit exceeded" errors (previously [peak] memory usage was takes failed allocations into account). [#40249](https://github.com/ClickHouse/ClickHouse/pull/40249) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Add metrics for filesystem cache: `FilesystemCacheSize` and `FilesystemCacheElements`. [#40260](https://github.com/ClickHouse/ClickHouse/pull/40260) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Support hadoop secure RPC transfer (hadoop.rpc.protection=privacy and hadoop.rpc.protection=integrity). [#39411](https://github.com/ClickHouse/ClickHouse/pull/39411) ([michael1589](https://github.com/michael1589)).
|
||||
* Avoid continuously growing memory consumption of pattern cache when using functions multi(Fuzzy)Match(Any|AllIndices|AnyIndex)(). [#40264](https://github.com/ClickHouse/ClickHouse/pull/40264) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Add cache for schema inference for file/s3/hdfs/url table functions. Now, schema inference will be performed only on the first query to the file, all subsequent queries to the same file will use the schema from cache if data wasn't changed. Add system table system.schema_inference_cache with all current schemas in cache and system queries SYSTEM DROP SCHEMA CACHE [FOR FILE/S3/HDFS/URL] to drop schemas from cache. [#38286](https://github.com/ClickHouse/ClickHouse/pull/38286) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add support for LARGE_BINARY/LARGE_STRING with Arrow (Closes [#32401](https://github.com/ClickHouse/ClickHouse/issues/32401)). [#40293](https://github.com/ClickHouse/ClickHouse/pull/40293) ([Josh Taylor](https://github.com/joshuataylor)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* [ClickFiddle](https://fiddle.clickhouse.com/): A new tool for testing ClickHouse versions in read/write mode (**Igor Baliuk**).
|
||||
* ClickHouse binary is made self-extracting [#35775](https://github.com/ClickHouse/ClickHouse/pull/35775) ([Yakov Olkhovskiy, Arthur Filatenkov](https://github.com/yakov-olkhovskiy)).
|
||||
* Update tzdata to 2022b to support the new timezone changes. See https://github.com/google/cctz/pull/226. Chile's 2022 DST start is delayed from September 4 to September 11. Iran plans to stop observing DST permanently, after it falls back on 2022-09-21. There are corrections of the historical time zone of Asia/Tehran in the year 1977: Iran adopted standard time in 1935, not 1946. In 1977 it observed DST from 03-21 23:00 to 10-20 24:00; its 1978 transitions were on 03-24 and 08-05, not 03-20 and 10-20; and its spring 1979 transition was on 05-27, not 03-21 (https://data.iana.org/time-zones/tzdb/NEWS). ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Former packages used to install systemd.service file to `/etc`. The files there are marked as `conf` and are not cleaned out, and not updated automatically. This PR cleans them out. [#39323](https://github.com/ClickHouse/ClickHouse/pull/39323) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Ensure LSan is effective. [#39430](https://github.com/ClickHouse/ClickHouse/pull/39430) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* TSAN has issues with clang-14 (https://github.com/google/sanitizers/issues/1552, https://github.com/google/sanitizers/issues/1540), so here we build the TSAN binaries with clang-15. [#39450](https://github.com/ClickHouse/ClickHouse/pull/39450) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Remove the option to build ClickHouse tools as separate executable programs. This fixes [#37847](https://github.com/ClickHouse/ClickHouse/issues/37847). [#39520](https://github.com/ClickHouse/ClickHouse/pull/39520) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Small preparations for build on s390x (which is big-endian). [#39627](https://github.com/ClickHouse/ClickHouse/pull/39627) ([Harry Lee](https://github.com/HarryLeeIBM)). [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issue in BitHelpers for s390x. [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Implement a piece of code related to SipHash for s390x architecture (which is not supported by ClickHouse). [#39732](https://github.com/ClickHouse/ClickHouse/pull/39732) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed an Endian issue in Coordination snapshot code for s390x architecture (which is not supported by ClickHouse). [#39931](https://github.com/ClickHouse/ClickHouse/pull/39931) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in Codec code for s390x architecture (which is not supported by ClickHouse). [#40008](https://github.com/ClickHouse/ClickHouse/pull/40008) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in reading/writing BigEndian binary data in ReadHelpers and WriteHelpers code for s390x architecture (which is not supported by ClickHouse). [#40179](https://github.com/ClickHouse/ClickHouse/pull/40179) ([Harry Lee](https://github.com/HarryLeeIBM)).
|
||||
* Support build with `clang-16` (trunk). This closes [#39949](https://github.com/ClickHouse/ClickHouse/issues/39949). [#40181](https://github.com/ClickHouse/ClickHouse/pull/40181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Prepare RISC-V 64 build to run in CI. This is for [#40141](https://github.com/ClickHouse/ClickHouse/issues/40141). [#40197](https://github.com/ClickHouse/ClickHouse/pull/40197) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Simplified function registration macro interface (`FUNCTION_REGISTER*`) to eliminate the step to add and call an extern function in the registerFunctions.cpp, it also makes incremental builds of a new function faster. [#38615](https://github.com/ClickHouse/ClickHouse/pull/38615) ([Li Yin](https://github.com/liyinsg)).
|
||||
* Docker: Now entrypoint.sh in docker image creates and executes chown for all folders it found in config for multidisk setup [#17717](https://github.com/ClickHouse/ClickHouse/issues/17717). [#39121](https://github.com/ClickHouse/ClickHouse/pull/39121) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
|
||||
#### Bug Fix
|
||||
* Fix possible segfault in `CapnProto` input format. This bug was found and send through ClickHouse bug-bounty [program](https://github.com/ClickHouse/ClickHouse/issues/38986) by *kiojj*. [#40241](https://github.com/ClickHouse/ClickHouse/pull/40241) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix a very rare case of incorrect behavior of array subscript operator. This closes [#28720](https://github.com/ClickHouse/ClickHouse/issues/28720). [#40185](https://github.com/ClickHouse/ClickHouse/pull/40185) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix insufficient argument check for encryption functions (found by query fuzzer). This closes [#39987](https://github.com/ClickHouse/ClickHouse/issues/39987). [#40194](https://github.com/ClickHouse/ClickHouse/pull/40194) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix the case when the order of columns can be incorrect if the `IN` operator is used with a table with `ENGINE = Set` containing multiple columns. This fixes [#13014](https://github.com/ClickHouse/ClickHouse/issues/13014). [#40225](https://github.com/ClickHouse/ClickHouse/pull/40225) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix seeking while reading from encrypted disk. This PR fixes [#38381](https://github.com/ClickHouse/ClickHouse/issues/38381). [#39687](https://github.com/ClickHouse/ClickHouse/pull/39687) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix duplicate columns in join plan. Finally, solve [#26809](https://github.com/ClickHouse/ClickHouse/issues/26809). [#40009](https://github.com/ClickHouse/ClickHouse/pull/40009) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fixed query hanging for SELECT with ORDER BY WITH FILL with different date/time types. [#37849](https://github.com/ClickHouse/ClickHouse/pull/37849) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Fix ORDER BY that matches projections ORDER BY (before it simply returns unsorted result). [#38725](https://github.com/ClickHouse/ClickHouse/pull/38725) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Do not optimise functions in GROUP BY statements if they shadow one of the table columns or expressions. Fixes [#37032](https://github.com/ClickHouse/ClickHouse/issues/37032). [#39103](https://github.com/ClickHouse/ClickHouse/pull/39103) ([Anton Kozlov](https://github.com/tonickkozlov)).
|
||||
* Fix wrong table name in logs after RENAME TABLE. This fixes [#38018](https://github.com/ClickHouse/ClickHouse/issues/38018). [#39227](https://github.com/ClickHouse/ClickHouse/pull/39227) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix positional arguments in case of columns pruning when optimising the query. Closes [#38433](https://github.com/ClickHouse/ClickHouse/issues/38433). [#39293](https://github.com/ClickHouse/ClickHouse/pull/39293) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix bug in schema inference in case of empty messages in Protobuf/CapnProto formats that allowed to create column with empty `Tuple` type. Closes [#39051](https://github.com/ClickHouse/ClickHouse/issues/39051) Add 2 new settings `input_format_{protobuf/capnproto}_skip_fields_with_unsupported_types_in_schema_inference` that allow to skip fields with unsupported types while schema inference for Protobuf and CapnProto formats. [#39357](https://github.com/ClickHouse/ClickHouse/pull/39357) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* (Window View is an experimental feature) Fix segmentation fault on `CREATE WINDOW VIEW .. ON CLUSTER ... INNER`. Closes [#39363](https://github.com/ClickHouse/ClickHouse/issues/39363). [#39384](https://github.com/ClickHouse/ClickHouse/pull/39384) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix WriteBuffer finalize when cancelling insert into function (in previous versions it may leat to std::terminate). [#39458](https://github.com/ClickHouse/ClickHouse/pull/39458) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix storing of columns of type `Object` in sparse serialization. [#39464](https://github.com/ClickHouse/ClickHouse/pull/39464) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix possible "Not found column in block" exception when using projections. This closes [#39469](https://github.com/ClickHouse/ClickHouse/issues/39469). [#39470](https://github.com/ClickHouse/ClickHouse/pull/39470) ([小路](https://github.com/nicelulu)).
|
||||
* Fix exception on race between DROP and INSERT with materialized views. [#39477](https://github.com/ClickHouse/ClickHouse/pull/39477) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* A bug in Apache Avro library: fix data race and possible heap-buffer-overflow in Avro format. Closes [#39094](https://github.com/ClickHouse/ClickHouse/issues/39094) Closes [#33652](https://github.com/ClickHouse/ClickHouse/issues/33652). [#39498](https://github.com/ClickHouse/ClickHouse/pull/39498) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix rare bug in asynchronous reading (with setting `local_filesystem_read_method='pread_threadpool'`) with enabled `O_DIRECT` (enabled by setting `min_bytes_to_use_direct_io`). [#39506](https://github.com/ClickHouse/ClickHouse/pull/39506) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* (only on FreeBSD) Fixes "Code: 49. DB::Exception: FunctionFactory: the function name '' is not unique. (LOGICAL_ERROR)" observed on FreeBSD when starting clickhouse. [#39551](https://github.com/ClickHouse/ClickHouse/pull/39551) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Fix bug with the recently introduced "maxsplit" argument for `splitByChar`, which was not working correctly. [#39552](https://github.com/ClickHouse/ClickHouse/pull/39552) ([filimonov](https://github.com/filimonov)).
|
||||
* Fix bug in ASOF JOIN with `enable_optimize_predicate_expression`, close [#37813](https://github.com/ClickHouse/ClickHouse/issues/37813). [#39556](https://github.com/ClickHouse/ClickHouse/pull/39556) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fixed `CREATE/DROP INDEX` query with `ON CLUSTER` or `Replicated` database and `ReplicatedMergeTree`. It used to be executed on all replicas (causing error or DDL queue stuck). Fixes [#39511](https://github.com/ClickHouse/ClickHouse/issues/39511). [#39565](https://github.com/ClickHouse/ClickHouse/pull/39565) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix "column not found" error for push down with join, close [#39505](https://github.com/ClickHouse/ClickHouse/issues/39505). [#39575](https://github.com/ClickHouse/ClickHouse/pull/39575) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix the wrong `REGEXP_REPLACE` alias. This fixes https://github.com/ClickHouse/ClickBench/issues/9. [#39592](https://github.com/ClickHouse/ClickHouse/pull/39592) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed point of origin for exponential decay window functions to the last value in window. Previously, decay was calculated by formula `exp((t - curr_row_t) / decay_length)`, which is incorrect when right boundary of window is not `CURRENT ROW`. It was changed to: `exp((t - last_row_t) / decay_length)`. There is no change in results for windows with `ROWS BETWEEN (smth) AND CURRENT ROW`. [#39593](https://github.com/ClickHouse/ClickHouse/pull/39593) ([Vladimir Chebotaryov](https://github.com/quickhouse)).
|
||||
* Fix Decimal division overflow, which can be detected based on operands scale. [#39600](https://github.com/ClickHouse/ClickHouse/pull/39600) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
* Fix settings `output_format_arrow_string_as_string` and `output_format_arrow_low_cardinality_as_dictionary` work in combination. Closes [#39624](https://github.com/ClickHouse/ClickHouse/issues/39624). [#39647](https://github.com/ClickHouse/ClickHouse/pull/39647) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fixed a bug in default database resolution in distributed table reads. [#39674](https://github.com/ClickHouse/ClickHouse/pull/39674) ([Anton Kozlov](https://github.com/tonickkozlov)).
|
||||
* (Only with the obsolete Ordinary databases) Select might read data of dropped table if cache for mmap IO is used and database engine is Ordinary and new tables was created with the same name as dropped one had. It's fixed. [#39708](https://github.com/ClickHouse/ClickHouse/pull/39708) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix possible error `Invalid column type for ColumnUnique::insertRangeFrom. Expected String, got ColumnLowCardinality` Fixes [#38460](https://github.com/ClickHouse/ClickHouse/issues/38460). [#39716](https://github.com/ClickHouse/ClickHouse/pull/39716) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Field names in the `meta` section of JSON format were erroneously double escaped. This closes [#39693](https://github.com/ClickHouse/ClickHouse/issues/39693). [#39747](https://github.com/ClickHouse/ClickHouse/pull/39747) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix wrong index analysis with tuples and operator `IN`, which could lead to wrong query result. [#39752](https://github.com/ClickHouse/ClickHouse/pull/39752) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix `EmbeddedRocksDB` tables filtering by key using params. [#39757](https://github.com/ClickHouse/ClickHouse/pull/39757) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix error `Invalid number of columns in chunk pushed to OutputPort` which was caused by ARRAY JOIN optimization. Fixes [#39164](https://github.com/ClickHouse/ClickHouse/issues/39164). [#39799](https://github.com/ClickHouse/ClickHouse/pull/39799) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* A workaround for a bug in Linux kernel. Fix `CANNOT_READ_ALL_DATA` exception with `local_filesystem_read_method=pread_threadpool`. This bug affected only Linux kernel version 5.9 and 5.10 according to [man](https://manpages.debian.org/testing/manpages-dev/preadv2.2.en.html#BUGS). [#39800](https://github.com/ClickHouse/ClickHouse/pull/39800) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* (Only on NFS) Fix broken NFS mkdir for root-squashed volumes. [#39898](https://github.com/ClickHouse/ClickHouse/pull/39898) ([Constantine Peresypkin](https://github.com/pkit)).
|
||||
* Remove dictionaries from prometheus metrics on DETACH/DROP. [#39926](https://github.com/ClickHouse/ClickHouse/pull/39926) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix read of StorageFile with virtual columns. Closes [#39907](https://github.com/ClickHouse/ClickHouse/issues/39907). [#39943](https://github.com/ClickHouse/ClickHouse/pull/39943) ([flynn](https://github.com/ucasfl)).
|
||||
* Fix big memory usage during fetches. Fixes [#39915](https://github.com/ClickHouse/ClickHouse/issues/39915). [#39990](https://github.com/ClickHouse/ClickHouse/pull/39990) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* (experimental feature) Fix `hashId` crash and salt parameter not being used. [#40002](https://github.com/ClickHouse/ClickHouse/pull/40002) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* `EXCEPT` and `INTERSECT` operators may lead to crash if a specific combination of constant and non-constant columns were used. [#40020](https://github.com/ClickHouse/ClickHouse/pull/40020) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Fixed "Part directory doesn't exist" and "`tmp_<part_name>` ... No such file or directory" errors during too slow INSERT or too long merge/mutation. Also fixed issue that may cause some replication queue entries to stuck without any errors or warnings in logs if previous attempt to fetch part failed, but `tmp-fetch_<part_name>` directory was not cleaned up. [#40031](https://github.com/ClickHouse/ClickHouse/pull/40031) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix rare cases of parsing of arrays of tuples in format `Values`. [#40034](https://github.com/ClickHouse/ClickHouse/pull/40034) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fixes ArrowColumn format Dictionary(X) & Dictionary(Nullable(X)) conversion to ClickHouse LowCardinality(X) & LowCardinality(Nullable(X)) respectively. [#40037](https://github.com/ClickHouse/ClickHouse/pull/40037) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Fix potential deadlock in writing to S3 during task scheduling failure. [#40070](https://github.com/ClickHouse/ClickHouse/pull/40070) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix bug in collectFilesToSkip() by adding correct file extension (.idx or idx2) for indexes to be recalculated, avoid wrong hard links. Fixed [#39896](https://github.com/ClickHouse/ClickHouse/issues/39896). [#40095](https://github.com/ClickHouse/ClickHouse/pull/40095) ([Jianmei Zhang](https://github.com/zhangjmruc)).
|
||||
* A fix for reverse DNS resolution. [#40134](https://github.com/ClickHouse/ClickHouse/pull/40134) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Fix unexpected result `arrayDifference` of `Array(UInt32). [#40211](https://github.com/ClickHouse/ClickHouse/pull/40211) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
|
||||
|
||||
### <a id="227"></a> ClickHouse release 22.7, 2022-07-21
|
||||
|
||||
#### Upgrade Notes
|
||||
* Enable setting `enable_positional_arguments` by default. It allows queries like `SELECT ... ORDER BY 1, 2` where 1, 2 are the references to the select clause. If you need to return the old behavior, disable this setting. [#38204](https://github.com/ClickHouse/ClickHouse/pull/38204) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Disable `format_csv_allow_single_quotes` by default. See [#37096](https://github.com/ClickHouse/ClickHouse/issues/37096). ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* `Ordinary` database engine and old storage definition syntax for `*MergeTree` tables are deprecated. By default it's not possible to create new databases with `Ordinary` engine. If `system` database has `Ordinary` engine it will be automatically converted to `Atomic` on server startup. There are settings to keep old behavior (`allow_deprecated_database_ordinary` and `allow_deprecated_syntax_for_merge_tree`), but these settings may be removed in future releases. [#38335](https://github.com/ClickHouse/ClickHouse/pull/38335) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Force rewriting comma join to inner by default (set default value `cross_to_inner_join_rewrite = 2`). To have old behavior set `cross_to_inner_join_rewrite = 1`. [#39326](https://github.com/ClickHouse/ClickHouse/pull/39326) ([Vladimir C](https://github.com/vdimir)). If you will face any incompatibilities, you can turn this setting back.
|
||||
|
||||
#### New Feature
|
||||
* Support expressions with window functions. Closes [#19857](https://github.com/ClickHouse/ClickHouse/issues/19857). [#37848](https://github.com/ClickHouse/ClickHouse/pull/37848) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Add new `direct` join algorithm for `EmbeddedRocksDB` tables, see [#33582](https://github.com/ClickHouse/ClickHouse/issues/33582). [#35363](https://github.com/ClickHouse/ClickHouse/pull/35363) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Added full sorting merge join algorithm. [#35796](https://github.com/ClickHouse/ClickHouse/pull/35796) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Implement NATS table engine, which allows to pub/sub to NATS. Closes [#32388](https://github.com/ClickHouse/ClickHouse/issues/32388). [#37171](https://github.com/ClickHouse/ClickHouse/pull/37171) ([tchepavel](https://github.com/tchepavel)). ([Kseniia Sumarokova](https://github.com/kssenii))
|
||||
* Implement table function `mongodb`. Allow writes into `MongoDB` storage / table function. [#37213](https://github.com/ClickHouse/ClickHouse/pull/37213) ([aaapetrenko](https://github.com/aaapetrenko)). ([Kseniia Sumarokova](https://github.com/kssenii))
|
||||
* Add `SQLInsert` output format. Closes [#38441](https://github.com/ClickHouse/ClickHouse/issues/38441). [#38477](https://github.com/ClickHouse/ClickHouse/pull/38477) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Introduced settings `additional_table_filters`. Using this setting, you can specify additional filtering condition for a table which will be applied directly after reading. Example: `select number, x, y from (select number from system.numbers limit 5) f any left join (select x, y from table_1) s on f.number = s.x settings additional_table_filters={'system.numbers : 'number != 3', 'table_1' : 'x != 2'}`. Introduced setting `additional_result_filter` which specifies additional filtering condition for query result. Closes [#37918](https://github.com/ClickHouse/ClickHouse/issues/37918). [#38475](https://github.com/ClickHouse/ClickHouse/pull/38475) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Add `compatibility` setting and `system.settings_changes` system table that contains information about changes in settings through ClickHouse versions. Closes [#35972](https://github.com/ClickHouse/ClickHouse/issues/35972). [#38957](https://github.com/ClickHouse/ClickHouse/pull/38957) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add functions `translate(string, from_string, to_string)` and `translateUTF8(string, from_string, to_string)`. It translates some characters to another. [#38935](https://github.com/ClickHouse/ClickHouse/pull/38935) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Support `parseTimeDelta` function. It can be used like ` ;-+,:` can be used as separators, eg. `1yr-2mo`, `2m:6s`: `SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ; 33 seconds')`. [#39071](https://github.com/ClickHouse/ClickHouse/pull/39071) ([jiahui-97](https://github.com/jiahui-97)).
|
||||
* Added `CREATE TABLE ... EMPTY AS SELECT` query. It automatically deduces table structure from the SELECT query, but does not fill the table after creation. Resolves [#38049](https://github.com/ClickHouse/ClickHouse/issues/38049). [#38272](https://github.com/ClickHouse/ClickHouse/pull/38272) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Added options to limit IO operations with remote storage: `max_remote_read_network_bandwidth_for_server` and `max_remote_write_network_bandwidth_for_server`. [#39095](https://github.com/ClickHouse/ClickHouse/pull/39095) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Add `group_by_use_nulls` setting to make aggregation key columns nullable in the case of ROLLUP, CUBE and GROUPING SETS. Closes [#37359](https://github.com/ClickHouse/ClickHouse/issues/37359). [#38642](https://github.com/ClickHouse/ClickHouse/pull/38642) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Add the ability to specify compression level during data export. [#38907](https://github.com/ClickHouse/ClickHouse/pull/38907) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Add an option to require explicit grants to SELECT from the `system` database. Details: [#38970](https://github.com/ClickHouse/ClickHouse/pull/38970) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Functions `multiMatchAny`, `multiMatchAnyIndex`, `multiMatchAllIndices` and their fuzzy variants now accept non-const pattern array argument. [#38485](https://github.com/ClickHouse/ClickHouse/pull/38485) ([Robert Schulze](https://github.com/rschu1ze)). SQL function `multiSearchAllPositions` now accepts non-const needle arguments. [#39167](https://github.com/ClickHouse/ClickHouse/pull/39167) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Add a setting `zstd_window_log_max` to configure max memory usage on zstd decoding when importing external files. Closes [#35693](https://github.com/ClickHouse/ClickHouse/issues/35693). [#37015](https://github.com/ClickHouse/ClickHouse/pull/37015) ([wuxiaobai24](https://github.com/wuxiaobai24)).
|
||||
* Add `send_logs_source_regexp` setting. Send server text logs with specified regexp to match log source name. Empty means all sources. [#39161](https://github.com/ClickHouse/ClickHouse/pull/39161) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Support `ALTER` for `Hive` tables. [#38214](https://github.com/ClickHouse/ClickHouse/pull/38214) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Support `isNullable` function. This function checks whether it's argument is nullable and return 1 or 0. Closes [#38611](https://github.com/ClickHouse/ClickHouse/issues/38611). [#38841](https://github.com/ClickHouse/ClickHouse/pull/38841) ([lokax](https://github.com/lokax)).
|
||||
* Added functions for base58 encoding/decoding. [#38159](https://github.com/ClickHouse/ClickHouse/pull/38159) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
* Add chart visualization to Play UI. [#38197](https://github.com/ClickHouse/ClickHouse/pull/38197) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Added L2 Squared distance and norm functions for both arrays and tuples. [#38545](https://github.com/ClickHouse/ClickHouse/pull/38545) ([Julian Gilyadov](https://github.com/israelg99)).
|
||||
* Add ability to pass HTTP headers to the `url` table function / storage via SQL. Closes [#37897](https://github.com/ClickHouse/ClickHouse/issues/37897). [#38176](https://github.com/ClickHouse/ClickHouse/pull/38176) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Add `clickhouse-diagnostics` binary to the packages. [#38647](https://github.com/ClickHouse/ClickHouse/pull/38647) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Experimental Feature
|
||||
* Adds new setting `implicit_transaction` to run standalone queries inside a transaction. It handles both creation and closing (via COMMIT if the query succeeded or ROLLBACK if it didn't) of the transaction automatically. [#38344](https://github.com/ClickHouse/ClickHouse/pull/38344) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Performance Improvement
|
||||
* Distinct optimization for sorted columns. Use specialized distinct transformation in case input stream is sorted by column(s) in distinct. Optimization can be applied to pre-distinct, final distinct, or both. Initial implementation by @dimarub2000. [#37803](https://github.com/ClickHouse/ClickHouse/pull/37803) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Improve performance of `ORDER BY`, `MergeTree` merges, window functions using batch version of `BinaryHeap`. [#38022](https://github.com/ClickHouse/ClickHouse/pull/38022) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* More parallel execution for queries with `FINAL` [#36396](https://github.com/ClickHouse/ClickHouse/pull/36396) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fix significant join performance regression which was introduced in [#35616](https://github.com/ClickHouse/ClickHouse/pull/35616). It's interesting that common join queries such as ssb queries have been 10 times slower for almost 3 months while no one complains. [#38052](https://github.com/ClickHouse/ClickHouse/pull/38052) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Migrate from the Intel hyperscan library to vectorscan, this speeds up many string matching on non-x86 platforms. [#38171](https://github.com/ClickHouse/ClickHouse/pull/38171) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Increased parallelism of query plan steps executed after aggregation. [#38295](https://github.com/ClickHouse/ClickHouse/pull/38295) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Improve performance of insertion to columns of type `JSON`. [#38320](https://github.com/ClickHouse/ClickHouse/pull/38320) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Optimized insertion and lookups in the HashTable. [#38413](https://github.com/ClickHouse/ClickHouse/pull/38413) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fix performance degradation from [#32493](https://github.com/ClickHouse/ClickHouse/issues/32493). [#38417](https://github.com/ClickHouse/ClickHouse/pull/38417) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Improve performance of joining with numeric columns using SIMD instructions. [#37235](https://github.com/ClickHouse/ClickHouse/pull/37235) ([zzachimed](https://github.com/zzachimed)). [#38565](https://github.com/ClickHouse/ClickHouse/pull/38565) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Norm and Distance functions for arrays speed up 1.2-2 times. [#38740](https://github.com/ClickHouse/ClickHouse/pull/38740) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Add AVX-512 VBMI optimized `copyOverlap32Shuffle` for LZ4 decompression. In other words, LZ4 decompression performance is improved. [#37891](https://github.com/ClickHouse/ClickHouse/pull/37891) ([Guo Wangyang](https://github.com/guowangy)).
|
||||
* `ORDER BY (a, b)` will use all the same benefits as `ORDER BY a, b`. [#38873](https://github.com/ClickHouse/ClickHouse/pull/38873) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Align branches within a 32B boundary to make benchmark more stable. [#38988](https://github.com/ClickHouse/ClickHouse/pull/38988) ([Guo Wangyang](https://github.com/guowangy)). It improves performance 1..2% on average for Intel.
|
||||
* Executable UDF, executable dictionaries, and Executable tables will avoid wasting one second during wait for subprocess termination. [#38929](https://github.com/ClickHouse/ClickHouse/pull/38929) ([Constantine Peresypkin](https://github.com/pkit)).
|
||||
* Optimize accesses to `system.stack_trace` table if not all columns are selected. [#39177](https://github.com/ClickHouse/ClickHouse/pull/39177) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Improve isNullable/isConstant/isNull/isNotNull performance for LowCardinality argument. [#39192](https://github.com/ClickHouse/ClickHouse/pull/39192) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Optimized processing of ORDER BY in window functions. [#34632](https://github.com/ClickHouse/ClickHouse/pull/34632) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* The table `system.asynchronous_metric_log` is further optimized for storage space. This closes [#38134](https://github.com/ClickHouse/ClickHouse/issues/38134). See the [YouTube video](https://www.youtube.com/watch?v=0fSp9SF8N8A). [#38428](https://github.com/ClickHouse/ClickHouse/pull/38428) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### Improvement
|
||||
* Support SQL standard CREATE INDEX and DROP INDEX syntax. [#35166](https://github.com/ClickHouse/ClickHouse/pull/35166) ([Jianmei Zhang](https://github.com/zhangjmruc)).
|
||||
* Send profile events for INSERT queries (previously only SELECT was supported). [#37391](https://github.com/ClickHouse/ClickHouse/pull/37391) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Implement in order aggregation (`optimize_aggregation_in_order`) for fully materialized projections. [#37469](https://github.com/ClickHouse/ClickHouse/pull/37469) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Remove subprocess run for kerberos initialization. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)).
|
||||
* * Add setting `multiple_joins_try_to_keep_original_names` to not rewrite identifier name on multiple JOINs rewrite, close [#34697](https://github.com/ClickHouse/ClickHouse/issues/34697). [#38149](https://github.com/ClickHouse/ClickHouse/pull/38149) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Improved trace-visualizer UX. [#38169](https://github.com/ClickHouse/ClickHouse/pull/38169) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Enable stack trace collection and query profiler for AArch64. [#38181](https://github.com/ClickHouse/ClickHouse/pull/38181) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Do not skip symlinks in `user_defined` directory during SQL user defined functions loading. Closes [#38042](https://github.com/ClickHouse/ClickHouse/issues/38042). [#38184](https://github.com/ClickHouse/ClickHouse/pull/38184) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Added background cleanup of subdirectories in `store/`. In some cases clickhouse-server might left garbage subdirectories in `store/` (for example, on unsuccessful table creation) and those dirs were never been removed. Fixes [#33710](https://github.com/ClickHouse/ClickHouse/issues/33710). [#38265](https://github.com/ClickHouse/ClickHouse/pull/38265) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Add `DESCRIBE CACHE` query to show cache settings from config. Add `SHOW CACHES` query to show available filesystem caches list. [#38279](https://github.com/ClickHouse/ClickHouse/pull/38279) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Add access check for `system drop filesystem cache`. Support ON CLUSTER. [#38319](https://github.com/ClickHouse/ClickHouse/pull/38319) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix PostgreSQL database engine incompatibility on upgrade from 21.3 to 22.3. Closes [#36659](https://github.com/ClickHouse/ClickHouse/issues/36659). [#38369](https://github.com/ClickHouse/ClickHouse/pull/38369) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* `filesystemAvailable` and similar functions now work in `clickhouse-local`. This closes [#38423](https://github.com/ClickHouse/ClickHouse/issues/38423). [#38424](https://github.com/ClickHouse/ClickHouse/pull/38424) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add `revision` function. [#38555](https://github.com/ClickHouse/ClickHouse/pull/38555) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix GCS via proxy tunnel usage. [#38726](https://github.com/ClickHouse/ClickHouse/pull/38726) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Support `\i file` in clickhouse client / local (similar to psql \i). [#38813](https://github.com/ClickHouse/ClickHouse/pull/38813) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* New option `optimize = 1` in `EXPLAIN AST`. If enabled, it shows AST after it's rewritten, otherwise AST of original query. Disabled by default. [#38910](https://github.com/ClickHouse/ClickHouse/pull/38910) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Allow trailing comma in columns list. closes [#38425](https://github.com/ClickHouse/ClickHouse/issues/38425). [#38440](https://github.com/ClickHouse/ClickHouse/pull/38440) ([chen](https://github.com/xiedeyantu)).
|
||||
* Bugfixes and performance improvements for `parallel_hash` JOIN method. [#37648](https://github.com/ClickHouse/ClickHouse/pull/37648) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Support hadoop secure RPC transfer (hadoop.rpc.protection=privacy and hadoop.rpc.protection=integrity). [#37852](https://github.com/ClickHouse/ClickHouse/pull/37852) ([Peng Liu](https://github.com/michael1589)).
|
||||
* Add struct type support in `StorageHive`. [#38118](https://github.com/ClickHouse/ClickHouse/pull/38118) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* S3 single objects are now removed with `RemoveObjectRequest`. Implement compatibility with GCP which did not allow to use `removeFileIfExists` effectively breaking approximately half of `remove` functionality. Automatic detection for `DeleteObjects` S3 API, that is not supported by GCS. This will allow to use GCS without explicit `support_batch_delete=0` in configuration. [#37882](https://github.com/ClickHouse/ClickHouse/pull/37882) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Expose basic ClickHouse Keeper related monitoring data (via ProfileEvents and CurrentMetrics). [#38072](https://github.com/ClickHouse/ClickHouse/pull/38072) ([lingpeng0314](https://github.com/lingpeng0314)).
|
||||
* Support `auto_close` option for PostgreSQL engine connection. Closes [#31486](https://github.com/ClickHouse/ClickHouse/issues/31486). [#38363](https://github.com/ClickHouse/ClickHouse/pull/38363) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Allow `NULL` modifier in columns declaration for table functions. [#38816](https://github.com/ClickHouse/ClickHouse/pull/38816) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Deactivate `mutations_finalizing_task` before shutdown to avoid benign `TABLE_IS_READ_ONLY` errors during shutdown. [#38851](https://github.com/ClickHouse/ClickHouse/pull/38851) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Eliminate unnecessary waiting of SELECT queries after ALTER queries in presence of INSERT queries if you use deprecated Ordinary databases. [#38864](https://github.com/ClickHouse/ClickHouse/pull/38864) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* New option `rewrite` in `EXPLAIN AST`. If enabled, it shows AST after it's rewritten, otherwise AST of original query. Disabled by default. [#38910](https://github.com/ClickHouse/ClickHouse/pull/38910) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Stop reporting Zookeeper "Node exists" exceptions in system.errors when they are expected. [#38961](https://github.com/ClickHouse/ClickHouse/pull/38961) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* `clickhouse-keeper`: add support for real-time digest calculation and verification. It is disabled by default. [#37555](https://github.com/ClickHouse/ClickHouse/pull/37555) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Allow to specify globs `* or {expr1, expr2, expr3}` inside a key for `clickhouse-extract-from-config` tool. [#38966](https://github.com/ClickHouse/ClickHouse/pull/38966) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* clearOldLogs: Don't report KEEPER_EXCEPTION on concurrent deletes. [#39016](https://github.com/ClickHouse/ClickHouse/pull/39016) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* clickhouse-keeper improvement: persist meta-information about keeper servers to disk. [#39069](https://github.com/ClickHouse/ClickHouse/pull/39069) ([Antonio Andelic](https://github.com/antonio2368)). This will make it easier to operate if you shutdown or restart all keeper nodes at the same time.
|
||||
* Continue without exception when running out of disk space when using filesystem cache. [#39106](https://github.com/ClickHouse/ClickHouse/pull/39106) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Handling SIGTERM signals from k8s. [#39130](https://github.com/ClickHouse/ClickHouse/pull/39130) ([Timur Solodovnikov](https://github.com/tsolodov)).
|
||||
* Add `merge_algorithm` column (Undecided, Horizontal, Vertical) to system.part_log. [#39181](https://github.com/ClickHouse/ClickHouse/pull/39181) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Don't increment a counter in `system.errors` when the disk is not rotational. [#39216](https://github.com/ClickHouse/ClickHouse/pull/39216) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* The metric `result_bytes` for `INSERT` queries in `system.query_log` shows number of bytes inserted. Previously value was incorrect and stored the same value as `result_rows`. [#39225](https://github.com/ClickHouse/ClickHouse/pull/39225) ([Ilya Yatsishin](https://github.com/qoega)).
|
||||
* The CPU usage metric in clickhouse-client will be displayed in a better way. Fixes [#38756](https://github.com/ClickHouse/ClickHouse/issues/38756). [#39280](https://github.com/ClickHouse/ClickHouse/pull/39280) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Rethrow exception on filesystem cache initialization on server startup, better error message. [#39386](https://github.com/ClickHouse/ClickHouse/pull/39386) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* OpenTelemetry now collects traces without Processors spans by default (there are too many). To enable Processors spans collection `opentelemetry_trace_processors` setting. [#39170](https://github.com/ClickHouse/ClickHouse/pull/39170) ([Ilya Yatsishin](https://github.com/qoega)).
|
||||
* Functions `multiMatch[Fuzzy](AllIndices/Any/AnyIndex)` - don't throw a logical error if the needle argument is empty. [#39012](https://github.com/ClickHouse/ClickHouse/pull/39012) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Allow to declare `RabbitMQ` queue without default arguments `x-max-length` and `x-overflow`. [#39259](https://github.com/ClickHouse/ClickHouse/pull/39259) ([rnbondarenko](https://github.com/rnbondarenko)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Apply Clang Thread Safety Analysis (TSA) annotations to ClickHouse. [#38068](https://github.com/ClickHouse/ClickHouse/pull/38068) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Adapt universal installation script for FreeBSD. [#39302](https://github.com/ClickHouse/ClickHouse/pull/39302) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Preparation for building on `s390x` platform. [#39193](https://github.com/ClickHouse/ClickHouse/pull/39193) ([Harry Lee](https://github.com/HarryLeeIBM)).
|
||||
* Fix a bug in `jemalloc` library [#38757](https://github.com/ClickHouse/ClickHouse/pull/38757) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Hardware benchmark now has support for automatic results uploading. [#38427](https://github.com/ClickHouse/ClickHouse/pull/38427) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* System table "system.licenses" is now correctly populated on Mac (Darwin). [#38294](https://github.com/ClickHouse/ClickHouse/pull/38294) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Change `all|noarch` packages to architecture-dependent - Fix some documentation for it - Push aarch64|arm64 packages to artifactory and release assets - Fixes [#36443](https://github.com/ClickHouse/ClickHouse/issues/36443). [#38580](https://github.com/ClickHouse/ClickHouse/pull/38580) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
* Fix rounding for `Decimal128/Decimal256` with more than 19-digits long scale. [#38027](https://github.com/ClickHouse/ClickHouse/pull/38027) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fixed crash caused by data race in storage `Hive` (integration table engine). [#38887](https://github.com/ClickHouse/ClickHouse/pull/38887) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Fix crash when executing GRANT ALL ON *.* with ON CLUSTER. It was broken in https://github.com/ClickHouse/ClickHouse/pull/35767. This closes [#38618](https://github.com/ClickHouse/ClickHouse/issues/38618). [#38674](https://github.com/ClickHouse/ClickHouse/pull/38674) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Correct glob expansion in case of `{0..10}` forms. Fixes [#38498](https://github.com/ClickHouse/ClickHouse/issues/38498) Current Implementation is similar to what shell does mentiond by @rschu1ze [here](https://github.com/ClickHouse/ClickHouse/pull/38502#issuecomment-1169057723). [#38502](https://github.com/ClickHouse/ClickHouse/pull/38502) ([Heena Bansal](https://github.com/HeenaBansal2009)).
|
||||
* Fix crash for `mapUpdate`, `mapFilter` functions when using with constant map argument. Closes [#38547](https://github.com/ClickHouse/ClickHouse/issues/38547). [#38553](https://github.com/ClickHouse/ClickHouse/pull/38553) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Fix `toHour` monotonicity information for query optimization which can lead to incorrect query result (incorrect index analysis). This fixes [#38333](https://github.com/ClickHouse/ClickHouse/issues/38333). [#38675](https://github.com/ClickHouse/ClickHouse/pull/38675) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix checking whether s3 storage support parallel writes. It resulted in s3 parallel writes not working. [#38792](https://github.com/ClickHouse/ClickHouse/pull/38792) ([chen](https://github.com/xiedeyantu)).
|
||||
* Fix s3 seekable reads with parallel read buffer. (Affected memory usage during query). Closes [#38258](https://github.com/ClickHouse/ClickHouse/issues/38258). [#38802](https://github.com/ClickHouse/ClickHouse/pull/38802) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Update `simdjson`. This fixes [#38621](https://github.com/ClickHouse/ClickHouse/issues/38621) - a buffer overflow on machines with the latest Intel CPUs with AVX-512 VBMI. [#38838](https://github.com/ClickHouse/ClickHouse/pull/38838) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix possible logical error for Vertical merges. [#38859](https://github.com/ClickHouse/ClickHouse/pull/38859) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix settings profile with seconds unit. [#38896](https://github.com/ClickHouse/ClickHouse/pull/38896) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix incorrect partition pruning when there is a nullable partition key. Note: most likely you don't use nullable partition keys - this is an obscure feature you should not use. Nullable keys are a nonsense and this feature is only needed for some crazy use-cases. This fixes [#38941](https://github.com/ClickHouse/ClickHouse/issues/38941). [#38946](https://github.com/ClickHouse/ClickHouse/pull/38946) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Improve `fsync_part_directory` for fetches. [#38993](https://github.com/ClickHouse/ClickHouse/pull/38993) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix possible dealock inside `OvercommitTracker`. Fixes [#37794](https://github.com/ClickHouse/ClickHouse/issues/37794). [#39030](https://github.com/ClickHouse/ClickHouse/pull/39030) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix bug in filesystem cache that could happen in some corner case which coincided with cache capacity hitting the limit. Closes [#39066](https://github.com/ClickHouse/ClickHouse/issues/39066). [#39070](https://github.com/ClickHouse/ClickHouse/pull/39070) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix some corner cases of interpretation of the arguments of window expressions. Fixes [#38538](https://github.com/ClickHouse/ClickHouse/issues/38538) Allow using of higher-order functions in window expressions. [#39112](https://github.com/ClickHouse/ClickHouse/pull/39112) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Keep `LowCardinality` type in `tuple` function. Previously `LowCardinality` type was dropped and elements of created tuple had underlying type of `LowCardinality`. [#39113](https://github.com/ClickHouse/ClickHouse/pull/39113) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix error `Block structure mismatch` which could happen for INSERT into table with attached MATERIALIZED VIEW and enabled setting `extremes = 1`. Closes [#29759](https://github.com/ClickHouse/ClickHouse/issues/29759) and [#38729](https://github.com/ClickHouse/ClickHouse/issues/38729). [#39125](https://github.com/ClickHouse/ClickHouse/pull/39125) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix unexpected query result when both `optimize_trivial_count_query` and `empty_result_for_aggregation_by_empty_set` are set to true. This fixes [#39140](https://github.com/ClickHouse/ClickHouse/issues/39140). [#39155](https://github.com/ClickHouse/ClickHouse/pull/39155) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed error `Not found column Type in block` in selects with `PREWHERE` and read-in-order optimizations. [#39157](https://github.com/ClickHouse/ClickHouse/pull/39157) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Fix extremely rare race condition in during hardlinks for remote filesystem. The only way to reproduce it is concurrent run of backups. [#39190](https://github.com/ClickHouse/ClickHouse/pull/39190) ([alesapin](https://github.com/alesapin)).
|
||||
* (zero-copy replication is an experimental feature that should not be used in production) Fix fetch of in-memory part with `allow_remote_fs_zero_copy_replication`. [#39214](https://github.com/ClickHouse/ClickHouse/pull/39214) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* (MaterializedPostgreSQL - experimental feature). Fix segmentation fault in MaterializedPostgreSQL database engine, which could happen if some exception occurred at replication initialisation. Closes [#36939](https://github.com/ClickHouse/ClickHouse/issues/36939). [#39272](https://github.com/ClickHouse/ClickHouse/pull/39272) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix incorrect fetch of table metadata from PostgreSQL database engine. Closes [#33502](https://github.com/ClickHouse/ClickHouse/issues/33502). [#39283](https://github.com/ClickHouse/ClickHouse/pull/39283) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix projection exception when aggregation keys are wrapped inside other functions. This fixes [#37151](https://github.com/ClickHouse/ClickHouse/issues/37151). [#37155](https://github.com/ClickHouse/ClickHouse/pull/37155) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix possible logical error `... with argument with type Nothing and default implementation for Nothing is expected to return result with type Nothing, got ...` in some functions. Closes: [#37610](https://github.com/ClickHouse/ClickHouse/issues/37610) Closes: [#37741](https://github.com/ClickHouse/ClickHouse/issues/37741). [#37759](https://github.com/ClickHouse/ClickHouse/pull/37759) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix incorrect columns order in subqueries of UNION (in case of duplicated columns in subselects may produce incorrect result). [#37887](https://github.com/ClickHouse/ClickHouse/pull/37887) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix incorrect work of MODIFY ALTER Column with column names that contain dots. Closes [#37907](https://github.com/ClickHouse/ClickHouse/issues/37907). [#37971](https://github.com/ClickHouse/ClickHouse/pull/37971) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix reading of sparse columns from `MergeTree` tables that store their data in S3. [#37978](https://github.com/ClickHouse/ClickHouse/pull/37978) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix possible crash in `Distributed` async insert in case of removing a replica from config. [#38029](https://github.com/ClickHouse/ClickHouse/pull/38029) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix "Missing columns" for GLOBAL JOIN with CTE without alias. [#38056](https://github.com/ClickHouse/ClickHouse/pull/38056) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Rewrite tuple functions as literals in backwards-compatibility mode. [#38096](https://github.com/ClickHouse/ClickHouse/pull/38096) ([Anton Kozlov](https://github.com/tonickkozlov)).
|
||||
* Fix redundant memory reservation for output block during `ORDER BY`. [#38127](https://github.com/ClickHouse/ClickHouse/pull/38127) ([iyupeng](https://github.com/iyupeng)).
|
||||
* Fix possible logical error `Bad cast from type DB::IColumn* to DB::ColumnNullable*` in array mapped functions. Closes [#38006](https://github.com/ClickHouse/ClickHouse/issues/38006). [#38132](https://github.com/ClickHouse/ClickHouse/pull/38132) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix temporary name clash in partial merge join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#38135](https://github.com/ClickHouse/ClickHouse/pull/38135) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Some minr issue with queries like `CREATE TABLE nested_name_tuples (`a` Tuple(x String, y Tuple(i Int32, j String))) ENGINE = Memory;` [#38136](https://github.com/ClickHouse/ClickHouse/pull/38136) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Fix bug with nested short-circuit functions that led to execution of arguments even if condition is false. Closes [#38040](https://github.com/ClickHouse/ClickHouse/issues/38040). [#38173](https://github.com/ClickHouse/ClickHouse/pull/38173) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* (Window View is a experimental feature) Fix LOGICAL_ERROR for WINDOW VIEW with incorrect structure. [#38205](https://github.com/ClickHouse/ClickHouse/pull/38205) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Update librdkafka submodule to fix crash when an OAUTHBEARER refresh callback is set. [#38225](https://github.com/ClickHouse/ClickHouse/pull/38225) ([Rafael Acevedo](https://github.com/racevedoo)).
|
||||
* Fix INSERT into Distributed hung due to ProfileEvents. [#38307](https://github.com/ClickHouse/ClickHouse/pull/38307) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix retries in PostgreSQL engine. [#38310](https://github.com/ClickHouse/ClickHouse/pull/38310) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix optimization in PartialSortingTransform (SIGSEGV and possible incorrect result). [#38324](https://github.com/ClickHouse/ClickHouse/pull/38324) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix RabbitMQ with formats based on PeekableReadBuffer. Closes [#38061](https://github.com/ClickHouse/ClickHouse/issues/38061). [#38356](https://github.com/ClickHouse/ClickHouse/pull/38356) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* MaterializedPostgreSQL - experimentail feature. Fix possible `Invalid number of rows in Chunk` in MaterializedPostgreSQL. Closes [#37323](https://github.com/ClickHouse/ClickHouse/issues/37323). [#38360](https://github.com/ClickHouse/ClickHouse/pull/38360) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix RabbitMQ configuration with connection string setting. Closes [#36531](https://github.com/ClickHouse/ClickHouse/issues/36531). [#38365](https://github.com/ClickHouse/ClickHouse/pull/38365) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix PostgreSQL engine not using PostgreSQL schema when retrieving array dimension size. Closes [#36755](https://github.com/ClickHouse/ClickHouse/issues/36755). Closes [#36772](https://github.com/ClickHouse/ClickHouse/issues/36772). [#38366](https://github.com/ClickHouse/ClickHouse/pull/38366) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix possibly incorrect result of distributed queries with `DISTINCT` and `LIMIT`. Fixes [#38282](https://github.com/ClickHouse/ClickHouse/issues/38282). [#38371](https://github.com/ClickHouse/ClickHouse/pull/38371) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix wrong results of countSubstrings() & position() on patterns with 0-bytes. [#38589](https://github.com/ClickHouse/ClickHouse/pull/38589) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Now it's possible to start a clickhouse-server and attach/detach tables even for tables with the incorrect values of IPv4/IPv6 representation. Proper fix for issue [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#38590](https://github.com/ClickHouse/ClickHouse/pull/38590) ([alesapin](https://github.com/alesapin)).
|
||||
* `rankCorr` function will work correctly if some arguments are NaNs. This closes [#38396](https://github.com/ClickHouse/ClickHouse/issues/38396). [#38722](https://github.com/ClickHouse/ClickHouse/pull/38722) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix `parallel_view_processing=1` with `optimize_trivial_insert_select=1`. Fix `max_insert_threads` while pushing to views. [#38731](https://github.com/ClickHouse/ClickHouse/pull/38731) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix use-after-free for aggregate functions with `Map` combinator that leads to incorrect result. [#38748](https://github.com/ClickHouse/ClickHouse/pull/38748) ([Azat Khuzhin](https://github.com/azat)).
|
||||
|
||||
### <a id="226"></a> ClickHouse release 22.6, 2022-06-16
|
||||
|
||||
#### Backward Incompatible Change
|
||||
* Remove support for octal number literals in SQL. In previous versions they were parsed as Float64. [#37765](https://github.com/ClickHouse/ClickHouse/pull/37765) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Changes how settings using `seconds` as type are parsed to support floating point values (for example: `max_execution_time=0.5`). Infinity or NaN values will throw an exception. [#37187](https://github.com/ClickHouse/ClickHouse/pull/37187) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Changed format of binary serialization of columns of experimental type `Object`. New format is more convenient to implement by third-party clients. [#37482](https://github.com/ClickHouse/ClickHouse/pull/37482) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Turn on setting `output_format_json_named_tuples_as_objects` by default. It allows to serialize named tuples as JSON objects in JSON formats. [#37756](https://github.com/ClickHouse/ClickHouse/pull/37756) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* LIKE patterns with trailing escape symbol ('\\') are now disallowed (as mandated by the SQL standard). [#37764](https://github.com/ClickHouse/ClickHouse/pull/37764) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* If you run different ClickHouse versions on a cluster with AArch64 CPU or mix AArch64 and amd64 on a cluster, and use distributed queries with GROUP BY multiple keys of fixed-size type that fit in 256 bits but don't fit in 64 bits, and the size of the result is huge, the data will not be fully aggregated in the result of these queries during upgrade. Workaround: upgrade with downtime instead of a rolling upgrade.
|
||||
|
||||
#### New Feature
|
||||
* Add `GROUPING` function. It allows to disambiguate the records in the queries with `ROLLUP`, `CUBE` or `GROUPING SETS`. Closes [#19426](https://github.com/ClickHouse/ClickHouse/issues/19426). [#37163](https://github.com/ClickHouse/ClickHouse/pull/37163) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* A new codec [FPC](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf) algorithm for floating point data compression. [#37553](https://github.com/ClickHouse/ClickHouse/pull/37553) ([Mikhail Guzov](https://github.com/koloshmet)).
|
||||
* Add new columnar JSON formats: `JSONColumns`, `JSONCompactColumns`, `JSONColumnsWithMetadata`. Closes [#36338](https://github.com/ClickHouse/ClickHouse/issues/36338) Closes [#34509](https://github.com/ClickHouse/ClickHouse/issues/34509). [#36975](https://github.com/ClickHouse/ClickHouse/pull/36975) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Added open telemetry traces visualizing tool based on d3js. [#37810](https://github.com/ClickHouse/ClickHouse/pull/37810) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Support INSERTs into `system.zookeeper` table. Closes [#22130](https://github.com/ClickHouse/ClickHouse/issues/22130). [#37596](https://github.com/ClickHouse/ClickHouse/pull/37596) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Support non-constant pattern argument for `LIKE`, `ILIKE` and `match` functions. [#37251](https://github.com/ClickHouse/ClickHouse/pull/37251) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Executable user defined functions now support parameters. Example: `SELECT test_function(parameters)(arguments)`. Closes [#37578](https://github.com/ClickHouse/ClickHouse/issues/37578). [#37720](https://github.com/ClickHouse/ClickHouse/pull/37720) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Add `merge_reason` column to system.part_log table. [#36912](https://github.com/ClickHouse/ClickHouse/pull/36912) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Add support for Maps and Records in Avro format. Add new setting `input_format_avro_null_as_default ` that allow to insert null as default in Avro format. Closes [#18925](https://github.com/ClickHouse/ClickHouse/issues/18925) Closes [#37378](https://github.com/ClickHouse/ClickHouse/issues/37378) Closes [#32899](https://github.com/ClickHouse/ClickHouse/issues/32899). [#37525](https://github.com/ClickHouse/ClickHouse/pull/37525) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add `clickhouse-disks` tool to introspect and operate on virtual filesystems configured for ClickHouse. [#36060](https://github.com/ClickHouse/ClickHouse/pull/36060) ([Artyom Yurkov](https://github.com/Varinara)).
|
||||
* Adds H3 unidirectional edge functions. [#36843](https://github.com/ClickHouse/ClickHouse/pull/36843) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Add support for calculating [hashids](https://hashids.org/) from unsigned integers. [#37013](https://github.com/ClickHouse/ClickHouse/pull/37013) ([Michael Nutt](https://github.com/mnutt)).
|
||||
* Explicit `SALT` specification is allowed for `CREATE USER <user> IDENTIFIED WITH sha256_hash`. [#37377](https://github.com/ClickHouse/ClickHouse/pull/37377) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Add two new settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines` to allow skipping specified number of lines in the beginning of the file in CSV/TSV formats. [#37537](https://github.com/ClickHouse/ClickHouse/pull/37537) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* `showCertificate` function shows current server's SSL certificate. [#37540](https://github.com/ClickHouse/ClickHouse/pull/37540) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* HTTP source for Data Dictionaries in Named Collections is supported. [#37581](https://github.com/ClickHouse/ClickHouse/pull/37581) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Implemented changing the comment for `ReplicatedMergeTree` tables. [#37416](https://github.com/ClickHouse/ClickHouse/pull/37416) ([Vasily Nemkov](https://github.com/Enmk)).
|
||||
* Added `SYSTEM UNFREEZE` query that deletes the whole backup regardless if the corresponding table is deleted or not. [#36424](https://github.com/ClickHouse/ClickHouse/pull/36424) ([Vadim Volodin](https://github.com/PolyProgrammist)).
|
||||
|
||||
#### Experimental Feature
|
||||
* Enables `POPULATE` for `WINDOW VIEW`. [#36945](https://github.com/ClickHouse/ClickHouse/pull/36945) ([vxider](https://github.com/Vxider)).
|
||||
* `ALTER TABLE ... MODIFY QUERY` support for `WINDOW VIEW`. [#37188](https://github.com/ClickHouse/ClickHouse/pull/37188) ([vxider](https://github.com/Vxider)).
|
||||
* This PR changes the behavior of the `ENGINE` syntax in `WINDOW VIEW`, to make it like in `MATERIALIZED VIEW`. [#37214](https://github.com/ClickHouse/ClickHouse/pull/37214) ([vxider](https://github.com/Vxider)).
|
||||
|
||||
#### Performance Improvement
|
||||
* Added numerous optimizations for ARM NEON [#38093](https://github.com/ClickHouse/ClickHouse/pull/38093)([Daniel Kutenin](https://github.com/danlark1)), ([Alexandra Pilipyuk](https://github.com/chalice19)) Note: if you run different ClickHouse versions on a cluster with ARM CPU and use distributed queries with GROUP BY multiple keys of fixed-size type that fit in 256 bits but don't fit in 64 bits, the result of the aggregation query will be wrong during upgrade. Workaround: upgrade with downtime instead of a rolling upgrade.
|
||||
* Improve performance and memory usage for select of subset of columns for formats Native, Protobuf, CapnProto, JSONEachRow, TSKV, all formats with suffixes WithNames/WithNamesAndTypes. Previously while selecting only subset of columns from files in these formats all columns were read and stored in memory. Now only required columns are read. This PR enables setting `input_format_skip_unknown_fields` by default, because otherwise in case of select of subset of columns exception will be thrown. [#37192](https://github.com/ClickHouse/ClickHouse/pull/37192) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Now more filters can be pushed down for join. [#37472](https://github.com/ClickHouse/ClickHouse/pull/37472) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Load marks for only necessary columns when reading wide parts. [#36879](https://github.com/ClickHouse/ClickHouse/pull/36879) ([Anton Kozlov](https://github.com/tonickkozlov)).
|
||||
* Improved performance of aggregation in case, when sparse columns (can be enabled by experimental setting `ratio_of_defaults_for_sparse_serialization` in `MergeTree` tables) are used as arguments in aggregate functions. [#37617](https://github.com/ClickHouse/ClickHouse/pull/37617) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Optimize function `COALESCE` with two arguments. [#37666](https://github.com/ClickHouse/ClickHouse/pull/37666) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Replace `multiIf` to `if` in case when `multiIf` has only one condition, because function `if` is more performant. [#37695](https://github.com/ClickHouse/ClickHouse/pull/37695) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Improve performance of `dictGetDescendants`, `dictGetChildren` functions, create temporary parent to children hierarchical index per query, not per function call during query. Allow to specify `BIDIRECTIONAL` for `HIERARHICAL` attributes, dictionary will maintain parent to children index in memory, that way functions `dictGetDescendants`, `dictGetChildren` will not create temporary index per query. Closes [#32481](https://github.com/ClickHouse/ClickHouse/issues/32481). [#37148](https://github.com/ClickHouse/ClickHouse/pull/37148) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Aggregates state destruction now may be posted on a thread pool. For queries with LIMIT and big state it provides significant speedup, e.g. `select uniq(number) from numbers_mt(1e7) group by number limit 100` became around 2.5x faster. [#37855](https://github.com/ClickHouse/ClickHouse/pull/37855) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Improve sort performance by single column. [#37195](https://github.com/ClickHouse/ClickHouse/pull/37195) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improve performance of single column sorting using sorting queue specializations. [#37990](https://github.com/ClickHouse/ClickHouse/pull/37990) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improved performance on array norm and distance functions 2x-4x times. [#37394](https://github.com/ClickHouse/ClickHouse/pull/37394) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Improve performance of number comparison functions using dynamic dispatch. [#37399](https://github.com/ClickHouse/ClickHouse/pull/37399) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improve performance of ORDER BY with LIMIT. [#37481](https://github.com/ClickHouse/ClickHouse/pull/37481) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improve performance of `hasAll` function using dynamic dispatch infrastructure. [#37484](https://github.com/ClickHouse/ClickHouse/pull/37484) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improve performance of `greatCircleAngle`, `greatCircleDistance`, `geoDistance` functions. [#37524](https://github.com/ClickHouse/ClickHouse/pull/37524) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improve performance of insert into MergeTree if there are multiple columns in ORDER BY. [#35762](https://github.com/ClickHouse/ClickHouse/pull/35762) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix excessive CPU usage in background when there are a lot of tables. [#38028](https://github.com/ClickHouse/ClickHouse/pull/38028) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improve performance of `not` function using dynamic dispatch. [#38058](https://github.com/ClickHouse/ClickHouse/pull/38058) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Optimized the internal caching of re2 patterns which occur e.g. in LIKE and MATCH functions. [#37544](https://github.com/ClickHouse/ClickHouse/pull/37544) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Improve filter bitmask generator function all in one with AVX-512 instructions. [#37588](https://github.com/ClickHouse/ClickHouse/pull/37588) ([yaqi-zhao](https://github.com/yaqi-zhao)).
|
||||
* Apply read method `threadpool` for Hive integration engine. This will significantly speed up reading. [#36328](https://github.com/ClickHouse/ClickHouse/pull/36328) ([李扬](https://github.com/taiyang-li)).
|
||||
* When all the columns to read are partition keys, construct columns by the file's row number without real reading the Hive file. [#37103](https://github.com/ClickHouse/ClickHouse/pull/37103) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Support multi disks for caching hive files. [#37279](https://github.com/ClickHouse/ClickHouse/pull/37279) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Limiting the maximum cache usage per query can effectively prevent cache pool contamination. [Related Issues](https://github.com/ClickHouse/ClickHouse/issues/28961). [#37859](https://github.com/ClickHouse/ClickHouse/pull/37859) ([Han Shukai](https://github.com/KinderRiven)).
|
||||
* Currently clickhouse directly downloads all remote files to the local cache (even if they are only read once), which will frequently cause IO of the local hard disk. In some scenarios, these IOs may not be necessary and may easily cause negative optimization. As shown in the figure below, when we run SSB Q1-Q4, the performance of the cache has caused negative optimization. [#37516](https://github.com/ClickHouse/ClickHouse/pull/37516) ([Han Shukai](https://github.com/KinderRiven)).
|
||||
* Allow to prune the list of files via virtual columns such as `_file` and `_path` when reading from S3. This is for [#37174](https://github.com/ClickHouse/ClickHouse/issues/37174) , [#23494](https://github.com/ClickHouse/ClickHouse/issues/23494). [#37356](https://github.com/ClickHouse/ClickHouse/pull/37356) ([Amos Bird](https://github.com/amosbird)).
|
||||
* In function: CompressedWriteBuffer::nextImpl(), there is an unnecessary write-copy step that would happen frequently during inserting data. Below shows the differentiation with this patch: - Before: 1. Compress "working_buffer" into "compressed_buffer" 2. write-copy into "out" - After: Directly Compress "working_buffer" into "out". [#37242](https://github.com/ClickHouse/ClickHouse/pull/37242) ([jasperzhu](https://github.com/jinjunzh)).
|
||||
|
||||
#### Improvement
|
||||
* Support types with non-standard defaults in ROLLUP, CUBE, GROUPING SETS. Closes [#37360](https://github.com/ClickHouse/ClickHouse/issues/37360). [#37667](https://github.com/ClickHouse/ClickHouse/pull/37667) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix stack traces collection on ARM. Closes [#37044](https://github.com/ClickHouse/ClickHouse/issues/37044). Closes [#15638](https://github.com/ClickHouse/ClickHouse/issues/15638). [#37797](https://github.com/ClickHouse/ClickHouse/pull/37797) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Client will try every IP address returned by DNS resolution until successful connection. [#37273](https://github.com/ClickHouse/ClickHouse/pull/37273) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Allow to use String type instead of Binary in Arrow/Parquet/ORC formats. This PR introduces 3 new settings for it: `output_format_arrow_string_as_string`, `output_format_parquet_string_as_string`, `output_format_orc_string_as_string`. Default value for all settings is `false`. [#37327](https://github.com/ClickHouse/ClickHouse/pull/37327) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Apply setting `input_format_max_rows_to_read_for_schema_inference` for all read rows in total from all files in globs. Previously setting `input_format_max_rows_to_read_for_schema_inference` was applied for each file in glob separately and in case of huge number of nulls we could read first `input_format_max_rows_to_read_for_schema_inference` rows from each file and get nothing. Also increase default value for this setting to 25000. [#37332](https://github.com/ClickHouse/ClickHouse/pull/37332) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add separate `CLUSTER` grant (and `access_control_improvements.on_cluster_queries_require_cluster_grant` configuration directive, for backward compatibility, default to `false`). [#35767](https://github.com/ClickHouse/ClickHouse/pull/35767) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Added support for schema inference for `hdfsCluster`. [#35812](https://github.com/ClickHouse/ClickHouse/pull/35812) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Implement `least_used` load balancing algorithm for disks inside volume (multi disk configuration). [#36686](https://github.com/ClickHouse/ClickHouse/pull/36686) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Modify the HTTP Endpoint to return the full stats under the `X-ClickHouse-Summary` header when `send_progress_in_http_headers=0` (before it would return all zeros). - Modify the HTTP Endpoint to return `X-ClickHouse-Exception-Code` header when progress has been sent before (`send_progress_in_http_headers=1`) - Modify the HTTP Endpoint to return `HTTP_REQUEST_TIMEOUT` (408) instead of `HTTP_INTERNAL_SERVER_ERROR` (500) on `TIMEOUT_EXCEEDED` errors. [#36884](https://github.com/ClickHouse/ClickHouse/pull/36884) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Allow a user to inspect grants from granted roles. [#36941](https://github.com/ClickHouse/ClickHouse/pull/36941) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
* Do not calculate an integral numerically but use CDF functions instead. This will speed up execution and will increase the precision. This fixes [#36714](https://github.com/ClickHouse/ClickHouse/issues/36714). [#36953](https://github.com/ClickHouse/ClickHouse/pull/36953) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Add default implementation for Nothing in functions. Now most of the functions will return column with type Nothing in case one of it's arguments is Nothing. It also solves problem with functions like arrayMap/arrayFilter and similar when they have empty array as an argument. Previously queries like `select arrayMap(x -> 2 * x, []);` failed because function inside lambda cannot work with type `Nothing`, now such queries return empty array with type `Array(Nothing)`. Also add support for arrays of nullable types in functions like arrayFilter/arrayFill. Previously, queries like `select arrayFilter(x -> x % 2, [1, NULL])` failed, now they work (if the result of lambda is NULL, then this value won't be included in the result). Closes [#37000](https://github.com/ClickHouse/ClickHouse/issues/37000). [#37048](https://github.com/ClickHouse/ClickHouse/pull/37048) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Now if a shard has local replica we create a local plan and a plan to read from all remote replicas. They have shared initiator which coordinates reading. [#37204](https://github.com/ClickHouse/ClickHouse/pull/37204) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Do no longer abort server startup if configuration option "mark_cache_size" is not explicitly set. [#37326](https://github.com/ClickHouse/ClickHouse/pull/37326) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Allows providing `NULL`/`NOT NULL` right after type in column declaration. [#37337](https://github.com/ClickHouse/ClickHouse/pull/37337) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* optimize file segment PARTIALLY_DOWNLOADED get read buffer. [#37338](https://github.com/ClickHouse/ClickHouse/pull/37338) ([xiedeyantu](https://github.com/xiedeyantu)).
|
||||
* Try to improve short circuit functions processing to fix problems with stress tests. [#37384](https://github.com/ClickHouse/ClickHouse/pull/37384) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Generate multiple columns with UUID (generateUUIDv4(1), generateUUIDv4(2)) [#37395](https://github.com/ClickHouse/ClickHouse/issues/37395). [#37415](https://github.com/ClickHouse/ClickHouse/pull/37415) ([Memo](https://github.com/Joeywzr)).
|
||||
* Fix extremely rare deadlock during part fetch in zero-copy replication. Fixes [#37423](https://github.com/ClickHouse/ClickHouse/issues/37423). [#37424](https://github.com/ClickHouse/ClickHouse/pull/37424) ([metahys](https://github.com/metahys)).
|
||||
* Don't allow to create storage with unknown data format. [#37450](https://github.com/ClickHouse/ClickHouse/pull/37450) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Set `global_memory_usage_overcommit_max_wait_microseconds` default value to 5 seconds. Add info about `OvercommitTracker` to OOM exception message. Add `MemoryOvercommitWaitTimeMicroseconds` profile event. [#37460](https://github.com/ClickHouse/ClickHouse/pull/37460) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Do not display `-0.0` CPU time in clickhouse-client. It can appear due to rounding errors. This closes [#38003](https://github.com/ClickHouse/ClickHouse/issues/38003). This closes [#38038](https://github.com/ClickHouse/ClickHouse/issues/38038). [#38064](https://github.com/ClickHouse/ClickHouse/pull/38064) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Play UI: Keep controls in place when the page is scrolled horizontally. This makes edits comfortable even if the table is wide and it was scrolled far to the right. The feature proposed by Maksym Tereshchenko from CaspianDB. [#37470](https://github.com/ClickHouse/ClickHouse/pull/37470) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Modify query div in play.html to be extendable beyond 20% height. In case of very long queries it is helpful to extend the textarea element, only today, since the div is fixed height, the extended textarea hides the data div underneath. With this fix, extending the textarea element will push the data div down/up such the extended textarea won't hide it. Also, keeps query box width 100% even when the user adjusting the size of the query textarea. [#37488](https://github.com/ClickHouse/ClickHouse/pull/37488) ([guyco87](https://github.com/guyco87)).
|
||||
* Added `ProfileEvents` for introspection of type of written (inserted or merged) parts (`Inserted{Wide/Compact/InMemory}Parts`, `MergedInto{Wide/Compact/InMemory}Parts`. Added column `part_type` to `system.part_log`. Resolves [#37495](https://github.com/ClickHouse/ClickHouse/issues/37495). [#37536](https://github.com/ClickHouse/ClickHouse/pull/37536) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* clickhouse-keeper improvement: move broken logs to a timestamped folder. [#37565](https://github.com/ClickHouse/ClickHouse/pull/37565) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Do not write expired columns by TTL after subsequent merges (before only first merge/optimize of the part will not write expired by TTL columns, all other will do). [#37570](https://github.com/ClickHouse/ClickHouse/pull/37570) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* More precise result of the `dumpColumnStructure` miscellaneous function in presence of LowCardinality or Sparse columns. In previous versions, these functions were converting the argument to a full column before returning the result. This is needed to provide an answer in [#6935](https://github.com/ClickHouse/ClickHouse/issues/6935). [#37633](https://github.com/ClickHouse/ClickHouse/pull/37633) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* clickhouse-keeper: store only unique session IDs for watches. [#37641](https://github.com/ClickHouse/ClickHouse/pull/37641) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix possible "Cannot write to finalized buffer". [#37645](https://github.com/ClickHouse/ClickHouse/pull/37645) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Add setting `support_batch_delete` for `DiskS3` to disable multiobject delete calls, which Google Cloud Storage doesn't support. [#37659](https://github.com/ClickHouse/ClickHouse/pull/37659) ([Fred Wulff](https://github.com/frew)).
|
||||
* Add an option to disable connection pooling in ODBC bridge. [#37705](https://github.com/ClickHouse/ClickHouse/pull/37705) ([Anton Kozlov](https://github.com/tonickkozlov)).
|
||||
* Functions `dictGetHierarchy`, `dictIsIn`, `dictGetChildren`, `dictGetDescendants` added support nullable `HIERARCHICAL` attribute in dictionaries. Closes [#35521](https://github.com/ClickHouse/ClickHouse/issues/35521). [#37805](https://github.com/ClickHouse/ClickHouse/pull/37805) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Expose BoringSSL version related info in the `system.build_options` table. [#37850](https://github.com/ClickHouse/ClickHouse/pull/37850) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Now clickhouse-server removes `delete_tmp` directories on server start. Fixes [#26503](https://github.com/ClickHouse/ClickHouse/issues/26503). [#37906](https://github.com/ClickHouse/ClickHouse/pull/37906) ([alesapin](https://github.com/alesapin)).
|
||||
* Clean up broken detached parts after timeout. Closes [#25195](https://github.com/ClickHouse/ClickHouse/issues/25195). [#37975](https://github.com/ClickHouse/ClickHouse/pull/37975) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Now in MergeTree table engines family failed-to-move parts will be removed instantly. [#37994](https://github.com/ClickHouse/ClickHouse/pull/37994) ([alesapin](https://github.com/alesapin)).
|
||||
* Now if setting `always_fetch_merged_part` is enabled for ReplicatedMergeTree merges will try to find parts on other replicas rarely with smaller load for [Zoo]Keeper. [#37995](https://github.com/ClickHouse/ClickHouse/pull/37995) ([alesapin](https://github.com/alesapin)).
|
||||
* Add implicit grants with grant option too. For example `GRANT CREATE TABLE ON test.* TO A WITH GRANT OPTION` now allows `A` to execute `GRANT CREATE VIEW ON test.* TO B`. [#38017](https://github.com/ClickHouse/ClickHouse/pull/38017) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Use `clang-14` and LLVM infrastructure version 14 for builds. This closes [#34681](https://github.com/ClickHouse/ClickHouse/issues/34681). [#34754](https://github.com/ClickHouse/ClickHouse/pull/34754) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Note: `clang-14` has [a bug](https://github.com/google/sanitizers/issues/1540) in ThreadSanitizer that makes our CI work worse.
|
||||
* Allow to drop privileges at startup. This simplifies Docker images. Closes [#36293](https://github.com/ClickHouse/ClickHouse/issues/36293). [#36341](https://github.com/ClickHouse/ClickHouse/pull/36341) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add docs spellcheck to CI. [#37790](https://github.com/ClickHouse/ClickHouse/pull/37790) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix overly aggressive stripping which removed the embedded hash required for checking the consistency of the executable. [#37993](https://github.com/ClickHouse/ClickHouse/pull/37993) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix `SELECT ... INTERSECT` and `EXCEPT SELECT` statements with constant string types. [#37738](https://github.com/ClickHouse/ClickHouse/pull/37738) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix `GROUP BY` `AggregateFunction` (i.e. you `GROUP BY` by the column that has `AggregateFunction` type). [#37093](https://github.com/ClickHouse/ClickHouse/pull/37093) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* (experimental WINDOW VIEW) Fix `addDependency` in WindowView. This bug can be reproduced like [#37237](https://github.com/ClickHouse/ClickHouse/issues/37237). [#37224](https://github.com/ClickHouse/ClickHouse/pull/37224) ([vxider](https://github.com/Vxider)).
|
||||
* Fix inconsistency in ORDER BY ... WITH FILL feature. Query, containing ORDER BY ... WITH FILL, can generate extra rows when multiple WITH FILL columns are present. [#38074](https://github.com/ClickHouse/ClickHouse/pull/38074) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* This PR moving `addDependency` from constructor to `startup()` to avoid adding dependency to a *dropped* table, fix [#37237](https://github.com/ClickHouse/ClickHouse/issues/37237). [#37243](https://github.com/ClickHouse/ClickHouse/pull/37243) ([vxider](https://github.com/Vxider)).
|
||||
* Fix inserting defaults for missing values in columnar formats. Previously missing columns were filled with defaults for types, not for columns. [#37253](https://github.com/ClickHouse/ClickHouse/pull/37253) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* (experimental Object type) Fix some cases of insertion nested arrays to columns of type `Object`. [#37305](https://github.com/ClickHouse/ClickHouse/pull/37305) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix unexpected errors with a clash of constant strings in aggregate function, prewhere and join. Close [#36891](https://github.com/ClickHouse/ClickHouse/issues/36891). [#37336](https://github.com/ClickHouse/ClickHouse/pull/37336) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix projections with GROUP/ORDER BY in query and optimize_aggregation_in_order (before the result was incorrect since only finish sorting was performed). [#37342](https://github.com/ClickHouse/ClickHouse/pull/37342) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed error with symbols in key name in S3. Fixes [#33009](https://github.com/ClickHouse/ClickHouse/issues/33009). [#37344](https://github.com/ClickHouse/ClickHouse/pull/37344) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Throw an exception when GROUPING SETS used with ROLLUP or CUBE. [#37367](https://github.com/ClickHouse/ClickHouse/pull/37367) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix LOGICAL_ERROR in getMaxSourcePartsSizeForMerge during merges (in case of non standard, greater, values of `background_pool_size`/`background_merges_mutations_concurrency_ratio` has been specified in `config.xml` (new way) not in `users.xml` (deprecated way)). [#37413](https://github.com/ClickHouse/ClickHouse/pull/37413) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Stop removing UTF-8 BOM in RowBinary format. [#37428](https://github.com/ClickHouse/ClickHouse/pull/37428) ([Paul Loyd](https://github.com/loyd)). [#37428](https://github.com/ClickHouse/ClickHouse/pull/37428) ([Paul Loyd](https://github.com/loyd)).
|
||||
* clickhouse-keeper bugfix: fix force recovery for single node cluster. [#37440](https://github.com/ClickHouse/ClickHouse/pull/37440) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix logical error in normalizeUTF8 functions. Closes [#37298](https://github.com/ClickHouse/ClickHouse/issues/37298). [#37443](https://github.com/ClickHouse/ClickHouse/pull/37443) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix cast lowcard of nullable in JoinSwitcher, close [#37385](https://github.com/ClickHouse/ClickHouse/issues/37385). [#37453](https://github.com/ClickHouse/ClickHouse/pull/37453) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix named tuples output in ORC/Arrow/Parquet formats. [#37458](https://github.com/ClickHouse/ClickHouse/pull/37458) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix optimization of monotonous functions in ORDER BY clause in presence of GROUPING SETS. Fixes [#37401](https://github.com/ClickHouse/ClickHouse/issues/37401). [#37493](https://github.com/ClickHouse/ClickHouse/pull/37493) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix error on joining with dictionary on some conditions. Close [#37386](https://github.com/ClickHouse/ClickHouse/issues/37386). [#37530](https://github.com/ClickHouse/ClickHouse/pull/37530) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Prohibit `optimize_aggregation_in_order` with `GROUPING SETS` (fixes `LOGICAL_ERROR`). [#37542](https://github.com/ClickHouse/ClickHouse/pull/37542) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix wrong dump information of ActionsDAG. [#37587](https://github.com/ClickHouse/ClickHouse/pull/37587) ([zhanglistar](https://github.com/zhanglistar)).
|
||||
* Fix converting types for UNION queries (may produce LOGICAL_ERROR). [#37593](https://github.com/ClickHouse/ClickHouse/pull/37593) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix `WITH FILL` modifier with negative intervals in `STEP` clause. Fixes [#37514](https://github.com/ClickHouse/ClickHouse/issues/37514). [#37600](https://github.com/ClickHouse/ClickHouse/pull/37600) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix illegal joinGet array usage when ` join_use_nulls = 1`. This fixes [#37562](https://github.com/ClickHouse/ClickHouse/issues/37562) . [#37650](https://github.com/ClickHouse/ClickHouse/pull/37650) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix columns number mismatch in cross join, close [#37561](https://github.com/ClickHouse/ClickHouse/issues/37561). [#37653](https://github.com/ClickHouse/ClickHouse/pull/37653) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix segmentation fault in `show create table` from mysql database when it is configured with named collections. Closes [#37683](https://github.com/ClickHouse/ClickHouse/issues/37683). [#37690](https://github.com/ClickHouse/ClickHouse/pull/37690) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix RabbitMQ Storage not being able to startup on server restart if storage was create without SETTINGS clause. Closes [#37463](https://github.com/ClickHouse/ClickHouse/issues/37463). [#37691](https://github.com/ClickHouse/ClickHouse/pull/37691) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* SQL user defined functions disable CREATE/DROP in readonly mode. Closes [#37280](https://github.com/ClickHouse/ClickHouse/issues/37280). [#37699](https://github.com/ClickHouse/ClickHouse/pull/37699) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix formatting of Nullable arguments for executable user defined functions. Closes [#35897](https://github.com/ClickHouse/ClickHouse/issues/35897). [#37711](https://github.com/ClickHouse/ClickHouse/pull/37711) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix optimization enabled by setting `optimize_monotonous_functions_in_order_by` in distributed queries. Fixes [#36037](https://github.com/ClickHouse/ClickHouse/issues/36037). [#37724](https://github.com/ClickHouse/ClickHouse/pull/37724) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix possible logical error: `Invalid Field get from type UInt64 to type Float64` in `values` table function. Closes [#37602](https://github.com/ClickHouse/ClickHouse/issues/37602). [#37754](https://github.com/ClickHouse/ClickHouse/pull/37754) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix possible segfault in schema inference in case of exception in SchemaReader constructor. Closes [#37680](https://github.com/ClickHouse/ClickHouse/issues/37680). [#37760](https://github.com/ClickHouse/ClickHouse/pull/37760) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix setting cast_ipv4_ipv6_default_on_conversion_error for internal cast function. Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#37761](https://github.com/ClickHouse/ClickHouse/pull/37761) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix toString error on DatatypeDate32. [#37775](https://github.com/ClickHouse/ClickHouse/pull/37775) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* The clickhouse-keeper setting `dead_session_check_period_ms` was transformed into microseconds (multiplied by 1000), which lead to dead sessions only being cleaned up after several minutes (instead of 500ms). [#37824](https://github.com/ClickHouse/ClickHouse/pull/37824) ([Michael Lex](https://github.com/mlex)).
|
||||
* Fix possible "No more packets are available" for distributed queries (in case of `async_socket_for_remote`/`use_hedged_requests` is disabled). [#37826](https://github.com/ClickHouse/ClickHouse/pull/37826) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* (experimental WINDOW VIEW) Do not drop the inner target table when executing `ALTER TABLE … MODIFY QUERY` in WindowView. [#37879](https://github.com/ClickHouse/ClickHouse/pull/37879) ([vxider](https://github.com/Vxider)).
|
||||
* Fix directory ownership of coordination dir in clickhouse-keeper Docker image. Fixes [#37914](https://github.com/ClickHouse/ClickHouse/issues/37914). [#37915](https://github.com/ClickHouse/ClickHouse/pull/37915) ([James Maidment](https://github.com/jamesmaidment)).
|
||||
* Dictionaries fix custom query with update field and `{condition}`. Closes [#33746](https://github.com/ClickHouse/ClickHouse/issues/33746). [#37947](https://github.com/ClickHouse/ClickHouse/pull/37947) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix possible incorrect result of `SELECT ... WITH FILL` in the case when `ORDER BY` should be applied after `WITH FILL` result (e.g. for outer query). Incorrect result was caused by optimization for `ORDER BY` expressions ([#35623](https://github.com/ClickHouse/ClickHouse/issues/35623)). Closes [#37904](https://github.com/ClickHouse/ClickHouse/issues/37904). [#37959](https://github.com/ClickHouse/ClickHouse/pull/37959) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* (experimental WINDOW VIEW) Add missing default columns when pushing to the target table in WindowView, fix [#37815](https://github.com/ClickHouse/ClickHouse/issues/37815). [#37965](https://github.com/ClickHouse/ClickHouse/pull/37965) ([vxider](https://github.com/Vxider)).
|
||||
* Fixed too large stack frame that would cause compilation to fail. [#37996](https://github.com/ClickHouse/ClickHouse/pull/37996) ([Han Shukai](https://github.com/KinderRiven)).
|
||||
* When open enable_filesystem_query_cache_limit, throw Reserved cache size exceeds the remaining cache size. [#38004](https://github.com/ClickHouse/ClickHouse/pull/38004) ([xiedeyantu](https://github.com/xiedeyantu)).
|
||||
* Fix converting types for UNION queries (may produce LOGICAL_ERROR). [#34775](https://github.com/ClickHouse/ClickHouse/pull/34775) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* TTL merge may not be scheduled again if BackgroundExecutor is busy. --merges_with_ttl_counter is increased in selectPartsToMerge() --merge task will be ignored if BackgroundExecutor is busy --merges_with_ttl_counter will not be decrease. [#36387](https://github.com/ClickHouse/ClickHouse/pull/36387) ([lthaooo](https://github.com/lthaooo)).
|
||||
* Fix overridden settings value of `normalize_function_names`. [#36937](https://github.com/ClickHouse/ClickHouse/pull/36937) ([李扬](https://github.com/taiyang-li)).
|
||||
* Fix for exponential time decaying window functions. Now respecting boundaries of the window. [#36944](https://github.com/ClickHouse/ClickHouse/pull/36944) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Fix possible heap-use-after-free error when reading system.projection_parts and system.projection_parts_columns . This fixes [#37184](https://github.com/ClickHouse/ClickHouse/issues/37184). [#37185](https://github.com/ClickHouse/ClickHouse/pull/37185) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed `DateTime64` fractional seconds behavior prior to Unix epoch. [#37697](https://github.com/ClickHouse/ClickHouse/pull/37697) ([Andrey Zvonov](https://github.com/zvonand)). [#37039](https://github.com/ClickHouse/ClickHouse/pull/37039) ([李扬](https://github.com/taiyang-li)).
|
||||
|
||||
|
||||
### <a id="225"></a> ClickHouse release 22.5, 2022-05-19
|
||||
|
||||
#### Upgrade Notes
|
||||
|
||||
* Now, background merges, mutations and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. This only affects the metric values, and makes them better. This change does not introduce any incompatibility, but you may wonder about the changes of metrics, so we put in this category. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). The ciphers `aes-192-cfb128` and `aes-256-cfb128` were removed, because they are not included in the FIPS certified version of BoringSSL.
|
||||
* `max_memory_usage` setting is removed from the default user profile in `users.xml`. This enables flexible memory limits for queries instead of the old rigid limit of 10 GB.
|
||||
* Disable `log_query_threads` setting by default. It controls the logging of statistics about every thread participating in query execution. After supporting asynchronous reads, the total number of distinct thread ids became too large, and logging into the `query_thread_log` has become too heavy. [#37077](https://github.com/ClickHouse/ClickHouse/pull/37077) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove function `groupArraySorted` which has a bug. [#36822](https://github.com/ClickHouse/ClickHouse/pull/36822) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### New Feature
|
||||
|
||||
* Enable memory overcommit by default. [#35921](https://github.com/ClickHouse/ClickHouse/pull/35921) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Add support of GROUPING SETS in GROUP BY clause. This implementation supports a parallel processing of grouping sets. [#33631](https://github.com/ClickHouse/ClickHouse/pull/33631) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Added `system.certificates` table. [#37142](https://github.com/ClickHouse/ClickHouse/pull/37142) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Adds `h3Line`, `h3Distance` and `h3HexRing` functions. [#37030](https://github.com/ClickHouse/ClickHouse/pull/37030) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* New single binary based diagnostics tool (clickhouse-diagnostics). [#36705](https://github.com/ClickHouse/ClickHouse/pull/36705) ([Dale McDiarmid](https://github.com/gingerwizard)).
|
||||
* Add output format `Prometheus` [#36051](https://github.com/ClickHouse/ClickHouse/issues/36051). [#36206](https://github.com/ClickHouse/ClickHouse/pull/36206) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Add `MySQLDump` input format. It reads all data from INSERT queries belonging to one table in dump. If there are more than one table, by default it reads data from the first one. [#36667](https://github.com/ClickHouse/ClickHouse/pull/36667) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Show the `total_rows` and `total_bytes` fields in `system.tables` for temporary tables. [#36401](https://github.com/ClickHouse/ClickHouse/issues/36401). [#36439](https://github.com/ClickHouse/ClickHouse/pull/36439) ([xiedeyantu](https://github.com/xiedeyantu)).
|
||||
* Allow to override `parts_to_delay_insert` and `parts_to_throw_insert` with query-level settings. If they are defined, they will override table-level settings. [#36371](https://github.com/ClickHouse/ClickHouse/pull/36371) ([Memo](https://github.com/Joeywzr)).
|
||||
|
||||
#### Experimental Feature
|
||||
|
||||
* Implemented L1, L2, Linf, Cosine distance functions for arrays and L1, L2, Linf norm functions for arrays.
|
||||
[#37033](https://github.com/ClickHouse/ClickHouse/pull/37033) ([qieqieplus](https://github.com/qieqieplus)). Caveat: the functions will be renamed.
|
||||
* Improve the `WATCH` query in WindowView: 1. Reduce the latency of providing query results by calling the `fire_condition` signal. 2. Makes the cancel query operation(ctrl-c) faster, by checking `isCancelled()` more frequently. [#37226](https://github.com/ClickHouse/ClickHouse/pull/37226) ([vxider](https://github.com/Vxider)).
|
||||
* Introspection for remove filesystem cache. [#36802](https://github.com/ClickHouse/ClickHouse/pull/36802) ([Han Shukai](https://github.com/KinderRiven)).
|
||||
* Added new hash function `wyHash64` for SQL. [#36467](https://github.com/ClickHouse/ClickHouse/pull/36467) ([olevino](https://github.com/olevino)).
|
||||
* Improvement for replicated databases: Added `SYSTEM SYNC DATABASE REPLICA` query which allows to sync tables metadata inside Replicated database, because currently synchronisation is asynchronous. [#35944](https://github.com/ClickHouse/ClickHouse/pull/35944) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Improvement for remote filesystem cache: Better read from cache. [#37054](https://github.com/ClickHouse/ClickHouse/pull/37054) ([Kseniia Sumarokova](https://github.com/kssenii)). Improve `SYSTEM DROP FILESYSTEM CACHE` query: `<path>` option and `FORCE` option. [#36639](https://github.com/ClickHouse/ClickHouse/pull/36639) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Improvement for semistructured data: Allow to cast columns of type `Object(...)` to `Object(Nullable(...))`. [#36564](https://github.com/ClickHouse/ClickHouse/pull/36564) ([awakeljw](https://github.com/awakeljw)).
|
||||
* Improvement for parallel replicas: We create a local interpreter if we want to execute query on localhost replica. But for when executing query on multiple replicas we rely on the fact that a connection exists so replicas can talk to coordinator. It is now improved and localhost replica can talk to coordinator directly in the same process. [#36281](https://github.com/ClickHouse/ClickHouse/pull/36281) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
|
||||
#### Performance Improvement
|
||||
|
||||
* Improve performance of `avg`, `sum` aggregate functions if used without GROUP BY expression. [#37257](https://github.com/ClickHouse/ClickHouse/pull/37257) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improve performance of unary arithmetic functions (`bitCount`, `bitNot`, `abs`, `intExp2`, `intExp10`, `negate`, `roundAge`, `roundDuration`, `roundToExp2`, `sign`) using dynamic dispatch. [#37289](https://github.com/ClickHouse/ClickHouse/pull/37289) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Improve performance of ORDER BY, MergeJoin, insertion into MergeTree using JIT compilation of sort columns comparator. [#34469](https://github.com/ClickHouse/ClickHouse/pull/34469) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Change structure of `system.asynchronous_metric_log`. It will take about 10 times less space. This closes [#36357](https://github.com/ClickHouse/ClickHouse/issues/36357). The field `event_time_microseconds` was removed, because it is useless. [#36360](https://github.com/ClickHouse/ClickHouse/pull/36360) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Load marks for only necessary columns when reading wide parts. [#36879](https://github.com/ClickHouse/ClickHouse/pull/36879) ([Anton Kozlov](https://github.com/tonickkozlov)).
|
||||
* Improves performance of file descriptor cache by narrowing mutex scopes. [#36682](https://github.com/ClickHouse/ClickHouse/pull/36682) ([Anton Kozlov](https://github.com/tonickkozlov)).
|
||||
* Improve performance of reading from storage `File` and table functions `file` in case when path has globs and matched directory contains large number of files. [#36647](https://github.com/ClickHouse/ClickHouse/pull/36647) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Apply parallel parsing for input format `HiveText`, which can speed up HiveText parsing by 2x when reading local file. [#36650](https://github.com/ClickHouse/ClickHouse/pull/36650) ([李扬](https://github.com/taiyang-li)).
|
||||
* The default `HashJoin` is not thread safe for inserting right table's rows and run it in a single thread. When the right table is large, the join process is too slow with low cpu utilization. [#36415](https://github.com/ClickHouse/ClickHouse/pull/36415) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Allow to rewrite `select countDistinct(a) from t` to `select count(1) from (select a from t groupBy a)`. [#35993](https://github.com/ClickHouse/ClickHouse/pull/35993) ([zhanglistar](https://github.com/zhanglistar)).
|
||||
* Transform OR LIKE chain to multiMatchAny. Will enable once we have more confidence it works. [#34932](https://github.com/ClickHouse/ClickHouse/pull/34932) ([Daniel Kutenin](https://github.com/danlark1)).
|
||||
* Improve performance of some functions with inlining. [#34544](https://github.com/ClickHouse/ClickHouse/pull/34544) ([Daniel Kutenin](https://github.com/danlark1)).
|
||||
* Add a branch to avoid unnecessary memcpy in readBig. It improves performance somewhat. [#36095](https://github.com/ClickHouse/ClickHouse/pull/36095) ([jasperzhu](https://github.com/jinjunzh)).
|
||||
* Implement partial GROUP BY key for optimize_aggregation_in_order. [#35111](https://github.com/ClickHouse/ClickHouse/pull/35111) ([Azat Khuzhin](https://github.com/azat)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
* Show names of erroneous files in case of parsing errors while executing table functions `file`, `s3` and `url`. [#36314](https://github.com/ClickHouse/ClickHouse/pull/36314) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Allowed to increase the number of threads for executing background operations (merges, mutations, moves and fetches) at runtime if they are specified at top level config. [#36425](https://github.com/ClickHouse/ClickHouse/pull/36425) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Now date time conversion functions that generates time before 1970-01-01 00:00:00 with partial hours/minutes timezones will be saturated to zero instead of overflow. This is the continuation of https://github.com/ClickHouse/ClickHouse/pull/29953 which addresses https://github.com/ClickHouse/ClickHouse/pull/29953#discussion_r800550280 . Mark as improvement because it's implementation defined behavior (and very rare case) and we are allowed to break it. [#36656](https://github.com/ClickHouse/ClickHouse/pull/36656) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Add a warning if someone running clickhouse-server with log level "test". The log level "test" was added recently and cannot be used in production due to inevitable, unavoidable, fatal and life-threatening performance degradation. [#36824](https://github.com/ClickHouse/ClickHouse/pull/36824) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Parse collations in CREATE TABLE, throw exception or ignore. closes [#35892](https://github.com/ClickHouse/ClickHouse/issues/35892). [#36271](https://github.com/ClickHouse/ClickHouse/pull/36271) ([yuuch](https://github.com/yuuch)).
|
||||
* Option `compatibility_ignore_auto_increment_in_create_table` allows ignoring `AUTO_INCREMENT` keyword in a column declaration to simplify migration from MySQL. [#37178](https://github.com/ClickHouse/ClickHouse/pull/37178) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Add aliases `JSONLines` and `NDJSON` for `JSONEachRow`. Closes [#36303](https://github.com/ClickHouse/ClickHouse/issues/36303). [#36327](https://github.com/ClickHouse/ClickHouse/pull/36327) ([flynn](https://github.com/ucasfl)).
|
||||
* Limit the max partitions could be queried for each hive table. Avoid resource overruns. [#37281](https://github.com/ClickHouse/ClickHouse/pull/37281) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Added implicit cast for `h3kRing` function second argument to improve usability. Closes [#35432](https://github.com/ClickHouse/ClickHouse/issues/35432). [#37189](https://github.com/ClickHouse/ClickHouse/pull/37189) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix progress indication for `INSERT SELECT` in `clickhouse-local` for any query and for file progress in client, more correct file progress. [#37075](https://github.com/ClickHouse/ClickHouse/pull/37075) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix bug which can lead to forgotten outdated parts in MergeTree table engines family in case of filesystem failures during parts removal. Before fix they will be removed only after first server restart. [#37014](https://github.com/ClickHouse/ClickHouse/pull/37014) ([alesapin](https://github.com/alesapin)).
|
||||
* Implemented a new mode of handling row policies which can be enabled in the main configuration which enables users without permissive row policies to read rows. [#36997](https://github.com/ClickHouse/ClickHouse/pull/36997) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Play UI: Nullable numbers will be aligned to the right in table cells. This closes [#36982](https://github.com/ClickHouse/ClickHouse/issues/36982). [#36988](https://github.com/ClickHouse/ClickHouse/pull/36988) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Play UI: If there is one row in result and more than a few columns, display the result vertically. Continuation of [#36811](https://github.com/ClickHouse/ClickHouse/issues/36811). [#36842](https://github.com/ClickHouse/ClickHouse/pull/36842) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Cleanup CSS in Play UI. The pixels are more evenly placed. Better usability for long content in table cells. [#36569](https://github.com/ClickHouse/ClickHouse/pull/36569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Finalize write buffers in case of exception to avoid doing it in destructors. Hope it fixes: [#36907](https://github.com/ClickHouse/ClickHouse/issues/36907). [#36979](https://github.com/ClickHouse/ClickHouse/pull/36979) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* After [#36425](https://github.com/ClickHouse/ClickHouse/issues/36425) settings like `background_fetches_pool_size` became obsolete and can appear in top level config, but clickhouse throws and exception like `Error updating configuration from '/etc/clickhouse-server/config.xml' config.: Code: 137. DB::Exception: A setting 'background_fetches_pool_size' appeared at top level in config /etc/clickhouse-server/config.xml.` This is fixed. [#36917](https://github.com/ClickHouse/ClickHouse/pull/36917) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Add extra diagnostic info (if applicable) when sending exception to other server. [#36872](https://github.com/ClickHouse/ClickHouse/pull/36872) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Allow to execute hash functions with arguments of type `Array(Tuple(..))`. [#36812](https://github.com/ClickHouse/ClickHouse/pull/36812) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Added `user_defined_path` config setting. [#36753](https://github.com/ClickHouse/ClickHouse/pull/36753) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Allow cluster macro in `s3Cluster` table function. [#36726](https://github.com/ClickHouse/ClickHouse/pull/36726) ([Vadim Volodin](https://github.com/PolyProgrammist)).
|
||||
* Properly cancel INSERT queries in `clickhouse-client`/`clickhouse-local`. [#36710](https://github.com/ClickHouse/ClickHouse/pull/36710) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Allow to cancel a query while still keeping a decent query id in `MySQLHandler`. [#36699](https://github.com/ClickHouse/ClickHouse/pull/36699) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Add `is_all_data_sent` column into `system.processes`, and improve internal testing hardening check based on it. [#36649](https://github.com/ClickHouse/ClickHouse/pull/36649) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* The metrics about time spent reading from s3 now calculated correctly. Close [#35483](https://github.com/ClickHouse/ClickHouse/issues/35483). [#36572](https://github.com/ClickHouse/ClickHouse/pull/36572) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Allow file descriptors in table function file if it is run in clickhouse-local. [#36562](https://github.com/ClickHouse/ClickHouse/pull/36562) ([wuxiaobai24](https://github.com/wuxiaobai24)).
|
||||
* Allow names of tuple elements that start from digits. [#36544](https://github.com/ClickHouse/ClickHouse/pull/36544) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Now clickhouse-benchmark can read authentication info from environment variables. [#36497](https://github.com/ClickHouse/ClickHouse/pull/36497) ([Anton Kozlov](https://github.com/tonickkozlov)).
|
||||
* `clickhouse-keeper` improvement: add support for force recovery which allows you to reconfigure cluster without quorum. [#36258](https://github.com/ClickHouse/ClickHouse/pull/36258) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Improve schema inference for JSON objects. [#36207](https://github.com/ClickHouse/ClickHouse/pull/36207) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Refactor code around schema inference with globs. Try next file from glob only if it makes sense (previously we tried next file in case of any error). Also it fixes [#36317](https://github.com/ClickHouse/ClickHouse/issues/36317). [#36205](https://github.com/ClickHouse/ClickHouse/pull/36205) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add a separate `CLUSTER` grant (and `access_control_improvements.on_cluster_queries_require_cluster_grant` configuration directive, for backward compatibility, default to `false`). [#35767](https://github.com/ClickHouse/ClickHouse/pull/35767) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* If the required amount of memory is available before the selected query stopped, all waiting queries continue execution. Now we don't stop any query if memory is freed before the moment when the selected query knows about the cancellation. [#35637](https://github.com/ClickHouse/ClickHouse/pull/35637) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Nullables detection in protobuf. In proto3, default values are not sent on the wire. This makes it non-trivial to distinguish between null and default values for Nullable columns. A standard way to deal with this problem is to use Google wrappers to nest the target value within an inner message (see https://github.com/protocolbuffers/protobuf/blob/master/src/google/protobuf/wrappers.proto). In this case, a missing field is interpreted as null value, a field with missing value if interpreted as default value, and a field with regular value is interpreted as regular value. However, ClickHouse interprets Google wrappers as nested columns. We propose to introduce special behaviour to detect Google wrappers and interpret them like in the description above. For example, to serialize values for a Nullable column `test`, we would use `google.protobuf.StringValue test` in our .proto schema. Note that these types are so called "well-known types" in Protobuf, implemented in the library itself. [#35149](https://github.com/ClickHouse/ClickHouse/pull/35149) ([Jakub Kuklis](https://github.com/jkuklis)).
|
||||
* Added support for specifying `content_type` in predefined and static HTTP handler config. [#34916](https://github.com/ClickHouse/ClickHouse/pull/34916) ([Roman Nikonov](https://github.com/nic11)).
|
||||
* Warn properly if use clickhouse-client --file without preceeding --external. Close [#34747](https://github.com/ClickHouse/ClickHouse/issues/34747). [#34765](https://github.com/ClickHouse/ClickHouse/pull/34765) ([李扬](https://github.com/taiyang-li)).
|
||||
* Improve MySQL database engine to compatible with binary(0) dataType. [#37232](https://github.com/ClickHouse/ClickHouse/pull/37232) ([zzsmdfj](https://github.com/zzsmdfj)).
|
||||
* Improve JSON report of clickhouse-benchmark. [#36473](https://github.com/ClickHouse/ClickHouse/pull/36473) ([Tian Xinhui](https://github.com/xinhuitian)).
|
||||
* Server might refuse to start if it cannot resolve hostname of external ClickHouse dictionary. It's fixed. Fixes [#36451](https://github.com/ClickHouse/ClickHouse/issues/36451). [#36463](https://github.com/ClickHouse/ClickHouse/pull/36463) ([tavplubix](https://github.com/tavplubix)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
|
||||
* Now `clickhouse-keeper` for the `x86_64` architecture is statically linked with [musl](https://musl.libc.org/) and doesn't depend on any system libraries. [#31833](https://github.com/ClickHouse/ClickHouse/pull/31833) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* ClickHouse builds for `PowerPC64LE` architecture are now available in universal installation script `curl https://clickhouse.com/ | sh` and by direct link `https://builds.clickhouse.com/master/powerpc64le/clickhouse`. [#37095](https://github.com/ClickHouse/ClickHouse/pull/37095) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Limit PowerPC code generation to Power8 for better compatibility. This closes [#36025](https://github.com/ClickHouse/ClickHouse/issues/36025). [#36529](https://github.com/ClickHouse/ClickHouse/pull/36529) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Simplify performance test. This will give a chance for us to use it. [#36769](https://github.com/ClickHouse/ClickHouse/pull/36769) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fail performance comparison on errors in the report. [#34797](https://github.com/ClickHouse/ClickHouse/pull/34797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add ZSTD support for Arrow. This fixes [#35283](https://github.com/ClickHouse/ClickHouse/issues/35283). [#35486](https://github.com/ClickHouse/ClickHouse/pull/35486) ([Sean Lafferty](https://github.com/seanlaff)).
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Extracts Version ID if present from the URI and adds a request to the AWS HTTP URI. Closes [#31221](https://github.com/ClickHouse/ClickHouse/issues/31221). - [x] Extract `Version ID` from URI if present and reassemble without it. - [x] Configure `AWS HTTP URI` object with request. - [x] Unit Tests: [`gtest_s3_uri`](https://github.com/ClickHouse/ClickHouse/blob/2340a6c6849ebc05a8efbf97ba8de3ff9dc0eff4/src/IO/tests/gtest_s3_uri.cpp) - [x] Drop instrumentation commit. [#34571](https://github.com/ClickHouse/ClickHouse/pull/34571) ([Saad Ur Rahman](https://github.com/surahman)).
|
||||
* Fix system.opentelemetry_span_log attribute.values alias to values instead of keys. [#37275](https://github.com/ClickHouse/ClickHouse/pull/37275) ([Aleksandr Razumov](https://github.com/ernado)).
|
||||
* Fix Nullable(String) to Nullable(Bool/IPv4/IPv6) conversion Closes [#37221](https://github.com/ClickHouse/ClickHouse/issues/37221). [#37270](https://github.com/ClickHouse/ClickHouse/pull/37270) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Experimental feature: Fix execution of mutations in tables, in which there exist columns of type `Object`. Using subcolumns of type `Object` in `WHERE` expression of `UPDATE` or `DELETE` queries is now allowed yet, as well as manipulating (`DROP`, `MODIFY`) of separate subcolumns. Fixes [#37205](https://github.com/ClickHouse/ClickHouse/issues/37205). [#37266](https://github.com/ClickHouse/ClickHouse/pull/37266) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Kafka does not need `group.id` on producer stage. In console log you can find Warning that describe this issue: ``` 2022.05.15 17:59:13.270227 [ 137 ] {} <Warning> StorageKafka (topic-name): [rdk:CONFWARN] [thrd:app]: Configuration property group.id is a consumer property and will be ignored by this producer instance ```. [#37228](https://github.com/ClickHouse/ClickHouse/pull/37228) ([Mark Andreev](https://github.com/mrk-andreev)).
|
||||
* Experimental feature (WindowView): Update `max_fired_watermark ` after blocks actually fired, in case delete data that hasn't been fired yet. [#37225](https://github.com/ClickHouse/ClickHouse/pull/37225) ([vxider](https://github.com/Vxider)).
|
||||
* Fix "Cannot create column of type Set" for distributed queries with LIMIT BY. [#37193](https://github.com/ClickHouse/ClickHouse/pull/37193) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Experimental feature: Now WindowView `WATCH EVENTS` query will not be terminated due to the nonempty Chunk created in `WindowViewSource.h:58`. [#37182](https://github.com/ClickHouse/ClickHouse/pull/37182) ([vxider](https://github.com/Vxider)).
|
||||
* Enable `enable_global_with_statement` for subqueries, close [#37141](https://github.com/ClickHouse/ClickHouse/issues/37141). [#37166](https://github.com/ClickHouse/ClickHouse/pull/37166) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix implicit cast for optimize_skip_unused_shards_rewrite_in. [#37153](https://github.com/ClickHouse/ClickHouse/pull/37153) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* The ILIKE function on FixedString columns could have returned wrong results (i.e. match less than it should). [#37117](https://github.com/ClickHouse/ClickHouse/pull/37117) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix `GROUP BY` `AggregateFunction` (i.e. you `GROUP BY` by the column that has `AggregateFunction` type). [#37093](https://github.com/ClickHouse/ClickHouse/pull/37093) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Experimental feature: Fix optimize_aggregation_in_order with prefix GROUP BY and *Array aggregate functions. [#37050](https://github.com/ClickHouse/ClickHouse/pull/37050) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed performance degradation of some INSERT SELECT queries with implicit aggregation. Fixes [#36792](https://github.com/ClickHouse/ClickHouse/issues/36792). [#37047](https://github.com/ClickHouse/ClickHouse/pull/37047) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Experimental feature: Fix in-order `GROUP BY` (`optimize_aggregation_in_order=1`) with `*Array` (`groupArrayArray`/...) aggregate functions. [#37046](https://github.com/ClickHouse/ClickHouse/pull/37046) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix LowCardinality->ArrowDictionary invalid output when type of indexes is not UInt8. Closes [#36832](https://github.com/ClickHouse/ClickHouse/issues/36832). [#37043](https://github.com/ClickHouse/ClickHouse/pull/37043) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fixed problem with infs in `quantileTDigest`. Fixes [#32107](https://github.com/ClickHouse/ClickHouse/issues/32107). [#37021](https://github.com/ClickHouse/ClickHouse/pull/37021) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Fix sending external tables data in HedgedConnections with max_parallel_replicas != 1. [#36981](https://github.com/ClickHouse/ClickHouse/pull/36981) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fixed logical error on `TRUNCATE` query in `Replicated` database. Fixes [#33747](https://github.com/ClickHouse/ClickHouse/issues/33747). [#36976](https://github.com/ClickHouse/ClickHouse/pull/36976) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Experimental feature: Fix stuck when dropping source table in WindowView. Closes [#35678](https://github.com/ClickHouse/ClickHouse/issues/35678). [#36967](https://github.com/ClickHouse/ClickHouse/pull/36967) ([vxider](https://github.com/Vxider)).
|
||||
* Experimental feature (rocksdb cache): Fix issue: [#36671](https://github.com/ClickHouse/ClickHouse/issues/36671). [#36929](https://github.com/ClickHouse/ClickHouse/pull/36929) ([李扬](https://github.com/taiyang-li)).
|
||||
* Experimental feature: Fix bugs when using multiple columns in WindowView by adding converting actions to make it possible to call`writeIntoWindowView` with a slightly different schema. [#36928](https://github.com/ClickHouse/ClickHouse/pull/36928) ([vxider](https://github.com/Vxider)).
|
||||
* Fix bug in clickhouse-keeper which can lead to corrupted compressed log files in case of small load and restarts. [#36910](https://github.com/ClickHouse/ClickHouse/pull/36910) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix incorrect query result when doing constant aggregation. This fixes [#36728](https://github.com/ClickHouse/ClickHouse/issues/36728) . [#36888](https://github.com/ClickHouse/ClickHouse/pull/36888) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Experimental feature: Fix `current_size` count in cache. [#36887](https://github.com/ClickHouse/ClickHouse/pull/36887) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Experimental feature: Fix fire in window view with hop window [#34044](https://github.com/ClickHouse/ClickHouse/issues/34044). [#36861](https://github.com/ClickHouse/ClickHouse/pull/36861) ([vxider](https://github.com/Vxider)).
|
||||
* Experimental feature: Fix incorrect cast in cached buffer from remote fs. [#36809](https://github.com/ClickHouse/ClickHouse/pull/36809) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix creation of tables with `flatten_nested = 0`. Previously unflattened `Nested` columns could be flattened after server restart. [#36803](https://github.com/ClickHouse/ClickHouse/pull/36803) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix some issues with async reads from remote filesystem which happened when reading low cardinality. [#36763](https://github.com/ClickHouse/ClickHouse/pull/36763) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Experimental feature: Fix insertion to columns of type `Object` from multiple files, e.g. via table function `file` with globs. [#36762](https://github.com/ClickHouse/ClickHouse/pull/36762) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix timeouts in Hedged requests. Connection hang right after sending remote query could lead to eternal waiting. [#36749](https://github.com/ClickHouse/ClickHouse/pull/36749) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Experimental feature: Fix a bug of `groupBitmapAndState`/`groupBitmapOrState`/`groupBitmapXorState` on distributed table. [#36739](https://github.com/ClickHouse/ClickHouse/pull/36739) ([Zhang Yifan](https://github.com/zhangyifan27)).
|
||||
* Experimental feature: During the [test](https://s3.amazonaws.com/clickhouse-test-reports/36376/1cb1c7275cb53769ab826772db9b71361bb3e413/stress_test__thread__actions_/clickhouse-server.clean.log) in [PR](https://github.com/ClickHouse/ClickHouse/pull/36376), I found that the one cache class was initialized twice, it throws a exception. Although the cause of this problem is not clear, there should be code logic of repeatedly loading disk in ClickHouse, so we need to make special judgment for this situation. [#36737](https://github.com/ClickHouse/ClickHouse/pull/36737) ([Han Shukai](https://github.com/KinderRiven)).
|
||||
* Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix server reload on port change (do not wait for current connections from query context). [#36700](https://github.com/ClickHouse/ClickHouse/pull/36700) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Experimental feature: In the previous [PR](https://github.com/ClickHouse/ClickHouse/pull/36376), I found that testing (stateless tests, flaky check (address, actions)) is timeout. Moreover, testing locally can also trigger unstable system deadlocks. This problem still exists when using the latest source code of master. [#36697](https://github.com/ClickHouse/ClickHouse/pull/36697) ([Han Shukai](https://github.com/KinderRiven)).
|
||||
* Experimental feature: Fix server restart if cache configuration changed. [#36685](https://github.com/ClickHouse/ClickHouse/pull/36685) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix possible heap-use-after-free in schema inference. Closes [#36661](https://github.com/ClickHouse/ClickHouse/issues/36661). [#36679](https://github.com/ClickHouse/ClickHouse/pull/36679) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fixed parsing of query settings in `CREATE` query when engine is not specified. Fixes https://github.com/ClickHouse/ClickHouse/pull/34187#issuecomment-1103812419. [#36642](https://github.com/ClickHouse/ClickHouse/pull/36642) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Experimental feature: Fix merges of wide parts with type `Object`. [#36637](https://github.com/ClickHouse/ClickHouse/pull/36637) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix format crash when default expression follow EPHEMERAL not literal. Closes [#36618](https://github.com/ClickHouse/ClickHouse/issues/36618). [#36633](https://github.com/ClickHouse/ClickHouse/pull/36633) ([flynn](https://github.com/ucasfl)).
|
||||
* Fix `Missing column` exception which could happen while using `INTERPOLATE` with `ENGINE = MergeTree` table. [#36549](https://github.com/ClickHouse/ClickHouse/pull/36549) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Fix potential error with literals in `WHERE` for join queries. Close [#36279](https://github.com/ClickHouse/ClickHouse/issues/36279). [#36542](https://github.com/ClickHouse/ClickHouse/pull/36542) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix offset update ReadBufferFromEncryptedFile, which could cause undefined behaviour. [#36493](https://github.com/ClickHouse/ClickHouse/pull/36493) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix hostname sanity checks for Keeper cluster configuration. Add `keeper_server.host_checks_enabled` config to enable/disable those checks. [#36492](https://github.com/ClickHouse/ClickHouse/pull/36492) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix usage of executable user defined functions in GROUP BY. Before executable user defined functions cannot be used as expressions in GROUP BY. Closes [#36448](https://github.com/ClickHouse/ClickHouse/issues/36448). [#36486](https://github.com/ClickHouse/ClickHouse/pull/36486) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix possible exception with unknown packet from server in client. [#36481](https://github.com/ClickHouse/ClickHouse/pull/36481) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Experimental feature (please never use `system.session_log`, it is going to be removed): Add missing enum values in system.session_log table. Closes [#36474](https://github.com/ClickHouse/ClickHouse/issues/36474). [#36480](https://github.com/ClickHouse/ClickHouse/pull/36480) ([Memo](https://github.com/Joeywzr)).
|
||||
* Fix bug in s3Cluster schema inference that let to the fact that not all data was read in the select from s3Cluster. The bug appeared in https://github.com/ClickHouse/ClickHouse/pull/35544. [#36434](https://github.com/ClickHouse/ClickHouse/pull/36434) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix nullptr dereference in JOIN and COLUMNS matcher. This fixes [#36416](https://github.com/ClickHouse/ClickHouse/issues/36416). This is for https://github.com/ClickHouse/ClickHouse/pull/36417. [#36430](https://github.com/ClickHouse/ClickHouse/pull/36430) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix dictionary reload for `ClickHouseDictionarySource` if it contains scalar subqueries. [#36390](https://github.com/ClickHouse/ClickHouse/pull/36390) ([lthaooo](https://github.com/lthaooo)).
|
||||
* Fix assertion in JOIN, close [#36199](https://github.com/ClickHouse/ClickHouse/issues/36199). [#36201](https://github.com/ClickHouse/ClickHouse/pull/36201) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Experimental feature: Fix insertion of complex JSONs with nested arrays to columns of type `Object`. [#36077](https://github.com/ClickHouse/ClickHouse/pull/36077) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix ALTER DROP COLUMN of nested column with compact parts (i.e. `ALTER TABLE x DROP COLUMN n`, when there is column `n.d`). [#35797](https://github.com/ClickHouse/ClickHouse/pull/35797) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix substring function range error length when `offset` and `length` is negative constant and `s` is not constant. [#33861](https://github.com/ClickHouse/ClickHouse/pull/33861) ([RogerYK](https://github.com/RogerYK)).
|
||||
|
||||
|
||||
### <a id="224"></a> ClickHouse release 22.4, 2022-04-19
|
||||
|
||||
#### Backward Incompatible Change
|
||||
|
||||
* Do not allow SETTINGS after FORMAT for INSERT queries (there is compatibility setting `allow_settings_after_format_in_insert` to accept such queries, but it is turned OFF by default). [#35883](https://github.com/ClickHouse/ClickHouse/pull/35883) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Function `yandexConsistentHash` (consistent hashing algorithm by Konstantin "kostik" Oblakov) is renamed to `kostikConsistentHash`. The old name is left as an alias for compatibility. Although this change is backward compatible, we may remove the alias in subsequent releases, that's why it's recommended to update the usages of this function in your apps. [#35553](https://github.com/ClickHouse/ClickHouse/pull/35553) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### New Feature
|
||||
|
||||
* Added INTERPOLATE extension to the ORDER BY ... WITH FILL. Closes [#34903](https://github.com/ClickHouse/ClickHouse/issues/34903). [#35349](https://github.com/ClickHouse/ClickHouse/pull/35349) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Profiling on Processors level (under `log_processors_profiles` setting, ClickHouse will write time that processor spent during execution/waiting for data to `system.processors_profile_log` table). [#34355](https://github.com/ClickHouse/ClickHouse/pull/34355) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Added functions makeDate(year, month, day), makeDate32(year, month, day). [#35628](https://github.com/ClickHouse/ClickHouse/pull/35628) ([Alexander Gololobov](https://github.com/davenger)). Implementation of makeDateTime() and makeDateTIme64(). [#35934](https://github.com/ClickHouse/ClickHouse/pull/35934) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Support new type of quota `WRITTEN BYTES` to limit amount of written bytes during insert queries. [#35736](https://github.com/ClickHouse/ClickHouse/pull/35736) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Added function `flattenTuple`. It receives nested named `Tuple` as an argument and returns a flatten `Tuple` which elements are the paths from the original `Tuple`. E.g.: `Tuple(a Int, Tuple(b Int, c Int)) -> Tuple(a Int, b Int, c Int)`. `flattenTuple` can be used to select all paths from type `Object` as separate columns. [#35690](https://github.com/ClickHouse/ClickHouse/pull/35690) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Added functions `arrayFirstOrNull`, `arrayLastOrNull`. Closes [#35238](https://github.com/ClickHouse/ClickHouse/issues/35238). [#35414](https://github.com/ClickHouse/ClickHouse/pull/35414) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Added functions `minSampleSizeContinous` and `minSampleSizeConversion`. Author [achimbab](https://github.com/achimbab). [#35360](https://github.com/ClickHouse/ClickHouse/pull/35360) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* New functions minSampleSizeContinous and minSampleSizeConversion. [#34354](https://github.com/ClickHouse/ClickHouse/pull/34354) ([achimbab](https://github.com/achimbab)).
|
||||
* Introduce format `ProtobufList` (all records as repeated messages in out Protobuf). Closes [#16436](https://github.com/ClickHouse/ClickHouse/issues/16436). [#35152](https://github.com/ClickHouse/ClickHouse/pull/35152) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Add `h3PointDistM`, `h3PointDistKm`, `h3PointDistRads`, `h3GetRes0Indexes`, `h3GetPentagonIndexes` functions. [#34568](https://github.com/ClickHouse/ClickHouse/pull/34568) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Add `toLastDayOfMonth` function which rounds up a date or date with time to the last day of the month. [#33501](https://github.com/ClickHouse/ClickHouse/issues/33501). [#34394](https://github.com/ClickHouse/ClickHouse/pull/34394) ([Habibullah Oladepo](https://github.com/holadepo)).
|
||||
* Added load balancing setting for \[Zoo\]Keeper client. Closes [#29617](https://github.com/ClickHouse/ClickHouse/issues/29617). [#30325](https://github.com/ClickHouse/ClickHouse/pull/30325) ([小路](https://github.com/nicelulu)).
|
||||
* Add a new kind of row policies named `simple`. Before this PR we had two kinds or row policies: `permissive` and `restrictive`. A `simple` row policy adds a new filter on a table without any side-effects like it was for permissive and restrictive policies. [#35345](https://github.com/ClickHouse/ClickHouse/pull/35345) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Added an ability to specify cluster secret in replicated database. [#35333](https://github.com/ClickHouse/ClickHouse/pull/35333) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Added sanity checks on server startup (available memory and disk space, max thread count, etc). [#34566](https://github.com/ClickHouse/ClickHouse/pull/34566) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* INTERVAL improvement - can be used with `[MILLI|MICRO|NANO]SECOND`. Added `toStartOf[Milli|Micro|Nano]second()` functions. Added `[add|subtract][Milli|Micro|Nano]seconds()`. [#34353](https://github.com/ClickHouse/ClickHouse/pull/34353) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
|
||||
#### Experimental Feature
|
||||
|
||||
* Added support for transactions for simple `MergeTree` tables. This feature is highly experimental and not recommended for production. Part of [#22086](https://github.com/ClickHouse/ClickHouse/issues/22086). [#24258](https://github.com/ClickHouse/ClickHouse/pull/24258) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Support schema inference for type `Object` in format `JSONEachRow`. Allow to convert columns of type `Map` to columns of type `Object`. [#35629](https://github.com/ClickHouse/ClickHouse/pull/35629) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Allow to write remote FS cache on all write operations. Add `system.remote_filesystem_cache` table. Add `drop remote filesystem cache` query. Add introspection for s3 metadata with `system.remote_data_paths` table. Closes [#34021](https://github.com/ClickHouse/ClickHouse/issues/34021). Add cache option for merges by adding mode `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` (turned on by default for merges and can also be turned on by query setting with the same name). Rename cache related settings (`remote_fs_enable_cache -> enable_filesystem_cache`, etc). [#35475](https://github.com/ClickHouse/ClickHouse/pull/35475) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* An option to store parts metadata in RocksDB. Speed up parts loading process of MergeTree to accelerate starting up of clickhouse-server. With this improvement, clickhouse-server was able to decrease starting up time from 75 minutes to 20 seconds, with 700k mergetree parts. [#32928](https://github.com/ClickHouse/ClickHouse/pull/32928) ([李扬](https://github.com/taiyang-li)).
|
||||
|
||||
#### Performance Improvement
|
||||
|
||||
* A new query plan optimization. Evaluate functions after `ORDER BY` when possible. As an example, for a query `SELECT sipHash64(number) FROM numbers(1e8) ORDER BY number LIMIT 5`, function `sipHash64` would be evaluated after `ORDER BY` and `LIMIT`, which gives ~20x speed up. [#35623](https://github.com/ClickHouse/ClickHouse/pull/35623) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Sizes of hash tables used during aggregation now collected and used in later queries to avoid hash tables resizes. [#33439](https://github.com/ClickHouse/ClickHouse/pull/33439) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Improvement for hasAll function using SIMD instructions (SSE and AVX2). [#27653](https://github.com/ClickHouse/ClickHouse/pull/27653) ([youennL-cs](https://github.com/youennL-cs)). [#35723](https://github.com/ClickHouse/ClickHouse/pull/35723) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Multiple changes to improve ASOF JOIN performance (1.2 - 1.6x as fast). It also adds support to use big integers. [#34733](https://github.com/ClickHouse/ClickHouse/pull/34733) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Improve performance of ASOF JOIN if key is native integer. [#35525](https://github.com/ClickHouse/ClickHouse/pull/35525) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Parallelization of multipart upload into S3 storage. [#35343](https://github.com/ClickHouse/ClickHouse/pull/35343) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* URL storage engine now downloads multiple chunks in parallel if the endpoint supports HTTP Range. Two additional settings were added, `max_download_threads` and `max_download_buffer_size`, which control maximum number of threads a single query can use to download the file and the maximum number of bytes each thread can process. [#35150](https://github.com/ClickHouse/ClickHouse/pull/35150) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Use multiple threads to download objects from S3. Downloading is controllable using `max_download_threads` and `max_download_buffer_size` settings. [#35571](https://github.com/ClickHouse/ClickHouse/pull/35571) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Narrow mutex scope when interacting with HDFS. Related to [#35292](https://github.com/ClickHouse/ClickHouse/issues/35292). [#35646](https://github.com/ClickHouse/ClickHouse/pull/35646) ([shuchaome](https://github.com/shuchaome)).
|
||||
* Require mutations for per-table TTL only when it had been changed. [#35953](https://github.com/ClickHouse/ClickHouse/pull/35953) ([Azat Khuzhin](https://github.com/azat)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
* Multiple improvements for schema inference. Use some tweaks and heuristics to determine numbers, strings, arrays, tuples and maps in CSV, TSV and TSVRaw data formats. Add setting `input_format_csv_use_best_effort_in_schema_inference` for CSV format that enables/disables using these heuristics, if it's disabled, we treat everything as string. Add similar setting `input_format_tsv_use_best_effort_in_schema_inference` for TSV/TSVRaw format. These settings are enabled by default. - Add Maps support for schema inference in Values format. - Fix possible segfault in schema inference in Values format. - Allow to skip columns with unsupported types in Arrow/ORC/Parquet formats. Add corresponding settings for it: `input_format_{parquet|orc|arrow}_skip_columns_with_unsupported_types_in_schema_inference`. These settings are disabled by default. - Allow to convert a column with type Null to a Nullable column with all NULL values in Arrow/Parquet formats. - Allow to specify column names in schema inference via setting `column_names_for_schema_inference` for formats that don't contain column names (like CSV, TSV, JSONCompactEachRow, etc) - Fix schema inference in ORC/Arrow/Parquet formats in terms of working with Nullable columns. Previously all inferred types were not Nullable and it blocked reading Nullable columns from data, now it's fixed and all inferred types are always Nullable (because we cannot understand that column is Nullable or not by reading the schema). - Fix schema inference in Template format with CSV escaping rules. [#35582](https://github.com/ClickHouse/ClickHouse/pull/35582) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add parallel parsing and schema inference for format `JSONAsObject`. [#35592](https://github.com/ClickHouse/ClickHouse/pull/35592) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Added a support for automatic schema inference to `s3Cluster` table function. Synced the signatures of `s3 ` and `s3Cluster`. [#35544](https://github.com/ClickHouse/ClickHouse/pull/35544) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Added support for schema inference for `hdfsCluster`. [#35602](https://github.com/ClickHouse/ClickHouse/pull/35602) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Add new setting `input_format_json_read_bools_as_numbers` that allows to infer and parse bools as numbers in JSON input formats. It's enabled by default. Suggested by @alexey-milovidov. [#35735](https://github.com/ClickHouse/ClickHouse/pull/35735) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Improve columns ordering in schema inference for formats TSKV and JSONEachRow, closes [#35640](https://github.com/ClickHouse/ClickHouse/issues/35640). Don't stop schema inference when reading empty row in schema inference for formats TSKV and JSONEachRow. [#35724](https://github.com/ClickHouse/ClickHouse/pull/35724) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add settings `input_format_orc_case_insensitive_column_matching`, `input_format_arrow_case_insensitive_column_matching`, and `input_format_parquet_case_insensitive_column_matching` which allows ClickHouse to use case insensitive matching of columns while reading data from ORC, Arrow or Parquet files. [#35459](https://github.com/ClickHouse/ClickHouse/pull/35459) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Added `is_secure` column to `system.query_log` which denotes if the client is using a secure connection over TCP or HTTP. [#35705](https://github.com/ClickHouse/ClickHouse/pull/35705) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Now `kafka_num_consumers` can be bigger than amount of physical cores in case of low resource machine (less than 16 cores). [#35926](https://github.com/ClickHouse/ClickHouse/pull/35926) ([alesapin](https://github.com/alesapin)).
|
||||
* Add some basic metrics to monitor engine=Kafka tables. [#35916](https://github.com/ClickHouse/ClickHouse/pull/35916) ([filimonov](https://github.com/filimonov)).
|
||||
* Now it's not allowed to `ALTER TABLE ... RESET SETTING` for non-existing settings for MergeTree engines family. Fixes [#35816](https://github.com/ClickHouse/ClickHouse/issues/35816). [#35884](https://github.com/ClickHouse/ClickHouse/pull/35884) ([alesapin](https://github.com/alesapin)).
|
||||
* Now some `ALTER MODIFY COLUMN` queries for `Arrays` and `Nullable` types can be done at metadata level without mutations. For example, alter from `Array(Enum8('Option1'=1))` to `Array(Enum8('Option1'=1, 'Option2'=2))`. [#35882](https://github.com/ClickHouse/ClickHouse/pull/35882) ([alesapin](https://github.com/alesapin)).
|
||||
* Added an animation to the hourglass icon to indicate to the user that a query is running. [#35860](https://github.com/ClickHouse/ClickHouse/pull/35860) ([peledni](https://github.com/peledni)).
|
||||
* support ALTER TABLE t DETACH PARTITION (ALL). [#35794](https://github.com/ClickHouse/ClickHouse/pull/35794) ([awakeljw](https://github.com/awakeljw)).
|
||||
* Improve projection analysis to optimize trivial queries such as `count()`. [#35788](https://github.com/ClickHouse/ClickHouse/pull/35788) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Support schema inference for insert select with using `input` table function. Get schema from insertion table instead of inferring it from the data in case of insert select from table functions that support schema inference. Closes [#35639](https://github.com/ClickHouse/ClickHouse/issues/35639). [#35760](https://github.com/ClickHouse/ClickHouse/pull/35760) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Respect `remote_url_allow_hosts` for Hive tables. [#35743](https://github.com/ClickHouse/ClickHouse/pull/35743) ([李扬](https://github.com/taiyang-li)).
|
||||
* Implement `send_logs_level` for clickhouse-local. Closes [#35653](https://github.com/ClickHouse/ClickHouse/issues/35653). [#35716](https://github.com/ClickHouse/ClickHouse/pull/35716) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Closes [#35641](https://github.com/ClickHouse/ClickHouse/issues/35641) Allow `EPHEMERAL` columns without explicit default expression. [#35706](https://github.com/ClickHouse/ClickHouse/pull/35706) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Add profile event counter `AsyncInsertBytes` about size of async INSERTs. [#35644](https://github.com/ClickHouse/ClickHouse/pull/35644) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Improve the pipeline description for JOIN. [#35612](https://github.com/ClickHouse/ClickHouse/pull/35612) ([何李夫](https://github.com/helifu)).
|
||||
* Deduce absolute hdfs config path. [#35572](https://github.com/ClickHouse/ClickHouse/pull/35572) ([李扬](https://github.com/taiyang-li)).
|
||||
* Improve pasting performance and compatibility of clickhouse-client. This helps [#35501](https://github.com/ClickHouse/ClickHouse/issues/35501). [#35541](https://github.com/ClickHouse/ClickHouse/pull/35541) ([Amos Bird](https://github.com/amosbird)).
|
||||
* It was possible to get stack overflow in distributed queries if one of the settings `async_socket_for_remote` and `use_hedged_requests` is enabled while parsing very deeply nested data type (at least in debug build). Closes [#35509](https://github.com/ClickHouse/ClickHouse/issues/35509). [#35524](https://github.com/ClickHouse/ClickHouse/pull/35524) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add sizes of subcolumns to `system.parts_columns` table. [#35488](https://github.com/ClickHouse/ClickHouse/pull/35488) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Add explicit table info to the scan node of query plan and pipeline. [#35460](https://github.com/ClickHouse/ClickHouse/pull/35460) ([何李夫](https://github.com/helifu)).
|
||||
* Allow server to bind to low-numbered ports (e.g. 443). ClickHouse installation script will set `cap_net_bind_service` to the binary file. [#35451](https://github.com/ClickHouse/ClickHouse/pull/35451) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix INSERT INTO table FROM INFILE: it did not display the progress bar. [#35429](https://github.com/ClickHouse/ClickHouse/pull/35429) ([xiedeyantu](https://github.com/xiedeyantu)).
|
||||
* Add arguments `--user`, `--password`, `--host`, `--port` for `clickhouse-diagnostics` tool. [#35422](https://github.com/ClickHouse/ClickHouse/pull/35422) ([李扬](https://github.com/taiyang-li)).
|
||||
* Support uuid for Postgres engines. Closes [#35384](https://github.com/ClickHouse/ClickHouse/issues/35384). [#35403](https://github.com/ClickHouse/ClickHouse/pull/35403) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* For table function `s3cluster` or `HDFSCluster` or `hive`, we can't get right `AccessType` by `StorageFactory::instance().getSourceAccessType(getStorageTypeName())`. This pr fix it. [#35365](https://github.com/ClickHouse/ClickHouse/pull/35365) ([李扬](https://github.com/taiyang-li)).
|
||||
* Remove `--testmode` option for clickhouse-client, enable it unconditionally. [#35354](https://github.com/ClickHouse/ClickHouse/pull/35354) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Don't allow `wchc` operation (four letter command) for clickhouse-keeper. [#35320](https://github.com/ClickHouse/ClickHouse/pull/35320) ([zhangyuli1](https://github.com/zhangyuli1)).
|
||||
* Add function `getTypeSerializationStreams`. For a specified type (which is detected from column), it returns an array with all the serialization substream paths. This function is useful mainly for developers. [#35290](https://github.com/ClickHouse/ClickHouse/pull/35290) ([李扬](https://github.com/taiyang-li)).
|
||||
* If `port` is not specified in cluster configuration, default server port will be used. This closes [#34769](https://github.com/ClickHouse/ClickHouse/issues/34769). [#34772](https://github.com/ClickHouse/ClickHouse/pull/34772) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Use `minmax` index for orc/parquet file in Hive Engine. Related PR: https://github.com/ClickHouse/arrow/pull/10. [#34631](https://github.com/ClickHouse/ClickHouse/pull/34631) ([李扬](https://github.com/taiyang-li)).
|
||||
* System log tables now allow to specify COMMENT in ENGINE declaration. Closes [#33768](https://github.com/ClickHouse/ClickHouse/issues/33768). [#34536](https://github.com/ClickHouse/ClickHouse/pull/34536) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Proper support of setting `max_rows_to_read` in case of reading in order of sorting key and specified limit. Previously the exception `Limit for rows or bytes to read exceeded` could be thrown even if query actually requires to read less amount of rows. [#33230](https://github.com/ClickHouse/ClickHouse/pull/33230) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Respect only quota & period from cgroups, ignore shares (which are not really limit the number of the cores which can be used). [#35815](https://github.com/ClickHouse/ClickHouse/pull/35815) ([filimonov](https://github.com/filimonov)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
|
||||
* Add next batch of randomization settings in functional tests. [#35047](https://github.com/ClickHouse/ClickHouse/pull/35047) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add backward compatibility check in stress test. Closes [#25088](https://github.com/ClickHouse/ClickHouse/issues/25088). [#27928](https://github.com/ClickHouse/ClickHouse/pull/27928) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Migrate package building to `nfpm` - Deprecate `release` script in favor of `packages/build` - Build everything in clickhouse/binary-builder image (cleanup: clickhouse/deb-builder) - Add symbol stripping to cmake (todo: use $prefix/lib/$bin_dir/clickhouse/$binary.debug) - Fix issue with DWARF symbols - Add Alpine APK packages - Rename `alien` to `additional_pkgs`. [#33664](https://github.com/ClickHouse/ClickHouse/pull/33664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add a night scan and upload for Coverity. [#34895](https://github.com/ClickHouse/ClickHouse/pull/34895) ([Boris Kuschel](https://github.com/bkuschel)).
|
||||
* A dedicated small package for `clickhouse-keeper`. [#35308](https://github.com/ClickHouse/ClickHouse/pull/35308) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Running with podman was failing: it complains about specifying the same volume twice. [#35978](https://github.com/ClickHouse/ClickHouse/pull/35978) ([Roman Nikonov](https://github.com/nic11)).
|
||||
* Minor improvement in contrib/krb5 build configuration. [#35832](https://github.com/ClickHouse/ClickHouse/pull/35832) ([Anton Kozlov](https://github.com/tonickkozlov)).
|
||||
* Add a label to recognize a building task for every image. [#35583](https://github.com/ClickHouse/ClickHouse/pull/35583) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Apply `black` formatter to python code and add a per-commit check. [#35466](https://github.com/ClickHouse/ClickHouse/pull/35466) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Redo alpine image to use clean Dockerfile. Create a script in tests/ci to build both ubuntu and alpine images. Add clickhouse-keeper image (cc @nikitamikhaylov). Add build check to PullRequestCI. Add a job to a ReleaseCI. Add a job to MasterCI to build and push `clickhouse/clickhouse-server:head` and `clickhouse/clickhouse-keeper:head` images for each merged PR. [#35211](https://github.com/ClickHouse/ClickHouse/pull/35211) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix stress-test report in CI, now we upload the runlog with information about started stress tests only once. [#35093](https://github.com/ClickHouse/ClickHouse/pull/35093) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Switch to libcxx / libcxxabi from LLVM 14. [#34906](https://github.com/ClickHouse/ClickHouse/pull/34906) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Update unixodbc to mitigate CVE-2018-7485. Note: this CVE is not relevant for ClickHouse as it implements its own isolation layer for ODBC. [#35943](https://github.com/ClickHouse/ClickHouse/pull/35943) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Added settings `input_format_ipv4_default_on_conversion_error`, `input_format_ipv6_default_on_conversion_error` to allow insert of invalid ip address values as default into tables. Closes [#35726](https://github.com/ClickHouse/ClickHouse/issues/35726). [#35733](https://github.com/ClickHouse/ClickHouse/pull/35733) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Avoid erasing columns from a block if it doesn't exist while reading data from Hive. [#35393](https://github.com/ClickHouse/ClickHouse/pull/35393) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Add type checking when creating materialized view. Close: [#23684](https://github.com/ClickHouse/ClickHouse/issues/23684). [#24896](https://github.com/ClickHouse/ClickHouse/pull/24896) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Fix formatting of INSERT INFILE queries (missing quotes). [#35886](https://github.com/ClickHouse/ClickHouse/pull/35886) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Disable `session_log` because memory safety issue has been found by fuzzing. See [#35714](https://github.com/ClickHouse/ClickHouse/issues/35714). [#35873](https://github.com/ClickHouse/ClickHouse/pull/35873) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Avoid processing per-column TTL multiple times. [#35820](https://github.com/ClickHouse/ClickHouse/pull/35820) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix inserts to columns of type `Object` in case when there is data related to several partitions in insert query. [#35806](https://github.com/ClickHouse/ClickHouse/pull/35806) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix bug in indexes of not presented columns in -WithNames formats that led to error `INCORRECT_NUMBER_OF_COLUMNS ` when the number of columns is more than 256. Closes [#35793](https://github.com/ClickHouse/ClickHouse/issues/35793). [#35803](https://github.com/ClickHouse/ClickHouse/pull/35803) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fixes [#35751](https://github.com/ClickHouse/ClickHouse/issues/35751). [#35799](https://github.com/ClickHouse/ClickHouse/pull/35799) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix for reading from HDFS in Snappy format. [#35771](https://github.com/ClickHouse/ClickHouse/pull/35771) ([shuchaome](https://github.com/shuchaome)).
|
||||
* Fix bug in conversion from custom types to string that could lead to segfault or unexpected error messages. Closes [#35752](https://github.com/ClickHouse/ClickHouse/issues/35752). [#35755](https://github.com/ClickHouse/ClickHouse/pull/35755) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix any/all (subquery) implementation. Closes [#35489](https://github.com/ClickHouse/ClickHouse/issues/35489). [#35727](https://github.com/ClickHouse/ClickHouse/pull/35727) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix dropping non-empty database in clickhouse-local. Closes [#35692](https://github.com/ClickHouse/ClickHouse/issues/35692). [#35711](https://github.com/ClickHouse/ClickHouse/pull/35711) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix bug in creating materialized view with subquery after server restart. Materialized view was not getting updated after inserts into underlying table after server restart. Closes [#35511](https://github.com/ClickHouse/ClickHouse/issues/35511). [#35691](https://github.com/ClickHouse/ClickHouse/pull/35691) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix possible `Can't adjust last granule` exception while reading subcolumns of experimental type `Object`. [#35687](https://github.com/ClickHouse/ClickHouse/pull/35687) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Enable build with JIT compilation by default. [#35683](https://github.com/ClickHouse/ClickHouse/pull/35683) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix possible loss of subcolumns in experimental type `Object`. [#35682](https://github.com/ClickHouse/ClickHouse/pull/35682) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix check ASOF JOIN key nullability, close [#35565](https://github.com/ClickHouse/ClickHouse/issues/35565). [#35674](https://github.com/ClickHouse/ClickHouse/pull/35674) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix part checking logic for parts with projections. Error happened when projection and main part had different types. This is similar to https://github.com/ClickHouse/ClickHouse/pull/33774 . The bug is addressed by @caoyang10. [#35667](https://github.com/ClickHouse/ClickHouse/pull/35667) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix server crash when large number of arguments are passed into `format` function. Please refer to the test file and see how to reproduce the crash. [#35651](https://github.com/ClickHouse/ClickHouse/pull/35651) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix usage of quotas with asynchronous inserts. [#35645](https://github.com/ClickHouse/ClickHouse/pull/35645) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix positional arguments with aliases. Closes [#35600](https://github.com/ClickHouse/ClickHouse/issues/35600). [#35620](https://github.com/ClickHouse/ClickHouse/pull/35620) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Check `remote_url_allow_hosts` before schema inference in URL engine Closes [#35064](https://github.com/ClickHouse/ClickHouse/issues/35064). [#35619](https://github.com/ClickHouse/ClickHouse/pull/35619) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix `HashJoin` when columns with `LowCardinality` type are used. This closes [#35548](https://github.com/ClickHouse/ClickHouse/issues/35548). [#35616](https://github.com/ClickHouse/ClickHouse/pull/35616) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix possible segfault in MaterializedPostgreSQL which happened if exception occurred when data, collected in memory, was synced into underlying tables. Closes [#35611](https://github.com/ClickHouse/ClickHouse/issues/35611). [#35614](https://github.com/ClickHouse/ClickHouse/pull/35614) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Setting `database_atomic_wait_for_drop_and_detach_synchronously` worked incorrectly for `ATTACH TABLE` query when previously detached table is still in use, It's fixed. [#35594](https://github.com/ClickHouse/ClickHouse/pull/35594) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix HTTP headers with named collections, add compression_method. Closes [#35273](https://github.com/ClickHouse/ClickHouse/issues/35273). Closes [#35269](https://github.com/ClickHouse/ClickHouse/issues/35269). [#35593](https://github.com/ClickHouse/ClickHouse/pull/35593) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix s3 engine getting virtual columns. Closes [#35411](https://github.com/ClickHouse/ClickHouse/issues/35411). [#35586](https://github.com/ClickHouse/ClickHouse/pull/35586) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fixed return type deduction for `caseWithExpression`. The type of the ELSE branch is now correctly taken into account. [#35576](https://github.com/ClickHouse/ClickHouse/pull/35576) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix parsing of IPv6 addresses longer than 39 characters. Closes [#34022](https://github.com/ClickHouse/ClickHouse/issues/34022). [#35539](https://github.com/ClickHouse/ClickHouse/pull/35539) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix cast into IPv4, IPv6 address in IN section. Fixes [#35528](https://github.com/ClickHouse/ClickHouse/issues/35528). [#35534](https://github.com/ClickHouse/ClickHouse/pull/35534) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix crash during short circuit function evaluation when one of arguments is nullable constant. Closes [#35497](https://github.com/ClickHouse/ClickHouse/issues/35497). Closes [#35496](https://github.com/ClickHouse/ClickHouse/issues/35496). [#35502](https://github.com/ClickHouse/ClickHouse/pull/35502) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix crash for function `throwIf` with constant arguments. [#35500](https://github.com/ClickHouse/ClickHouse/pull/35500) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix bug in Keeper which can lead to unstable client connections. Introduced in [#35031](https://github.com/ClickHouse/ClickHouse/issues/35031). [#35498](https://github.com/ClickHouse/ClickHouse/pull/35498) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix bug in function `if` when resulting column type differs with resulting data type that led to logical errors like `Logical error: 'Bad cast from type DB::ColumnVector<int> to DB::ColumnVector<long>'.`. Closes [#35367](https://github.com/ClickHouse/ClickHouse/issues/35367). [#35476](https://github.com/ClickHouse/ClickHouse/pull/35476) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix excessive logging when using S3 as backend for MergeTree or as separate table engine/function. Fixes [#30559](https://github.com/ClickHouse/ClickHouse/issues/30559). [#35434](https://github.com/ClickHouse/ClickHouse/pull/35434) ([alesapin](https://github.com/alesapin)).
|
||||
* Now merges executed with zero copy replication (experimental) will not spam logs with message `Found parts with the same min block and with the same max block as the missing part _ on replica _. Hoping that it will eventually appear as a result of a merge.`. [#35430](https://github.com/ClickHouse/ClickHouse/pull/35430) ([alesapin](https://github.com/alesapin)).
|
||||
* Skip possible exception if empty chunks appear in GroupingAggregatedTransform. [#35417](https://github.com/ClickHouse/ClickHouse/pull/35417) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fix working with columns that are not needed in query in Arrow/Parquet/ORC formats, it prevents possible errors like `Unsupported <format> type <type> of an input column <column_name>` when file contains column with unsupported type and we don't use it in query. [#35406](https://github.com/ClickHouse/ClickHouse/pull/35406) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix for local cache for remote filesystem (experimental feature) for high concurrency on corner cases. [#35381](https://github.com/ClickHouse/ClickHouse/pull/35381) ([Kseniia Sumarokova](https://github.com/kssenii)). Fix possible deadlock in cache. [#35378](https://github.com/ClickHouse/ClickHouse/pull/35378) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix partition pruning in case of comparison with constant in `WHERE`. If column and constant had different types, overflow was possible. Query could return an incorrect empty result. This fixes [#35304](https://github.com/ClickHouse/ClickHouse/issues/35304). [#35334](https://github.com/ClickHouse/ClickHouse/pull/35334) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix schema inference for TSKV format while using small max_read_buffer_size. [#35332](https://github.com/ClickHouse/ClickHouse/pull/35332) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix mutations in tables with enabled sparse columns. [#35284](https://github.com/ClickHouse/ClickHouse/pull/35284) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Do not delay final part writing by default (fixes possible `Memory limit exceeded` during `INSERT` by adding `max_insert_delayed_streams_for_parallel_write` with default to 1000 for writes to s3 and disabled as before otherwise). [#34780](https://github.com/ClickHouse/ClickHouse/pull/34780) ([Azat Khuzhin](https://github.com/azat)).
|
||||
|
||||
|
||||
## <a id="223"></a> ClickHouse release v22.3-lts, 2022-03-17
|
||||
@ -11,6 +803,7 @@
|
||||
|
||||
* Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)).
|
||||
* Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)).
|
||||
* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
|
||||
#### New Feature
|
||||
|
||||
@ -485,4 +1278,4 @@
|
||||
* Fix hang up with command `DROP TABLE system.query_log sync`. [#33293](https://github.com/ClickHouse/ClickHouse/pull/33293) ([zhanghuajie](https://github.com/zhanghuajieHIT)).
|
||||
|
||||
|
||||
## [Changelog for 2021](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats-new/changelog/2021.md)
|
||||
## [Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021)
|
||||
|
245
CMakeLists.txt
245
CMakeLists.txt
@ -1,32 +1,6 @@
|
||||
cmake_minimum_required(VERSION 3.14)
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
|
||||
foreach(policy
|
||||
CMP0023
|
||||
CMP0048 # CMake 3.0
|
||||
CMP0074 # CMake 3.12
|
||||
CMP0077
|
||||
CMP0079
|
||||
)
|
||||
if(POLICY ${policy})
|
||||
cmake_policy(SET ${policy} NEW)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# set default policy
|
||||
foreach(default_policy_var_name
|
||||
# make option() honor normal variables for BUILD_SHARED_LIBS:
|
||||
# - re2
|
||||
# - snappy
|
||||
CMAKE_POLICY_DEFAULT_CMP0077
|
||||
# Google Test from sources uses too old cmake, 2.6.x, and CMP0022 should
|
||||
# set, to avoid using deprecated LINK_INTERFACE_LIBRARIES(_<CONFIG>)? over
|
||||
# INTERFACE_LINK_LIBRARIES.
|
||||
CMAKE_POLICY_DEFAULT_CMP0022
|
||||
)
|
||||
set(${default_policy_var_name} NEW)
|
||||
endforeach()
|
||||
|
||||
project(ClickHouse)
|
||||
project(ClickHouse LANGUAGES C CXX ASM)
|
||||
|
||||
# If turned off: e.g. when ENABLE_FOO is ON, but FOO tool was not found, the CMake will continue.
|
||||
option(FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION
|
||||
@ -39,12 +13,11 @@ else()
|
||||
set(RECONFIGURE_MESSAGE_LEVEL WARNING)
|
||||
endif()
|
||||
|
||||
enable_language(C CXX ASM)
|
||||
|
||||
include (cmake/arch.cmake)
|
||||
include (cmake/target.cmake)
|
||||
include (cmake/tools.cmake)
|
||||
include (cmake/analysis.cmake)
|
||||
include (cmake/ccache.cmake)
|
||||
include (cmake/clang_tidy.cmake)
|
||||
include (cmake/git_status.cmake)
|
||||
|
||||
# Ignore export() since we don't use it,
|
||||
@ -52,7 +25,6 @@ include (cmake/git_status.cmake)
|
||||
macro (export)
|
||||
endmacro ()
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/")
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS 1) # Write compile_commands.json
|
||||
set(CMAKE_LINK_DEPENDS_NO_SHARED 1) # Do not relink all depended targets on .so
|
||||
set(CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel" CACHE STRING "" FORCE)
|
||||
@ -64,11 +36,9 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON)
|
||||
|
||||
# Check that submodules are present
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/sysroot/README.md")
|
||||
message (FATAL_ERROR "Submodules are not initialized. Run\n\tgit submodule update --init --recursive")
|
||||
message (FATAL_ERROR "Submodules are not initialized. Run\n\tgit submodule update --init")
|
||||
endif ()
|
||||
|
||||
include (cmake/ccache.cmake)
|
||||
|
||||
# Take care to add prlimit in command line before ccache, or else ccache thinks that
|
||||
# prlimit is compiler, and clang++ is its input file, and refuses to work with
|
||||
# multiple inputs, e.g in ccache log:
|
||||
@ -104,18 +74,12 @@ message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
|
||||
string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
|
||||
|
||||
option(USE_STATIC_LIBRARIES "Disable to use shared libraries" ON)
|
||||
|
||||
if (NOT USE_STATIC_LIBRARIES)
|
||||
# DEVELOPER ONLY.
|
||||
# Faster linking if turned on.
|
||||
option(SPLIT_SHARED_LIBRARIES "Keep all internal libraries as separate .so files")
|
||||
|
||||
option(CLICKHOUSE_SPLIT_BINARY
|
||||
"Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled")
|
||||
endif ()
|
||||
# DEVELOPER ONLY.
|
||||
# Faster linking if turned on.
|
||||
option(SPLIT_SHARED_LIBRARIES "Keep all internal libraries as separate .so files" OFF)
|
||||
|
||||
if (USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
|
||||
message(FATAL_ERROR "Defining SPLIT_SHARED_LIBRARIES=1 without USE_STATIC_LIBRARIES=0 has no effect.")
|
||||
message(FATAL_ERROR "SPLIT_SHARED_LIBRARIES=1 must not be used together with USE_STATIC_LIBRARIES=1")
|
||||
endif()
|
||||
|
||||
if (NOT USE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES)
|
||||
@ -161,20 +125,22 @@ add_library(global-libs INTERFACE)
|
||||
include (cmake/fuzzer.cmake)
|
||||
include (cmake/sanitize.cmake)
|
||||
|
||||
if (CMAKE_GENERATOR STREQUAL "Ninja" AND NOT DISABLE_COLORED_BUILD)
|
||||
option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON)
|
||||
|
||||
set (CMAKE_COLOR_MAKEFILE ${ENABLE_COLORED_BUILD}) # works only for the makefile generator
|
||||
|
||||
if (ENABLE_COLORED_BUILD AND CMAKE_GENERATOR STREQUAL "Ninja")
|
||||
# Turn on colored output. https://github.com/ninja-build/ninja/wiki/FAQ
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always")
|
||||
# ... such manually setting of flags can be removed once CMake supports a variable to
|
||||
# activate colors in *all* build systems: https://gitlab.kitware.com/cmake/cmake/-/issues/15502
|
||||
endif ()
|
||||
|
||||
include (cmake/check_flags.cmake)
|
||||
include (cmake/add_warning.cmake)
|
||||
|
||||
set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wall") # -Werror and many more is also added inside cmake/warnings.cmake
|
||||
|
||||
if (COMPILER_CLANG)
|
||||
# clang: warning: argument unused during compilation: '-specs=/usr/share/dpkg/no-pie-compile.specs' [-Wunused-command-line-argument]
|
||||
set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wno-unused-command-line-argument")
|
||||
# generate ranges for fast "addr2line" search
|
||||
if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
|
||||
set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges")
|
||||
@ -187,6 +153,8 @@ if (COMPILER_CLANG)
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
no_warning(enum-constexpr-conversion) # breaks Protobuf in clang-16
|
||||
endif ()
|
||||
|
||||
# If compiler has support for -Wreserved-identifier. It is difficult to detect by clang version,
|
||||
@ -222,6 +190,12 @@ else ()
|
||||
set(NO_WHOLE_ARCHIVE --no-whole-archive)
|
||||
endif ()
|
||||
|
||||
option(ENABLE_CURL_BUILD "Enable curl, azure, sentry build on by default except MacOS." ON)
|
||||
if (OS_DARWIN)
|
||||
# Disable the curl, azure, senry build on MacOS
|
||||
set (ENABLE_CURL_BUILD OFF)
|
||||
endif ()
|
||||
|
||||
# Ignored if `lld` is used
|
||||
option(ADD_GDB_INDEX_FOR_GOLD "Add .gdb-index to resulting binaries for gold linker.")
|
||||
|
||||
@ -245,12 +219,31 @@ if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
|
||||
OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
|
||||
OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
|
||||
set (OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT ON)
|
||||
else()
|
||||
set (OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT OFF)
|
||||
endif()
|
||||
# Provides faster linking and lower binary size.
|
||||
# Tradeoff is the inability to debug some source files with e.g. gdb
|
||||
# (empty stack frames and no local variables)."
|
||||
option(OMIT_HEAVY_DEBUG_SYMBOLS
|
||||
"Do not generate debugger info for heavy modules (ClickHouse functions and dictionaries, some contrib)"
|
||||
${OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT})
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
|
||||
set(USE_DEBUG_HELPERS ON)
|
||||
endif()
|
||||
|
||||
option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS})
|
||||
|
||||
option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF)
|
||||
if (NOT BUILD_STANDALONE_KEEPER)
|
||||
option(CREATE_KEEPER_SYMLINK "Create symlink for clickhouse-keeper to main server binary" ON)
|
||||
else ()
|
||||
option(CREATE_KEEPER_SYMLINK "Create symlink for clickhouse-keeper to main server binary" OFF)
|
||||
endif ()
|
||||
|
||||
# Create BuildID when using lld. For other linkers it is created by default.
|
||||
if (LINKER_NAME MATCHES "lld$")
|
||||
@ -261,13 +254,18 @@ endif ()
|
||||
# Add a section with the hash of the compiled machine code for integrity checks.
|
||||
# Only for official builds, because adding a section can be time consuming (rewrite of several GB).
|
||||
# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary)
|
||||
if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
|
||||
set (USE_BINARY_HASH 1)
|
||||
if (CLICKHOUSE_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE OR CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64.cmake$"))
|
||||
message(STATUS "Official build: A checksum hash will be added to the clickhouse executable")
|
||||
set (USE_BINARY_HASH 1 CACHE STRING "Calculate binary hash and store it in the separate section")
|
||||
else ()
|
||||
message(STATUS "No official build: A checksum hash will not be added to the clickhouse executable")
|
||||
endif ()
|
||||
|
||||
# Allows to build stripped binary in a separate directory
|
||||
if (OBJCOPY_PATH AND READELF_PATH)
|
||||
set(BUILD_STRIPPED_BINARIES_PREFIX "" CACHE STRING "Build stripped binaries with debug info in separate directory")
|
||||
# Optionally split binaries and debug symbols.
|
||||
option(SPLIT_DEBUG_SYMBOLS "Split binaries and debug symbols" OFF)
|
||||
if (SPLIT_DEBUG_SYMBOLS)
|
||||
message(STATUS "Will split binaries and debug symbols")
|
||||
set(SPLITTED_DEBUG_SYMBOLS_DIR "stripped" CACHE STRING "A separate directory for stripped information")
|
||||
endif()
|
||||
|
||||
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd
|
||||
@ -291,33 +289,33 @@ include(cmake/cpu_features.cmake)
|
||||
# Enable it explicitly.
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -fasynchronous-unwind-tables")
|
||||
|
||||
# Reproducible builds
|
||||
# If turned `ON`, remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE().
|
||||
option(ENABLE_BUILD_PATH_MAPPING "Enable remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE(). It's to generate reproducible builds. See https://reproducible-builds.org/docs/build-path" ON)
|
||||
# Reproducible builds.
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
|
||||
set (ENABLE_BUILD_PATH_MAPPING_DEFAULT OFF)
|
||||
else ()
|
||||
set (ENABLE_BUILD_PATH_MAPPING_DEFAULT ON)
|
||||
endif ()
|
||||
|
||||
option (ENABLE_BUILD_PATH_MAPPING "Enable remapping of file source paths in debug info, predefined preprocessor macros, and __builtin_FILE(). It's used to generate reproducible builds. See https://reproducible-builds.org/docs/build-path" ${ENABLE_BUILD_PATH_MAPPING_DEFAULT})
|
||||
|
||||
if (ENABLE_BUILD_PATH_MAPPING)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
|
||||
endif()
|
||||
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.12.4")
|
||||
# CMake < 3.12 doesn't support setting 20 as a C++ standard version.
|
||||
# We will add C++ standard controlling flag in CMAKE_CXX_FLAGS manually for now.
|
||||
|
||||
if (COMPILER_GCC OR COMPILER_CLANG)
|
||||
# to make numeric_limits<__int128> works with GCC
|
||||
set (_CXX_STANDARD "gnu++2a")
|
||||
else ()
|
||||
set (_CXX_STANDARD "c++2a")
|
||||
endif ()
|
||||
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=${_CXX_STANDARD}")
|
||||
else ()
|
||||
set (CMAKE_CXX_STANDARD 20)
|
||||
set (CMAKE_CXX_EXTENSIONS ON) # Same as gnu++2a (ON) vs c++2a (OFF): https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html
|
||||
set (CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
endif ()
|
||||
|
||||
option (ENABLE_BUILD_PROFILING "Enable profiling of build time" OFF)
|
||||
if (ENABLE_BUILD_PROFILING)
|
||||
if (COMPILER_CLANG)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ftime-trace")
|
||||
else ()
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "Build profiling is only available with CLang")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
set (CMAKE_CXX_STANDARD 20)
|
||||
set (CMAKE_CXX_EXTENSIONS ON) # Same as gnu++2a (ON) vs c++2a (OFF): https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html
|
||||
set (CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
set (CMAKE_C_STANDARD 11)
|
||||
set (CMAKE_C_EXTENSIONS ON)
|
||||
set (CMAKE_C_STANDARD_REQUIRED ON)
|
||||
@ -333,6 +331,22 @@ if (COMPILER_GCC OR COMPILER_CLANG)
|
||||
set(COMPILER_FLAGS "${COMPILER_FLAGS} -falign-functions=32")
|
||||
endif ()
|
||||
|
||||
if (ARCH_AMD64)
|
||||
# align branches within a 32-Byte boundary to avoid the potential performance loss when code layout change,
|
||||
# which makes benchmark results more stable.
|
||||
set(BRANCHES_WITHIN_32B_BOUNDARIES "-mbranches-within-32B-boundaries")
|
||||
if (COMPILER_GCC)
|
||||
# gcc is in assembler, need to add "-Wa," prefix
|
||||
set(BRANCHES_WITHIN_32B_BOUNDARIES "-Wa,${BRANCHES_WITHIN_32B_BOUNDARIES}")
|
||||
endif()
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
check_cxx_compiler_flag("${BRANCHES_WITHIN_32B_BOUNDARIES}" HAS_BRANCHES_WITHIN_32B_BOUNDARIES)
|
||||
if (HAS_BRANCHES_WITHIN_32B_BOUNDARIES)
|
||||
set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (COMPILER_GCC)
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcoroutines")
|
||||
endif ()
|
||||
@ -357,15 +371,18 @@ set (COMPILER_FLAGS "${COMPILER_FLAGS}")
|
||||
# Our built-in unwinder only supports DWARF version up to 4.
|
||||
set (DEBUG_INFO_FLAGS "-g -gdwarf-4")
|
||||
|
||||
set (CMAKE_BUILD_COLOR_MAKEFILE ON)
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${PLATFORM_EXTRA_CXX_FLAG} ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS}")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS}")
|
||||
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}")
|
||||
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} -fno-inline ${CMAKE_CXX_FLAGS_ADD}")
|
||||
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${COMMON_WARNING_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} -fno-inline ${CMAKE_C_FLAGS_ADD}")
|
||||
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMPILER_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
|
||||
set (CMAKE_ASM_FLAGS_RELWITHDEBINFO "${CMAKE_ASM_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
|
||||
set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} -fno-inline ${CMAKE_ASM_FLAGS_ADD}")
|
||||
|
||||
if (COMPILER_CLANG)
|
||||
if (OS_DARWIN)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
|
||||
@ -403,19 +420,12 @@ if (COMPILER_CLANG)
|
||||
endif ()
|
||||
|
||||
elseif (ENABLE_THINLTO)
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "ThinLTO is only available with CLang")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "ThinLTO is only available with Clang")
|
||||
endif ()
|
||||
|
||||
# Turns on all external libs like s3, kafka, ODBC, ...
|
||||
option(ENABLE_LIBRARIES "Enable all external libraries by default" ON)
|
||||
|
||||
if (NOT (OS_LINUX OR OS_DARWIN))
|
||||
# Using system libs can cause a lot of warnings in includes (on macro expansion).
|
||||
option(WERROR "Enable -Werror compiler option" OFF)
|
||||
else ()
|
||||
option(WERROR "Enable -Werror compiler option" ON)
|
||||
endif ()
|
||||
|
||||
# Increase stack size on Musl. We need big stack for our recursive-descend parser.
|
||||
if (USE_MUSL)
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-z,stack-size=2097152")
|
||||
@ -433,6 +443,13 @@ elseif (OS_FREEBSD)
|
||||
endif ()
|
||||
link_libraries(global-group)
|
||||
|
||||
if (NOT (OS_LINUX OR OS_DARWIN))
|
||||
# Using system libs can cause a lot of warnings in includes (on macro expansion).
|
||||
option(WERROR "Enable -Werror compiler option" OFF)
|
||||
else ()
|
||||
option(WERROR "Enable -Werror compiler option" ON)
|
||||
endif ()
|
||||
|
||||
if (WERROR)
|
||||
# Don't pollute CMAKE_CXX_FLAGS with -Werror as it will break some CMake checks.
|
||||
# Instead, adopt modern cmake usage requirement.
|
||||
@ -441,7 +458,7 @@ endif ()
|
||||
|
||||
# Make this extra-checks for correct library dependencies.
|
||||
if (OS_LINUX AND NOT SANITIZE)
|
||||
target_link_options(global-group INTERFACE "-Wl,--no-undefined")
|
||||
target_link_options(global-group INTERFACE "LINKER:--no-undefined")
|
||||
endif ()
|
||||
|
||||
######################################
|
||||
@ -452,7 +469,7 @@ set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX")
|
||||
|
||||
if (USE_STATIC_LIBRARIES)
|
||||
set (CMAKE_POSITION_INDEPENDENT_CODE OFF)
|
||||
if (OS_LINUX AND NOT ARCH_ARM)
|
||||
if (OS_LINUX AND NOT ARCH_AARCH64)
|
||||
# Slightly more efficient code can be generated
|
||||
# It's disabled for ARM because otherwise ClickHouse cannot run on Android.
|
||||
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie")
|
||||
@ -486,8 +503,7 @@ endif ()
|
||||
message (STATUS
|
||||
"Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ;
|
||||
USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES}
|
||||
SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES}
|
||||
CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}")
|
||||
SPLIT_SHARED_LIBRARIES=${SPLIT_SHARED_LIBRARIES}")
|
||||
|
||||
include (GNUInstallDirs)
|
||||
|
||||
@ -503,14 +519,14 @@ if (NOT ENABLE_JEMALLOC)
|
||||
message (WARNING "Non default allocator is disabled. This is not recommended for production builds.")
|
||||
endif ()
|
||||
|
||||
macro (add_executable target)
|
||||
macro (clickhouse_add_executable target)
|
||||
# invoke built-in add_executable
|
||||
# explicitly acquire and interpose malloc symbols by clickhouse_malloc
|
||||
# if GLIBC_COMPATIBILITY is ON and ENABLE_THINLTO is on than provide memcpy symbol explicitly to neutrialize thinlto's libcall generation.
|
||||
if (ARCH_AMD64 AND GLIBC_COMPATIBILITY AND ENABLE_THINLTO)
|
||||
_add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc> $<TARGET_OBJECTS:memcpy>)
|
||||
add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc> $<TARGET_OBJECTS:memcpy>)
|
||||
else ()
|
||||
_add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc>)
|
||||
add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc>)
|
||||
endif ()
|
||||
|
||||
get_target_property (type ${target} TYPE)
|
||||
@ -539,6 +555,16 @@ macro (add_executable target)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
# With cross-compiling, all targets are built for the target platform which usually different from the host
|
||||
# platform. This is problematic if a build artifact X (e.g. a file or an executable) is generated by running
|
||||
# another executable Y previously produced in the build. This is solved by compiling and running Y for/on
|
||||
# the host platform. Add target to the list:
|
||||
# add_native_target(<target> ...)
|
||||
set_property (GLOBAL PROPERTY NATIVE_BUILD_TARGETS)
|
||||
function (add_native_target)
|
||||
set_property (GLOBAL APPEND PROPERTY NATIVE_BUILD_TARGETS ${ARGV})
|
||||
endfunction (add_native_target)
|
||||
|
||||
set(ConfigIncludePath ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTERNAL "Path to generated configuration files.")
|
||||
include_directories(${ConfigIncludePath})
|
||||
|
||||
@ -553,3 +579,32 @@ add_subdirectory (tests)
|
||||
add_subdirectory (utils)
|
||||
|
||||
include (cmake/sanitize_target_link_libraries.cmake)
|
||||
|
||||
# Build native targets if necessary
|
||||
get_property(NATIVE_BUILD_TARGETS GLOBAL PROPERTY NATIVE_BUILD_TARGETS)
|
||||
if (NATIVE_BUILD_TARGETS
|
||||
AND NOT(
|
||||
CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME
|
||||
AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR
|
||||
)
|
||||
)
|
||||
message (STATUS "Building native targets...")
|
||||
|
||||
set (NATIVE_BUILD_DIR "${CMAKE_BINARY_DIR}/native")
|
||||
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory "${NATIVE_BUILD_DIR}"
|
||||
COMMAND_ECHO STDOUT)
|
||||
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND}
|
||||
"-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
|
||||
"-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
|
||||
${CMAKE_SOURCE_DIR}
|
||||
WORKING_DIRECTORY "${NATIVE_BUILD_DIR}"
|
||||
COMMAND_ECHO STDOUT)
|
||||
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND} --build "${NATIVE_BUILD_DIR}" --target ${NATIVE_BUILD_TARGETS}
|
||||
COMMAND_ECHO STDOUT)
|
||||
endif ()
|
||||
|
@ -15,28 +15,80 @@
|
||||
if (NOT DEFINED ENV{CLION_IDE} AND NOT DEFINED ENV{XCODE_IDE})
|
||||
find_program(NINJA_PATH ninja)
|
||||
if (NINJA_PATH)
|
||||
set(CMAKE_GENERATOR "Ninja" CACHE INTERNAL "" FORCE)
|
||||
set(CMAKE_GENERATOR "Ninja" CACHE INTERNAL "")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
# Check if environment is polluted.
|
||||
if (NOT "$ENV{CFLAGS}" STREQUAL ""
|
||||
OR NOT "$ENV{CXXFLAGS}" STREQUAL ""
|
||||
OR NOT "$ENV{LDFLAGS}" STREQUAL ""
|
||||
OR CMAKE_C_FLAGS OR CMAKE_CXX_FLAGS OR CMAKE_EXE_LINKER_FLAGS OR CMAKE_SHARED_LINKER_FLAGS OR CMAKE_MODULE_LINKER_FLAGS
|
||||
OR CMAKE_C_FLAGS_INIT OR CMAKE_CXX_FLAGS_INIT OR CMAKE_EXE_LINKER_FLAGS_INIT OR CMAKE_SHARED_LINKER_FLAGS_INIT OR CMAKE_MODULE_LINKER_FLAGS_INIT)
|
||||
|
||||
# if $ENV
|
||||
message("CFLAGS: $ENV{CFLAGS}")
|
||||
message("CXXFLAGS: $ENV{CXXFLAGS}")
|
||||
message("LDFLAGS: $ENV{LDFLAGS}")
|
||||
# if *_FLAGS
|
||||
message("CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}")
|
||||
message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
|
||||
message("CMAKE_EXE_LINKER_FLAGS: ${CMAKE_EXE_LINKER_FLAGS}")
|
||||
message("CMAKE_SHARED_LINKER_FLAGS: ${CMAKE_SHARED_LINKER_FLAGS}")
|
||||
message("CMAKE_MODULE_LINKER_FLAGS: ${CMAKE_MODULE_LINKER_FLAGS}")
|
||||
# if *_FLAGS_INIT
|
||||
message("CMAKE_C_FLAGS_INIT: ${CMAKE_C_FLAGS_INIT}")
|
||||
message("CMAKE_CXX_FLAGS_INIT: ${CMAKE_CXX_FLAGS_INIT}")
|
||||
message("CMAKE_EXE_LINKER_FLAGS_INIT: ${CMAKE_EXE_LINKER_FLAGS_INIT}")
|
||||
message("CMAKE_SHARED_LINKER_FLAGS_INIT: ${CMAKE_SHARED_LINKER_FLAGS_INIT}")
|
||||
message("CMAKE_MODULE_LINKER_FLAGS_INIT: ${CMAKE_MODULE_LINKER_FLAGS_INIT}")
|
||||
|
||||
message(FATAL_ERROR "
|
||||
Some of the variables like CFLAGS, CXXFLAGS, LDFLAGS are not empty.
|
||||
It is not possible to build ClickHouse with custom flags.
|
||||
These variables can be set up by previous invocation of some other build tools.
|
||||
You should cleanup these variables and start over again.
|
||||
|
||||
Run the `env` command to check the details.
|
||||
You will also need to remove the contents of the build directory.
|
||||
|
||||
Note: if you don't like this behavior, you can manually edit the cmake files, but please don't complain to developers.")
|
||||
endif()
|
||||
|
||||
# Default toolchain - this is needed to avoid dependency on OS files.
|
||||
execute_process(COMMAND uname -s OUTPUT_VARIABLE OS)
|
||||
execute_process(COMMAND uname -m OUTPUT_VARIABLE ARCH)
|
||||
|
||||
# By default, prefer clang on Linux
|
||||
# But note, that you still may change the compiler with -DCMAKE_C_COMPILER/-DCMAKE_CXX_COMPILER.
|
||||
if (OS MATCHES "Linux"
|
||||
AND "$ENV{CC}" STREQUAL ""
|
||||
AND "$ENV{CXX}" STREQUAL ""
|
||||
AND NOT DEFINED CMAKE_C_COMPILER
|
||||
AND NOT DEFINED CMAKE_CXX_COMPILER)
|
||||
find_program(CLANG_PATH clang)
|
||||
if (CLANG_PATH)
|
||||
set(CMAKE_C_COMPILER "clang" CACHE INTERNAL "")
|
||||
endif()
|
||||
|
||||
find_program(CLANG_CXX_PATH clang++)
|
||||
if (CLANG_CXX_PATH)
|
||||
set(CMAKE_CXX_COMPILER "clang++" CACHE INTERNAL "")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (OS MATCHES "Linux"
|
||||
AND NOT DEFINED CMAKE_TOOLCHAIN_FILE
|
||||
AND NOT DISABLE_HERMETIC_BUILD
|
||||
AND ($ENV{CC} MATCHES ".*clang.*" OR CMAKE_C_COMPILER MATCHES ".*clang.*"))
|
||||
AND ("$ENV{CC}" MATCHES ".*clang.*" OR CMAKE_C_COMPILER MATCHES ".*clang.*"))
|
||||
|
||||
if (ARCH MATCHES "amd64|x86_64")
|
||||
set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-x86_64.cmake" CACHE INTERNAL "" FORCE)
|
||||
set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-x86_64.cmake" CACHE INTERNAL "")
|
||||
elseif (ARCH MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)")
|
||||
set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-aarch64.cmake" CACHE INTERNAL "" FORCE)
|
||||
set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-aarch64.cmake" CACHE INTERNAL "")
|
||||
elseif (ARCH MATCHES "^(ppc64le.*|PPC64LE.*)")
|
||||
set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-ppc64le.cmake" CACHE INTERNAL "" FORCE)
|
||||
set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-ppc64le.cmake" CACHE INTERNAL "")
|
||||
else ()
|
||||
message (FATAL_ERROR "Unsupported architecture: ${ARCH}")
|
||||
endif ()
|
||||
|
||||
endif()
|
||||
|
@ -1,4 +1,4 @@
|
||||
[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/ClickHouse/raw/master/website/images/logo-400x240.png)](https://clickhouse.com)
|
||||
[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/clickhouse-presentations/raw/master/images/logo-400x240.png)](https://clickhouse.com)
|
||||
|
||||
ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.
|
||||
|
||||
@ -12,4 +12,7 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
* [Blog](https://clickhouse.com/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events.
|
||||
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
|
||||
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
|
||||
* [Contacts](https://clickhouse.com/company/#contact) can help to get your questions answered if there are any.
|
||||
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
|
||||
|
||||
## Upcoming events
|
||||
* [**v22.8 Release Webinar**](https://clickhouse.com/company/events/v22-8-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
|
||||
|
53
SECURITY.md
53
SECURITY.md
@ -1,3 +1,4 @@
|
||||
|
||||
# Security Policy
|
||||
|
||||
## Security Announcements
|
||||
@ -7,33 +8,38 @@ Security fixes will be announced by posting them in the [security changelog](htt
|
||||
|
||||
The following versions of ClickHouse server are currently being supported with security updates:
|
||||
|
||||
| Version | Supported |
|
||||
| ------- | ------------------ |
|
||||
| 1.x | :x: |
|
||||
| 18.x | :x: |
|
||||
| 19.x | :x: |
|
||||
| 20.x | :x: |
|
||||
| 21.1 | :x: |
|
||||
| 21.2 | :x: |
|
||||
| 21.3 | :x: |
|
||||
| 21.4 | :x: |
|
||||
| 21.5 | :x: |
|
||||
| 21.6 | :x: |
|
||||
| 21.7 | :x: |
|
||||
| 21.8 | ✅ |
|
||||
| 21.9 | :x: |
|
||||
| 21.10 | :x: |
|
||||
| 21.11 | :x: |
|
||||
| 21.12 | :x: |
|
||||
| 22.1 | ✅ |
|
||||
| 22.2 | ✅ |
|
||||
| 22.3 | ✅ |
|
||||
| Version | Supported |
|
||||
|:-|:-|
|
||||
| 22.8 | ✔️ |
|
||||
| 22.7 | ✔️ |
|
||||
| 22.6 | ✔️ |
|
||||
| 22.5 | ❌ |
|
||||
| 22.4 | ❌ |
|
||||
| 22.3 | ✔️ |
|
||||
| 22.2 | ❌ |
|
||||
| 22.1 | ❌ |
|
||||
| 21.12 | ❌ |
|
||||
| 21.11 | ❌ |
|
||||
| 21.10 | ❌ |
|
||||
| 21.9 | ❌ |
|
||||
| 21.8 | ❌ |
|
||||
| 21.7 | ❌ |
|
||||
| 21.6 | ❌ |
|
||||
| 21.5 | ❌ |
|
||||
| 21.4 | ❌ |
|
||||
| 21.3 | ❌ |
|
||||
| 21.2 | ❌ |
|
||||
| 21.1 | ❌ |
|
||||
| 20.* | ❌ |
|
||||
| 19.* | ❌ |
|
||||
| 18.* | ❌ |
|
||||
| 1.* | ❌ |
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers.
|
||||
|
||||
To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com).
|
||||
To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). We do not offer any financial rewards for reporting issues to us using this method. Alternatively, you can also submit your findings through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement.
|
||||
|
||||
### When Should I Report a Vulnerability?
|
||||
|
||||
@ -53,6 +59,5 @@ As the security issue moves from triage, to identified fix, to release planning
|
||||
|
||||
## Public Disclosure Timing
|
||||
|
||||
A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect report date to disclosure date to be on the order of 7 days.
|
||||
|
||||
A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect the report date to disclosure date to be on the order of 7 days.
|
||||
|
||||
|
@ -3,9 +3,6 @@ if (USE_CLANG_TIDY)
|
||||
endif ()
|
||||
|
||||
add_subdirectory (base)
|
||||
add_subdirectory (daemon)
|
||||
add_subdirectory (loggers)
|
||||
add_subdirectory (pcg-random)
|
||||
add_subdirectory (widechar_width)
|
||||
add_subdirectory (readpassphrase)
|
||||
add_subdirectory (bridge)
|
||||
|
@ -89,7 +89,7 @@ public:
|
||||
inline void returnObject(T && object_to_return)
|
||||
{
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(objects_mutex);
|
||||
std::lock_guard lock(objects_mutex);
|
||||
|
||||
objects.emplace_back(std::move(object_to_return));
|
||||
--borrowed_objects_size;
|
||||
@ -107,14 +107,14 @@ public:
|
||||
/// Allocated objects size by the pool. If allocatedObjectsSize == maxSize then pool is full.
|
||||
inline size_t allocatedObjectsSize() const
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(objects_mutex);
|
||||
std::lock_guard lock(objects_mutex);
|
||||
return allocated_objects_size;
|
||||
}
|
||||
|
||||
/// Returns allocatedObjectsSize == maxSize
|
||||
inline bool isFull() const
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(objects_mutex);
|
||||
std::lock_guard lock(objects_mutex);
|
||||
return allocated_objects_size == max_size;
|
||||
}
|
||||
|
||||
@ -122,7 +122,7 @@ public:
|
||||
/// Then client will wait during borrowObject function call.
|
||||
inline size_t borrowedObjectsSize() const
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(objects_mutex);
|
||||
std::lock_guard lock(objects_mutex);
|
||||
return borrowed_objects_size;
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,7 @@ set (SRCS
|
||||
argsToConfig.cpp
|
||||
coverage.cpp
|
||||
demangle.cpp
|
||||
getAvailableMemoryAmount.cpp
|
||||
getFQDNOrHostName.cpp
|
||||
getMemoryAmount.cpp
|
||||
getPageSize.cpp
|
||||
@ -16,14 +17,12 @@ set (SRCS
|
||||
sleep.cpp
|
||||
terminalColors.cpp
|
||||
errnoToString.cpp
|
||||
ReplxxLineReader.cpp
|
||||
StringRef.cpp
|
||||
safeExit.cpp
|
||||
throwError.cpp
|
||||
)
|
||||
|
||||
if (ENABLE_REPLXX)
|
||||
list (APPEND SRCS ReplxxLineReader.cpp)
|
||||
endif ()
|
||||
|
||||
if (USE_DEBUG_HELPERS)
|
||||
get_target_property(MAGIC_ENUM_INCLUDE_DIR ch_contrib::magic_enum INTERFACE_INCLUDE_DIRECTORIES)
|
||||
# CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc.
|
||||
|
@ -49,7 +49,7 @@ struct Decimal
|
||||
using NativeType = T;
|
||||
|
||||
constexpr Decimal() = default;
|
||||
constexpr Decimal(Decimal<T> &&) = default;
|
||||
constexpr Decimal(Decimal<T> &&) noexcept = default;
|
||||
constexpr Decimal(const Decimal<T> &) = default;
|
||||
|
||||
constexpr Decimal(const T & value_): value(value_) {}
|
||||
@ -57,7 +57,7 @@ struct Decimal
|
||||
template <typename U>
|
||||
constexpr Decimal(const Decimal<U> & x): value(x.value) {}
|
||||
|
||||
constexpr Decimal<T> & operator = (Decimal<T> &&) = default;
|
||||
constexpr Decimal<T> & operator=(Decimal<T> &&) noexcept = default;
|
||||
constexpr Decimal<T> & operator = (const Decimal<T> &) = default;
|
||||
|
||||
constexpr operator T () const { return value; }
|
||||
|
@ -1,5 +1,5 @@
|
||||
#include <string>
|
||||
#include <string.h>
|
||||
#include <cstring>
|
||||
|
||||
#include <Poco/UTF8Encoding.h>
|
||||
#include <Poco/NumberParser.h>
|
||||
@ -12,7 +12,14 @@
|
||||
#define JSON_MAX_DEPTH 100
|
||||
|
||||
|
||||
POCO_IMPLEMENT_EXCEPTION(JSONException, Poco::Exception, "JSONException")
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec"
|
||||
#endif
|
||||
POCO_IMPLEMENT_EXCEPTION(JSONException, Poco::Exception, "JSONException") // NOLINT(cert-err60-cpp, modernize-use-noexcept, hicpp-use-noexcept)
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
|
||||
/// Прочитать беззнаковое целое в простом формате из не-0-terminated строки.
|
||||
@ -662,18 +669,18 @@ std::string JSON::getName() const
|
||||
return getString();
|
||||
}
|
||||
|
||||
StringRef JSON::getRawString() const
|
||||
std::string_view JSON::getRawString() const
|
||||
{
|
||||
Pos s = ptr_begin;
|
||||
if (*s != '"')
|
||||
throw JSONException(std::string("JSON: expected \", got ") + *s);
|
||||
while (++s != ptr_end && *s != '"');
|
||||
if (s != ptr_end)
|
||||
return StringRef(ptr_begin + 1, s - ptr_begin - 1);
|
||||
return std::string_view(ptr_begin + 1, s - ptr_begin - 1);
|
||||
throw JSONException("JSON: incorrect syntax (expected end of string, found end of JSON).");
|
||||
}
|
||||
|
||||
StringRef JSON::getRawName() const
|
||||
std::string_view JSON::getRawName() const
|
||||
{
|
||||
return getRawString();
|
||||
}
|
||||
|
@ -38,8 +38,14 @@
|
||||
*/
|
||||
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec"
|
||||
#endif
|
||||
POCO_DECLARE_EXCEPTION(Foundation_API, JSONException, Poco::Exception)
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
class JSON
|
||||
{
|
||||
@ -130,8 +136,8 @@ public:
|
||||
std::string getName() const; /// Получить имя name-value пары.
|
||||
JSON getValue() const; /// Получить значение name-value пары.
|
||||
|
||||
StringRef getRawString() const;
|
||||
StringRef getRawName() const;
|
||||
std::string_view getRawString() const;
|
||||
std::string_view getRawName() const;
|
||||
|
||||
/// Получить значение элемента; если элемент - строка, то распарсить значение из строки; если не строка или число - то исключение.
|
||||
double toDouble() const;
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <algorithm>
|
||||
|
||||
#include <cassert>
|
||||
#include <string.h>
|
||||
#include <cstring>
|
||||
#include <unistd.h>
|
||||
#include <sys/select.h>
|
||||
#include <sys/time.h>
|
||||
@ -73,7 +73,7 @@ replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String &
|
||||
if (std::string::npos == last_word_pos)
|
||||
last_word = prefix;
|
||||
else
|
||||
last_word = std::string_view(prefix).substr(last_word_pos + 1, std::string::npos);
|
||||
last_word = std::string_view{prefix}.substr(last_word_pos + 1, std::string::npos);
|
||||
/// last_word can be empty.
|
||||
|
||||
std::pair<Words::const_iterator, Words::const_iterator> range;
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <replxx.hxx>
|
||||
|
||||
#include <base/types.h>
|
||||
#include <base/defines.h>
|
||||
|
||||
class LineReader
|
||||
{
|
||||
@ -20,8 +21,8 @@ public:
|
||||
void addWords(Words && new_words);
|
||||
|
||||
private:
|
||||
Words words;
|
||||
Words words_no_case;
|
||||
Words words TSA_GUARDED_BY(mutex);
|
||||
Words words_no_case TSA_GUARDED_BY(mutex);
|
||||
|
||||
std::mutex mutex;
|
||||
};
|
||||
@ -29,7 +30,7 @@ public:
|
||||
using Patterns = std::vector<const char *>;
|
||||
|
||||
LineReader(const String & history_file_path, bool multiline, Patterns extenders, Patterns delimiters);
|
||||
virtual ~LineReader() {}
|
||||
virtual ~LineReader() = default;
|
||||
|
||||
/// Reads the whole line until delimiter (in multiline mode) or until the last line without extender.
|
||||
/// If resulting line is empty, it means the user interrupted the input.
|
||||
|
@ -45,14 +45,16 @@ std::string replxx_now_ms_str()
|
||||
time_t t = ms.count() / 1000;
|
||||
tm broken;
|
||||
if (!localtime_r(&t, &broken))
|
||||
{
|
||||
return std::string();
|
||||
}
|
||||
return {};
|
||||
|
||||
static int const BUFF_SIZE(32);
|
||||
char str[BUFF_SIZE];
|
||||
strftime(str, BUFF_SIZE, "%Y-%m-%d %H:%M:%S.", &broken);
|
||||
snprintf(str + sizeof("YYYY-mm-dd HH:MM:SS"), 5, "%03d", static_cast<int>(ms.count() % 1000));
|
||||
if (strftime(str, BUFF_SIZE, "%Y-%m-%d %H:%M:%S.", &broken) <= 0)
|
||||
return {};
|
||||
|
||||
if (snprintf(str + sizeof("YYYY-mm-dd HH:MM:SS"), 5, "%03d", static_cast<int>(ms.count() % 1000)) <= 0)
|
||||
return {};
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
@ -378,4 +380,4 @@ void ReplxxLineReader::enableBracketedPaste()
|
||||
{
|
||||
bracketed_paste_enabled = true;
|
||||
rx.enable_bracketed_paste();
|
||||
};
|
||||
}
|
||||
|
@ -37,7 +37,8 @@ struct StringRef
|
||||
size_t size = 0;
|
||||
|
||||
/// Non-constexpr due to reinterpret_cast.
|
||||
template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
|
||||
template <typename CharT>
|
||||
requires (sizeof(CharT) == 1)
|
||||
StringRef(const CharT * data_, size_t size_) : data(reinterpret_cast<const char *>(data_)), size(size_)
|
||||
{
|
||||
/// Sanity check for overflowed values.
|
||||
@ -51,6 +52,8 @@ struct StringRef
|
||||
constexpr StringRef(const char * data_) : StringRef(std::string_view{data_}) {} /// NOLINT
|
||||
constexpr StringRef() = default;
|
||||
|
||||
bool empty() const { return size == 0; }
|
||||
|
||||
std::string toString() const { return std::string(data, size); }
|
||||
|
||||
explicit operator std::string() const { return toString(); }
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
namespace Poco::Util
|
||||
{
|
||||
class LayeredConfiguration;
|
||||
class LayeredConfiguration; // NOLINT(cppcoreguidelines-virtual-class-destructor)
|
||||
}
|
||||
|
||||
/// Import extra command line arguments to configuration. These are command line arguments after --.
|
||||
|
@ -100,11 +100,70 @@
|
||||
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED ALWAYS_INLINE
|
||||
#endif
|
||||
|
||||
#if defined(__clang__) && defined(__clang_major__) && __clang_major__ >= 14
|
||||
# define DISABLE_SANITIZER_INSTRUMENTATION __attribute__((disable_sanitizer_instrumentation))
|
||||
#else
|
||||
# define DISABLE_SANITIZER_INSTRUMENTATION
|
||||
#endif
|
||||
|
||||
|
||||
#if !__has_include(<sanitizer/asan_interface.h>) || !defined(ADDRESS_SANITIZER)
|
||||
# define ASAN_UNPOISON_MEMORY_REGION(a, b)
|
||||
# define ASAN_POISON_MEMORY_REGION(a, b)
|
||||
#endif
|
||||
|
||||
#if !defined(ABORT_ON_LOGICAL_ERROR)
|
||||
#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) || defined(UNDEFINED_BEHAVIOR_SANITIZER)
|
||||
#define ABORT_ON_LOGICAL_ERROR
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/// chassert(x) is similar to assert(x), but:
|
||||
/// - works in builds with sanitizers, not only in debug builds
|
||||
/// - tries to print failed assertion into server log
|
||||
/// It can be used for all assertions except heavy ones.
|
||||
/// Heavy assertions (that run loops or call complex functions) are allowed in debug builds only.
|
||||
#if !defined(chassert)
|
||||
#if defined(ABORT_ON_LOGICAL_ERROR)
|
||||
#define chassert(x) static_cast<bool>(x) ? void(0) : abortOnFailedAssertion(#x)
|
||||
#else
|
||||
#define chassert(x) ((void)0)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers.
|
||||
/// Feel free to extend, but please stay close to https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#mutexheader
|
||||
#if defined(__clang__)
|
||||
# define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) /// data is protected by given capability
|
||||
# define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) /// pointed-to data is protected by the given capability
|
||||
# define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) /// thread needs exclusive possession of given capability
|
||||
# define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) /// thread needs shared possession of given capability
|
||||
# define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) /// annotated lock must be locked after given lock
|
||||
# define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function
|
||||
|
||||
/// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function)
|
||||
/// Consider adding a comment before using these macros.
|
||||
# define TSA_SUPPRESS_WARNING_FOR_READ(x) [&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }()
|
||||
# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) [&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }()
|
||||
|
||||
/// This macro is useful when only one thread writes to a member
|
||||
/// and you want to read this member from the same thread without locking a mutex.
|
||||
/// It's safe (because no concurrent writes are possible), but TSA generates a warning.
|
||||
/// (Seems like there's no way to verify it, but it makes sense to distinguish it from TSA_SUPPRESS_WARNING_FOR_READ for readability)
|
||||
# define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x)
|
||||
|
||||
#else
|
||||
# define TSA_GUARDED_BY(...)
|
||||
# define TSA_PT_GUARDED_BY(...)
|
||||
# define TSA_REQUIRES(...)
|
||||
# define TSA_REQUIRES_SHARED(...)
|
||||
# define TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
|
||||
# define TSA_SUPPRESS_WARNING_FOR_READ(x)
|
||||
# define TSA_SUPPRESS_WARNING_FOR_WRITE(x)
|
||||
# define TSA_READ_ONE_THREAD(x)
|
||||
#endif
|
||||
|
||||
/// A template function for suppressing warnings about unused variables or function results.
|
||||
template <typename... Args>
|
||||
constexpr void UNUSED(Args &&... args [[maybe_unused]])
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include <base/demangle.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <cstdlib>
|
||||
#include <cxxabi.h>
|
||||
|
||||
static DemangleResult tryDemangle(const char * name, int & status)
|
||||
|
@ -27,6 +27,6 @@ struct FreeingDeleter
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::unique_ptr<char, FreeingDeleter> DemangleResult;
|
||||
using DemangleResult = std::unique_ptr<char, FreeingDeleter>;
|
||||
|
||||
DemangleResult tryDemangle(const char * name);
|
||||
|
@ -9,7 +9,7 @@ std::string errnoToString(int code, int the_errno)
|
||||
char buf[buf_size];
|
||||
#ifndef _GNU_SOURCE
|
||||
int rc = strerror_r(the_errno, buf, buf_size);
|
||||
#ifdef __APPLE__
|
||||
#ifdef OS_DARWIN
|
||||
if (rc != 0 && rc != EINVAL)
|
||||
#else
|
||||
if (rc != 0)
|
||||
|
44
base/base/getAvailableMemoryAmount.cpp
Normal file
44
base/base/getAvailableMemoryAmount.cpp
Normal file
@ -0,0 +1,44 @@
|
||||
#include <stdexcept>
|
||||
#include <fstream>
|
||||
#include <base/getAvailableMemoryAmount.h>
|
||||
#include <base/getPageSize.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#if defined(BSD)
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/vmmeter.h>
|
||||
#endif
|
||||
|
||||
|
||||
uint64_t getAvailableMemoryAmountOrZero()
|
||||
{
|
||||
#if defined(_SC_PHYS_PAGES) // linux
|
||||
return getPageSize() * sysconf(_SC_PHYS_PAGES);
|
||||
#elif defined(OS_FREEBSD)
|
||||
struct vmtotal vmt;
|
||||
size_t vmt_size = sizeof(vmt);
|
||||
if (sysctlbyname("vm.vmtotal", &vmt, &vmt_size, NULL, 0) == 0)
|
||||
return getPageSize() * vmt.t_avm;
|
||||
else
|
||||
return 0;
|
||||
#else // darwin
|
||||
unsigned int usermem;
|
||||
size_t len = sizeof(usermem);
|
||||
static int mib[2] = { CTL_HW, HW_USERMEM };
|
||||
if (sysctl(mib, 2, &usermem, &len, nullptr, 0) == 0 && len == sizeof(usermem))
|
||||
return usermem;
|
||||
else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
uint64_t getAvailableMemoryAmount()
|
||||
{
|
||||
auto res = getAvailableMemoryAmountOrZero();
|
||||
if (!res)
|
||||
throw std::runtime_error("Cannot determine available memory amount");
|
||||
return res;
|
||||
}
|
12
base/base/getAvailableMemoryAmount.h
Normal file
12
base/base/getAvailableMemoryAmount.h
Normal file
@ -0,0 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
/** Returns the size of currently available physical memory (RAM) in bytes.
|
||||
* Returns 0 on unsupported platform or if it cannot determine the size of physical memory.
|
||||
*/
|
||||
uint64_t getAvailableMemoryAmountOrZero();
|
||||
|
||||
/** Throws exception if it cannot determine the size of physical memory.
|
||||
*/
|
||||
uint64_t getAvailableMemoryAmount();
|
@ -3,7 +3,7 @@
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <errno.h>
|
||||
#include <cerrno>
|
||||
|
||||
|
||||
void * mremap_fallback(
|
||||
|
@ -2,11 +2,11 @@
|
||||
#pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||
#endif
|
||||
|
||||
/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex.
|
||||
/// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/
|
||||
|
||||
#include <base/defines.h>
|
||||
|
||||
#if defined(__linux__) && !defined(THREAD_SANITIZER) && !defined(USE_MUSL)
|
||||
#if defined(OS_LINUX) && !defined(THREAD_SANITIZER) && !defined(USE_MUSL)
|
||||
#define USE_PHDR_CACHE 1
|
||||
#endif
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex.
|
||||
/// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/
|
||||
|
||||
/** Collects all dl_phdr_info items and caches them in a static array.
|
||||
* Also rewrites dl_iterate_phdr with a lock-free version which consults the above cache
|
||||
|
@ -169,9 +169,9 @@ obstacle to adoption, that text has been removed.
|
||||
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
|
||||
double preciseExp10(double x)
|
||||
{
|
||||
|
@ -21,10 +21,12 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename G, typename = std::enable_if_t<std::is_convertible_v<G, F>, void>>
|
||||
template <typename G>
|
||||
requires std::is_convertible_v<G, F>
|
||||
constexpr basic_scope_guard(basic_scope_guard<G> && src) : function{src.release()} {}
|
||||
|
||||
template <typename G, typename = std::enable_if_t<std::is_convertible_v<G, F>, void>>
|
||||
template <typename G>
|
||||
requires std::is_convertible_v<G, F>
|
||||
constexpr basic_scope_guard & operator=(basic_scope_guard<G> && src)
|
||||
{
|
||||
if (this != &src)
|
||||
@ -35,10 +37,12 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename G, typename = std::enable_if_t<std::is_convertible_v<G, F>, void>>
|
||||
template <typename G>
|
||||
requires std::is_convertible_v<G, F>
|
||||
constexpr basic_scope_guard(const G & function_) : function{function_} {}
|
||||
|
||||
template <typename G, typename = std::enable_if_t<std::is_convertible_v<G, F>, void>>
|
||||
template <typename G>
|
||||
requires std::is_convertible_v<G, F>
|
||||
constexpr basic_scope_guard(G && function_) : function{std::move(function_)} {}
|
||||
|
||||
~basic_scope_guard() { invoke(); }
|
||||
@ -64,7 +68,8 @@ public:
|
||||
return std::exchange(function, {});
|
||||
}
|
||||
|
||||
template <typename G, typename = std::enable_if_t<std::is_convertible_v<G, F>, void>>
|
||||
template <typename G>
|
||||
requires std::is_convertible_v<G, F>
|
||||
basic_scope_guard<F> & join(basic_scope_guard<G> && other)
|
||||
{
|
||||
if (other.function)
|
||||
|
@ -1,36 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
|
||||
/** Allows to make std::shared_ptr from T with protected constructor.
|
||||
*
|
||||
* Derive your T class from shared_ptr_helper<T> and add shared_ptr_helper<T> as a friend
|
||||
* and you will have static 'create' method in your class.
|
||||
*/
|
||||
template <typename T>
|
||||
struct shared_ptr_helper
|
||||
{
|
||||
template <typename... TArgs>
|
||||
static std::shared_ptr<T> create(TArgs &&... args)
|
||||
{
|
||||
return std::shared_ptr<T>(new T(std::forward<TArgs>(args)...));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct is_shared_ptr
|
||||
{
|
||||
static constexpr bool value = false;
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct is_shared_ptr<std::shared_ptr<T>>
|
||||
{
|
||||
static constexpr bool value = true;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline constexpr bool is_shared_ptr_v = is_shared_ptr<T>::value;
|
@ -1,7 +1,7 @@
|
||||
#include <base/sleep.h>
|
||||
|
||||
#include <time.h>
|
||||
#include <errno.h>
|
||||
#include <ctime>
|
||||
#include <cerrno>
|
||||
|
||||
#if defined(OS_DARWIN)
|
||||
#include <mach/mach.h>
|
||||
|
@ -23,10 +23,10 @@ public:
|
||||
constexpr StrongTypedef(): t() {}
|
||||
|
||||
constexpr StrongTypedef(const Self &) = default;
|
||||
constexpr StrongTypedef(Self &&) = default;
|
||||
constexpr StrongTypedef(Self &&) noexcept(std::is_nothrow_move_constructible_v<T>) = default;
|
||||
|
||||
Self & operator=(const Self &) = default;
|
||||
Self & operator=(Self &&) = default;
|
||||
Self & operator=(Self &&) noexcept(std::is_nothrow_move_assignable_v<T>)= default;
|
||||
|
||||
template <class Enable = typename std::is_copy_assignable<T>::type>
|
||||
Self & operator=(const T & rhs) { t = rhs; return *this;}
|
||||
|
@ -1,2 +1,2 @@
|
||||
add_executable (dump_variable dump_variable.cpp)
|
||||
clickhouse_add_executable (dump_variable dump_variable.cpp)
|
||||
target_link_libraries (dump_variable PRIVATE clickhouse_common_io)
|
||||
|
8
base/base/throwError.cpp
Normal file
8
base/base/throwError.cpp
Normal file
@ -0,0 +1,8 @@
|
||||
#include <base/throwError.h>
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
[[noreturn]] void throwError(const char * err)
|
||||
{
|
||||
throw std::runtime_error(err);
|
||||
}
|
@ -1,15 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
/// Throw DB::Exception-like exception before its definition.
|
||||
/// DB::Exception derived from Poco::Exception derived from std::exception.
|
||||
/// DB::Exception generally caught as Poco::Exception. std::exception generally has other catch blocks and could lead to other outcomes.
|
||||
/// DB::Exception is not defined yet. It'd better to throw Poco::Exception but we do not want to include any big header here, even <string>.
|
||||
/// So we throw some std::exception instead in the hope its catch block is the same as DB::Exception one.
|
||||
template <typename T>
|
||||
[[noreturn]] inline void throwError(const T & err)
|
||||
{
|
||||
throw std::runtime_error(err);
|
||||
}
|
||||
[[noreturn]] void throwError(const char * err);
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <time.h>
|
||||
#include <ctime>
|
||||
|
||||
#if defined (OS_DARWIN) || defined (OS_SUNOS)
|
||||
# define CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC
|
||||
|
@ -1,9 +1,48 @@
|
||||
#pragma once
|
||||
|
||||
#include <string.h>
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
#include <bit>
|
||||
|
||||
|
||||
inline void reverseMemcpy(void * dst, const void * src, size_t size)
|
||||
{
|
||||
uint8_t * uint_dst = reinterpret_cast<uint8_t *>(dst);
|
||||
const uint8_t * uint_src = reinterpret_cast<const uint8_t *>(src);
|
||||
|
||||
uint_dst += size;
|
||||
while (size)
|
||||
{
|
||||
--uint_dst;
|
||||
*uint_dst = *uint_src;
|
||||
++uint_src;
|
||||
--size;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T unalignedLoadLE(const void * address)
|
||||
{
|
||||
T res {};
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
memcpy(&res, address, sizeof(res));
|
||||
else
|
||||
reverseMemcpy(&res, address, sizeof(res));
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
inline void unalignedStoreLE(void * address,
|
||||
const typename std::enable_if<true, T>::type & src)
|
||||
{
|
||||
static_assert(std::is_trivially_copyable_v<T>);
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
memcpy(address, &src, sizeof(src));
|
||||
else
|
||||
reverseMemcpy(address, &src, sizeof(src));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T unalignedLoad(const void * address)
|
||||
{
|
||||
|
@ -1,14 +1,19 @@
|
||||
#pragma once
|
||||
#include <cstddef>
|
||||
|
||||
#ifdef HAS_RESERVED_IDENTIFIER
|
||||
#pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||
#endif
|
||||
|
||||
constexpr size_t KiB = 1024;
|
||||
constexpr size_t MiB = 1024 * KiB;
|
||||
constexpr size_t GiB = 1024 * MiB;
|
||||
|
||||
#ifdef HAS_RESERVED_IDENTIFIER
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||
#endif
|
||||
|
||||
constexpr size_t operator"" _KiB(unsigned long long val) { return val * KiB; }
|
||||
constexpr size_t operator"" _MiB(unsigned long long val) { return val * MiB; }
|
||||
constexpr size_t operator"" _GiB(unsigned long long val) { return val * GiB; }
|
||||
|
||||
#ifdef HAS_RESERVED_IDENTIFIER
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
@ -27,6 +27,8 @@
|
||||
#include <type_traits>
|
||||
#include <initializer_list>
|
||||
|
||||
// NOLINTBEGIN(*)
|
||||
|
||||
namespace wide
|
||||
{
|
||||
template <size_t Bits, typename Signed>
|
||||
@ -257,4 +259,7 @@ struct hash<wide::integer<Bits, Signed>>;
|
||||
|
||||
}
|
||||
|
||||
// NOLINTEND(*)
|
||||
|
||||
#include "wide_integer_impl.h"
|
||||
|
||||
|
@ -15,6 +15,8 @@
|
||||
#include <boost/multiprecision/cpp_bin_float.hpp>
|
||||
#include <boost/math/special_functions/fpclassify.hpp>
|
||||
|
||||
// NOLINTBEGIN(*)
|
||||
|
||||
/// Use same extended double for all platforms
|
||||
#if (LDBL_MANT_DIG == 64)
|
||||
#define CONSTEXPR_FROM_DOUBLE constexpr
|
||||
@ -576,8 +578,8 @@ private:
|
||||
else if constexpr (Bits == 128 && sizeof(base_type) == 8)
|
||||
{
|
||||
using CompilerUInt128 = unsigned __int128;
|
||||
CompilerUInt128 a = (CompilerUInt128(lhs.items[1]) << 64) + lhs.items[0];
|
||||
CompilerUInt128 b = (CompilerUInt128(rhs.items[1]) << 64) + rhs.items[0];
|
||||
CompilerUInt128 a = (CompilerUInt128(lhs.items[1]) << 64) + lhs.items[0]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
|
||||
CompilerUInt128 b = (CompilerUInt128(rhs.items[1]) << 64) + rhs.items[0]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
|
||||
CompilerUInt128 c = a * b;
|
||||
integer<Bits, Signed> res;
|
||||
res.items[0] = c;
|
||||
@ -841,8 +843,8 @@ public:
|
||||
{
|
||||
using CompilerUInt128 = unsigned __int128;
|
||||
|
||||
CompilerUInt128 a = (CompilerUInt128(numerator.items[1]) << 64) + numerator.items[0];
|
||||
CompilerUInt128 b = (CompilerUInt128(denominator.items[1]) << 64) + denominator.items[0];
|
||||
CompilerUInt128 a = (CompilerUInt128(numerator.items[1]) << 64) + numerator.items[0]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
|
||||
CompilerUInt128 b = (CompilerUInt128(denominator.items[1]) << 64) + denominator.items[0]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
|
||||
CompilerUInt128 c = a / b; // NOLINT
|
||||
|
||||
integer<Bits, Signed> res;
|
||||
@ -1204,7 +1206,7 @@ constexpr integer<Bits, Signed>::operator T() const noexcept
|
||||
|
||||
UnsignedT res{};
|
||||
for (unsigned i = 0; i < _impl::item_count && i < (sizeof(T) + sizeof(base_type) - 1) / sizeof(base_type); ++i)
|
||||
res += UnsignedT(items[i]) << (sizeof(base_type) * 8 * i);
|
||||
res += UnsignedT(items[i]) << (sizeof(base_type) * 8 * i); // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -1478,3 +1480,5 @@ struct hash<wide::integer<Bits, Signed>>
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
// NOLINTEND(*)
|
||||
|
@ -1,13 +0,0 @@
|
||||
add_library (bridge
|
||||
IBridge.cpp
|
||||
)
|
||||
|
||||
target_include_directories (daemon PUBLIC ..)
|
||||
target_link_libraries (bridge
|
||||
PRIVATE
|
||||
daemon
|
||||
dbms
|
||||
Poco::Data
|
||||
Poco::Data::ODBC
|
||||
)
|
||||
|
@ -1,17 +0,0 @@
|
||||
add_library (daemon
|
||||
BaseDaemon.cpp
|
||||
GraphiteWriter.cpp
|
||||
SentryWriter.cpp
|
||||
)
|
||||
|
||||
target_include_directories (daemon PUBLIC ..)
|
||||
|
||||
if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES)
|
||||
target_link_libraries (daemon PUBLIC -Wl,-undefined,dynamic_lookup)
|
||||
endif()
|
||||
|
||||
target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES})
|
||||
|
||||
if (TARGET ch_contrib::sentry)
|
||||
target_link_libraries (daemon PRIVATE ch_contrib::sentry)
|
||||
endif ()
|
@ -51,6 +51,6 @@ if (GLIBC_COMPATIBILITY)
|
||||
|
||||
message (STATUS "Some symbols from glibc will be replaced for compatibility")
|
||||
|
||||
elseif (YANDEX_OFFICIAL_BUILD)
|
||||
elseif (CLICKHOUSE_OFFICIAL_BUILD)
|
||||
message (WARNING "Option GLIBC_COMPATIBILITY must be turned on for production builds.")
|
||||
endif ()
|
||||
|
@ -1,68 +1,192 @@
|
||||
#include <sys/auxv.h>
|
||||
#include "atomic.h"
|
||||
#include <unistd.h> // __environ
|
||||
#include <sys/auxv.h>
|
||||
#include <fcntl.h> // open
|
||||
#include <sys/stat.h> // O_RDONLY
|
||||
#include <unistd.h> // read, close
|
||||
#include <stdlib.h> // ssize_t
|
||||
#include <stdio.h> // perror, fprintf
|
||||
#include <link.h> // ElfW
|
||||
#include <errno.h>
|
||||
|
||||
// We don't have libc struct available here. Compute aux vector manually.
|
||||
static unsigned long * __auxv = NULL;
|
||||
static unsigned long __auxv_secure = 0;
|
||||
#define ARRAY_SIZE(a) sizeof((a))/sizeof((a[0]))
|
||||
|
||||
static size_t __find_auxv(unsigned long type)
|
||||
/// Suppress TSan since it is possible for this code to be called from multiple threads,
|
||||
/// and initialization is safe to be done multiple times from multiple threads.
|
||||
#if defined(__clang__)
|
||||
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
|
||||
#else
|
||||
# define NO_SANITIZE_THREAD
|
||||
#endif
|
||||
|
||||
// We don't have libc struct available here.
|
||||
// Compute aux vector manually (from /proc/self/auxv).
|
||||
//
|
||||
// Right now there is only 51 AT_* constants,
|
||||
// so 64 should be enough until this implementation will be replaced with musl.
|
||||
static unsigned long __auxv_procfs[64];
|
||||
static unsigned long __auxv_secure = 0;
|
||||
// Common
|
||||
static unsigned long * __auxv_environ = NULL;
|
||||
|
||||
static void * volatile getauxval_func;
|
||||
|
||||
static unsigned long __auxv_init_environ(unsigned long type);
|
||||
|
||||
//
|
||||
// auxv from procfs interface
|
||||
//
|
||||
ssize_t __retry_read(int fd, void * buf, size_t count)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
ssize_t ret = read(fd, buf, count);
|
||||
if (ret == -1)
|
||||
{
|
||||
if (errno == EINTR)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
perror("Cannot read /proc/self/auxv");
|
||||
abort();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
unsigned long NO_SANITIZE_THREAD __getauxval_procfs(unsigned long type)
|
||||
{
|
||||
if (type == AT_SECURE)
|
||||
{
|
||||
return __auxv_secure;
|
||||
}
|
||||
|
||||
if (type >= ARRAY_SIZE(__auxv_procfs))
|
||||
{
|
||||
errno = ENOENT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return __auxv_procfs[type];
|
||||
}
|
||||
static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type)
|
||||
{
|
||||
// For debugging:
|
||||
// - od -t dL /proc/self/auxv
|
||||
// - LD_SHOW_AUX= ls
|
||||
int fd = open("/proc/self/auxv", O_RDONLY);
|
||||
// It is possible in case of:
|
||||
// - no procfs mounted
|
||||
// - on android you are not able to read it unless running from shell or debugging
|
||||
// - some other issues
|
||||
if (fd == -1)
|
||||
{
|
||||
// Fallback to environ.
|
||||
a_cas_p(&getauxval_func, (void *)__auxv_init_procfs, (void *)__auxv_init_environ);
|
||||
return __auxv_init_environ(type);
|
||||
}
|
||||
|
||||
ElfW(auxv_t) aux;
|
||||
|
||||
/// NOTE: sizeof(aux) is very small (less then PAGE_SIZE), so partial read should not be possible.
|
||||
_Static_assert(sizeof(aux) < 4096, "Unexpected sizeof(aux)");
|
||||
while (__retry_read(fd, &aux, sizeof(aux)) == sizeof(aux))
|
||||
{
|
||||
if (aux.a_type == AT_NULL)
|
||||
{
|
||||
break;
|
||||
}
|
||||
if (aux.a_type == AT_IGNORE || aux.a_type == AT_IGNOREPPC)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (aux.a_type >= ARRAY_SIZE(__auxv_procfs))
|
||||
{
|
||||
fprintf(stderr, "AT_* is out of range: %li (maximum allowed is %zu)\n", aux.a_type, ARRAY_SIZE(__auxv_procfs));
|
||||
abort();
|
||||
}
|
||||
if (__auxv_procfs[aux.a_type])
|
||||
{
|
||||
/// It is possible due to race on initialization.
|
||||
}
|
||||
__auxv_procfs[aux.a_type] = aux.a_un.a_val;
|
||||
}
|
||||
close(fd);
|
||||
|
||||
__auxv_secure = __getauxval_procfs(AT_SECURE);
|
||||
|
||||
// Now we've initialized __auxv_procfs, next time getauxval() will only call __get_auxval().
|
||||
a_cas_p(&getauxval_func, (void *)__auxv_init_procfs, (void *)__getauxval_procfs);
|
||||
|
||||
return __getauxval_procfs(type);
|
||||
}
|
||||
|
||||
//
|
||||
// auxv from environ interface
|
||||
//
|
||||
// NOTE: environ available only after static initializers,
|
||||
// so you cannot rely on this if you need getauxval() before.
|
||||
//
|
||||
// Good example of such user is sanitizers, for example
|
||||
// LSan will not work with __auxv_init_environ(),
|
||||
// since it needs getauxval() before.
|
||||
//
|
||||
static size_t NO_SANITIZE_THREAD __find_auxv(unsigned long type)
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; __auxv[i]; i += 2)
|
||||
for (i = 0; __auxv_environ[i]; i += 2)
|
||||
{
|
||||
if (__auxv[i] == type)
|
||||
if (__auxv_environ[i] == type)
|
||||
{
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
return (size_t) -1;
|
||||
}
|
||||
|
||||
unsigned long __getauxval(unsigned long type)
|
||||
unsigned long NO_SANITIZE_THREAD __getauxval_environ(unsigned long type)
|
||||
{
|
||||
if (type == AT_SECURE)
|
||||
return __auxv_secure;
|
||||
|
||||
if (__auxv)
|
||||
if (__auxv_environ)
|
||||
{
|
||||
size_t index = __find_auxv(type);
|
||||
if (index != ((size_t) -1))
|
||||
return __auxv[index];
|
||||
return __auxv_environ[index];
|
||||
}
|
||||
|
||||
errno = ENOENT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void * volatile getauxval_func;
|
||||
|
||||
static unsigned long __auxv_init(unsigned long type)
|
||||
static unsigned long NO_SANITIZE_THREAD __auxv_init_environ(unsigned long type)
|
||||
{
|
||||
if (!__environ)
|
||||
{
|
||||
// __environ is not initialized yet so we can't initialize __auxv right now.
|
||||
// __environ is not initialized yet so we can't initialize __auxv_environ right now.
|
||||
// That's normally occurred only when getauxval() is called from some sanitizer's internal code.
|
||||
errno = ENOENT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Initialize __auxv and __auxv_secure.
|
||||
// Initialize __auxv_environ and __auxv_secure.
|
||||
size_t i;
|
||||
for (i = 0; __environ[i]; i++);
|
||||
__auxv = (unsigned long *) (__environ + i + 1);
|
||||
__auxv_environ = (unsigned long *) (__environ + i + 1);
|
||||
|
||||
size_t secure_idx = __find_auxv(AT_SECURE);
|
||||
if (secure_idx != ((size_t) -1))
|
||||
__auxv_secure = __auxv[secure_idx];
|
||||
__auxv_secure = __auxv_environ[secure_idx];
|
||||
|
||||
// Now we've initialized __auxv, next time getauxval() will only call __get_auxval().
|
||||
a_cas_p(&getauxval_func, (void *)__auxv_init, (void *)__getauxval);
|
||||
// Now we need to switch to __getauxval_environ for all later calls, since
|
||||
// everything is initialized.
|
||||
a_cas_p(&getauxval_func, (void *)__auxv_init_environ, (void *)__getauxval_environ);
|
||||
|
||||
return __getauxval(type);
|
||||
return __getauxval_environ(type);
|
||||
}
|
||||
|
||||
// First time getauxval() will call __auxv_init().
|
||||
static void * volatile getauxval_func = (void *)__auxv_init;
|
||||
// Callchain:
|
||||
// - __auxv_init_procfs -> __getauxval_environ
|
||||
// - __auxv_init_procfs -> __auxv_init_environ -> __getauxval_environ
|
||||
static void * volatile getauxval_func = (void *)__auxv_init_procfs;
|
||||
|
||||
unsigned long getauxval(unsigned long type)
|
||||
{
|
||||
|
@ -260,4 +260,35 @@ TRAP(mq_timedreceive)
|
||||
TRAP(wordexp)
|
||||
TRAP(wordfree)
|
||||
|
||||
/// C11 threading primitives are not supported by ThreadSanitizer.
|
||||
/// Also we should avoid using them for compatibility with old libc.
|
||||
TRAP(thrd_create)
|
||||
TRAP(thrd_equal)
|
||||
TRAP(thrd_current)
|
||||
TRAP(thrd_sleep)
|
||||
TRAP(thrd_yield)
|
||||
TRAP(thrd_exit)
|
||||
TRAP(thrd_detach)
|
||||
TRAP(thrd_join)
|
||||
|
||||
TRAP(mtx_init)
|
||||
TRAP(mtx_lock)
|
||||
TRAP(mtx_timedlock)
|
||||
TRAP(mtx_trylock)
|
||||
TRAP(mtx_unlock)
|
||||
TRAP(mtx_destroy)
|
||||
TRAP(call_once)
|
||||
|
||||
TRAP(cnd_init)
|
||||
TRAP(cnd_signal)
|
||||
TRAP(cnd_broadcast)
|
||||
TRAP(cnd_wait)
|
||||
TRAP(cnd_timedwait)
|
||||
TRAP(cnd_destroy)
|
||||
|
||||
TRAP(tss_create)
|
||||
TRAP(tss_get)
|
||||
TRAP(tss_set)
|
||||
TRAP(tss_delete)
|
||||
|
||||
#endif
|
||||
|
@ -1,13 +0,0 @@
|
||||
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
|
||||
add_headers_and_sources(loggers .)
|
||||
|
||||
# Standard version depends on DBMS and works with text log
|
||||
add_library(loggers ${loggers_sources} ${loggers_headers})
|
||||
target_compile_definitions(loggers PUBLIC WITH_TEXT_LOG=1)
|
||||
target_link_libraries(loggers PRIVATE dbms clickhouse_common_io)
|
||||
target_include_directories(loggers PUBLIC ..)
|
||||
|
||||
# Lightweight version doesn't work with textlog and also doesn't depend on DBMS
|
||||
add_library(loggers_no_text_log ${loggers_sources} ${loggers_headers})
|
||||
target_link_libraries(loggers_no_text_log PRIVATE clickhouse_common_io)
|
||||
target_include_directories(loggers PUBLIC ..)
|
@ -90,6 +90,7 @@
|
||||
#define PCG_EMULATED_128BIT_MATH 1
|
||||
#endif
|
||||
|
||||
// NOLINTBEGIN(*)
|
||||
|
||||
namespace pcg_extras {
|
||||
|
||||
@ -552,4 +553,6 @@ std::ostream& operator<<(std::ostream& out, printable_typename<T>) {
|
||||
|
||||
} // namespace pcg_extras
|
||||
|
||||
// NOLINTEND(*)
|
||||
|
||||
#endif // PCG_EXTRAS_HPP_INCLUDED
|
||||
|
@ -113,6 +113,8 @@
|
||||
|
||||
#include "pcg_extras.hpp"
|
||||
|
||||
// NOLINTBEGIN(*)
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct PcgSerializer;
|
||||
@ -1777,4 +1779,6 @@ typedef pcg_engines::ext_oneseq_xsh_rs_64_32<14,32,true> pcg32_k16384_fast;
|
||||
#pragma warning(default:4146)
|
||||
#endif
|
||||
|
||||
// NOLINTEND(*)
|
||||
|
||||
#endif // PCG_RAND_HPP_INCLUDED
|
||||
|
@ -16,6 +16,8 @@
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
// NOLINTBEGIN(*)
|
||||
|
||||
/* Special width values */
|
||||
enum {
|
||||
widechar_nonprint = -1, // The character is not printable.
|
||||
@ -518,4 +520,6 @@ inline int widechar_wcwidth(wchar_t c) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// NOLINTEND(*)
|
||||
|
||||
#endif // WIDECHAR_WIDTH_H
|
||||
|
1
benchmark/README.md
Normal file
1
benchmark/README.md
Normal file
@ -0,0 +1 @@
|
||||
Benchmark is located in a separate repository: https://github.com/ClickHouse/ClickBench
|
@ -1,180 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# script to run query to databases
|
||||
|
||||
function usage()
|
||||
{
|
||||
cat <<EOF
|
||||
usage: $0 options
|
||||
|
||||
This script run benhmark for database
|
||||
|
||||
OPTIONS:
|
||||
-c config file where some script variables are defined
|
||||
-n table name
|
||||
|
||||
-h Show this message
|
||||
-t how many times execute each query. default is '3'
|
||||
-q query file
|
||||
-e expect file
|
||||
-s /etc/init.d/service
|
||||
-p table name pattern to be replaced to name. default is 'hits_10m'
|
||||
EOF
|
||||
}
|
||||
|
||||
TIMES=3
|
||||
table_name_pattern=hits_10m
|
||||
|
||||
while getopts “c:ht:n:q:e:s:r” OPTION
|
||||
do
|
||||
case $OPTION in
|
||||
c)
|
||||
source $OPTARG
|
||||
;;
|
||||
?)
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
OPTIND=1
|
||||
|
||||
while getopts “c:ht:n:q:e:s:r” OPTION
|
||||
do
|
||||
case $OPTION in
|
||||
h)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
t)
|
||||
TIMES=$OPTARG
|
||||
;;
|
||||
n)
|
||||
table_name=$OPTARG
|
||||
;;
|
||||
q)
|
||||
test_file=$OPTARG
|
||||
;;
|
||||
e)
|
||||
expect_file=$OPTARG
|
||||
;;
|
||||
s)
|
||||
etc_init_d_service=$OPTARG
|
||||
;;
|
||||
p)
|
||||
table_name_pattern=$OPTARG
|
||||
;;
|
||||
c)
|
||||
;;
|
||||
r)
|
||||
restart_server_each_query=1
|
||||
;;
|
||||
?)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ! -f $expect_file ]]; then
|
||||
echo "Not found: expect file"
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! -f $test_file ]]; then
|
||||
echo "Not found: test file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f $etc_init_d_service ]]; then
|
||||
echo "Not found: /etc/init.d/service with path=$etc_init_d_service"
|
||||
use_service=0
|
||||
else
|
||||
use_service=1
|
||||
fi
|
||||
|
||||
if [[ "$table_name_pattern" == "" ]]; then
|
||||
echo "Empty table_name_pattern"
|
||||
exit 1
|
||||
fi
|
||||
if [[ "$table_name" == "" ]]; then
|
||||
echo "Empty table_name"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
function execute()
|
||||
{
|
||||
queries=("${@}")
|
||||
queries_count=${#queries[@]}
|
||||
|
||||
if [ -z $TIMES ]; then
|
||||
TIMES=1
|
||||
fi
|
||||
|
||||
index=0
|
||||
while [ "$index" -lt "$queries_count" ]; do
|
||||
query=${queries[$index]}
|
||||
|
||||
if [[ $query == "" ]]; then
|
||||
let "index = $index + 1"
|
||||
continue
|
||||
fi
|
||||
|
||||
comment_re='--.*'
|
||||
if [[ $query =~ $comment_re ]]; then
|
||||
echo "$query"
|
||||
echo
|
||||
else
|
||||
sync
|
||||
sudo sh -c "echo 3 > /proc/sys/vm/drop_caches"
|
||||
|
||||
if [[ "$restart_server_each_query" == "1" && "$use_service" == "1" ]]; then
|
||||
echo "restart server: $etc_init_d_service restart"
|
||||
sudo $etc_init_d_service restart
|
||||
fi
|
||||
|
||||
for i in $(seq $TIMES)
|
||||
do
|
||||
if [[ -f $etc_init_d_service && "$use_service" == "1" ]]; then
|
||||
sudo $etc_init_d_service status
|
||||
server_status=$?
|
||||
expect -f $expect_file ""
|
||||
|
||||
if [[ "$?" != "0" || $server_status != "0" ]]; then
|
||||
echo "restart server: $etc_init_d_service restart"
|
||||
sudo $etc_init_d_service restart
|
||||
fi
|
||||
|
||||
#wait until can connect to server
|
||||
restart_timer=0
|
||||
restart_limit=60
|
||||
expect -f $expect_file "" &> /dev/null
|
||||
while [ "$?" != "0" ]; do
|
||||
echo "waiting"
|
||||
sleep 1
|
||||
let "restart_timer = $restart_timer + 1"
|
||||
if (( $restart_limit < $restart_timer )); then
|
||||
sudo $etc_init_d_service restart
|
||||
restart_timer=0
|
||||
fi
|
||||
expect -f $expect_file "" &> /dev/null
|
||||
done
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "times: $i"
|
||||
|
||||
echo "query:" "$query"
|
||||
expect -f $expect_file "$query"
|
||||
|
||||
done
|
||||
fi
|
||||
|
||||
let "index = $index + 1"
|
||||
done
|
||||
}
|
||||
|
||||
temp_test_file=temp_queries_$table_name
|
||||
cat $test_file | sed s/$table_name_pattern/$table_name/g > $temp_test_file
|
||||
mapfile -t test_queries < $temp_test_file
|
||||
|
||||
echo "start time: $(date)"
|
||||
time execute "${test_queries[@]}"
|
||||
echo "stop time: $(date)"
|
@ -1,22 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
QUERIES_FILE="queries.sql"
|
||||
TABLE=$1
|
||||
TRIES=3
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s|{table}|\"${TABLE}\"|g" | while read query; do
|
||||
|
||||
echo -n "["
|
||||
for i in $(seq 1 $TRIES); do
|
||||
while true; do
|
||||
RES=$(command time -f %e -o /dev/stdout curl -sS -G --data-urlencode "query=$query" --data "default_format=Null&max_memory_usage=100000000000&max_memory_usage_for_all_queries=100000000000&max_concurrent_queries_for_user=100&database=*$YT_CLIQUE_ID" --location-trusted -H "Authorization: OAuth $YT_TOKEN" "$YT_PROXY.yt.yandex.net/query" 2>/dev/null);
|
||||
if [[ $? == 0 ]]; then
|
||||
[[ $RES =~ 'fail|Exception' ]] || break;
|
||||
fi
|
||||
done
|
||||
|
||||
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
|
||||
[[ "$i" != $TRIES ]] && echo -n ", "
|
||||
done
|
||||
echo "],"
|
||||
done
|
@ -1,29 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
QUERIES_FILE="queries.sql"
|
||||
TABLE=$1
|
||||
TRIES=3
|
||||
|
||||
if [ -x ./clickhouse ]
|
||||
then
|
||||
CLICKHOUSE_CLIENT="./clickhouse client"
|
||||
elif command -v clickhouse-client >/dev/null 2>&1
|
||||
then
|
||||
CLICKHOUSE_CLIENT="clickhouse-client"
|
||||
else
|
||||
echo "clickhouse-client is not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
|
||||
sync
|
||||
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
|
||||
|
||||
echo -n "["
|
||||
for i in $(seq 1 $TRIES); do
|
||||
RES=$(${CLICKHOUSE_CLIENT} --time --format=Null --max_memory_usage=100G --query="$query" 2>&1)
|
||||
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
|
||||
[[ "$i" != $TRIES ]] && echo -n ", "
|
||||
done
|
||||
echo "],"
|
||||
done
|
@ -1,19 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
QUERIES_FILE="queries.sql"
|
||||
TABLE=$1
|
||||
TRIES=3
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s|{table}|\"${TABLE}\"|g" | while read query; do
|
||||
|
||||
echo -n "["
|
||||
for i in $(seq 1 $TRIES); do
|
||||
while true; do
|
||||
RES=$(command time -f %e -o time ./yql --clickhouse --syntax-version 1 -f empty <<< "USE chyt.hume; PRAGMA max_memory_usage = 100000000000; PRAGMA max_memory_usage_for_all_queries = 100000000000; $query" >/dev/null 2>&1 && cat time) && break;
|
||||
done
|
||||
|
||||
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
|
||||
[[ "$i" != $TRIES ]] && echo -n ", "
|
||||
done
|
||||
echo "],"
|
||||
done
|
@ -1,43 +0,0 @@
|
||||
SELECT count() FROM {table};
|
||||
SELECT count() FROM {table} WHERE AdvEngineID != 0;
|
||||
SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM {table} ;
|
||||
SELECT sum(UserID) FROM {table} ;
|
||||
SELECT uniq(UserID) FROM {table} ;
|
||||
SELECT uniq(SearchPhrase) FROM {table} ;
|
||||
SELECT min(EventDate), max(EventDate) FROM {table} ;
|
||||
SELECT AdvEngineID, count() FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count() DESC;
|
||||
SELECT RegionID, uniq(UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
|
||||
SELECT MobilePhoneModel, uniq(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchPhrase, count() AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT SearchPhrase, uniq(UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchEngineID, SearchPhrase, count() AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT UserID, count() FROM {table} GROUP BY UserID ORDER BY count() DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count() DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
|
||||
SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10;
|
||||
SELECT UserID FROM {table} WHERE UserID = 12345678901234567890;
|
||||
SELECT count() FROM {table} WHERE URL LIKE '%metrika%';
|
||||
SELECT SearchPhrase, any(URL), count() AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT SearchPhrase, any(URL), any(Title), count() AS c, uniq(UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, any(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
|
||||
SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT URL, count() AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
|
||||
SELECT 1, URL, count() AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
|
||||
SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM {table} GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10;
|
||||
SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT Title, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT URLHash, EventDate, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = halfMD5('http://example.ru/') GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
|
||||
SELECT WindowClientWidth, WindowClientHeight, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND NOT DontCountHits AND URLHash = halfMD5('http://example.ru/') GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
SELECT toStartOfMinute(EventTime) AS Minute, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;
|
@ -1,3 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
table=hits_10m; time clickhouse-client --max_bytes_before_external_sort=30000000000 --query="SELECT toInt64(WatchID), JavaEnable, Title, GoodEvent, (EventTime < toDateTime('1971-01-01 00:00:00') ? toDateTime('1971-01-01 00:00:01') : EventTime), (EventDate < toDate('1971-01-01') ? toDate('1971-01-01') : EventDate), CounterID, ClientIP, RegionID, toInt64(UserID), CounterClass, OS, UserAgent, URL, Referer, Refresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, (ClientEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : ClientEventTime), SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce, toInt64(FUniqID), OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, (LocalEventTime < toDateTime('1971-01-01 00:00:01') ? toDateTime('1971-01-01 00:00:01') : LocalEventTime), Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, ParamCurrency, ParamCurrencyID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, toInt64(RefererHash), toInt64(URLHash), CLID FROM $table ORDER BY rand()" | corrector_utf8 > /opt/dumps/${table}_corrected.tsv
|
15906
benchmark/duckdb/log
15906
benchmark/duckdb/log
File diff suppressed because one or more lines are too long
@ -1,43 +0,0 @@
|
||||
SELECT count(*) FROM hits;
|
||||
SELECT count(*) FROM hits WHERE AdvEngineID != 0;
|
||||
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits;
|
||||
SELECT sum(UserID) FROM hits;
|
||||
SELECT COUNT(DISTINCT UserID) FROM hits;
|
||||
SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
|
||||
SELECT min(EventDate), max(EventDate) FROM hits;
|
||||
SELECT AdvEngineID, count(*) FROM hits WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
|
||||
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
|
||||
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(MobilePhoneModel) > 0 GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(MobilePhoneModel) > 0 GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchPhrase, count(*) AS c FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT UserID, count(*) FROM hits GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
|
||||
SELECT UserID, extract(minute FROM (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) AS m, SearchPhrase, count(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
|
||||
SELECT UserID FROM hits WHERE UserID = 12345678901234567890;
|
||||
SELECT count(*) FROM hits WHERE URL::TEXT LIKE '%metrika%';
|
||||
SELECT SearchPhrase, min(URL), count(*) AS c FROM hits WHERE URL::TEXT LIKE '%metrika%' AND octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title::TEXT LIKE '%Яндекс%' AND URL::TEXT NOT LIKE '%.yandex.%' AND octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT * FROM hits WHERE URL::TEXT LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY EventTime LIMIT 10;
|
||||
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY SearchPhrase LIMIT 10;
|
||||
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
SELECT CounterID, avg(octet_length(URL)) AS l, count(*) AS c FROM hits WHERE octet_length(URL) > 0 GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT regexp_replace(Referer::TEXT, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(octet_length(Referer)) AS l, count(*) AS c, min(Referer) FROM hits WHERE octet_length(Referer) > 0 GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits;
|
||||
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
|
||||
SELECT 1, URL, count(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
|
||||
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND octet_length(URL) > 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT Title, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND octet_length(Title) > 0 GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
|
||||
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
|
||||
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
SELECT DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) AS "Minute", count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) ORDER BY DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime)));
|
File diff suppressed because one or more lines are too long
@ -1,43 +0,0 @@
|
||||
Folder structure
|
||||
______________
|
||||
dump_dataset_from_ch.sh - bash script that dumps a dataset from Clickhouse
|
||||
schema.sql - schema for a Greenplum cluster to load dumped dataset in
|
||||
load_data_set.sql - the script that loads up a dumped dataset
|
||||
queries.sql - SQL statements used in the benchmark
|
||||
benchmark.sh - this piece of bash conducts a benchmark
|
||||
result_parser.py - script to parse benchmark.sh's output and produce python code to build a graph to compare up to 4 benchmark results.
|
||||
Requirements
|
||||
____________
|
||||
|
||||
Greenplum uses a separate server as a point of entry, so you need 2 servers at least to run a cluster: master and segment hosts. 2 segments host and 56 segments(28 per host) had been used while conducting the test.
|
||||
You has has to put segment hostnames in the benchmark.sh.
|
||||
Greenplum quick installation instructions
|
||||
_________________________________________
|
||||
|
||||
Obtain a stable Greenplum version here(4.3.9.1 was used while conducting the benchmark):
|
||||
https://network.pivotal.io/products/pivotal-gpdb
|
||||
|
||||
and install it using this detailed guide:
|
||||
http://gpdb.docs.pivotal.io/4340/install_guide/install_guide.html
|
||||
|
||||
You should change gp_interconnect_type to 'tcp' if cluster members are connected via 1GB link or lower.
|
||||
There are some variables that has to be changed prior the first benchmark run: gp_vmem_protect_limit and max_statement_mem to allow each segment to use more virtual memory. Here are commands to change this GUCS that has to be executed as gpadmin at the master host:
|
||||
|
||||
gpconfig -c gp_interconnect_type -v tcp
|
||||
gpconfig -c gp_vmem_protect_limit -v 3000
|
||||
gpconfig -c max_statement_mem -v '4000MB'
|
||||
|
||||
How to prepare data
|
||||
-------------------
|
||||
|
||||
One can prepare datasets to run the benchmark on using dump_dataset_from_ch.sh script from this repo. The script has to be run at at Clickhouse host. It takes a long time to get dumps.
|
||||
|
||||
Upload the datasets into Greenplum master.Then run schema.sql to prepare schema and load_data_set.sql to load data up. This operation also takes a long time.
|
||||
|
||||
How to conduct the benchmark
|
||||
__________________________
|
||||
There is a benchmark.sh that take some arguments. Here is the syntax:
|
||||
|
||||
./benchmark.sh sql_statements_file tablename dbname orca_switch
|
||||
|
||||
If you don't know about the last one then just use a default value.
|
@ -1,30 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
filename=${1-queries.sql}
|
||||
table=$2
|
||||
dbname=$3
|
||||
orca=${4-on}
|
||||
host1=somehost
|
||||
host2=somehost
|
||||
mem='15GB'
|
||||
cat $filename | sed "s/{table}/$table/g" | while read query ;
|
||||
do
|
||||
ssh -n $host1 'echo 3 | tee /proc/sys/vm/drop_caches; sync' > /dev/null
|
||||
ssh -n $host2 'echo 3 | tee /proc/sys/vm/drop_caches; sync' > /dev/null
|
||||
sleep 5
|
||||
echo $query | egrep "SELECT UserID, date_trunc\('minute', EventTime\) AS m|SELECT Referer AS key, avg\(length\(Referer\)\) AS l|SELECT URL, count(1) AS c FROM.*GROUP BY URL|SELECT 1, URL, count\(1\) AS c FROM.*GROUP BY 1" && mem='10GB'
|
||||
echo $query | egrep 'SELECT DISTINCT|GROUP BY UserID, SearchPhrase LIMIT 10|count\(DISTINCT UserID\) AS u' && mem='5GB'
|
||||
echo "####################"
|
||||
echo "$query"
|
||||
echo "Timestamp_begin:$(date)"
|
||||
echo "\\timing off \\\\set optimizer=$orca; set effective_cache_size='256MB'; set statement_mem='$mem';\\timing on \\\\ $query;" | psql -p 5432 -h 'localhost' -o /dev/null -U gpadmin ${dbname}
|
||||
echo "Timestamp_end:$(date)"
|
||||
echo "Timestamp_begin:$(date)"
|
||||
echo "\\timing off \\\\set optimizer=$orca; set effective_cache_size='50GB'; set statement_mem='$mem';\\timing on \\\\ $query;" | psql -p 5432 -h 'localhost' -o /dev/null -U gpadmin ${dbname}
|
||||
echo "Timestamp_end:$(date)"
|
||||
echo "Timestamp_begin:$(date)"
|
||||
echo "\\timing off \\\\set optimizer=$orca; set effective_cache_size='50GB'; set statement_mem='$mem';\\timing on \\\\ $query;" | psql -p 5432 -h 'localhost' -o /dev/null -U gpadmin ${dbname}
|
||||
echo "Timestamp_end:$(date)"
|
||||
echo "$query"
|
||||
echo '####################'
|
||||
done
|
@ -1,5 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
for table in hits_10m_single hits_100m_single hits_1000m_single; do
|
||||
clickhouse-client -q "SELECT (round(WatchID/2), JavaEnable, Title, GoodEvent, EventTime, EventDate, CounterID, ClientIP, RegionID,round(UserID/2), CounterClass, OS, UserAgent, URL, Referer, Refresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, ClientEventTime, SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce,round(FUniqID/2), OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, LocalEventTime, Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID,round(RefererHash/2),round(URLHash/2), CLID) FROM $table FORMAT CSV" > $table
|
||||
done
|
@ -1,12 +0,0 @@
|
||||
COPY hits_all_10m FROM '/data/hits_10m_single.dump' CSV SEGMENT REJECT LIMIT 30 PERCENT;
|
||||
CREATE INDEX pk_counterid_eventdate_userid_10m ON hits_all_10m USING btree (counterid, eventdate, userid);
|
||||
CREATE INDEX idx_10m_counterid on hits_all_10m using btree (counterid); CREATE INDEX idx_10m_userid on hits_all_10m using btree (userid);
|
||||
ANALYZE hits_all_10m;
|
||||
COPY hits_all_100m from '/data/hits_100m_single.dump' CSV SEGMENT REJECT LIMIT 30 PERCENT;
|
||||
CREATE INDEX pk_counterid_eventdate_userid_100m ON hits_all_100m USING btree (counterid, eventdate, userid);
|
||||
CREATE INDEX idx_100m_counterid on hits_all_100m using btree (counterid); CREATE INDEX idx_100m_userid on hits_all_100m using btree (userid);
|
||||
ANALYZE hits_all_100m;
|
||||
COPY hits_all_1000m from '/data/hits_1000m_single.dump' CSV SEGMENT REJECT LIMIT 30 PERCENT;
|
||||
CREATE INDEX pk_counterid_eventdate_userid_1000m ON hits_all_1000m USING btree (counterid, eventdate, userid);
|
||||
CREATE INDEX idx_1000m_counterid on hits_all_1000m using btree (counterid); CREATE INDEX idx_1000m_userid on hits_all_1000m using btree (userid);
|
||||
ANALYZE hits_all_1000m;
|
@ -1,43 +0,0 @@
|
||||
SELECT count(1) FROM {table}
|
||||
SELECT count(1) FROM {table} WHERE AdvEngineID != 0
|
||||
SELECT sum(AdvEngineID), count(1), avg(ResolutionWidth) FROM {table}
|
||||
SELECT sum(UserID) FROM {table}
|
||||
SELECT count(UserID) FROM ( SELECT DISTINCT UserID FROM {table} ) AS d
|
||||
SELECT count(SearchPhrase) FROM ( SELECT DISTINCT SearchPhrase FROM {table} ) AS d
|
||||
SELECT min(EventDate), max(EventDate) FROM {table}
|
||||
SELECT AdvEngineID, count(1) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY 2 DESC
|
||||
SELECT RegionID, count(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10
|
||||
SELECT RegionID, sum(AdvEngineID), count(1) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10
|
||||
SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10
|
||||
SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10
|
||||
SELECT SearchPhrase, count(1) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10
|
||||
SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10
|
||||
SELECT SearchEngineID, SearchPhrase, count(1) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10
|
||||
SELECT UserID, count(1) FROM {table} GROUP BY UserID ORDER BY 2 DESC LIMIT 10
|
||||
SELECT UserID, SearchPhrase, count(1) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY 3 DESC LIMIT 10
|
||||
SELECT UserID, SearchPhrase, count(1) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10
|
||||
SELECT UserID, date_trunc('minute', EventTime) AS m, SearchPhrase, count(1) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(1) DESC LIMIT 10
|
||||
SELECT UserID FROM {table} WHERE UserID = 12345678901234567890
|
||||
SELECT count(1) FROM {table} WHERE URL LIKE '%metrika%'
|
||||
SELECT SearchPhrase, max(URL) as URL, count(1) AS c FROM {table} h WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10
|
||||
SELECT SearchPhrase, max(URL) as URL, min(Title) as Title, count(1) AS c, count(DISTINCT UserID) FROM {table} WHERE Title LIKE '%\xd0\xaf\xd0\xbd\xd0\xb4\xd0\xb5\xd0\xba\xd1\x81%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
SELECT count(1) FROM {table}
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10
|
||||
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10
|
||||
SELECT CounterID, avg(length(URL)) AS l, count(1) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(1) > 100000 ORDER BY l DESC LIMIT 25
|
||||
SELECT Referer AS key, avg(length(Referer)) AS l, count(1) AS c, Referer FROM {table} WHERE Referer != '' GROUP BY key HAVING count(1) > 100000 ORDER BY l DESC LIMIT 25
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table}
|
||||
SELECT SearchEngineID, ClientIP, count(1) AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10
|
||||
SELECT WatchID, ClientIP, count(1) AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10
|
||||
SELECT WatchID, ClientIP, count(1) AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10
|
||||
SELECT URL, count(1) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10
|
||||
SELECT 1, URL, count(1) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10
|
||||
SELECT ClientIP AS x, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(1) AS c FROM {table} GROUP BY x, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10
|
||||
SELECT URL, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND DontCountHits =0 AND Refresh = 0 AND URL <>'' GROUP BY URL ORDER BY PageViews DESC LIMIT 10
|
||||
SELECT Title, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate BETWEEN '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND DontCountHits=0 AND Refresh=0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10
|
||||
SELECT URL, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND Refresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, case when (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END Src, URL AS Dst, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND eventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND Refresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
|
||||
SELECT URLHash, EventDate, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND eventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND Refresh =0 AND TraficSourceID IN (-1, 6) AND RefererHash = 7135345792483900000 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100
|
||||
SELECT WindowClientWidth, WindowClientHeight, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND eventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND Refresh =0 AND DontCountHits =0 AND URLHash = 7135345792483900000 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
SELECT date_trunc('minute', EventTime) AS Minute, count(1) AS PageViews FROM {table} WHERE CounterID = 62 AND eventDate between '2013-07-01'::timestamp AND '2013-07-31'::timestamp AND Refresh =0 AND DontCountHits =0 GROUP BY Minute ORDER BY Minute;
|
@ -1,122 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import sys
|
||||
import json
|
||||
|
||||
def parse_block(block=[], options=[]):
|
||||
|
||||
#print('block is here', block)
|
||||
#show_query = False
|
||||
#show_query = options.show_query
|
||||
result = []
|
||||
query = block[0].strip()
|
||||
if len(block) > 4:
|
||||
timing1 = block[1].strip().split()[1]
|
||||
timing2 = block[3].strip().split()[1]
|
||||
timing3 = block[5].strip().split()[1]
|
||||
else:
|
||||
timing1 = block[1].strip().split()[1]
|
||||
timing2 = block[2].strip().split()[1]
|
||||
timing3 = block[3].strip().split()[1]
|
||||
if options.show_queries:
|
||||
result.append( query )
|
||||
if not options.show_first_timings:
|
||||
result += [ timing1 , timing2, timing3 ]
|
||||
else:
|
||||
result.append(timing1)
|
||||
return result
|
||||
|
||||
|
||||
def read_stats_file(options, fname):
|
||||
result = []
|
||||
int_result = []
|
||||
block = []
|
||||
time_count = 1
|
||||
with open(fname) as f:
|
||||
|
||||
for line in f.readlines():
|
||||
|
||||
if 'SELECT' in line:
|
||||
if len(block) > 1:
|
||||
result.append( parse_block(block, options) )
|
||||
block = [ line ]
|
||||
elif 'Time:' in line:
|
||||
block.append( line )
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def compare_stats_files(options, arguments):
|
||||
result = []
|
||||
file_output = []
|
||||
pyplot_colors = ['y', 'b', 'g', 'r']
|
||||
for fname in arguments[1:]:
|
||||
file_output.append((read_stats_file(options, fname)))
|
||||
if len(file_output[0]) > 0:
|
||||
timings_count = len(file_output[0])
|
||||
for idx, data_set in enumerate(file_output):
|
||||
int_result = []
|
||||
for timing in data_set:
|
||||
int_result.append(float(timing[0])) #y values
|
||||
result.append([[x for x in range(0, len(int_result)) ], int_result,
|
||||
pyplot_colors[idx] + '^' ] )
|
||||
# result.append([x for x in range(1, len(int_result)) ]) #x values
|
||||
# result.append( pyplot_colors[idx] + '^' )
|
||||
|
||||
return result
|
||||
|
||||
def parse_args():
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser(usage='usage: %prog [options] [result_file_path]..')
|
||||
parser.add_option("-q", "--show-queries", help="Show statements along with timings", action="store_true", dest="show_queries")
|
||||
parser.add_option("-f", "--show-first-timings", help="Show only first tries timings", action="store_true", dest="show_first_timings")
|
||||
parser.add_option("-c", "--compare-mode", help="Prepare output for pyplot comparing result files.", action="store", dest="compare_mode")
|
||||
(options, arguments) = parser.parse_args(sys.argv)
|
||||
if len(arguments) < 2:
|
||||
parser.print_usage()
|
||||
sys.exit(1)
|
||||
return ( options, arguments )
|
||||
|
||||
def gen_pyplot_code(options, arguments):
|
||||
result = ''
|
||||
data_sets = compare_stats_files(options, arguments)
|
||||
for idx, data_set in enumerate(data_sets, start=0):
|
||||
x_values, y_values, line_style = data_set
|
||||
result += '\nplt.plot('
|
||||
result += '%s, %s, \'%s\'' % ( x_values, y_values, line_style )
|
||||
result += ', label=\'%s try\')' % idx
|
||||
print('import matplotlib.pyplot as plt')
|
||||
print(result)
|
||||
print( 'plt.xlabel(\'Try number\')' )
|
||||
print( 'plt.ylabel(\'Timing\')' )
|
||||
print( 'plt.title(\'Benchmark query timings\')' )
|
||||
print('plt.legend()')
|
||||
print('plt.show()')
|
||||
|
||||
|
||||
def gen_html_json(options, arguments):
|
||||
tuples = read_stats_file(options, arguments[1])
|
||||
print('{')
|
||||
print('"system: GreenPlum(x2),')
|
||||
print(('"version": "%s",' % '4.3.9.1'))
|
||||
print('"data_size": 10000000,')
|
||||
print('"time": "",')
|
||||
print('"comments": "",')
|
||||
print('"result":')
|
||||
print('[')
|
||||
for s in tuples:
|
||||
print(s)
|
||||
print(']')
|
||||
print('}')
|
||||
|
||||
|
||||
def main():
|
||||
( options, arguments ) = parse_args()
|
||||
if len(arguments) > 2:
|
||||
gen_pyplot_code(options, arguments)
|
||||
else:
|
||||
gen_html_json(options, arguments)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,3 +0,0 @@
|
||||
CREATE TABLE hits_all_10m ( WatchID bigint, JavaEnable int, Title text, GoodEvent int, EventTime timestamp, EventDate timestamp, CounterID bigint, ClientIP bigint, RegionID bigint, UserID bigint, CounterClass int, OS int, UserAgent int, URL text, Referer text, Refresh int, RefererCategoryID int, RefererRegionID bigint, URLCategoryID int, URLRegionID bigint, ResolutionWidth int, ResolutionHeight int, ResolutionDepth int, FlashMajor int, FlashMinor int, FlashMinor2 text, NetMajor int, NetMinor int, UserAgentMajor int, CookieEnable int, JavascriptEnable int, IsMobile int, MobilePhone int, MobilePhoneModel text, Params text, IPNetworkID bigint, TraficSourceID int, SearchEngineID int, SearchPhrase text, AdvEngineID int, IsArtifical int, WindowClientWidth int, WindowClientHeight int, ClientTimeZone int, ClientEventTime timestamp, SilverlightVersion1 int, SilverlightVersion2 int, SilverlightVersion3 bigint, SilverlightVersion4 int, PageCharset text, CodeVersion bigint, IsLink int, IsDownload int, IsNotBounce int, FUniqID bigint, OriginalURL text, HID bigint, IsOldCounter int, IsEvent int, IsParameter int, DontCountHits int, WithHash int, HitColor varchar(3), LocalEventTime timestamp, Age int, Sex int, Income int, Interests int, Robotness int, RemoteIP bigint, WindowName int, OpenerName int, HistoryLength int, SocialNetwork text, SocialAction text, HTTPError int, SendTiming bigint, DNSTiming bigint, ConnectTiming bigint, ResponseStartTiming bigint, ResponseEndTiming bigint, FetchTiming bigint, SocialSourceNetworkID int, SocialSourcePage text, ParamPrice int, ParamOrderID text, OpenstatServiceName text, OpenstatCampaignID text, OpenstatAdID text, OpenstatSourceID text, UTMSource text, UTMMedium text, UTMCampaign text, UTMContent text, UTMTerm text, FromTag text, HasGCLID int, RefererHash bigint, URLHash bigint, CLID bigint) WITH (appendonly=true, orientation=column, compresstype=quicklz) DISTRIBUTED BY (userid) ;
|
||||
CREATE TABLE hits_all_100m ( WatchID bigint, JavaEnable int, Title text, GoodEvent int, EventTime timestamp, EventDate timestamp, CounterID bigint, ClientIP bigint, RegionID bigint, UserID bigint, CounterClass int, OS int, UserAgent int, URL text, Referer text, Refresh int, RefererCategoryID int, RefererRegionID bigint, URLCategoryID int, URLRegionID bigint, ResolutionWidth int, ResolutionHeight int, ResolutionDepth int, FlashMajor int, FlashMinor int, FlashMinor2 text, NetMajor int, NetMinor int, UserAgentMajor int, CookieEnable int, JavascriptEnable int, IsMobile int, MobilePhone int, MobilePhoneModel text, Params text, IPNetworkID bigint, TraficSourceID int, SearchEngineID int, SearchPhrase text, AdvEngineID int, IsArtifical int, WindowClientWidth int, WindowClientHeight int, ClientTimeZone int, ClientEventTime timestamp, SilverlightVersion1 int, SilverlightVersion2 int, SilverlightVersion3 bigint, SilverlightVersion4 int, PageCharset text, CodeVersion bigint, IsLink int, IsDownload int, IsNotBounce int, FUniqID bigint, OriginalURL text, HID bigint, IsOldCounter int, IsEvent int, IsParameter int, DontCountHits int, WithHash int, HitColor varchar(3), LocalEventTime timestamp, Age int, Sex int, Income int, Interests int, Robotness int, RemoteIP bigint, WindowName int, OpenerName int, HistoryLength int, SocialNetwork text, SocialAction text, HTTPError int, SendTiming bigint, DNSTiming bigint, ConnectTiming bigint, ResponseStartTiming bigint, ResponseEndTiming bigint, FetchTiming bigint, SocialSourceNetworkID int, SocialSourcePage text, ParamPrice int, ParamOrderID text, OpenstatServiceName text, OpenstatCampaignID text, OpenstatAdID text, OpenstatSourceID text, UTMSource text, UTMMedium text, UTMCampaign text, UTMContent text, UTMTerm text, FromTag text, HasGCLID int, RefererHash bigint, URLHash bigint, CLID bigint) WITH (appendonly=true, orientation=column, compresstype=quicklz) DISTRIBUTED BY (userid) ;
|
||||
CREATE TABLE hits_all_1000m ( WatchID bigint, JavaEnable int, Title text, GoodEvent int, EventTime timestamp, EventDate timestamp, CounterID bigint, ClientIP bigint, RegionID bigint, UserID bigint, CounterClass int, OS int, UserAgent int, URL text, Referer text, Refresh int, RefererCategoryID int, RefererRegionID bigint, URLCategoryID int, URLRegionID bigint, ResolutionWidth int, ResolutionHeight int, ResolutionDepth int, FlashMajor int, FlashMinor int, FlashMinor2 text, NetMajor int, NetMinor int, UserAgentMajor int, CookieEnable int, JavascriptEnable int, IsMobile int, MobilePhone int, MobilePhoneModel text, Params text, IPNetworkID bigint, TraficSourceID int, SearchEngineID int, SearchPhrase text, AdvEngineID int, IsArtifical int, WindowClientWidth int, WindowClientHeight int, ClientTimeZone int, ClientEventTime timestamp, SilverlightVersion1 int, SilverlightVersion2 int, SilverlightVersion3 bigint, SilverlightVersion4 int, PageCharset text, CodeVersion bigint, IsLink int, IsDownload int, IsNotBounce int, FUniqID bigint, OriginalURL text, HID bigint, IsOldCounter int, IsEvent int, IsParameter int, DontCountHits int, WithHash int, HitColor varchar(3), LocalEventTime timestamp, Age int, Sex int, Income int, Interests int, Robotness int, RemoteIP bigint, WindowName int, OpenerName int, HistoryLength int, SocialNetwork text, SocialAction text, HTTPError int, SendTiming bigint, DNSTiming bigint, ConnectTiming bigint, ResponseStartTiming bigint, ResponseEndTiming bigint, FetchTiming bigint, SocialSourceNetworkID int, SocialSourcePage text, ParamPrice int, ParamOrderID text, OpenstatServiceName text, OpenstatCampaignID text, OpenstatAdID text, OpenstatSourceID text, UTMSource text, UTMMedium text, UTMCampaign text, UTMContent text, UTMTerm text, FromTag text, HasGCLID int, RefererHash bigint, URLHash bigint, CLID bigint) WITH (appendonly=true, orientation=column,compresstype=quicklz) DISTRIBUTED BY (userid) ;
|
@ -1,189 +0,0 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
if [[ -n $1 ]]; then
|
||||
SCALE=$1
|
||||
else
|
||||
SCALE=100
|
||||
fi
|
||||
|
||||
TABLE="hits_${SCALE}m_obfuscated"
|
||||
DATASET="${TABLE}_v1.tar.xz"
|
||||
QUERIES_FILE="queries.sql"
|
||||
TRIES=3
|
||||
|
||||
# Note: on older Ubuntu versions, 'axel' does not support IPv6. If you are using IPv6-only servers on very old Ubuntu, just don't install 'axel'.
|
||||
|
||||
FASTER_DOWNLOAD=wget
|
||||
if command -v axel >/dev/null; then
|
||||
FASTER_DOWNLOAD=axel
|
||||
else
|
||||
echo "It's recommended to install 'axel' for faster downloads."
|
||||
fi
|
||||
|
||||
if command -v pixz >/dev/null; then
|
||||
TAR_PARAMS='-Ipixz'
|
||||
else
|
||||
echo "It's recommended to install 'pixz' for faster decompression of the dataset."
|
||||
fi
|
||||
|
||||
mkdir -p clickhouse-benchmark-$SCALE
|
||||
pushd clickhouse-benchmark-$SCALE
|
||||
|
||||
OS=$(uname -s)
|
||||
ARCH=$(uname -m)
|
||||
|
||||
DIR=
|
||||
|
||||
if [ "${OS}" = "Linux" ]
|
||||
then
|
||||
if [ "${ARCH}" = "x86_64" ]
|
||||
then
|
||||
DIR="amd64"
|
||||
elif [ "${ARCH}" = "aarch64" ]
|
||||
then
|
||||
DIR="aarch64"
|
||||
elif [ "${ARCH}" = "powerpc64le" ]
|
||||
then
|
||||
DIR="powerpc64le"
|
||||
fi
|
||||
elif [ "${OS}" = "FreeBSD" ]
|
||||
then
|
||||
if [ "${ARCH}" = "x86_64" ]
|
||||
then
|
||||
DIR="freebsd"
|
||||
elif [ "${ARCH}" = "aarch64" ]
|
||||
then
|
||||
DIR="freebsd-aarch64"
|
||||
elif [ "${ARCH}" = "powerpc64le" ]
|
||||
then
|
||||
DIR="freebsd-powerpc64le"
|
||||
fi
|
||||
elif [ "${OS}" = "Darwin" ]
|
||||
then
|
||||
if [ "${ARCH}" = "x86_64" ]
|
||||
then
|
||||
DIR="macos"
|
||||
elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ]
|
||||
then
|
||||
DIR="macos-aarch64"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "${DIR}" ]
|
||||
then
|
||||
echo "The '${OS}' operating system with the '${ARCH}' architecture is not supported."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
URL="https://builds.clickhouse.com/master/${DIR}/clickhouse"
|
||||
echo
|
||||
echo "Will download ${URL}"
|
||||
echo
|
||||
curl -O "${URL}" && chmod a+x clickhouse || exit 1
|
||||
echo
|
||||
echo "Successfully downloaded the ClickHouse binary"
|
||||
|
||||
chmod a+x clickhouse
|
||||
|
||||
if [[ ! -f $QUERIES_FILE ]]; then
|
||||
wget "https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/$QUERIES_FILE"
|
||||
fi
|
||||
|
||||
if [[ ! -d data ]]; then
|
||||
if [[ ! -f $DATASET ]]; then
|
||||
$FASTER_DOWNLOAD "https://datasets.clickhouse.com/hits/partitions/$DATASET"
|
||||
fi
|
||||
|
||||
tar $TAR_PARAMS --strip-components=1 --directory=. -x -v -f $DATASET
|
||||
fi
|
||||
|
||||
uptime
|
||||
|
||||
echo "Starting clickhouse-server"
|
||||
|
||||
./clickhouse server > server.log 2>&1 &
|
||||
PID=$!
|
||||
|
||||
function finish {
|
||||
kill $PID
|
||||
wait
|
||||
}
|
||||
trap finish EXIT
|
||||
|
||||
echo "Waiting for clickhouse-server to start"
|
||||
|
||||
for i in {1..30}; do
|
||||
sleep 1
|
||||
./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM $TABLE" 2>/dev/null && break || echo '.'
|
||||
if [[ $i == 30 ]]; then exit 1; fi
|
||||
done
|
||||
|
||||
echo
|
||||
echo "Will perform benchmark. Results:"
|
||||
echo
|
||||
|
||||
cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do
|
||||
sync
|
||||
if [ "${OS}" = "Darwin" ]
|
||||
then
|
||||
sudo purge > /dev/null
|
||||
else
|
||||
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
|
||||
fi
|
||||
|
||||
echo -n "["
|
||||
for i in $(seq 1 $TRIES); do
|
||||
RES=$(./clickhouse client --max_memory_usage 100G --time --format=Null --query="$query" 2>&1 ||:)
|
||||
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
|
||||
[[ "$i" != $TRIES ]] && echo -n ", "
|
||||
done
|
||||
echo "],"
|
||||
done
|
||||
|
||||
|
||||
echo
|
||||
echo "Benchmark complete. System info:"
|
||||
echo
|
||||
|
||||
if [ "${OS}" = "Darwin" ]
|
||||
then
|
||||
echo '----Version, build id-----------'
|
||||
./clickhouse local --query "SELECT format('Version: {}', version())"
|
||||
sw_vers | grep BuildVersion
|
||||
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
|
||||
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
|
||||
echo '----CPU-------------------------'
|
||||
sysctl hw.model
|
||||
sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize'
|
||||
echo '----Disk Free and Total--------'
|
||||
df -h .
|
||||
echo '----Memory Free and Total-------'
|
||||
vm_stat
|
||||
echo '----Physical Memory Amount------'
|
||||
ls -l /var/vm
|
||||
echo '--------------------------------'
|
||||
else
|
||||
echo '----Version, build id-----------'
|
||||
./clickhouse local --query "SELECT format('Version: {}, build id: {}', version(), buildId())"
|
||||
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
|
||||
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
|
||||
echo '----CPU-------------------------'
|
||||
cat /proc/cpuinfo | grep -i -F 'model name' | uniq
|
||||
lscpu
|
||||
echo '----Block Devices---------------'
|
||||
lsblk
|
||||
echo '----Disk Free and Total--------'
|
||||
df -h .
|
||||
echo '----Memory Free and Total-------'
|
||||
free -h
|
||||
echo '----Physical Memory Amount------'
|
||||
cat /proc/meminfo | grep MemTotal
|
||||
echo '----RAID Info-------------------'
|
||||
cat /proc/mdstat
|
||||
#echo '----PCI-------------------------'
|
||||
#lspci
|
||||
#echo '----All Hardware Info-----------'
|
||||
#lshw
|
||||
echo '--------------------------------'
|
||||
fi
|
||||
echo
|
@ -1,4 +0,0 @@
|
||||
CONF_DIR=/home/kartavyy/benchmark/hive
|
||||
expect_file=$CONF_DIR/expect.tcl
|
||||
test_file=$CONF_DIR/queries.sql
|
||||
etc_init_d_service=
|
@ -1,9 +0,0 @@
|
||||
create table hits_10m_raw ( WatchID BIGINT, JavaEnable SMALLINT, Title STRING, GoodEvent SMALLINT, EventTime TIMESTAMP, EventDate TIMESTAMP, CounterID BIGINT, ClientIP BIGINT, RegionID BIGINT, UserID BIGINT, CounterClass TINYINT, OS SMALLINT, UserAgent SMALLINT, URL STRING, Referer STRING, Refresh TINYINT, RefererCategoryID INT, RefererRegionID BIGINT, URLCategoryID INT, URLRegionID BIGINT, ResolutionWidth INT, ResolutionHeight INT, ResolutionDepth SMALLINT, FlashMajor SMALLINT, FlashMinor SMALLINT, FlashMinor2 STRING, NetMajor SMALLINT, NetMinor SMALLINT, UserAgentMajor INT, UserAgentMinor STRING, CookieEnable SMALLINT, JavascriptEnable SMALLINT, IsMobile SMALLINT, MobilePhone SMALLINT, MobilePhoneModel STRING, Params STRING, IPNetworkID BIGINT, TraficSourceID SMALLINT, SearchEngineID INT, SearchPhrase STRING, AdvEngineID SMALLINT, IsArtifical SMALLINT, WindowClientWidth INT, WindowClientHeight INT, ClientTimeZone INT, ClientEventTime TIMESTAMP, SilverlightVersion1 SMALLINT, SilverlightVersion2 SMALLINT, SilverlightVersion3 BIGINT, SilverlightVersion4 INT, PageCharset STRING, CodeVersion BIGINT, IsLink SMALLINT, IsDownload SMALLINT, IsNotBounce SMALLINT, FUniqID BIGINT, OriginalURL STRING, HID BIGINT, IsOldCounter SMALLINT, IsEvent SMALLINT, IsParameter SMALLINT, DontCountHits SMALLINT, WithHash SMALLINT, HitColor STRING, LocalEventTime TIMESTAMP, Age SMALLINT, Sex SMALLINT, Income SMALLINT, Interests INT, Robotness SMALLINT, RemoteIP BIGINT, WindowName INT, OpenerName INT, HistoryLength SMALLINT, BrowserLanguage STRING, BrowserCountry STRING, SocialNetwork STRING, SocialAction STRING, HTTPError INT, SendTiming BIGINT, DNSTiming BIGINT, ConnectTiming BIGINT, ResponseStartTiming BIGINT, ResponseEndTiming BIGINT, FetchTiming BIGINT, SocialSourceNetworkID SMALLINT, SocialSourcePage STRING, ParamPrice BIGINT, ParamOrderID STRING, ParamCurrency STRING, ParamCurrencyID INT, OpenstatServiceName STRING, OpenstatCampaignID STRING, OpenstatAdID STRING, OpenstatSourceID STRING, UTMSource STRING, UTMMedium STRING, UTMCampaign STRING, UTMContent STRING, UTMTerm STRING, FromTag STRING, HasGCLID SMALLINT, RefererHash BIGINT, URLHash BIGINT, CLID BIGINT, UserIDHash BIGINT ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE;
|
||||
|
||||
load data local inpath '/opt/dump/dump_0.3/dump_hits_10m_meshed_utf8.tsv' overwrite into table hits_10m_raw;
|
||||
|
||||
create table hits_10m ( WatchID BIGINT, JavaEnable SMALLINT, Title STRING, GoodEvent SMALLINT, EventTime TIMESTAMP, EventDate TIMESTAMP, CounterID BIGINT, ClientIP BIGINT, RegionID BIGINT, UserID BIGINT, CounterClass TINYINT, OS SMALLINT, UserAgent SMALLINT, URL STRING, Referer STRING, Refresh TINYINT, RefererCategoryID INT, RefererRegionID BIGINT, URLCategoryID INT, URLRegionID BIGINT, ResolutionWidth INT, ResolutionHeight INT, ResolutionDepth SMALLINT, FlashMajor SMALLINT, FlashMinor SMALLINT, FlashMinor2 STRING, NetMajor SMALLINT, NetMinor SMALLINT, UserAgentMajor INT, UserAgentMinor STRING, CookieEnable SMALLINT, JavascriptEnable SMALLINT, IsMobile SMALLINT, MobilePhone SMALLINT, MobilePhoneModel STRING, Params STRING, IPNetworkID BIGINT, TraficSourceID SMALLINT, SearchEngineID INT, SearchPhrase STRING, AdvEngineID SMALLINT, IsArtifical SMALLINT, WindowClientWidth INT, WindowClientHeight INT, ClientTimeZone INT, ClientEventTime TIMESTAMP, SilverlightVersion1 SMALLINT, SilverlightVersion2 SMALLINT, SilverlightVersion3 BIGINT, SilverlightVersion4 INT, PageCharset STRING, CodeVersion BIGINT, IsLink SMALLINT, IsDownload SMALLINT, IsNotBounce SMALLINT, FUniqID BIGINT, OriginalURL STRING, HID BIGINT, IsOldCounter SMALLINT, IsEvent SMALLINT, IsParameter SMALLINT, DontCountHits SMALLINT, WithHash SMALLINT, HitColor STRING, LocalEventTime TIMESTAMP, Age SMALLINT, Sex SMALLINT, Income SMALLINT, Interests INT, Robotness SMALLINT, RemoteIP BIGINT, WindowName INT, OpenerName INT, HistoryLength SMALLINT, BrowserLanguage STRING, BrowserCountry STRING, SocialNetwork STRING, SocialAction STRING, HTTPError INT, SendTiming BIGINT, DNSTiming BIGINT, ConnectTiming BIGINT, ResponseStartTiming BIGINT, ResponseEndTiming BIGINT, FetchTiming BIGINT, SocialSourceNetworkID SMALLINT, SocialSourcePage STRING, ParamPrice BIGINT, ParamOrderID STRING, ParamCurrency STRING, ParamCurrencyID INT, OpenstatServiceName STRING, OpenstatCampaignID STRING, OpenstatAdID STRING, OpenstatSourceID STRING, UTMSource STRING, UTMMedium STRING, UTMCampaign STRING, UTMContent STRING, UTMTerm STRING, FromTag STRING, HasGCLID SMALLINT, RefererHash BIGINT, URLHash BIGINT, CLID BIGINT, UserIDHash BIGINT ) CLUSTERED BY (EventDate) SORTED BY(CounterID, EventDate, UserIDHash, EventTime) INTO 10 BUCKETS STORED AS ORC tblproperties("orc.compress"="ZLIB");
|
||||
|
||||
insert overwrite table hits_10m select * from hits_10m_raw;
|
||||
|
||||
--drop table hits_10m_raw;
|
@ -1,18 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#!/bin/expect
|
||||
|
||||
# Set timeout
|
||||
set timeout 600
|
||||
|
||||
# Get arguments
|
||||
set query [lindex $argv 0]
|
||||
|
||||
spawn hive
|
||||
|
||||
expect "hive>"
|
||||
send "$query;\r"
|
||||
|
||||
expect "hive>"
|
||||
send "quit;\r"
|
||||
|
||||
expect eof
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,624 +0,0 @@
|
||||
start time: Вт. сент. 10 18:46:00 MSK 2013
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_15579@mturlrep13_201309101846_67163557.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT count(*) FROM hits_10m;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_16038@mturlrep13_201309101846_623079473.txt
|
||||
hive> SELECT count(*) FROM hits_10m;;
|
||||
Total MapReduce jobs = 1
|
||||
Launching Job 1 out of 1
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0036
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
|
||||
2013-09-10 18:46:20,061 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:46:27,089 Stage-1 map = 7%, reduce = 0%
|
||||
2013-09-10 18:46:33,113 Stage-1 map = 14%, reduce = 0%
|
||||
2013-09-10 18:46:36,127 Stage-1 map = 22%, reduce = 0%
|
||||
2013-09-10 18:46:39,143 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:40,149 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:41,156 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:42,162 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:43,168 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:44,174 Stage-1 map = 29%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:45,179 Stage-1 map = 36%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:46,185 Stage-1 map = 36%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:47,191 Stage-1 map = 36%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:48,197 Stage-1 map = 43%, reduce = 0%, Cumulative CPU 46.41 sec
|
||||
2013-09-10 18:46:49,205 Stage-1 map = 47%, reduce = 0%, Cumulative CPU 62.51 sec
|
||||
2013-09-10 18:46:50,211 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:51,217 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:52,222 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:53,227 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:54,233 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:55,238 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:56,244 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:57,250 Stage-1 map = 54%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:58,255 Stage-1 map = 57%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:46:59,261 Stage-1 map = 57%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:00,266 Stage-1 map = 57%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:01,272 Stage-1 map = 61%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:02,277 Stage-1 map = 61%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:03,282 Stage-1 map = 65%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:04,287 Stage-1 map = 65%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:05,305 Stage-1 map = 65%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:06,310 Stage-1 map = 69%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:07,316 Stage-1 map = 73%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:08,321 Stage-1 map = 73%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:09,326 Stage-1 map = 76%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:10,331 Stage-1 map = 80%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:11,336 Stage-1 map = 80%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:12,341 Stage-1 map = 84%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:13,346 Stage-1 map = 88%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:14,351 Stage-1 map = 88%, reduce = 17%, Cumulative CPU 83.95 sec
|
||||
2013-09-10 18:47:15,356 Stage-1 map = 93%, reduce = 17%, Cumulative CPU 118.21 sec
|
||||
2013-09-10 18:47:16,372 Stage-1 map = 93%, reduce = 17%, Cumulative CPU 118.21 sec
|
||||
2013-09-10 18:47:17,379 Stage-1 map = 93%, reduce = 17%, Cumulative CPU 118.21 sec
|
||||
2013-09-10 18:47:18,384 Stage-1 map = 97%, reduce = 17%, Cumulative CPU 118.21 sec
|
||||
2013-09-10 18:47:19,388 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 162.76 sec
|
||||
2013-09-10 18:47:20,393 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 162.76 sec
|
||||
2013-09-10 18:47:21,397 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 162.76 sec
|
||||
2013-09-10 18:47:22,404 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 165.27 sec
|
||||
2013-09-10 18:47:23,410 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 165.27 sec
|
||||
2013-09-10 18:47:24,415 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 165.27 sec
|
||||
MapReduce Total cumulative CPU time: 2 minutes 45 seconds 270 msec
|
||||
Ended Job = job_201309101627_0036
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 165.27 sec HDFS Read: 1082943442 HDFS Write: 9 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 2 minutes 45 seconds 270 msec
|
||||
OK
|
||||
10000000
|
||||
Time taken: 74.228 seconds, Fetched: 1 row(s)
|
||||
hive> quit;
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_17475@mturlrep13_201309101847_1783698271.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT count(*) FROM hits_10m WHERE AdvEngineID != 0;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_17882@mturlrep13_201309101847_1295809350.txt
|
||||
hive> SELECT count(*) FROM hits_10m WHERE AdvEngineID != 0;;
|
||||
Total MapReduce jobs = 1
|
||||
Launching Job 1 out of 1
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0037
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
|
||||
2013-09-10 18:47:44,058 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:47:49,086 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.21 sec
|
||||
2013-09-10 18:47:50,093 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.21 sec
|
||||
2013-09-10 18:47:51,101 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.21 sec
|
||||
2013-09-10 18:47:52,107 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.21 sec
|
||||
2013-09-10 18:47:53,113 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.21 sec
|
||||
2013-09-10 18:47:54,119 Stage-1 map = 75%, reduce = 0%, Cumulative CPU 18.18 sec
|
||||
2013-09-10 18:47:55,125 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 23.81 sec
|
||||
2013-09-10 18:47:56,130 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 23.81 sec
|
||||
2013-09-10 18:47:57,138 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 25.64 sec
|
||||
2013-09-10 18:47:58,144 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 25.64 sec
|
||||
2013-09-10 18:47:59,150 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 25.64 sec
|
||||
MapReduce Total cumulative CPU time: 25 seconds 640 msec
|
||||
Ended Job = job_201309101627_0037
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 25.64 sec HDFS Read: 907716 HDFS Write: 7 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 25 seconds 640 msec
|
||||
OK
|
||||
171127
|
||||
Time taken: 25.153 seconds, Fetched: 1 row(s)
|
||||
hive> quit;
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_19147@mturlrep13_201309101848_1891179156.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_19567@mturlrep13_201309101848_690102300.txt
|
||||
hive> SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;;
|
||||
Total MapReduce jobs = 1
|
||||
Launching Job 1 out of 1
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0038
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
|
||||
2013-09-10 18:48:18,837 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:48:25,865 Stage-1 map = 39%, reduce = 0%
|
||||
2013-09-10 18:48:26,875 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
|
||||
2013-09-10 18:48:27,882 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
|
||||
2013-09-10 18:48:28,889 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
|
||||
2013-09-10 18:48:29,895 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
|
||||
2013-09-10 18:48:30,901 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
|
||||
2013-09-10 18:48:31,907 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
|
||||
2013-09-10 18:48:32,914 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 18.45 sec
|
||||
2013-09-10 18:48:33,920 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 34.59 sec
|
||||
2013-09-10 18:48:34,925 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 34.59 sec
|
||||
2013-09-10 18:48:35,930 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 34.59 sec
|
||||
2013-09-10 18:48:36,935 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 34.59 sec
|
||||
2013-09-10 18:48:37,940 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 34.59 sec
|
||||
2013-09-10 18:48:38,945 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 35.24 sec
|
||||
2013-09-10 18:48:39,952 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 36.63 sec
|
||||
2013-09-10 18:48:40,958 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 36.63 sec
|
||||
MapReduce Total cumulative CPU time: 36 seconds 630 msec
|
||||
Ended Job = job_201309101627_0038
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 36.63 sec HDFS Read: 8109219 HDFS Write: 30 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 36 seconds 630 msec
|
||||
OK
|
||||
Time taken: 31.961 seconds, Fetched: 1 row(s)
|
||||
hive> quit;
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_20898@mturlrep13_201309101848_327652001.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT sum(UserID) FROM hits_10m;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_21336@mturlrep13_201309101848_1975614127.txt
|
||||
hive> SELECT sum(UserID) FROM hits_10m;;
|
||||
Total MapReduce jobs = 1
|
||||
Launching Job 1 out of 1
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0039
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
|
||||
2013-09-10 18:49:00,561 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:49:07,617 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
|
||||
2013-09-10 18:49:08,626 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
|
||||
2013-09-10 18:49:09,634 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
|
||||
2013-09-10 18:49:10,639 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
|
||||
2013-09-10 18:49:11,646 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
|
||||
2013-09-10 18:49:12,652 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 15.12 sec
|
||||
2013-09-10 18:49:13,658 Stage-1 map = 75%, reduce = 0%, Cumulative CPU 21.86 sec
|
||||
2013-09-10 18:49:14,664 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
|
||||
2013-09-10 18:49:15,670 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
|
||||
2013-09-10 18:49:16,675 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
|
||||
2013-09-10 18:49:17,680 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
|
||||
2013-09-10 18:49:18,685 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
|
||||
2013-09-10 18:49:19,690 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 30.08 sec
|
||||
2013-09-10 18:49:20,697 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 32.07 sec
|
||||
2013-09-10 18:49:21,703 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 32.07 sec
|
||||
MapReduce Total cumulative CPU time: 32 seconds 70 msec
|
||||
Ended Job = job_201309101627_0039
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 32.07 sec HDFS Read: 57312623 HDFS Write: 21 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 32 seconds 70 msec
|
||||
OK
|
||||
-4662894107982093709
|
||||
Time taken: 30.94 seconds, Fetched: 1 row(s)
|
||||
hive> quit;
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_22560@mturlrep13_201309101849_2023198520.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT count(DISTINCT UserID) FROM hits_10m;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_22993@mturlrep13_201309101849_961728603.txt
|
||||
hive> SELECT count(DISTINCT UserID) FROM hits_10m;;
|
||||
Total MapReduce jobs = 1
|
||||
Launching Job 1 out of 1
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0040
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
|
||||
2013-09-10 18:49:41,232 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:49:48,264 Stage-1 map = 43%, reduce = 0%
|
||||
2013-09-10 18:49:51,283 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:52,291 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:53,298 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:54,304 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:55,310 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:56,317 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:57,332 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:58,337 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:49:59,342 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:50:00,348 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 27.01 sec
|
||||
2013-09-10 18:50:01,353 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
|
||||
2013-09-10 18:50:02,360 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
|
||||
2013-09-10 18:50:03,365 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
|
||||
2013-09-10 18:50:04,369 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
|
||||
2013-09-10 18:50:05,375 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
|
||||
2013-09-10 18:50:06,379 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 55.01 sec
|
||||
2013-09-10 18:50:07,385 Stage-1 map = 100%, reduce = 88%, Cumulative CPU 55.01 sec
|
||||
2013-09-10 18:50:08,391 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 62.95 sec
|
||||
2013-09-10 18:50:09,397 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 62.95 sec
|
||||
2013-09-10 18:50:10,402 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 62.95 sec
|
||||
MapReduce Total cumulative CPU time: 1 minutes 2 seconds 950 msec
|
||||
Ended Job = job_201309101627_0040
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 62.95 sec HDFS Read: 57312623 HDFS Write: 8 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 1 minutes 2 seconds 950 msec
|
||||
OK
|
||||
2037258
|
||||
Time taken: 38.84 seconds, Fetched: 1 row(s)
|
||||
hive> quit;
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_24634@mturlrep13_201309101850_840502487.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT count(DISTINCT SearchPhrase) FROM hits_10m;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_25401@mturlrep13_201309101850_84750246.txt
|
||||
hive> SELECT count(DISTINCT SearchPhrase) FROM hits_10m;;
|
||||
Total MapReduce jobs = 1
|
||||
Launching Job 1 out of 1
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0041
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
|
||||
2013-09-10 18:50:31,472 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:50:38,501 Stage-1 map = 43%, reduce = 0%
|
||||
2013-09-10 18:50:40,517 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:41,523 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:42,531 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:43,536 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:44,542 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:45,548 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:46,555 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:47,561 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 21.42 sec
|
||||
2013-09-10 18:50:48,566 Stage-1 map = 97%, reduce = 17%, Cumulative CPU 31.8 sec
|
||||
2013-09-10 18:50:49,571 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:50,576 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:51,581 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:52,587 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:53,592 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:54,597 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:55,602 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:56,607 Stage-1 map = 100%, reduce = 92%, Cumulative CPU 42.95 sec
|
||||
2013-09-10 18:50:57,615 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 50.6 sec
|
||||
2013-09-10 18:50:58,642 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 50.6 sec
|
||||
MapReduce Total cumulative CPU time: 50 seconds 600 msec
|
||||
Ended Job = job_201309101627_0041
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 50.6 sec HDFS Read: 27820105 HDFS Write: 8 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 50 seconds 600 msec
|
||||
OK
|
||||
1110413
|
||||
Time taken: 37.04 seconds, Fetched: 1 row(s)
|
||||
hive> quit;
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_26718@mturlrep13_201309101851_285967686.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT min(EventDate), max(EventDate) FROM hits_10m;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_27149@mturlrep13_201309101851_2135309314.txt
|
||||
hive> SELECT min(EventDate), max(EventDate) FROM hits_10m;;
|
||||
Total MapReduce jobs = 1
|
||||
Launching Job 1 out of 1
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0042
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 1
|
||||
2013-09-10 18:51:19,077 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:51:25,106 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
|
||||
2013-09-10 18:51:26,114 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
|
||||
2013-09-10 18:51:27,123 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
|
||||
2013-09-10 18:51:28,129 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
|
||||
2013-09-10 18:51:29,135 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
|
||||
2013-09-10 18:51:30,141 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 13.92 sec
|
||||
2013-09-10 18:51:31,147 Stage-1 map = 75%, reduce = 0%, Cumulative CPU 20.4 sec
|
||||
2013-09-10 18:51:32,152 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 27.44 sec
|
||||
2013-09-10 18:51:33,158 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
|
||||
2013-09-10 18:51:34,163 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
|
||||
2013-09-10 18:51:35,168 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
|
||||
2013-09-10 18:51:36,173 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
|
||||
2013-09-10 18:51:37,179 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
|
||||
2013-09-10 18:51:38,184 Stage-1 map = 100%, reduce = 25%, Cumulative CPU 27.44 sec
|
||||
2013-09-10 18:51:39,192 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 29.39 sec
|
||||
2013-09-10 18:51:40,198 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 29.39 sec
|
||||
MapReduce Total cumulative CPU time: 29 seconds 390 msec
|
||||
Ended Job = job_201309101627_0042
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 1 Cumulative CPU: 29.39 sec HDFS Read: 597016 HDFS Write: 6 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 29 seconds 390 msec
|
||||
OK
|
||||
Time taken: 30.908 seconds, Fetched: 1 row(s)
|
||||
hive> quit;
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_28401@mturlrep13_201309101851_891001725.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT AdvEngineID, count(*) AS c FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY c DESC;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_28836@mturlrep13_201309101851_1054092389.txt
|
||||
hive> SELECT AdvEngineID, count(*) AS c FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY c DESC;;
|
||||
Total MapReduce jobs = 2
|
||||
Launching Job 1 out of 2
|
||||
Number of reduce tasks not specified. Estimated from input data size: 2
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0043
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 2
|
||||
2013-09-10 18:51:59,809 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:52:04,838 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.48 sec
|
||||
2013-09-10 18:52:05,847 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.48 sec
|
||||
2013-09-10 18:52:06,855 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.48 sec
|
||||
2013-09-10 18:52:07,861 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.48 sec
|
||||
2013-09-10 18:52:08,868 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 12.48 sec
|
||||
2013-09-10 18:52:09,875 Stage-1 map = 75%, reduce = 0%, Cumulative CPU 18.07 sec
|
||||
2013-09-10 18:52:10,881 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 23.92 sec
|
||||
2013-09-10 18:52:11,887 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 23.92 sec
|
||||
2013-09-10 18:52:12,894 Stage-1 map = 100%, reduce = 67%, Cumulative CPU 25.68 sec
|
||||
2013-09-10 18:52:13,901 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 27.53 sec
|
||||
2013-09-10 18:52:14,908 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 27.53 sec
|
||||
MapReduce Total cumulative CPU time: 27 seconds 530 msec
|
||||
Ended Job = job_201309101627_0043
|
||||
Launching Job 2 out of 2
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0044
|
||||
Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1
|
||||
2013-09-10 18:52:17,388 Stage-2 map = 0%, reduce = 0%
|
||||
2013-09-10 18:52:19,396 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:20,401 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:21,406 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:22,411 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:23,415 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:24,420 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:25,425 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:26,430 Stage-2 map = 100%, reduce = 33%, Cumulative CPU 0.75 sec
|
||||
2013-09-10 18:52:27,436 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 2.14 sec
|
||||
2013-09-10 18:52:28,442 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 2.14 sec
|
||||
2013-09-10 18:52:29,448 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 2.14 sec
|
||||
MapReduce Total cumulative CPU time: 2 seconds 140 msec
|
||||
Ended Job = job_201309101627_0044
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 2 Cumulative CPU: 27.53 sec HDFS Read: 907716 HDFS Write: 384 SUCCESS
|
||||
Job 1: Map: 1 Reduce: 1 Cumulative CPU: 2.14 sec HDFS Read: 1153 HDFS Write: 60 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 29 seconds 670 msec
|
||||
OK
|
||||
Time taken: 39.506 seconds, Fetched: 9 row(s)
|
||||
hive> quit;
|
||||
-- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.;
|
||||
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_30667@mturlrep13_201309101852_966681525.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_31123@mturlrep13_201309101852_1252745596.txt
|
||||
hive> SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;;
|
||||
Total MapReduce jobs = 2
|
||||
Launching Job 1 out of 2
|
||||
Number of reduce tasks not specified. Estimated from input data size: 2
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0045
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 2
|
||||
2013-09-10 18:52:49,457 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:52:56,485 Stage-1 map = 43%, reduce = 0%
|
||||
2013-09-10 18:52:59,503 Stage-1 map = 46%, reduce = 0%, Cumulative CPU 14.56 sec
|
||||
2013-09-10 18:53:00,511 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:01,519 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:02,526 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:03,533 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:04,539 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:05,545 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:06,550 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:07,557 Stage-1 map = 92%, reduce = 17%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:08,563 Stage-1 map = 92%, reduce = 17%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:09,569 Stage-1 map = 92%, reduce = 17%, Cumulative CPU 29.73 sec
|
||||
2013-09-10 18:53:10,575 Stage-1 map = 97%, reduce = 17%, Cumulative CPU 44.01 sec
|
||||
2013-09-10 18:53:11,598 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 58.47 sec
|
||||
2013-09-10 18:53:12,604 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 58.47 sec
|
||||
2013-09-10 18:53:13,609 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 58.47 sec
|
||||
2013-09-10 18:53:14,615 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 58.47 sec
|
||||
2013-09-10 18:53:15,620 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 58.47 sec
|
||||
2013-09-10 18:53:16,627 Stage-1 map = 100%, reduce = 63%, Cumulative CPU 65.64 sec
|
||||
2013-09-10 18:53:17,634 Stage-1 map = 100%, reduce = 63%, Cumulative CPU 65.64 sec
|
||||
2013-09-10 18:53:18,640 Stage-1 map = 100%, reduce = 63%, Cumulative CPU 65.64 sec
|
||||
2013-09-10 18:53:19,646 Stage-1 map = 100%, reduce = 63%, Cumulative CPU 65.64 sec
|
||||
2013-09-10 18:53:20,653 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 71.27 sec
|
||||
2013-09-10 18:53:21,659 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 71.27 sec
|
||||
MapReduce Total cumulative CPU time: 1 minutes 11 seconds 270 msec
|
||||
Ended Job = job_201309101627_0045
|
||||
Launching Job 2 out of 2
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0046
|
||||
Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1
|
||||
2013-09-10 18:53:25,187 Stage-2 map = 0%, reduce = 0%
|
||||
2013-09-10 18:53:27,196 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:28,202 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:29,207 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:30,211 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:31,216 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:32,220 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:33,226 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:34,231 Stage-2 map = 100%, reduce = 33%, Cumulative CPU 1.42 sec
|
||||
2013-09-10 18:53:35,237 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 3.16 sec
|
||||
2013-09-10 18:53:36,243 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 3.16 sec
|
||||
MapReduce Total cumulative CPU time: 3 seconds 160 msec
|
||||
Ended Job = job_201309101627_0046
|
||||
MapReduce Jobs Launched:
|
||||
Job 0: Map: 4 Reduce: 2 Cumulative CPU: 71.27 sec HDFS Read: 67340015 HDFS Write: 100142 SUCCESS
|
||||
Job 1: Map: 1 Reduce: 1 Cumulative CPU: 3.16 sec HDFS Read: 100911 HDFS Write: 96 SUCCESS
|
||||
Total MapReduce CPU Time Spent: 1 minutes 14 seconds 430 msec
|
||||
OK
|
||||
Time taken: 56.439 seconds, Fetched: 10 row(s)
|
||||
hive> quit;
|
||||
-- агрегация, среднее количество ключей.;
|
||||
|
||||
status
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_609@mturlrep13_201309101853_355533849.txt
|
||||
hive> ;
|
||||
hive> quit;
|
||||
|
||||
times: 1
|
||||
query: SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY c DESC LIMIT 10;
|
||||
spawn hive
|
||||
|
||||
Logging initialized using configuration in file:/opt/hive/conf/hive-log4j.properties
|
||||
Hive history file=/tmp/kartavyy/hive_job_log_kartavyy_1183@mturlrep13_201309101853_289725544.txt
|
||||
hive> SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY c DESC LIMIT 10;;
|
||||
Total MapReduce jobs = 2
|
||||
Launching Job 1 out of 2
|
||||
Number of reduce tasks not specified. Estimated from input data size: 2
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0047
|
||||
Hadoop job information for Stage-1: number of mappers: 4; number of reducers: 2
|
||||
2013-09-10 18:53:55,838 Stage-1 map = 0%, reduce = 0%
|
||||
2013-09-10 18:54:02,865 Stage-1 map = 29%, reduce = 0%
|
||||
2013-09-10 18:54:05,876 Stage-1 map = 43%, reduce = 0%
|
||||
2013-09-10 18:54:08,894 Stage-1 map = 46%, reduce = 0%, Cumulative CPU 16.8 sec
|
||||
2013-09-10 18:54:09,901 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:10,909 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:11,915 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:12,921 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:13,927 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:14,932 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:15,938 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:16,943 Stage-1 map = 80%, reduce = 17%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:17,949 Stage-1 map = 80%, reduce = 17%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:18,954 Stage-1 map = 80%, reduce = 17%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:19,959 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:20,964 Stage-1 map = 96%, reduce = 17%, Cumulative CPU 34.85 sec
|
||||
2013-09-10 18:54:21,970 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:22,975 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:23,980 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:24,986 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:25,991 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:26,997 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:28,002 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:29,008 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:30,014 Stage-1 map = 100%, reduce = 17%, Cumulative CPU 67.35 sec
|
||||
2013-09-10 18:54:31,021 Stage-1 map = 100%, reduce = 58%, Cumulative CPU 74.39 sec
|
||||
2013-09-10 18:54:32,027 Stage-1 map = 100%, reduce = 96%, Cumulative CPU 74.39 sec
|
||||
2013-09-10 18:54:33,033 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 84.05 sec
|
||||
2013-09-10 18:54:34,038 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 84.05 sec
|
||||
2013-09-10 18:54:35,044 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 84.05 sec
|
||||
MapReduce Total cumulative CPU time: 1 minutes 24 seconds 50 msec
|
||||
Ended Job = job_201309101627_0047
|
||||
Launching Job 2 out of 2
|
||||
Number of reduce tasks determined at compile time: 1
|
||||
In order to change the average load for a reducer (in bytes):
|
||||
set hive.exec.reducers.bytes.per.reducer=<number>
|
||||
In order to limit the maximum number of reducers:
|
||||
set hive.exec.reducers.max=<number>
|
||||
In order to set a constant number of reducers:
|
||||
set mapred.reduce.tasks=<number>
|
||||
Kill Command = /usr/libexec/../bin/hadoop job -kill job_201309101627_0048
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,109 +0,0 @@
|
||||
SELECT count(*) FROM hits_10m;
|
||||
SELECT count(*) FROM hits_10m WHERE AdvEngineID != 0;
|
||||
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_10m;
|
||||
SELECT sum(UserID) FROM hits_10m;
|
||||
SELECT count(DISTINCT UserID) FROM hits_10m;
|
||||
SELECT count(DISTINCT SearchPhrase) FROM hits_10m;
|
||||
SELECT min(EventDate), max(EventDate) FROM hits_10m;
|
||||
|
||||
SELECT AdvEngineID, count(*) AS c FROM hits_10m WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY c DESC;
|
||||
-- мощная фильтрация. После фильтрации почти ничего не остаётся, но делаем ещё агрегацию.;
|
||||
|
||||
SELECT RegionID, count(DISTINCT UserID) AS u FROM hits_10m GROUP BY RegionID ORDER BY u DESC LIMIT 10;
|
||||
-- агрегация, среднее количество ключей.;
|
||||
|
||||
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), count(DISTINCT UserID) FROM hits_10m GROUP BY RegionID ORDER BY c DESC LIMIT 10;
|
||||
-- агрегация, среднее количество ключей, несколько агрегатных функций.;
|
||||
|
||||
SELECT MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
-- мощная фильтрация по строкам, затем агрегация по строкам.;
|
||||
|
||||
SELECT MobilePhone, MobilePhoneModel, count(DISTINCT UserID) AS u FROM hits_10m WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
|
||||
-- мощная фильтрация по строкам, затем агрегация по паре из числа и строки.;
|
||||
|
||||
SELECT SearchPhrase, count(*) AS c FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
-- средняя фильтрация по строкам, затем агрегация по строкам, большое количество ключей.;
|
||||
|
||||
SELECT SearchPhrase, count(DISTINCT UserID) AS u FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
-- агрегация чуть сложнее.;
|
||||
|
||||
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
-- агрегация по числу и строке, большое количество ключей.;
|
||||
|
||||
SELECT UserID, count(*) AS c FROM hits_10m GROUP BY UserID ORDER BY c DESC LIMIT 10;
|
||||
-- агрегация по очень большому количеству ключей, может не хватить оперативки.;
|
||||
|
||||
SELECT UserID, SearchPhrase, count(*) AS c FROM hits_10m GROUP BY UserID, SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
-- ещё более сложная агрегация.;
|
||||
|
||||
SELECT UserID, SearchPhrase, count(*) AS c FROM hits_10m GROUP BY UserID, SearchPhrase LIMIT 10;
|
||||
-- то же самое, но без сортировки.;
|
||||
|
||||
SELECT UserID, minute(EventTime), SearchPhrase, count(*) AS c FROM hits_10m GROUP BY UserID, minute(EventTime), SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
-- ещё более сложная агрегация, не стоит выполнять на больших таблицах.;
|
||||
|
||||
SELECT UserID FROM hits_10m WHERE UserID = 12345678901234567890;
|
||||
-- мощная фильтрация по столбцу типа UInt64.;
|
||||
|
||||
SELECT count(*) AS c FROM hits_10m WHERE URL LIKE '%metrika%';
|
||||
-- фильтрация по поиску подстроки в строке.;
|
||||
|
||||
SELECT SearchPhrase, MAX(URL), count(*) AS c FROM hits_10m WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
-- вынимаем большие столбцы, фильтрация по строке.;
|
||||
|
||||
SELECT SearchPhrase, MAX(URL), MAX(Title), count(*) AS c, count(DISTINCT UserID) FROM hits_10m WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
|
||||
-- чуть больше столбцы.;
|
||||
|
||||
SELECT * FROM hits_10m WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
|
||||
-- плохой запрос - вынимаем все столбцы.;
|
||||
|
||||
SELECT SearchPhrase, EventTime FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
|
||||
-- большая сортировка.;
|
||||
|
||||
SELECT SearchPhrase FROM hits_10m WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
|
||||
-- большая сортировка по строкам.;
|
||||
|
||||
SELECT SearchPhrase, EventTime FROM hits_10m WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
|
||||
-- большая сортировка по кортежу.;
|
||||
|
||||
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM hits_10m WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
-- считаем средние длины URL для крупных счётчиков.;
|
||||
|
||||
SELECT SUBSTRING(SUBSTRING(Referer, instr(Referer, '//') + 2), 1, if(0 < instr(SUBSTRING(Referer, instr(Referer, '//') + 2), '/') - 1, instr(SUBSTRING(Referer, instr(Referer, '//') + 2), '/' ) - 1, 0)), avg(length(Referer)) AS l, count(*) AS c, MAX(Referer) FROM hits_100m WHERE Referer != '' GROUP BY SUBSTRING(SUBSTRING(Referer, instr(Referer, '//') + 2), 1, if(0 < instr(SUBSTRING(Referer, instr(Referer, '//') + 2), '/') - 1, instr(SUBSTRING(Referer, instr(Referer, '//') + 2), '/' ) - 1, 0)) HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
|
||||
-- то же самое, но с разбивкой по доменам.;
|
||||
|
||||
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_10m;
|
||||
-- много тупых агрегатных функций.;
|
||||
|
||||
SELECT SearchEngineID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
-- сложная агрегация, для больших таблиц может не хватить оперативки.;
|
||||
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
-- агрегация по двум полям, которая ничего не агрегирует. Для больших таблиц выполнить не получится.;
|
||||
|
||||
SELECT WatchID, ClientIP, count(*) AS c, sum(Refresh), avg(ResolutionWidth) FROM hits_10m GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
|
||||
-- то же самое, но ещё и без фильтрации.;
|
||||
|
||||
SELECT URL, count(*) AS c FROM hits_10m GROUP BY URL ORDER BY c DESC LIMIT 10;
|
||||
-- агрегация по URL.;
|
||||
|
||||
SELECT 1, URL, count(*) AS c FROM hits_10m GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
|
||||
-- агрегация по URL и числу.;
|
||||
|
||||
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits_10m GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
|
||||
|
||||
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT DontCountHits != 0 AND NOT Refresh != 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
|
||||
|
||||
|
||||
SELECT Title, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT DontCountHits != 0 AND NOT Refresh != 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
|
||||
|
||||
|
||||
SELECT URL, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT Refresh != 0 AND IsLink != 0 AND NOT IsDownload != 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
|
||||
|
||||
SELECT TraficSourceID, SearchEngineID, AdvEngineID, URL, count(*) as c, if(SearchEngineID = 0 AND AdvEngineID = 0 , Referer, '') as src FROM hits_100m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT Refresh != 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, URL, if(SearchEngineID = 0 AND AdvEngineID = 0 , Referer, '') ORDER BY c DESC LIMIT 1000;
|
||||
|
||||
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT Refresh != 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 6202628419148573758 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100000;
|
||||
|
||||
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-31') AND NOT Refresh != 0 AND NOT DontCountHits != 0 AND URLHash = 6202628419148573758 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
|
||||
|
||||
SELECT unix_timestamp(EventTime) - SECOND(EventTime) AS m, count(*) FROM hits_10m WHERE CounterID = 62 AND EventDate >= TIMESTAMP('2013-07-01') AND EventDate <= TIMESTAMP('2013-07-02') AND NOT Refresh != 0 AND NOT DontCountHits != 0 GROUP BY unix_timestamp(EventTime) - SECOND(EventTime) ORDER BY m;
|
@ -1,2 +0,0 @@
|
||||
cd /home/kartavyy/benchmark
|
||||
./benchmark.sh -c hive/conf.sh -n $1 > hive/log/log_$1
|
@ -1,4 +0,0 @@
|
||||
CONF_DIR=/home/kartavyy/benchmark/mysql
|
||||
expect_file=$CONF_DIR/expect.tcl
|
||||
test_file=$CONF_DIR/queries.sql
|
||||
etc_init_d_service=/etc/init.d/mysql
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user