Merge branch 'master' into hanfei/gwp-asan

This commit is contained in:
Han Fei 2023-02-07 14:55:55 +01:00 committed by GitHub
commit 0f7defb87f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
641 changed files with 19416 additions and 5163 deletions

View File

@ -512,6 +512,75 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
############################################################################################
#################################### INSTALL PACKAGES ######################################
############################################################################################
InstallPackagesTestRelease:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/test_install
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Install packages (amd64)
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Test packages installation
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 install_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
InstallPackagesTestAarch64:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/test_install
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Install packages (arm64)
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Test packages installation
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 install_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################

View File

@ -946,6 +946,75 @@ jobs:
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 mark_release_ready.py
############################################################################################
#################################### INSTALL PACKAGES ######################################
############################################################################################
InstallPackagesTestRelease:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/test_install
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Install packages (amd64)
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Test packages installation
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 install_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
InstallPackagesTestAarch64:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/test_install
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Install packages (arm64)
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Test packages installation
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 install_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################

View File

@ -984,6 +984,75 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
############################################################################################
#################################### INSTALL PACKAGES ######################################
############################################################################################
InstallPackagesTestRelease:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/test_install
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Install packages (amd64)
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Test packages installation
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 install_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
InstallPackagesTestAarch64:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/test_install
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Install packages (arm64)
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Test packages installation
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 install_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################
@ -2813,6 +2882,217 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
# Parallel replicas
FunctionalStatefulTestDebugParallelReplicas:
needs: [BuilderDebDebug]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (debug, ParallelReplicas)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatefulTestUBsanParallelReplicas:
needs: [BuilderDebUBsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_ubsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (ubsan, ParallelReplicas)
REPO_COPY=${{runner.temp}}/stateful_ubsan/ClickHouse
KILL_TIMEOUT=3600
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatefulTestMsanParallelReplicas:
needs: [BuilderDebMsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_msan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (msan, ParallelReplicas)
REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
KILL_TIMEOUT=3600
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatefulTestTsanParallelReplicas:
needs: [BuilderDebTsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_tsan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (tsan, ParallelReplicas)
REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
KILL_TIMEOUT=3600
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatefulTestAsanParallelReplicas:
needs: [BuilderDebAsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (asan, ParallelReplicas)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatefulTestReleaseParallelReplicas:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (release, ParallelReplicas)
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT=3600
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
######################################### STRESS TESTS #######################################
##############################################################################################

View File

@ -15,7 +15,8 @@ jobs:
- name: Deploy packages and assets
run: |
GITHUB_TAG="${GITHUB_REF#refs/tags/}"
curl '${{ secrets.PACKAGES_RELEASE_URL }}/release/'"${GITHUB_TAG}"'?binary=binary_darwin&binary=binary_darwin_aarch64&sync=true' -d ''
curl --silent --data '' \
'${{ secrets.PACKAGES_RELEASE_URL }}/release/'"${GITHUB_TAG}"'?binary=binary_darwin&binary=binary_darwin_aarch64&sync=true'
############################################################################################
##################################### Docker images #######################################
############################################################################################

View File

@ -604,6 +604,75 @@ jobs:
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 mark_release_ready.py
############################################################################################
#################################### INSTALL PACKAGES ######################################
############################################################################################
InstallPackagesTestRelease:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/test_install
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Install packages (amd64)
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Test packages installation
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 install_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
InstallPackagesTestAarch64:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/test_install
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Install packages (arm64)
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Test packages installation
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 install_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################

6
.gitmodules vendored
View File

@ -257,6 +257,9 @@
[submodule "contrib/qpl"]
path = contrib/qpl
url = https://github.com/intel/qpl
[submodule "contrib/idxd-config"]
path = contrib/idxd-config
url = https://github.com/intel/idxd-config
[submodule "contrib/wyhash"]
path = contrib/wyhash
url = https://github.com/wangyi-fudan/wyhash
@ -330,3 +333,6 @@
[submodule "contrib/crc32-vpmsum"]
path = contrib/crc32-vpmsum
url = https://github.com/antonblanchard/crc32-vpmsum.git
[submodule "contrib/liburing"]
path = contrib/liburing
url = https://github.com/axboe/liburing

View File

@ -15,6 +15,8 @@
* Parallel quorum inserts might work incorrectly with `*MergeTree` tables created with the deprecated syntax. Therefore, parallel quorum inserts support is completely disabled for such tables. It does not affect tables created with a new syntax. [#45430](https://github.com/ClickHouse/ClickHouse/pull/45430) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Use the `GetObjectAttributes` request instead of the `HeadObject` request to get the size of an object in AWS S3. This change fixes handling endpoints without explicit regions after updating the AWS SDK, for example. [#45288](https://github.com/ClickHouse/ClickHouse/pull/45288) ([Vitaly Baranov](https://github.com/vitlibar)). AWS S3 and Minio are tested, but keep in mind that various S3-compatible services (GCS, R2, B2) may have subtle incompatibilities. This change also may require you to adjust the ACL to allow the `GetObjectAttributes` request.
* Forbid paths in timezone names. For example, a timezone name like `/usr/share/zoneinfo/Asia/Aden` is not allowed; the IANA timezone database name like `Asia/Aden` should be used. [#44225](https://github.com/ClickHouse/ClickHouse/pull/44225) ([Kruglov Pavel](https://github.com/Avogar)).
* Queries combining equijoin and constant expressions (e.g., `JOIN ON t1.x = t2.x AND 1 = 1`) are forbidden due to incorrect results. [#44016](https://github.com/ClickHouse/ClickHouse/pull/44016) ([Vladimir C](https://github.com/vdimir)).
#### New Feature
* Dictionary source for extracting keys by traversing regular expressions tree. It can be used for User-Agent parsing. [#40878](https://github.com/ClickHouse/ClickHouse/pull/40878) ([Vage Ogannisian](https://github.com/nooblose)). [#43858](https://github.com/ClickHouse/ClickHouse/pull/43858) ([Han Fei](https://github.com/hanfei1991)).
@ -119,7 +121,6 @@ Add settings input_format_tsv/csv/custom_detect_header that enable this behaviou
* Fix possible use of an uninitialized value after executing expressions after sorting. Closes [#43386](https://github.com/ClickHouse/ClickHouse/issues/43386) [#43635](https://github.com/ClickHouse/ClickHouse/pull/43635) ([Kruglov Pavel](https://github.com/Avogar)).
* Better handling of NULL in aggregate combinators, fix possible segfault/logical error while using an obscure optimization `optimize_rewrite_sum_if_to_count_if`. Closes [#43758](https://github.com/ClickHouse/ClickHouse/issues/43758). [#43813](https://github.com/ClickHouse/ClickHouse/pull/43813) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix CREATE USER/ROLE query settings constraints. [#43993](https://github.com/ClickHouse/ClickHouse/pull/43993) ([Nikolay Degterinsky](https://github.com/evillique)).
* Fix incorrect behavior of `JOIN ON t1.x = t2.x AND 1 = 1`, forbid such queries. [#44016](https://github.com/ClickHouse/ClickHouse/pull/44016) ([Vladimir C](https://github.com/vdimir)).
* Fixed bug with non-parsable default value for `EPHEMERAL` column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix parsing of bad version from compatibility setting. [#44224](https://github.com/ClickHouse/ClickHouse/pull/44224) ([Kruglov Pavel](https://github.com/Avogar)).
* Bring interval subtraction from datetime in line with addition. [#44241](https://github.com/ClickHouse/ClickHouse/pull/44241) ([ltrk2](https://github.com/ltrk2)).

View File

@ -9,7 +9,7 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
* [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-rxm3rdrk-lIUmhLC3V8WTaL0TGxsOmg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
* [Slack](https://clickhousedb.slack.com/) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.

View File

@ -141,6 +141,7 @@ add_contrib (simdjson-cmake simdjson)
add_contrib (rapidjson-cmake rapidjson)
add_contrib (fastops-cmake fastops)
add_contrib (libuv-cmake libuv)
add_contrib (liburing-cmake liburing)
add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv
add_contrib (cassandra-cmake cassandra) # requires: libuv

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 545b8c810a956b2efdc116e86be219af7e83d68a
Subproject commit b56784be1aec568fb72aff47f281097c017623cb

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit 4a12641211d4dbc8e2fdb2dd0f1eea0927db9252
Subproject commit 06a6610e6fb3385e22ad85014a67aa307825ffb1

2
contrib/azure vendored

@ -1 +1 @@
Subproject commit ea8c3044f43f5afa7016d2d580ed201f495d7e94
Subproject commit 096049bf24fffafcaccc132b9367694532716731

1
contrib/idxd-config vendored Submodule

@ -0,0 +1 @@
Subproject commit f6605c41a735e3fdfef2d2d18655a33af6490b99

1
contrib/liburing vendored Submodule

@ -0,0 +1 @@
Subproject commit f5a48392c4ea33f222cbebeb2e2fc31620162949

View File

@ -0,0 +1,53 @@
set (ENABLE_LIBURING_DEFAULT ${ENABLE_LIBRARIES})
if (NOT OS_LINUX)
set (ENABLE_LIBURING_DEFAULT OFF)
endif ()
option (ENABLE_LIBURING "Enable liburing" ${ENABLE_LIBURING_DEFAULT})
if (NOT ENABLE_LIBURING)
message (STATUS "Not using liburing")
return ()
endif ()
set (LIBURING_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/liburing/src/include")
set (LIBURING_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/liburing/src")
set (SRCS
"${LIBURING_SOURCE_DIR}/queue.c"
"${LIBURING_SOURCE_DIR}/register.c"
"${LIBURING_SOURCE_DIR}/setup.c"
"${LIBURING_SOURCE_DIR}/syscall.c"
"${LIBURING_SOURCE_DIR}/version.c"
)
add_compile_definitions (_GNU_SOURCE)
add_compile_definitions (LIBURING_INTERNAL)
set (LIBURING_COMPAT_INCLUDE_DIR "${ClickHouse_BINARY_DIR}/contrib/liburing/src/include-compat")
set (LIBURING_COMPAT_HEADER "${LIBURING_COMPAT_INCLUDE_DIR}/liburing/compat.h")
set (LIBURING_CONFIG_HAS_KERNEL_RWF_T FALSE)
set (LIBURING_CONFIG_HAS_KERNEL_TIMESPEC FALSE)
set (LIBURING_CONFIG_HAS_OPEN_HOW FALSE)
set (LIBURING_CONFIG_HAS_STATX FALSE)
set (LIBURING_CONFIG_HAS_GLIBC_STATX FALSE)
configure_file (compat.h.in ${LIBURING_COMPAT_HEADER})
set (LIBURING_GENERATED_INCLUDE_DIR "${ClickHouse_BINARY_DIR}/contrib/liburing/src/include")
set (LIBURING_VERSION_HEADER "${LIBURING_GENERATED_INCLUDE_DIR}/liburing/io_uring_version.h")
file (READ "${LIBURING_SOURCE_DIR}/../liburing.spec" LIBURING_SPEC)
string (REGEX MATCH "Version: ([0-9]+)\.([0-9]+)" _ ${LIBURING_SPEC})
set (LIBURING_VERSION_MAJOR ${CMAKE_MATCH_1})
set (LIBURING_VERSION_MINOR ${CMAKE_MATCH_2})
configure_file (io_uring_version.h.in ${LIBURING_VERSION_HEADER})
add_library (_liburing ${SRCS})
add_library (ch_contrib::liburing ALIAS _liburing)
target_include_directories (_liburing SYSTEM PUBLIC ${LIBURING_COMPAT_INCLUDE_DIR} ${LIBURING_GENERATED_INCLUDE_DIR} "${LIBURING_SOURCE_DIR}/include")

View File

@ -0,0 +1,50 @@
/* SPDX-License-Identifier: MIT */
#ifndef LIBURING_COMPAT_H
#define LIBURING_COMPAT_H
# cmakedefine LIBURING_CONFIG_HAS_KERNEL_RWF_T
# cmakedefine LIBURING_CONFIG_HAS_KERNEL_TIMESPEC
# cmakedefine LIBURING_CONFIG_HAS_OPEN_HOW
# cmakedefine LIBURING_CONFIG_HAS_GLIBC_STATX
# cmakedefine LIBURING_CONFIG_HAS_STATX
#if !defined(LIBURING_CONFIG_HAS_KERNEL_RWF_T)
typedef int __kernel_rwf_t;
#endif
#if !defined(LIBURING_CONFIG_HAS_KERNEL_TIMESPEC)
#include <stdint.h>
struct __kernel_timespec {
int64_t tv_sec;
long long tv_nsec;
};
/* <linux/time_types.h> is not available, so it can't be included */
#define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H 1
#else
#include <linux/time_types.h>
/* <linux/time_types.h> is included above and not needed again */
#define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H 1
#endif
#if !defined(LIBURING_CONFIG_HAS_OPEN_HOW)
#include <inttypes.h>
struct open_how {
uint64_t flags;
uint64_t mode;
uint64_t resolve;
};
#else
#include <linux/openat2.h>
#endif
#if !defined(LIBURING_CONFIG_HAS_GLIBC_STATX) && defined(LIBURING_CONFIG_HAS_STATX)
#include <sys/stat.h>
#endif
#endif

View File

@ -0,0 +1,8 @@
/* SPDX-License-Identifier: MIT */
#ifndef LIBURING_VERSION_H
#define LIBURING_VERSION_H
#define IO_URING_VERSION_MAJOR ${LIBURING_VERSION_MAJOR}
#define IO_URING_VERSION_MINOR ${LIBURING_VERSION_MINOR}
#endif

2
contrib/qpl vendored

@ -1 +1 @@
Subproject commit becb7a1b15bdb4845ec3721a550707ffa51d029d
Subproject commit d75a29d95d8a548297fce3549d21020005364dc8

View File

@ -10,11 +10,30 @@ if (NOT ENABLE_QPL)
return()
endif()
## QPL has build dependency on libaccel-config. Here is to build libaccel-config which is required by QPL.
## libaccel-config is the utility library for controlling and configuring Intel® In-Memory Analytics Accelerator (Intel® IAA).
set (LIBACCEL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/idxd-config")
set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake")
set (LIBACCEL_HEADER_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake/idxd-header")
set (SRCS
"${LIBACCEL_SOURCE_DIR}/accfg/lib/libaccfg.c"
"${LIBACCEL_SOURCE_DIR}/util/log.c"
"${LIBACCEL_SOURCE_DIR}/util/sysfs.c"
)
add_library(accel-config ${SRCS})
target_compile_options(accel-config PRIVATE "-D_GNU_SOURCE")
target_include_directories(accel-config BEFORE
PRIVATE ${UUID_DIR}
PRIVATE ${LIBACCEL_HEADER_DIR}
PRIVATE ${LIBACCEL_SOURCE_DIR})
## QPL build start here.
set (QPL_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl")
set (QPL_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl/sources")
set (QPL_BINARY_DIR "${ClickHouse_BINARY_DIR}/build/contrib/qpl")
set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake")
set (EFFICIENT_WAIT OFF)
set (BLOCK_ON_FAULT ON)
set (LOG_HW_INIT OFF)
@ -315,13 +334,8 @@ target_compile_definitions(_qpl
PRIVATE -DQPL_BADARG_CHECK
PUBLIC -DENABLE_QPL_COMPRESSION)
find_library(LIBACCEL accel-config)
if(NOT LIBACCEL)
message(FATAL_ERROR "Please install QPL dependency library:libaccel-config from https://github.com/intel/idxd-config")
endif()
target_link_libraries(_qpl
PRIVATE ${LIBACCEL}
PRIVATE accel-config
PRIVATE ${CMAKE_DL_LIBS})
add_library (ch_contrib::qpl ALIAS _qpl)

View File

@ -0,0 +1,159 @@
/* config.h. Generated from config.h.in by configure. */
/* config.h.in. Generated from configure.ac by autoheader. */
/* Define if building universal (internal helper macro) */
/* #undef AC_APPLE_UNIVERSAL_BUILD */
/* Debug messages. */
/* #undef ENABLE_DEBUG */
/* Documentation / man pages. */
/* #define ENABLE_DOCS */
/* System logging. */
#define ENABLE_LOGGING 1
/* accfg test support */
/* #undef ENABLE_TEST */
/* Define to 1 if big-endian-arch */
/* #undef HAVE_BIG_ENDIAN */
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if you have the <linux/version.h> header file. */
#define HAVE_LINUX_VERSION_H 1
/* Define to 1 if little-endian-arch */
#define HAVE_LITTLE_ENDIAN 1
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if you have the `secure_getenv' function. */
#define HAVE_SECURE_GETENV 1
/* Define to 1 if you have statement expressions. */
#define HAVE_STATEMENT_EXPR 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if typeof works with your compiler. */
#define HAVE_TYPEOF 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Define to 1 if using libuuid */
#define HAVE_UUID 1
/* Define to 1 if you have the `__secure_getenv' function. */
/* #undef HAVE___SECURE_GETENV */
/* Define to the sub-directory where libtool stores uninstalled libraries. */
#define LT_OBJDIR ".libs/"
/* Name of package */
#define PACKAGE "accel-config"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT "linux-dsa@lists.01.org"
/* Define to the full name of this package. */
#define PACKAGE_NAME "accel-config"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "accel-config 3.5.2.gitf6605c41"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "accel-config"
/* Define to the home page for this package. */
#define PACKAGE_URL "https://github.com/xxx/accel-config"
/* Define to the version of this package. */
#define PACKAGE_VERSION "3.5.2.gitf6605c41"
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Enable extensions on AIX 3, Interix. */
#ifndef _ALL_SOURCE
# define _ALL_SOURCE 1
#endif
/* Enable GNU extensions on systems that have them. */
#ifndef _GNU_SOURCE
# define _GNU_SOURCE 1
#endif
/* Enable threading extensions on Solaris. */
#ifndef _POSIX_PTHREAD_SEMANTICS
# define _POSIX_PTHREAD_SEMANTICS 1
#endif
/* Enable extensions on HP NonStop. */
#ifndef _TANDEM_SOURCE
# define _TANDEM_SOURCE 1
#endif
/* Enable general extensions on Solaris. */
#ifndef __EXTENSIONS__
# define __EXTENSIONS__ 1
#endif
/* Version number of package */
#define VERSION "3.5.2.gitf6605c41"
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
significant byte first (like Motorola and SPARC, unlike Intel). */
#if defined AC_APPLE_UNIVERSAL_BUILD
# if defined __BIG_ENDIAN__
# define WORDS_BIGENDIAN 1
# endif
#else
# ifndef WORDS_BIGENDIAN
/* # undef WORDS_BIGENDIAN */
# endif
#endif
/* Enable large inode numbers on Mac OS X 10.5. */
#ifndef _DARWIN_USE_64_BIT_INODE
# define _DARWIN_USE_64_BIT_INODE 1
#endif
/* Number of bits in a file offset, on hosts where this is settable. */
/* #undef _FILE_OFFSET_BITS */
/* Define for large files, on AIX-style hosts. */
/* #undef _LARGE_FILES */
/* Define to 1 if on MINIX. */
/* #undef _MINIX */
/* Define to 2 if the system does not provide POSIX.1 features except with
this defined. */
/* #undef _POSIX_1_SOURCE */
/* Define to 1 if you need to in order for `stat' and other things to work. */
/* #undef _POSIX_SOURCE */
/* Define to __typeof__ if your compiler spells it that way. */
/* #undef typeof */

View File

@ -1,6 +1,10 @@
set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/snappy")
set (SNAPPY_IS_BIG_ENDIAN 0)
if (ARCH_S390X)
set (SNAPPY_IS_BIG_ENDIAN 1)
else ()
set (SNAPPY_IS_BIG_ENDIAN 0)
endif()
set (HAVE_BYTESWAP_H 1)
set (HAVE_SYS_MMAN_H 1)

View File

@ -21,5 +21,3 @@ RUN yarn config set registry https://registry.npmjs.org \
COPY run.sh /run.sh
ENTRYPOINT ["/run.sh"]
CMD ["yarn", "build"]

View File

@ -25,7 +25,8 @@ done
sed -i '/onBrokenMarkdownLinks:/ s/ignore/error/g' docusaurus.config.js
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
export CI=true
export CI=true
yarn install
exec yarn build "$@"
fi

View File

@ -134,6 +134,14 @@
"name": "clickhouse/keeper-jepsen-test",
"dependent": []
},
"docker/test/install/deb": {
"name": "clickhouse/install-deb-test",
"dependent": []
},
"docker/test/install/rpm": {
"name": "clickhouse/install-rpm-test",
"dependent": []
},
"docker/docs/builder": {
"name": "clickhouse/docs-builder",
"dependent": [

View File

@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.1.2.9"
ARG VERSION="23.1.3.5"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.1.2.9"
ARG VERSION="23.1.3.5"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -139,6 +139,7 @@ function clone_submodules
contrib/morton-nd
contrib/xxHash
contrib/simdjson
contrib/liburing
)
git submodule sync
@ -161,6 +162,7 @@ function run_cmake
"-DENABLE_NURAFT=1"
"-DENABLE_SIMDJSON=1"
"-DENABLE_JEMALLOC=1"
"-DENABLE_LIBURING=1"
)
export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache"
@ -229,6 +231,7 @@ function run_tests
--hung-check
--fast-tests-only
--no-random-settings
--no-random-merge-tree-settings
--no-long
--testname
--shard

View File

@ -0,0 +1,64 @@
FROM ubuntu:22.04
# The Dockerfile is nicely borrowed from
# https://github.com/lionelnicolas/docker-ubuntu-systemd/blob/83aa3249146f5df264fe45353f79fc76eb1e42d7/Dockerfile
ENV \
DEBIAN_FRONTEND=noninteractive \
LANG=C.UTF-8 \
container=docker \
init=/lib/systemd/systemd
# install systemd packages
RUN apt-get update && \
apt-get install -y --no-install-recommends \
systemd \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists
# configure systemd
# remove systemd 'wants' triggers
# remove everything except tmpfiles setup in sysinit target
# remove UTMP updater service
# disable /tmp mount
# fix missing BPF firewall support warning
# just for cosmetics, fix "not-found" entries while using "systemctl --all"
RUN \
find \
/etc/systemd/system/*.wants/* \
/lib/systemd/system/multi-user.target.wants/* \
/lib/systemd/system/sockets.target.wants/*initctl* \
! -type d \
-delete && \
find \
/lib/systemd/system/sysinit.target.wants \
! -type d \
! -name '*systemd-tmpfiles-setup*' \
-delete && \
find \
/lib/systemd \
-name systemd-update-utmp-runlevel.service \
-delete && \
rm -vf /usr/share/systemd/tmp.mount && \
sed -ri '/^IPAddressDeny/d' /lib/systemd/system/systemd-journald.service && \
for MATCH in \
plymouth-start.service \
plymouth-quit-wait.service \
syslog.socket \
syslog.service \
display-manager.service \
systemd-sysusers.service \
tmp.mount \
systemd-udevd.service \
; do \
grep -rn --binary-files=without-match ${MATCH} /lib/systemd/ | cut -d: -f1 | xargs sed -ri 's/(.*=.*)'${MATCH}'(.*)/\1\2/'; \
done && \
systemctl disable ondemand.service && \
systemctl set-default multi-user.target
VOLUME ["/run", "/run/lock"]
STOPSIGNAL SIGRTMIN+3
ENTRYPOINT ["/lib/systemd/systemd"]

View File

@ -0,0 +1,55 @@
FROM centos:8
# The Dockerfile is nicely borrowed from
# https://github.com/lionelnicolas/docker-ubuntu-systemd/blob/83aa3249146f5df264fe45353f79fc76eb1e42d7/Dockerfile
ENV \
LANG=C.UTF-8 \
container=docker \
init=/lib/systemd/systemd
# configure systemd
# remove systemd 'wants' triggers
# remove everything except tmpfiles setup in sysinit target
# remove UTMP updater service
# disable /tmp mount
# fix missing BPF firewall support warning
# just for cosmetics, fix "not-found" entries while using "systemctl --all"
RUN \
find \
/etc/systemd/system/*.wants/ \
/lib/systemd/system/multi-user.target.wants/ \
/lib/systemd/system/local-fs.target.wants/ \
/lib/systemd/system/sockets.target.wants/*initctl* \
! -type d \
-delete && \
find \
/lib/systemd/system/sysinit.target.wants \
! -type d \
! -name '*systemd-tmpfiles-setup*' \
-delete && \
find \
/lib/systemd \
-name systemd-update-utmp-runlevel.service \
-delete && \
rm -vf /usr/share/systemd/tmp.mount && \
sed -ri '/^IPAddressDeny/d' /lib/systemd/system/systemd-journald.service && \
for MATCH in \
plymouth-start.service \
plymouth-quit-wait.service \
syslog.socket \
syslog.service \
display-manager.service \
systemd-sysusers.service \
tmp.mount \
systemd-udevd.service \
; do \
grep -rn --binary-files=without-match ${MATCH} /lib/systemd/ | cut -d: -f1 | xargs sed -ri 's/(.*=.*)'${MATCH}'(.*)/\1\2/'; \
done && \
systemctl set-default multi-user.target
VOLUME ["/run", "/run/lock"]
STOPSIGNAL SIGRTMIN+3
ENTRYPOINT ["/lib/systemd/systemd"]

View File

@ -126,13 +126,16 @@ function run_tests()
fi
set +e
clickhouse-test -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \
--skip 00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" \
if [[ -n "$USE_PARALLEL_REPLICAS" ]] && [[ "$USE_PARALLEL_REPLICAS" -eq 1 ]]; then
clickhouse-test --client="clickhouse-client --use_hedged_requests=0 --allow_experimental_parallel_reading_from_replicas=1 \
--max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'" \
-j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --no-parallel-replicas --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
clickhouse-test --timeout 1200 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \
00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt
else
clickhouse-test -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
fi
set -e
}

View File

@ -134,9 +134,9 @@ function run_tests()
set +e
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt
--test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt
set -e
}

View File

@ -11,6 +11,31 @@ set -x
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
OK="\tOK\t\\N\t"
FAIL="\tFAIL\t\\N\t"
FAILURE_CONTEXT_LINES=50
FAILURE_CONTEXT_MAX_LINE_WIDTH=400
function escaped()
{
# That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language.
# Also limit lines width just in case (too long lines are not really useful usually)
clickhouse local -S 's String' --input-format=LineAsString -q "select substr(s, 1, $FAILURE_CONTEXT_MAX_LINE_WIDTH)
from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'"
}
function head_escaped()
{
head -n $FAILURE_CONTEXT_LINES $1 | escaped
}
function unts()
{
grep -Po "[0-9][0-9]:[0-9][0-9] \K.*"
}
function trim_server_logs()
{
head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped
}
function install_packages()
{
@ -33,7 +58,9 @@ function configure()
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
# avoid too slow startup
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml | sed "s|<snapshot_distance>100000</snapshot_distance>|<snapshot_distance>10000</snapshot_distance>|" > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
| sed "s|<snapshot_distance>100000</snapshot_distance>|<snapshot_distance>10000</snapshot_distance>|" \
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
@ -136,6 +163,7 @@ function stop()
clickhouse stop --max-tries "$max_tries" --do-not-kill && return
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv
kill -TERM "$(pidof gdb)" ||:
sleep 5
echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log
@ -151,10 +179,11 @@ function start()
if [ "$counter" -gt ${1:-120} ]
then
echo "Cannot start clickhouse-server"
echo -e "Cannot start clickhouse-server\tFAIL" >> /test_output/test_results.tsv
rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||:
echo -e "Cannot start clickhouse-server$FAIL$(trim_server_logs application_errors.txt)" >> /test_output/test_results.tsv
cat /var/log/clickhouse-server/stdout.log
tail -n1000 /var/log/clickhouse-server/stderr.log
tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e '<Warning> RaftInstance:' -e '<Information> RaftInstance' | tail -n1000
tail -n100 /var/log/clickhouse-server/stderr.log
tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e '<Warning> RaftInstance:' -e '<Information> RaftInstance' | tail -n100
break
fi
# use root to match with current uid
@ -252,9 +281,92 @@ start
clickhouse-client --query "SHOW TABLES FROM datasets"
clickhouse-client --query "SHOW TABLES FROM test"
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32), WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8, PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16), CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16,
EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32,
UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String,
Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32),
RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8,
FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2),
CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String,
IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8,
WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8,
SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32,
IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8,
IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8,
Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32,
RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2),
BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32,
DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32,
RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32,
LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32,
RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String,
ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String,
OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String,
UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64,
URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String,
ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64),
IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16,
EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32,
UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String,
RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16),
URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8,
FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16,
UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8,
MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16,
SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16,
ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32,
SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8,
FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8,
HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8,
GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32,
HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String,
HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32,
FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32,
LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32,
RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String,
ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String,
OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String,
UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64,
URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String,
ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64),
IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8,
VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32,
Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String,
EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String,
AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32),
RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32,
SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32,
ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32,
SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16,
UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16,
FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8,
FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8,
Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8,
BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16),
Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32),
WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64,
ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32,
ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32,
ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32,
ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16,
ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32,
OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String,
UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime,
PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8,
PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16),
CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64,
StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64,
OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64,
UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32,
ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64),
Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32,
DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16))
ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
@ -275,7 +387,9 @@ export ZOOKEEPER_FAULT_INJECTION=1
configure
# But we still need default disk because some tables loaded only into it
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml | sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
| sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
> /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp
mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
@ -283,8 +397,12 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
start
./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
&& echo -e 'Test script exit code\tOK' >> /test_output/test_results.tsv \
|| echo -e 'Test script failed\tFAIL' >> /test_output/test_results.tsv
&& echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv
# NOTE Hung check is implemented in docker/tests/stress/stress
rg -Fa "No queries hung" /test_output/test_results.tsv | grep -Fa "OK" \
|| echo -e "Hung check failed, possible deadlock found (see hung_check.log)$FAIL$(head_escaped /test_output/hung_check.log | unts)"
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log
@ -295,9 +413,10 @@ unset "${!THREAD_@}"
start
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|| (echo -e 'Server failed to start (see application_errors.txt and clickhouse-server.clean.log)\tFAIL' >> /test_output/test_results.tsv \
&& rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt)
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \
&& echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \
>> /test_output/test_results.tsv)
stop
@ -310,49 +429,54 @@ stop
rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
&& echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No sanitizer asserts\tOK' >> /test_output/test_results.tsv
&& echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|| echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv
rm -f /test_output/tmp
# OOM
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
&& echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
&& echo -e "Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|| echo -e "No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Logical errors
rg -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \
&& echo -e 'Logical error thrown (see clickhouse-server.log or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv
rg -Fa "Code: 49. DB::Exception: " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \
&& echo -e "Logical error thrown (see clickhouse-server.log or logical_errors.txt)$FAIL$(head_escaped /test_output/logical_errors.txt)" >> /test_output/test_results.tsv \
|| echo -e "No logical errors$OK" >> /test_output/test_results.tsv
# Remove file logical_errors.txt if it's empty
[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt
# No such key errors
rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \
&& echo -e 'S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No lost s3 keys\tOK' >> /test_output/test_results.tsv
&& echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \
|| echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv
# Remove file no_such_key_errors.txt if it's empty
[ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt
# Crash
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
&& echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv
&& echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|| echo -e "Not crashed$OK" >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog)
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/fatal_messages.txt \
&& echo -e 'Fatal message in clickhouse-server.log (see fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
&& echo -e "Fatal message in clickhouse-server.log (see fatal_messages.txt)$FAIL$(trim_server_logs fatal_messages.txt)" >> /test_output/test_results.tsv \
|| echo -e "No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Remove file fatal_messages.txt if it's empty
[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
rg -Fa "########################################" /test_output/* > /dev/null \
&& echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv
&& echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv
function get_gdb_log_context()
{
rg -A50 -Fa " received signal " /test_output/gdb.log | head_escaped
}
rg -Fa " received signal " /test_output/gdb.log > /dev/null \
&& echo -e 'Found signal in gdb.log\tFAIL' >> /test_output/test_results.tsv
&& echo -e "Found signal in gdb.log$FAIL$(get_gdb_log_context)" >> /test_output/test_results.tsv
if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
echo -e "Backward compatibility check\n"
@ -367,8 +491,8 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
echo "Download clickhouse-server from the previous release"
mkdir previous_release_package_folder
echo $previous_release_tag | download_release_packages && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \
|| echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv
echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log
for table in query_log trace_log
@ -381,13 +505,13 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
# Check if we cloned previous release repository successfully
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
then
echo -e "Backward compatibility check: Failed to clone previous release tests\tFAIL" >> /test_output/test_results.tsv
echo -e "Backward compatibility check: Failed to clone previous release tests$FAIL" >> /test_output/test_results.tsv
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
then
echo -e "Backward compatibility check: Failed to download previous release packages\tFAIL" >> /test_output/test_results.tsv
echo -e "Backward compatibility check: Failed to download previous release packages$FAIL" >> /test_output/test_results.tsv
else
echo -e "Successfully cloned previous release tests\tOK" >> /test_output/test_results.tsv
echo -e "Successfully downloaded previous release packages\tOK" >> /test_output/test_results.tsv
echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv
# Uninstall current packages
dpkg --remove clickhouse-client
@ -446,9 +570,10 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
mkdir tmp_stress_output
./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
&& echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv
./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" \
--backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
&& echo -e "Backward compatibility check: Test script exit code$OK" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: Test script failed$FAIL" >> /test_output/test_results.tsv
rm -rf tmp_stress_output
# We experienced deadlocks in this command in very rare cases. Let's debug it:
@ -470,9 +595,9 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
export ZOOKEEPER_FAULT_INJECTION=0
configure
start 500
clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|| (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \
&& rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt)
clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt \
&& echo -e "Backward compatibility check: Server failed to start$FAIL$(trim_server_logs bc_check_application_errors.txt)" >> /test_output/test_results.tsv)
clickhouse-client --query="SELECT 'Server version: ', version()"
@ -488,8 +613,6 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
# FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server.
# Let's just ignore all errors from queries ("} <Error> TCPHandler: Code:", "} <Error> executeQuery: Code:")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")
# NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected
# ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility
echo "Check for Error messages in server log:"
rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
@ -519,7 +642,6 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
-e "} <Error> TCPHandler: Code:" \
-e "} <Error> executeQuery: Code:" \
-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
-e "This engine is deprecated and is not supported in transactions" \
-e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \
-e "The set of parts restored in place of" \
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
@ -530,8 +652,9 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
-e "Session expired" \
-e "TOO_MANY_PARTS" \
/var/log/clickhouse-server/clickhouse-server.backward.dirty.log | rg -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
&& echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
&& echo -e "Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)$FAIL$(trim_server_logs bc_check_error_messages.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No Error messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Remove file bc_check_error_messages.txt if it's empty
[ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt
@ -540,34 +663,36 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
rg -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
&& echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv
&& echo -e "Backward compatibility check: Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No sanitizer asserts$OK" >> /test_output/test_results.tsv
rm -f /test_output/tmp
# OOM
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
&& echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
&& echo -e "Backward compatibility check: Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Logical errors
echo "Check for Logical errors in server log:"
rg -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \
&& echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv
rg -Fa -A20 "Code: 49. DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \
&& echo -e "Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)$FAIL$(trim_server_logs bc_check_logical_errors.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No logical errors$OK" >> /test_output/test_results.tsv
# Remove file bc_check_logical_errors.txt if it's empty
[ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt
# Crash
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
&& echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv
&& echo -e "Backward compatibility check: Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: Not crashed$OK" >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog)
echo "Check for Fatal message in server log:"
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \
&& echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
&& echo -e "Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)$FAIL$(trim_server_logs bc_check_fatal_messages.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "Backward compatibility check: No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
# Remove file bc_check_fatal_messages.txt if it's empty
[ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
@ -575,7 +700,8 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||:
for table in query_log trace_log
do
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.backward.tsv.zst ||:
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" \
| zstd --threads=0 > /test_output/$table.backward.tsv.zst ||:
done
fi
fi
@ -584,13 +710,28 @@ dmesg -T > /test_output/dmesg.log
# OOM in dmesg -- those are real
grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \
&& echo -e 'OOM in dmesg\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No OOM in dmesg\tOK' >> /test_output/test_results.tsv
&& echo -e "OOM in dmesg$FAIL$(head_escaped /test_output/dmesg.log)" >> /test_output/test_results.tsv \
|| echo -e "No OOM in dmesg$OK" >> /test_output/test_results.tsv
mv /var/log/clickhouse-server/stderr.log /test_output/
# Write check result into check_status.tsv
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
# Try to choose most specific error for the whole check status
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by
(test like 'Backward compatibility check%'), -- BC check goes last
(test like '%Sanitizer%') DESC,
(test like '%Killed by signal%') DESC,
(test like '%gdb.log%') DESC,
(test ilike '%possible deadlock%') DESC,
(test like '%start%') DESC,
(test like '%dmesg%') DESC,
(test like '%OOM%') DESC,
(test like '%Signal 9%') DESC,
(test like '%Fatal message%') DESC,
(test like '%Error message%') DESC,
(test like '%previous release%') DESC,
rowNumberInAllBlocks()
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# Core dumps

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from multiprocessing import cpu_count
from subprocess import Popen, call, check_output, STDOUT
from subprocess import Popen, call, check_output, STDOUT, PIPE
import os
import argparse
import logging
@ -299,14 +299,19 @@ if __name__ == "__main__":
"00001_select_1",
]
)
res = call(cmd, shell=True, stderr=STDOUT)
hung_check_status = "No queries hung\tOK\n"
hung_check_log = os.path.join(args.output_folder, "hung_check.log")
tee = Popen(['/usr/bin/tee', hung_check_log], stdin=PIPE)
res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT)
tee.stdin.close()
if res != 0 and have_long_running_queries:
logging.info("Hung check failed with exit code {}".format(res))
hung_check_status = "Hung check failed\tFAIL\n"
with open(
os.path.join(args.output_folder, "test_results.tsv"), "w+"
) as results:
results.write(hung_check_status)
else:
hung_check_status = "No queries hung\tOK\t\\N\t\n"
with open(
os.path.join(args.output_folder, "test_results.tsv"), "w+"
) as results:
results.write(hung_check_status)
os.remove(hung_check_log)
logging.info("Stress test finished")

View File

@ -48,6 +48,7 @@ RUN apt-get update \
gdb \
git \
gperf \
libclang-rt-${LLVM_VERSION}-dev \
lld-${LLVM_VERSION} \
llvm-${LLVM_VERSION} \
llvm-${LLVM_VERSION}-dev \

View File

@ -85,8 +85,16 @@ def process_test_log(log_path):
if DATABASE_SIGN in line:
test_end = True
# Python does not support TSV, so we have to escape '\t' and '\n' manually
# and hope that complex escape sequences will not break anything
test_results = [
(test[0], test[1], test[2], "".join(test[3])[:4096]) for test in test_results
(
test[0],
test[1],
test[2],
"".join(test[3])[:4096].replace("\t", "\\t").replace("\n", "\\n"),
)
for test in test_results
]
return (

View File

@ -0,0 +1,17 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.1.3.5-stable (548b494bcce) FIXME as compared to v23.1.2.9-stable (8dfb1700858)
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#45896](https://github.com/ClickHouse/ClickHouse/issues/45896): Bugfix IPv6 parser for mixed ip4 address with missed first octet (like `::.1.2.3`). [#45871](https://github.com/ClickHouse/ClickHouse/pull/45871) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -16,6 +16,11 @@ Tests are located in `queries` directory. There are two subdirectories: `statele
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
:::note
A common mistake when testing data types `DateTime` and `DateTime64` is assuming that the server uses a specific time zone (e.g. "UTC"). This is not the case, time zones in CI test runs
are deliberately randomized. The easiest workaround is to specify the time zone for test values explicitly, e.g. `toDateTime64(val, 3, 'Europe/Amsterdam')`.
:::
### Running a Test Locally {#functional-test-locally}
Start the ClickHouse server locally, listening on the default port (9000). To

View File

@ -77,9 +77,12 @@ Optional parameters:
- `rabbitmq_password` - RabbitMQ password.
- `rabbitmq_commit_on_select` - Commit messages when select query is made. Default: `false`.
- `rabbitmq_max_rows_per_message` — The maximum number of rows written in one RabbitMQ message for row-based formats. Default : `1`.
- `rabbitmq_empty_queue_backoff_start` — A start backoff point to reschedule read if the rabbitmq queue is empty.
- `rabbitmq_empty_queue_backoff_end` — An end backoff point to reschedule read if the rabbitmq queue is empty.
SSL connection:
* [ ] SSL connection:
Use either `rabbitmq_secure = 1` or `amqps` in connection address: `rabbitmq_address = 'amqps://guest:guest@localhost/vhost'`.
The default behaviour of the used library is not to check if the created TLS connection is sufficiently secure. Whether the certificate is expired, self-signed, missing or invalid: the connection is simply permitted. More strict checking of certificates can possibly be implemented in the future.

View File

@ -2,10 +2,10 @@
slug: /en/engines/table-engines/mergetree-family/invertedindexes
sidebar_label: Inverted Indexes
description: Quickly find search terms in text.
keywords: [full-text search, text search]
keywords: [full-text search, text search, inverted, index, indices]
---
# Inverted indexes [experimental]
# Full-text Search using Inverted Indexes [experimental]
Inverted indexes are an experimental type of [secondary indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#available-types-of-indices) which provide fast text search
capabilities for [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md)
@ -13,7 +13,7 @@ columns. The main idea of an inverted index is to store a mapping from "terms" t
tokenized cells of the string column. For example, the string cell "I will be a little late" is by default tokenized into six terms "I", "will",
"be", "a", "little" and "late". Another kind of tokenizer is n-grams. For example, the result of 3-gram tokenization will be 21 terms "I w",
" wi", "wil", "ill", "ll ", "l b", " be" etc. The more fine-granular the input strings are tokenized, the bigger but also the more
useful the resulting inverted index will be.
useful the resulting inverted index will be.
:::warning
Inverted indexes are experimental and should not be used in production environments yet. They may change in the future in backward-incompatible
@ -50,7 +50,7 @@ Being a type of skipping index, inverted indexes can be dropped or added to a co
``` sql
ALTER TABLE tab DROP INDEX inv_idx;
ALTER TABLE tab ADD INDEX inv_idx(s) TYPE inverted(2) GRANULARITY 1;
ALTER TABLE tab ADD INDEX inv_idx(s) TYPE inverted(2);
```
To use the index, no special functions or syntax are required. Typical string search predicates automatically leverage the index. As
@ -74,7 +74,120 @@ controls the amount of data read consumed from the underlying column before a ne
intermediate memory consumption for index construction but also improves lookup performance since fewer segments need to be checked on
average to evaluate a query.
## Full-text search of the Hacker News dataset
Let's look at the performance improvements of inverted indexes on a large dataset with lots of text. We will use 28.7M rows of comments on the popular Hacker News website. Here is the table without an inverted index:
```sql
CREATE TABLE hackernews (
id UInt64,
deleted UInt8,
type String,
author String,
timestamp DateTime,
comment String,
dead UInt8,
parent UInt64,
poll UInt64,
children Array(UInt32),
url String,
score UInt32,
title String,
parts Array(UInt32),
descendants UInt32
)
ENGINE = MergeTree
ORDER BY (type, author);
```
The 28.7M rows are in a Parquet file in S3 - let's insert them into the `hackernews` table:
```sql
INSERT INTO hackernews
SELECT * FROM s3Cluster(
'default',
'https://datasets-documentation.s3.eu-west-3.amazonaws.com/hackernews/hacknernews.parquet',
'Parquet',
'
id UInt64,
deleted UInt8,
type String,
by String,
time DateTime,
text String,
dead UInt8,
parent UInt64,
poll UInt64,
kids Array(UInt32),
url String,
score UInt32,
title String,
parts Array(UInt32),
descendants UInt32');
```
Consider the following simple search for the term `ClickHouse` (and its varied upper and lower cases) in the `comment` column:
```sql
SELECT count()
FROM hackernews
WHERE hasToken(lower(comment), 'clickhouse');
```
Notice it takes 3 seconds to execute the query:
```response
┌─count()─┐
│ 1145 │
└─────────┘
1 row in set. Elapsed: 3.001 sec. Processed 28.74 million rows, 9.75 GB (9.58 million rows/s., 3.25 GB/s.)
```
We will use `ALTER TABLE` and add an inverted index on the lowercase of the `comment` column, then materialize it (which can take a while - wait for it to materialize):
```sql
ALTER TABLE hackernews
ADD INDEX comment_lowercase(lower(comment)) TYPE inverted;
ALTER TABLE hackernews MATERIALIZE INDEX comment_lowercase;
```
We run the same query...
```sql
SELECT count()
FROM hackernews
WHERE hasToken(lower(comment), 'clickhouse')
```
...and notice the query executes 4x faster:
```response
┌─count()─┐
│ 1145 │
└─────────┘
1 row in set. Elapsed: 0.747 sec. Processed 4.49 million rows, 1.77 GB (6.01 million rows/s., 2.37 GB/s.)
```
We can also search for one or all of multiple terms, i.e., disjunctions or conjunctions:
```sql
-- multiple OR'ed terms
SELECT count(*)
FROM hackernews
WHERE multiSearchAny(lower(comment), ['oltp', 'olap']);
-- multiple AND'ed terms
SELECT count(*)
FROM hackernews
WHERE hasToken(lower(comment), 'avx') AND hasToken(lower(comment), 'sve');
```
:::note
Unlike other secondary indices, inverted indexes (for now) map to row numbers (row ids) instead of granule ids. The reason for this design
is performance. In practice, users often search for multiple terms at once. For example, filter predicate `WHERE s LIKE '%little%' OR s LIKE
'%big%'` can be evaluated directly using an inverted index by forming the union of the row id lists for terms "little" and "big". This also
means that the parameter `GRANULARITY` supplied to index creation has no meaning (it may be removed from the syntax in the future).
:::

View File

@ -0,0 +1,226 @@
---
slug: /en/engines/table-engines/special/executable
sidebar_position: 40
sidebar_label: Executable
---
# Executable and ExecutablePool Table Engines
The `Executable` and `ExecutablePool` table engines allow you to define a table whose rows are generated from a script that you define (by writing rows to **stdout**). The executable script is stored in the `users_scripts` directory and can read data from any source.
- `Executable` tables: the script is run on every query
- `ExecutablePool` tables: maintains a pool of persistent processes, and takes processes from the pool for reads
You can optionally include one or more input queries that stream their results to **stdin** for the script to read.
## Creating an Executable Table
The `Executable` table engine requires two parameters: the name of the script and the format of the incoming data. You can optionally pass in one or more input queries:
```sql
Executable(script_name, format, [input_query...])
```
Here are the relevant settings for an `Executable` table:
- `send_chunk_header`
- Description: Send the number of rows in each chunk before sending a chunk to process. This setting can help to write your script in a more efficient way to preallocate some resources
- Default value: false
- `command_termination_timeout`
- Description: Command termination timeout in seconds
- Default value: 10
- `command_read_timeout`
- Description: Timeout for reading data from command stdout in milliseconds
- Default value: 10000
- `command_write_timeout`
- Description: Timeout for writing data to command stdin in milliseconds
- Default value: 10000
Let's look at an example. The following Python script is named `my_script.py` and is saved in the `user_scripts` folder. It reads in a number `i` and prints `i` random strings, with each string preceded by a number that is separated by a tab:
```python
#!/usr/bin/python3
import sys
import string
import random
def main():
# Read input value
for number in sys.stdin:
i = int(number)
# Generate some random rows
for id in range(0, i):
letters = string.ascii_letters
random_string = ''.join(random.choices(letters ,k=10))
print(str(id) + '\t' + random_string + '\n', end='')
# Flush results to stdout
sys.stdout.flush()
if __name__ == "__main__":
main()
```
The following `my_executable_table` is built from the output of `my_script.py`, which will generate 10 random strings everytime you run a `SELECT` from `my_executable_table`:
```sql
CREATE TABLE my_executable_table (
x UInt32,
y String
)
ENGINE = Executable('my_script.py', TabSeparated, (SELECT 10))
```
Creating the table returns immediately and does not invoke the script. Querying `my_executable_table` causes the script to be invoked:
```sql
SELECT * FROM my_executable_table
```
```response
┌─x─┬─y──────────┐
│ 0 │ BsnKBsNGNH │
│ 1 │ mgHfBCUrWM │
│ 2 │ iDQAVhlygr │
│ 3 │ uNGwDuXyCk │
│ 4 │ GcFdQWvoLB │
│ 5 │ UkciuuOTVO │
│ 6 │ HoKeCdHkbs │
│ 7 │ xRvySxqAcR │
│ 8 │ LKbXPHpyDI │
│ 9 │ zxogHTzEVV │
└───┴────────────┘
```
## Passing Query Results to a Script
Users of the Hacker News website leave comments. Python contains a natural language processing toolkit (`nltk`) with a `SentimentIntensityAnalyzer` for determining if comments are positive, negative, or neutral - including assigning a value between -1 (a very negative comment) and 1 (a very positive comment). Let's create an `Executable` table that computes the sentiment of Hacker News comments using `nltk`.
This example uses the `hackernews` table described [here](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/invertedindexes/#full-text-search-of-the-hacker-news-dataset). The `hackernews` table includes an `id` column of type `UInt64` and a `String` column named `comment`. Let's start by defining the `Executable` table:
```sql
CREATE TABLE sentiment (
id UInt64,
sentiment Float32
)
ENGINE = Executable(
'sentiment.py',
TabSeparated,
(SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20)
);
```
Some comments about the `sentiment` table:
- The file `sentiment.py` is saved in the `user_scripts` folder (the default folder of the `user_scripts_path` setting)
- The `TabSeparated` format means our Python script needs to generate rows of raw data that contain tab-separated values
- The query selects two columns from `hackernews`. The Python script will need to parse out those column values from the incoming rows
Here is the defintion of `sentiment.py`:
```python
#!/usr/local/bin/python3.9
import sys
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
def main():
sentiment_analyzer = SentimentIntensityAnalyzer()
while True:
try:
row = sys.stdin.readline()
if row == '':
break
split_line = row.split("\t")
id = str(split_line[0])
comment = split_line[1]
score = sentiment_analyzer.polarity_scores(comment)['compound']
print(id + '\t' + str(score) + '\n', end='')
sys.stdout.flush()
except BaseException as x:
break
if __name__ == "__main__":
main()
```
Some comments about our Python script:
- For this to work, you will need to run `nltk.downloader.download('vader_lexicon')`. This could have been placed in the script, but then it would have been downloaded every time a query was executed on the `sentiment` table - which is not efficient
- Each value of `row` is going to be a row in the result set of `SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20`
- The incoming row is tab-separated, so we parse out the `id` and `comment` using the Python `split` function
- The result of `polarity_scores` is a JSON object with a handful of values. We decided to just grab the `compound` value of this JSON object
- Recall that the `sentiment` table in ClickHouse uses the `TabSeparated` format and contains two columns, so our `print` function separates those columns with a tab
Every time you write a query that selects rows from the `sentiment` table, the `SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20` query is executed and the result is passed to `sentiment.py`. Let's test it out:
```sql
SELECT *
FROM sentiment
```
The response looks like:
```response
┌───────id─┬─sentiment─┐
│ 7398199 │ 0.4404 │
│ 21640317 │ 0.1779 │
│ 21462000 │ 0 │
│ 25168863 │ 0 │
│ 25168978 │ -0.1531 │
│ 25169359 │ 0 │
│ 25169394 │ -0.9231 │
│ 25169766 │ 0.4137 │
│ 25172570 │ 0.7469 │
│ 25173687 │ 0.6249 │
│ 28291534 │ 0 │
│ 28291669 │ -0.4767 │
│ 28291731 │ 0 │
│ 28291949 │ -0.4767 │
│ 28292004 │ 0.3612 │
│ 28292050 │ -0.296 │
│ 28292322 │ 0 │
│ 28295172 │ 0.7717 │
│ 28295288 │ 0.4404 │
│ 21465723 │ -0.6956 │
└──────────┴───────────┘
```
## Creating an ExecutablePool Table
The syntax for `ExecutablePool` is similar to `Executable`, but there are a couple of relevant settings unique to an `ExecutablePool` table:
- `pool_size`
- Description: Processes pool size. If size is 0, then there are no size restrictions
- Default value: 16
- `max_command_execution_time`
- Description: Max command execution time in seconds
- Default value: 10
We can easily convert the `sentiment` table above to use `ExecutablePool` instead of `Executable`:
```sql
CREATE TABLE sentiment_pooled (
id UInt64,
sentiment Float32
)
ENGINE = ExecutablePool(
'sentiment.py',
TabSeparated,
(SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20000)
)
SETTINGS
pool_size = 4;
```
ClickHouse will maintain 4 processes on-demand when your client queries the `sentiment_pooled` table.

View File

@ -22,6 +22,6 @@ Additional cache types:
- [Dictionaries](../sql-reference/dictionaries/index.md) data cache.
- Schema inference cache.
- [Filesystem cache](storing-data.md) over S3, Azure, Local and other disks.
- [(Experimental) Query result cache](query-result-cache.md).
- [(Experimental) Query cache](query-cache.md).
To drop one of the caches, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md#drop-mark-cache) statements.

View File

@ -0,0 +1,112 @@
---
slug: /en/operations/query-cache
sidebar_position: 65
sidebar_label: Query Cache [experimental]
---
# Query Cache [experimental]
The query cache allows to compute `SELECT` queries just once and to serve further executions of the same query directly from the cache.
Depending on the type of the queries, this can dramatically reduce latency and resource consumption of the ClickHouse server.
## Background, Design and Limitations
Query caches can generally be viewed as transactionally consistent or inconsistent.
- In transactionally consistent caches, the database invalidates (discards) cached query results if the result of the `SELECT` query changes
or potentially changes. In ClickHouse, operations which change the data include inserts/updates/deletes in/of/from tables or collapsing
merges. Transactionally consistent caching is especially suitable for OLTP databases, for example
[MySQL](https://dev.mysql.com/doc/refman/5.6/en/query-cache.html) (which removed query cache after v8.0) and
[Oracle](https://docs.oracle.com/database/121/TGDBA/tune_result_cache.htm).
- In transactionally inconsistent caches, slight inaccuracies in query results are accepted under the assumption that all cache entries are
assigned a validity period after which they expire (e.g. 1 minute) and that the underlying data changes only little during this period.
This approach is overall more suitable for OLAP databases. As an example where transactionally inconsistent caching is sufficient,
consider an hourly sales report in a reporting tool which is simultaneously accessed by multiple users. Sales data changes typically
slowly enough that the database only needs to compute the report once (represented by the first `SELECT` query). Further queries can be
served directly from the query cache. In this example, a reasonable validity period could be 30 min.
Transactionally inconsistent caching is traditionally provided by client tools or proxy packages interacting with the database. As a result,
the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
This reduces maintenance effort and avoids redundancy.
:::warning
The query cache is an experimental feature that should not be used in production. There are known cases (e.g. in distributed query
processing) where wrong results are returned.
:::
## Configuration Settings and Usage
As long as the result cache is experimental it must be activated using the following configuration setting:
```sql
SET allow_experimental_query_cache = true;
```
Afterwards, setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries
of the current session should utilize the query cache. For example, the first execution of query
```sql
SELECT some_expensive_calculation(column_1, column_2)
FROM table
SETTINGS use_query_cache = true;
```
will store the query result in the query cache. Subsequent executions of the same query (also with parameter `use_query_cache = true`) will
read the computed result from the cache and return it immediately.
The way the cache is utilized can be configured in more detail using settings [enable_writes_to_query_cache](settings/settings.md#enable-writes-to-query-cache)
and [enable_reads_from_query_cache](settings/settings.md#enable-reads-from-query-cache) (both `true` by default). The former setting
controls whether query results are stored in the cache, whereas the latter setting determines if the database should try to retrieve query
results from the cache. For example, the following query will use the cache only passively, i.e. attempt to read from it but not store its
result in it:
```sql
SELECT some_expensive_calculation(column_1, column_2)
FROM table
SETTINGS use_query_cache = true, enable_writes_to_query_cache = false;
```
For maximum control, it is generally recommended to provide settings "use_query_cache", "enable_writes_to_query_cache" and
"enable_reads_from_query_cache" only with specific queries. It is also possible to enable caching at user or profile level (e.g. via `SET
use_query_cache = true`) but one should keep in mind that all `SELECT` queries including monitoring or debugging queries to system tables
may return cached results then.
The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table
`system.query_cache`. The number of query cache hits and misses are shown as events "QueryCacheHits" and "QueryCacheMisses" in system table
`system.events`. Both counters are only updated for `SELECT` queries which run with setting "use_query_cache = true". Other queries do not
affect the cache miss counter.
The query cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can be
changed (see below) but doing so is not recommended for security reasons.
Query results are referenced in the query cache by the [Abstract Syntax Tree (AST)](https://en.wikipedia.org/wiki/Abstract_syntax_tree) of
their query. This means that caching is agnostic to upper/lowercase, for example `SELECT 1` and `select 1` are treated as the same query. To
make the matching more natural, all query-level settings related to the query cache are removed from the AST.
If the query was aborted due to an exception or user cancellation, no entry is written into the query cache.
The size of the query cache, the maximum number of cache entries and the maximum size of cache entries (in bytes and in records) can
be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache).
To define how long a query must run at least such that its result can be cached, you can use setting
[query_cache_min_query_duration](settings/settings.md#query-cache-min-query-duration). For example, the result of query
``` sql
SELECT some_expensive_calculation(column_1, column_2)
FROM table
SETTINGS use_query_cache = true, query_cache_min_query_duration = 5000;
```
is only cached if the query runs longer than 5 seconds. It is also possible to specify how often a query needs to run until its result is
cached - for that use setting [query_cache_min_query_runs](settings/settings.md#query-cache-min-query-runs).
Entries in the query cache become stale after a certain time period (time-to-live). By default, this period is 60 seconds but a different
value can be specified at session, profile or query level using setting [query_cache_ttl](settings/settings.md#query-cache-ttl).
Also, results of queries with non-deterministic functions such as `rand()` and `now()` are not cached. This can be overruled using
setting [query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions).
Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a
row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can
be marked accessible by other users (i.e. shared) by supplying setting
[query_cache_share_between_users](settings/settings.md#query-cache-share-between-users).

View File

@ -1,112 +0,0 @@
---
slug: /en/operations/query-result-cache
sidebar_position: 65
sidebar_label: Query Result Cache [experimental]
---
# Query Result Cache [experimental]
The query result cache allows to compute `SELECT` queries just once and to serve further executions of the same query directly from the
cache. Depending on the type of the queries, this can dramatically reduce latency and resource consumption of the ClickHouse server.
## Background, Design and Limitations
Query result caches can generally be viewed as transactionally consistent or inconsistent.
- In transactionally consistent caches, the database invalidates (discards) cached query results if the result of the `SELECT` query changes
or potentially changes. In ClickHouse, operations which change the data include inserts/updates/deletes in/of/from tables or collapsing
merges. Transactionally consistent caching is especially suitable for OLTP databases, for example
[MySQL](https://dev.mysql.com/doc/refman/5.6/en/query-cache.html) (which removed query result cache after v8.0) and
[Oracle](https://docs.oracle.com/database/121/TGDBA/tune_result_cache.htm).
- In transactionally inconsistent caches, slight inaccuracies in query results are accepted under the assumption that all cache entries are
assigned a validity period after which they expire (e.g. 1 minute) and that the underlying data changes only little during this period.
This approach is overall more suitable for OLAP databases. As an example where transactionally inconsistent caching is sufficient,
consider an hourly sales report in a reporting tool which is simultaneously accessed by multiple users. Sales data changes typically
slowly enough that the database only needs to compute the report once (represented by the first `SELECT` query). Further queries can be
served directly from the query result cache. In this example, a reasonable validity period could be 30 min.
Transactionally inconsistent caching is traditionally provided by client tools or proxy packages interacting with the database. As a result,
the same caching logic and configuration is often duplicated. With ClickHouse's query result cache, the caching logic moves to the server
side. This reduces maintenance effort and avoids redundancy.
:::warning
The query result cache is an experimental feature that should not be used in production. There are known cases (e.g. in distributed query
processing) where wrong results are returned.
:::
## Configuration Settings and Usage
As long as the result cache is experimental it must be activated using the following configuration setting:
```sql
SET allow_experimental_query_result_cache = true;
```
Afterwards, setting [use_query_result_cache](settings/settings.md#use-query-result-cache) can be used to control whether a specific query or
all queries of the current session should utilize the query result cache. For example, the first execution of query
```sql
SELECT some_expensive_calculation(column_1, column_2)
FROM table
SETTINGS use_query_result_cache = true;
```
will store the query result in the query result cache. Subsequent executions of the same query (also with parameter `use_query_result_cache
= true`) will read the computed result from the cache and return it immediately.
The way the cache is utilized can be configured in more detail using settings [enable_writes_to_query_result_cache](settings/settings.md#enable-writes-to-query-result-cache)
and [enable_reads_from_query_result_cache](settings/settings.md#enable-reads-from-query-result-cache) (both `true` by default). The first
settings controls whether query results are stored in the cache, whereas the second parameter determines if the database should try to
retrieve query results from the cache. For example, the following query will use the cache only passively, i.e. attempt to read from it but
not store its result in it:
```sql
SELECT some_expensive_calculation(column_1, column_2)
FROM table
SETTINGS use_query_result_cache = true, enable_writes_to_query_result_cache = false;
```
For maximum control, it is generally recommended to provide settings "use_query_result_cache", "enable_writes_to_query_result_cache" and
"enable_reads_from_query_result_cache" only with specific queries. It is also possible to enable caching at user or profile level (e.g. via
`SET use_query_result_cache = true`) but one should keep in mind that all `SELECT` queries including monitoring or debugging queries to
system tables may return cached results then.
The query result cache can be cleared using statement `SYSTEM DROP QUERY RESULT CACHE`. The content of the query result cache is displayed
in system table `SYSTEM.QUERY_RESULT_CACHE`. The number of query result cache hits and misses are shown as events "QueryResultCacheHits" and
"QueryResultCacheMisses" in system table `SYSTEM.EVENTS`. Both counters are only updated for `SELECT` queries which run with setting
"use_query_result_cache = true". Other queries do not affect the cache miss counter.
The query result cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can
be changed (see below) but doing so is not recommended for security reasons.
Query results are referenced in the query result cache by the [Abstract Syntax Tree (AST)](https://en.wikipedia.org/wiki/Abstract_syntax_tree)
of their query. This means that caching is agnostic to upper/lowercase, for example `SELECT 1` and `select 1` are treated as the same query.
To make the matching more natural, all query-level settings related to the query result cache are removed from the AST.
If the query was aborted due to an exception or user cancellation, no entry is written into the query result cache.
The size of the query result cache, the maximum number of cache entries and the maximum size of cache entries (in bytes and in records) can
be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-result-cache).
To define how long a query must run at least such that its result can be cached, you can use setting
[query_result_cache_min_query_duration](settings/settings.md#query-result-cache-min-query-duration). For example, the result of query
``` sql
SELECT some_expensive_calculation(column_1, column_2)
FROM table
SETTINGS use_query_result_cache = true, query_result_cache_min_query_duration = 5000;
```
is only cached if the query runs longer than 5 seconds. It is also possible to specify how often a query needs to run until its result is
cached - for that use setting [query_result_cache_min_query_runs](settings/settings.md#query-result-cache-min-query-runs).
Entries in the query result cache become stale after a certain time period (time-to-live). By default, this period is 60 seconds but a
different value can be specified at session, profile or query level using setting [query_result_cache_ttl](settings/settings.md#query-result-cache-ttl).
Also, results of queries with non-deterministic functions such as `rand()` and `now()` are not cached. This can be overruled using
setting [query_result_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-result-cache-store-results-of-queries-with-nondeterministic-functions).
Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a
row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can
be marked accessible by other users (i.e. shared) by supplying setting
[query_result_cache_share_between_users](settings/settings.md#query-result-cache-share-between-users).

View File

@ -1270,30 +1270,32 @@ If the table does not exist, ClickHouse will create it. If the structure of the
</query_log>
```
## query_result_cache {#server_configuration_parameters_query-result-cache}
## query_cache {#server_configuration_parameters_query-cache}
[Query result cache](../query-result-cache.md) configuration.
[Query cache](../query-cache.md) configuration.
The following settings are available:
- `size`: The maximum cache size in bytes. 0 means the query result cache is disabled. Default value: `1073741824` (1 GiB).
- `max_entries`: The maximum number of SELECT query results stored in the cache. Default value: `1024`.
- `max_entry_size`: The maximum size in bytes SELECT query results may have to be saved in the cache. Default value: `1048576` (1 MiB).
- `max_entry_records`: The maximum number of records SELECT query results may have to be saved in the cache. Default value: `30000000` (30 mil).
- `size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB).
- `max_entries`: The maximum number of `SELECT` query results stored in the cache. Default value: `1024`.
- `max_entry_size`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB).
- `max_entry_rows`: The maximum number of rows `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil).
Changed settings take effect immediately.
:::warning
Data for the query result cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query result cache altogether.
Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query cache altogether.
:::
**Example**
```xml
<query_result_cache>
<query_cache>
<size>1073741824</size>
<max_entries>1024</max_entries>
<max_entry_size>1048576</max_entry_size>
<max_entry_records>30000000</max_entry_records>
</query_result_cache>
<max_entry_rows>30000000</max_entry_rows>
</query_cache>
```
## query_thread_log {#server_configuration_parameters-query_thread_log}

View File

@ -233,7 +233,7 @@ Possible values:
Default value: 100.
Normally, the `use_async_block_ids_cache` updates as soon as there are updates in the watching keeper path. However, the cache updates might be too frequent and become a heavy burden. This minimum interval prevents the cache from updating too fast. Note that if we set this value too long, the block with duplicated inserts will have a longer retry time.
Normally, the `use_async_block_ids_cache` updates as soon as there are updates in the watching keeper path. However, the cache updates might be too frequent and become a heavy burden. This minimum interval prevents the cache from updating too fast. Note that if we set this value too long, the block with duplicated inserts will have a longer retry time.
## max_replicated_logs_to_keep

View File

@ -1301,10 +1301,10 @@ Possible values:
Default value: `3`.
## use_query_result_cache {#use-query-result-cache}
## use_query_cache {#use-query-cache}
If turned on, SELECT queries may utilize the [query result cache](../query-result-cache.md). Parameters [enable_reads_from_query_result_cache](#enable-reads-from-query-result-cache)
and [enable_writes_to_query_result_cache](#enable-writes-to-query-result-cache) control in more detail how the cache is used.
If turned on, `SELECT` queries may utilize the [query cache](../query-cache.md). Parameters [enable_reads_from_query_cache](#enable-reads-from-query-cache)
and [enable_writes_to_query_cache](#enable-writes-to-query-cache) control in more detail how the cache is used.
Possible values:
@ -1313,9 +1313,9 @@ Possible values:
Default value: `0`.
## enable_reads_from_query_result_cache {#enable-reads-from-query-result-cache}
## enable_reads_from_query_cache {#enable-reads-from-query-cache}
If turned on, results of SELECT queries are retrieved from the [query result cache](../query-result-cache.md).
If turned on, results of `SELECT` queries are retrieved from the [query cache](../query-cache.md).
Possible values:
@ -1324,9 +1324,9 @@ Possible values:
Default value: `1`.
## enable_writes_to_query_result_cache {#enable-writes-to-query-result-cache}
## enable_writes_to_query_cache {#enable-writes-to-query-cache}
If turned on, results of SELECT queries are stored in the [query result cache](../query-result-cache.md).
If turned on, results of `SELECT` queries are stored in the [query cache](../query-cache.md).
Possible values:
@ -1335,9 +1335,9 @@ Possible values:
Default value: `1`.
## query_result_cache_store_results_of_queries_with_nondeterministic_functions {#query-result-cache-store-results-of-queries-with-nondeterministic-functions}
## query_cache_store_results_of_queries_with_nondeterministic_functions {#query--store-results-of-queries-with-nondeterministic-functions}
If turned on, then results of SELECT queries with non-deterministic functions (e.g. `rand()`, `now()`) can be cached in the [query result cache](../query-result-cache.md).
If turned on, then results of `SELECT` queries with non-deterministic functions (e.g. `rand()`, `now()`) can be cached in the [query cache](../query-cache.md).
Possible values:
@ -1346,9 +1346,9 @@ Possible values:
Default value: `0`.
## query_result_cache_min_query_runs {#query-result-cache-min-query-runs}
## query_cache_min_query_runs {#query-cache-min-query-runs}
Minimum number of times a SELECT query must run before its result is stored in the [query result cache](../query-result-cache.md).
Minimum number of times a `SELECT` query must run before its result is stored in the [query cache](../query-cache.md).
Possible values:
@ -1356,9 +1356,9 @@ Possible values:
Default value: `0`
## query_result_cache_min_query_duration {#query-result-cache-min-query-duration}
## query_cache_min_query_duration {#query-cache-min-query-duration}
Minimum duration in milliseconds a query needs to run for its result to be stored in the [query result cache](../query-result-cache.md).
Minimum duration in milliseconds a query needs to run for its result to be stored in the [query cache](../query-cache.md).
Possible values:
@ -1366,9 +1366,9 @@ Possible values:
Default value: `0`
## query_result_cache_ttl {#query-result-cache-ttl}
## query_cache_ttl {#query-cache-ttl}
After this time in seconds entries in the [query result cache](../query-result-cache.md) become stale.
After this time in seconds entries in the [query cache](../query-cache.md) become stale.
Possible values:
@ -1376,9 +1376,9 @@ Possible values:
Default value: `60`
## query_result_cache_share_between_users {#query-result-cache-share-between-users}
## query_cache_share_between_users {#query-cache-share-between-users}
If turned on, the result of SELECT queries cached in the [query result cache](../query-result-cache.md) can be read by other users.
If turned on, the result of `SELECT` queries cached in the [query cache](../query-cache.md) can be read by other users.
It is not recommended to enable this setting due to security reasons.
Possible values:
@ -3689,6 +3689,30 @@ Default value: `0`.
- [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting
## optimize_using_constraints
Use [constraints](../../sql-reference/statements/create/table#constraints) for query optimization. The default is `false`.
Possible values:
- true, false
## optimize_append_index
Use [constraints](../../sql-reference/statements/create/table#constraints) in order to append index condition. The default is `false`.
Possible values:
- true, false
## optimize_substitute_columns
Use [constraints](../../sql-reference/statements/create/table#constraints) for column substitution. The default is `false`.
Possible values:
- true, false
## describe_include_subcolumns {#describe_include_subcolumns}
Enables describing subcolumns for a [DESCRIBE](../../sql-reference/statements/describe-table.md) query. For example, members of a [Tuple](../../sql-reference/data-types/tuple.md) or subcolumns of a [Map](../../sql-reference/data-types/map.md/#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md/#finding-null) or an [Array](../../sql-reference/data-types/array.md/#array-size) data type.

View File

@ -1,5 +1,5 @@
---
slug: /en/sql-reference/aggregate-functions/reference/sparkbar
slug: /en/sql-reference/aggregate-functions/reference/sparkbar
sidebar_position: 311
sidebar_label: sparkbar
---
@ -7,9 +7,11 @@ sidebar_label: sparkbar
# sparkbar
The function plots a frequency histogram for values `x` and the repetition rate `y` of these values over the interval `[min_x, max_x]`.
Repetitions for all `x` falling into the same bucket are averaged, so data should be pre-aggregated.
Negative repetitions are ignored.
If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end.
If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end.
Otherwise, values outside the interval are ignored.
**Syntax**
@ -37,29 +39,24 @@ sparkbar(width[, min_x, max_x])(x, y)
Query:
``` sql
CREATE TABLE spark_bar_data (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree ORDER BY event_date SETTINGS index_granularity = 8192;
INSERT INTO spark_bar_data VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11');
CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date;
SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_data;
INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11');
SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_data;
SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
```
Result:
``` text
┌─sparkbar(9)(event_date, cnt)─┐
│ │
│ ▁▅▄▃██▅ ▁ │
│ │
│ ▂▅▂▃▆█ ▂ │
└──────────────────────────────┘
┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐
│ │
│▁▄▄▂▅▇█▁ │
│ │
│ ▂▅▂▃▇▆█ │
└──────────────────────────────────────────────────────────────────────────┘
```

View File

@ -5,7 +5,7 @@ sidebar_label: Storing Dictionaries in Memory
---
import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
# Storing Dictionaries in Memory
# Storing Dictionaries in Memory
There are a variety of ways to store dictionaries in memory.
@ -25,7 +25,7 @@ ClickHouse generates an exception for errors with dictionaries. Examples of erro
You can view the list of dictionaries and their statuses in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table.
<CloudDetails />
<CloudDetails />
The configuration looks like this:
@ -299,11 +299,11 @@ Example: The table contains discounts for each advertiser in the format:
To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). These elements must contain elements `name` and `type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others).
:::warning
:::warning
Values of `range_min` and `range_max` should fit in `Int64` type.
:::
Example:
Example:
``` xml
<layout>
@ -459,7 +459,7 @@ select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res;
│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null
└─────┘
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
┌─res─┐
│ 0.2 │ -- two ranges are matching, range_min 2015-01-15 (0.2) is bigger than 2015-01-01 (0.1)
└─────┘
@ -496,7 +496,7 @@ select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res;
│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null
└─────┘
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
┌─res─┐
│ 0.1 │ -- two ranges are matching, range_min 2015-01-01 (0.1) is less than 2015-01-15 (0.2)
└─────┘
@ -588,7 +588,7 @@ Set a large enough cache size. You need to experiment to select the number of ce
3. Assess memory consumption using the `system.dictionaries` table.
4. Increase or decrease the number of cells until the required memory consumption is reached.
:::warning
:::warning
Do not use ClickHouse as a source, because it is slow to process queries with random reads.
:::
@ -660,25 +660,30 @@ This type of storage is for use with composite [keys](../../../sql-reference/dic
This type of storage is for mapping network prefixes (IP addresses) to metadata such as ASN.
Example: The table contains network prefixes and their corresponding AS number and country code:
**Example**
``` text
+-----------|-----|------+
| prefix | asn | cca2 |
+=================+=======+========+
| 202.79.32.0/20 | 17501 | NP |
+-----------|-----|------+
| 2620:0:870::/48 | 3856 | US |
+-----------|-----|------+
| 2a02:6b8:1::/48 | 13238 | RU |
+-----------|-----|------+
| 2001:db8::/32 | 65536 | ZZ |
+-----------|-----|------+
Suppose we have a table in ClickHouse that contains our IP prefixes and mappings:
```sql
CREATE TABLE my_ip_addresses (
prefix String,
asn UInt32,
cca2 String
)
ENGINE = MergeTree
PRIMARY KEY prefix;
```
When using this type of layout, the structure must have a composite key.
```sql
INSERT INTO my_ip_addresses VALUES
('202.79.32.0/20', 17501, 'NP'),
('2620:0:870::/48', 3856, 'US'),
('2a02:6b8:1::/48', 13238, 'RU'),
('2001:db8::/32', 65536, 'ZZ')
;
```
Example:
Let's define an `ip_trie` dictionary for this table. The `ip_trie` layout requires a composite key:
``` xml
<structure>
@ -712,26 +717,29 @@ Example:
or
``` sql
CREATE DICTIONARY somedict (
CREATE DICTIONARY my_ip_trie_dictionary (
prefix String,
asn UInt32,
cca2 String DEFAULT '??'
)
PRIMARY KEY prefix
SOURCE(CLICKHOUSE(TABLE 'my_ip_addresses'))
LAYOUT(IP_TRIE)
LIFETIME(3600);
```
The key must have only one String type attribute that contains an allowed IP prefix. Other types are not supported yet.
The key must have only one `String` type attribute that contains an allowed IP prefix. Other types are not supported yet.
For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys:
For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys. The syntax is:
``` sql
dictGetT('dict_name', 'attr_name', tuple(ip))
```
The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6:
The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6. For example:
``` sql
dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1')))
select dictGet('my_ip_trie_dictionary', 'asn', tuple(IPv6StringToNum('2001:db8::1')))
```
Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned.

View File

@ -21,14 +21,14 @@ For example, you cant compare a date with a string. You have to use a functio
Strings are compared by bytes. A shorter string is smaller than all strings that start with it and that contain at least one more character.
## equals, a = b and a == b operator
### equals, a `=` b and a `==` b operator
## notEquals, a != b and a \<\> b operator
### notEquals, a `!=` b and a `<>` b operator
## less, \< operator
### less, `<` operator
## greater, \> operator
### greater, `>` operator
## lessOrEquals, \<= operator
### lessOrEquals, `<=` operator
## greaterOrEquals, \>= operator
### greaterOrEquals, `>=` operator

View File

@ -304,7 +304,7 @@ Result:
└──────────────┘
```
## s2RectUinion
## s2RectUnion
Returns the smallest rectangle containing the union of this rectangle and the given rectangle. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space.

View File

@ -6,6 +6,10 @@ sidebar_label: TTL
# Manipulations with Table TTL
:::note
If you are looking for details on using TTL for managing old data, check out the [Manage Data with TTL](/docs/en/guides/developer/ttl.md) user guide. The docs below demonstrate how to alter or remove an existing TTL rule.
:::
## MODIFY TTL
You can change [table TTL](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) with a request of the following form:

View File

@ -3,6 +3,7 @@ slug: /en/sql-reference/statements/create/table
sidebar_position: 36
sidebar_label: TABLE
title: "CREATE TABLE"
keywords: [compression, codec, schema, DDL]
---
Creates a new table. This query can have various syntax forms depending on a use case.

View File

@ -2,15 +2,16 @@
slug: /en/sql-reference/statements/delete
sidebar_position: 36
sidebar_label: DELETE
description: Lightweight deletes simplify the process of deleting data from the database.
keywords: [delete]
title: DELETE Statement
---
# DELETE Statement
``` sql
DELETE FROM [db.]table [WHERE expr]
DELETE FROM [db.]table [ON CLUSTER cluster] [WHERE expr]
```
`DELETE FROM` removes rows from table `[db.]table` that match expression `expr`. The deleted rows are marked as deleted immediately and will be automatically filtered out of all subsequent queries. Cleanup of data happens asynchronously in background. This feature is only available for MergeTree table engine family.
`DELETE FROM` removes rows from the table `[db.]table` that match the expression `expr`. The deleted rows are marked as deleted immediately and will be automatically filtered out of all subsequent queries. Cleanup of data happens asynchronously in the background. This feature is only available for the MergeTree table engine family.
For example, the following query deletes all rows from the `hits` table where the `Title` column contains the text `hello`:
@ -32,7 +33,7 @@ SET allow_experimental_lightweight_delete = true;
An [alternative way to delete rows](./alter/delete.md) in ClickHouse is `ALTER TABLE ... DELETE`, which might be more efficient if you do bulk deletes only occasionally and don't need the operation to be applied instantly. In most use cases the new lightweight `DELETE FROM` behavior will be considerably faster.
:::warning
Even though deletes are becoming more lightweight in ClickHouse, they should still not be used as aggressively as on an OLTP system. Ligthweight deletes are currently efficient for wide parts, but for compact parts they can be a heavyweight operation, and it may be better to use `ALTER TABLE` for some scenarios.
Even though deletes are becoming more lightweight in ClickHouse, they should still not be used as aggressively as on an OLTP system. Lightweight deletes are currently efficient for wide parts, but for compact parts, they can be a heavyweight operation, and it may be better to use `ALTER TABLE` for some scenarios.
:::
:::note
@ -41,3 +42,34 @@ Even though deletes are becoming more lightweight in ClickHouse, they should sti
grant ALTER DELETE ON db.table to username;
```
:::
## Lightweight Delete Internals
The idea behind Lightweight Delete is that when a `DELETE FROM table ...` query is executed ClickHouse only saves a mask where each row is marked as either “existing” or as “deleted”. Those “deleted” rows become invisible for subsequent queries, but physically the rows are removed only later by subsequent merges. Writing this mask is usually much more lightweight than what is done by `ALTER table DELETE ...` query.
### How it is implemented
The mask is implemented as a hidden `_row_exists` system column that stores True for all visible rows and False for deleted ones. This column is only present in a part if some rows in this part were deleted. In other words, the column is not persisted when it has all values equal to True.
## SELECT query
When the column is present `SELECT ... FROM table WHERE condition` query internally is extended by an additional predicate on `_row_exists` and becomes similar to
```sql
SELECT ... FROM table PREWHERE _row_exists WHERE condition
```
At execution time the column `_row_exists` is read to figure out which rows are not visible and if there are many deleted rows it can figure out which granules can be fully skipped when reading the rest of the columns.
## DELETE query
`DELETE FROM table WHERE condition` is translated into `ALTER table UPDATE _row_exists = 0 WHERE condition` mutation. Internally this mutation is executed in 2 steps:
1. `SELECT count() FROM table WHERE condition` for each individual part to figure out if the part is affected.
2. Mutate affected parts, and make hardlinks for unaffected parts. Mutating a part in fact only writes `_row_exists` column and just hardlinks all other columns files in the case of Wide parts. But for Compact parts, all columns are rewritten because they all are stored together in one file.
So if we compare Lightweight Delete to `ALTER DELETE` in the first step they both do the same thing to figure out which parts are affected, but in the second step `ALTER DELETE` does much more work because it reads and rewrites all columns files for the affected parts.
With the described implementation now we can see what can negatively affect 'DELETE FROM' execution time:
- Heavy WHERE condition in DELETE query
- Mutations queue filled with other mutations, because all mutations on a table are executed sequentially
- Table having a very large number of data parts
- Having a lot of data in Compact parts—in a Compact part, all columns are stored in one file.
:::note
This implementation might change in the future.
:::

View File

@ -510,3 +510,15 @@ Result:
**See Also**
- [system.settings](../../operations/system-tables/settings.md) table
## SHOW ENGINES
``` sql
SHOW ENGINES [INTO OUTFILE filename] [FORMAT format]
```
Outputs the content of the [system.table_engines](../../operations/system-tables/table_engines.md) table, that contains description of table engines supported by server and their feature support information.
**See Also**
- [system.table_engines](../../operations/system-tables/table_engines.md) table

View File

@ -103,9 +103,9 @@ Its size can be configured using the server-level setting [uncompressed_cache_si
Reset the compiled expression cache.
The compiled expression cache is enabled/disabled with the query/user/profile-level setting [compile_expressions](../../operations/settings/settings.md#compile-expressions).
## DROP QUERY RESULT CACHE
## DROP QUERY CACHE
Resets the [query result cache](../../operations/query-result-cache.md).
Resets the [query cache](../../operations/query-cache.md).
## FLUSH LOGS
@ -283,7 +283,7 @@ SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster. Will run until `receive_timeout` if fetches currently disabled for the table.
``` sql
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name
SYSTEM SYNC REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name
```
After running this statement the `[db.]replicated_merge_tree_family_table_name` fetches commands from the common replicated log into its own replication queue, and then the query waits till the replica processes all of the fetched commands.

View File

@ -0,0 +1,97 @@
---
slug: /en/engines/table-functions/executable
sidebar_position: 55
sidebar_label: executable
keywords: [udf, user defined function, clickhouse, executable, table, function]
---
# executable Table Function for UDFs
The `executable` table function creates a table based on the output of a user-defined function (UDF) that you define in a script that outputs rows to **stdout**. The executable script is stored in the `users_scripts` directory and can read data from any source.
You can optionally include one or more input queries that stream their results to **stdin** for the script to read.
:::note
A key advantage between ordinary UDF functions and the `executable` table function and `Executable` table engine is that ordinary UDF functions cannot change the row count. For example, if the input is 100 rows, then the result must return 100 rows. When using the `executable` table function or `Executable` table engine, your script can make any data transformations you want, including complex aggregations.
:::
## Syntax
The `executable` table function requires three parameters and accepts an optional list of input queries:
```sql
executable(script_name, format, structure, [input_query...])
```
- `script_name`: the file name of the script. saved in the `user_scripts` folder (the default folder of the `user_scripts_path` setting)
- `format`: the format of the generated table
- `structure`: the table schema of the generated table
- `input_query`: an optional query (or collection or queries) whose results are passed to the script via **stdin**
:::note
If you are going to invoke the same script repeatedly with the same input queries, consider using the [`Executable` table engine](../../engines/table-engines/special/executable.md).
:::
The following Python script is named `generate_random.py` and is saved in the `user_scripts` folder. It reads in a number `i` and prints `i` random strings, with each string preceded by a number that is separated by a tab:
```python
#!/usr/local/bin/python3.9
import sys
import string
import random
def main():
# Read input value
for number in sys.stdin:
i = int(number)
# Generate some random rows
for id in range(0, i):
letters = string.ascii_letters
random_string = ''.join(random.choices(letters ,k=10))
print(str(id) + '\t' + random_string + '\n', end='')
# Flush results to stdout
sys.stdout.flush()
if __name__ == "__main__":
main()
```
Let's invoke the script and have it generate 10 random strings:
```sql
SELECT * FROM executable('my_script.py', TabSeparated, 'id UInt32, random String', (SELECT 10))
```
The response looks like:
```response
┌─id─┬─random─────┐
│ 0 │ xheXXCiSkH │
│ 1 │ AqxvHAoTrl │
│ 2 │ JYvPCEbIkY │
│ 3 │ sWgnqJwGRm │
│ 4 │ fTZGrjcLon │
│ 5 │ ZQINGktPnd │
│ 6 │ YFSvGGoezb │
│ 7 │ QyMJJZOOia │
│ 8 │ NfiyDDhmcI │
│ 9 │ REJRdJpWrg │
└────┴────────────┘
```
## Passing Query Results to a Script
Be sure to check out the example in the `Executable` table engine on [how to pass query results to a script](../../engines/table-engines/special/executable#passing-query-results-to-a-script). Here is how you execute the same script in that example using the `executable` table function:
```sql
SELECT * FROM executable(
'sentiment.py',
TabSeparated,
'id UInt64, sentiment Float32',
(SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20)
);
```

View File

@ -2,11 +2,12 @@
slug: /en/sql-reference/table-functions/s3
sidebar_position: 45
sidebar_label: s3
keywords: [s3, gcs, bucket]
---
# s3 Table Function
Provides table-like interface to select/insert files in [Amazon S3](https://aws.amazon.com/s3/). This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md), but provides S3-specific features.
Provides a table-like interface to select/insert files in [Amazon S3](https://aws.amazon.com/s3/) and [Google Cloud Storage](https://cloud.google.com/storage/). This table function is similar to the [hdfs function](../../sql-reference/table-functions/hdfs.md), but provides S3-specific features.
**Syntax**
@ -14,9 +15,24 @@ Provides table-like interface to select/insert files in [Amazon S3](https://aws.
s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
```
:::tip GCS
The S3 Table Function integrates with Google Cloud Storage by using the GCS XML API and HMAC keys. See the [Google interoperability docs]( https://cloud.google.com/storage/docs/interoperability) for more details about the endpoint and HMAC.
For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_id` and `aws_secret_access_key`.
:::
**Arguments**
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
:::note GCS
The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API:
```
https://storage.googleapis.com/<bucket>/<folder>/<filename(s)>
```
and not ~~https://storage.cloud.google.com~~.
:::
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension.

View File

@ -1,14 +1,15 @@
---
slug: /ru/sql-reference/aggregate-functions/reference/sparkbar
slug: /ru/sql-reference/aggregate-functions/reference/sparkbar
sidebar_position: 311
sidebar_label: sparkbar
---
# sparkbar {#sparkbar}
Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`.
Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`. Повторения для всех `x`, попавших в один бакет, усредняются, поэтому данные должны быть предварительно агрегированы. Отрицательные повторения игнорируются.
Если интервал для построения не указан, то в качестве нижней границы интервала будет взято минимальное значение `x`, а в качестве верхней границы — максимальное значение `x`.
Значения `x` вне указанного интервала игнорируются.
**Синтаксис**
@ -39,29 +40,23 @@ sparkbar(width[, min_x, max_x])(x, y)
Запрос:
``` sql
CREATE TABLE spark_bar_data (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree ORDER BY event_date SETTINGS index_granularity = 8192;
INSERT INTO spark_bar_data VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11');
CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date;
SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_data;
INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11');
SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_data;
SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
```
Результат:
``` text
┌─sparkbar(9)(event_date, cnt)─┐
│ │
│ ▁▅▄▃██▅ ▁ │
│ │
│ ▂▅▂▃▆█ ▂ │
└──────────────────────────────┘
┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐
│ │
│▁▄▄▂▅▇█▁ │
│ │
│ ▂▅▂▃▇▆█ │
└──────────────────────────────────────────────────────────────────────────┘
```

View File

@ -102,7 +102,8 @@ done
EOF
chmod +x "$PKG_PATH/install/doinst.sh"
if [ -f "$PKG_PATH/DEBIAN/postinst" ]; then
tail +2 "$PKG_PATH/DEBIAN/postinst" >> "$PKG_PATH/install/doinst.sh"
# we don't need debconf source in doinst in any case
tail +2 "$PKG_PATH/DEBIAN/postinst" | grep -v debconf/confmodule >> "$PKG_PATH/install/doinst.sh"
fi
rm -rf "$PKG_PATH/DEBIAN"
if [ -f "/usr/bin/pigz" ]; then

View File

@ -0,0 +1,46 @@
#!/bin/sh
set -e
# set -x
PROGRAM=clickhouse-keeper
KEEPER_USER=${KEEPER_USER:=clickhouse}
KEEPER_GROUP=${KEEPER_GROUP:=clickhouse}
# Please note that we don't support paths with whitespaces. This is rather ignorant.
KEEPER_CONFDIR=${KEEPER_CONFDIR:=/etc/$PROGRAM}
KEEPER_DATADIR=${KEEPER_DATADIR:=/var/lib/clickhouse}
KEEPER_LOGDIR=${KEEPER_LOGDIR:=/var/log/$PROGRAM}
[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
[ -f /etc/default/clickhouse-keeper ] && . /etc/default/clickhouse-keeper
if [ ! -f "/etc/debian_version" ]; then
not_deb_os=1
fi
if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
if ! getent group "${KEEPER_GROUP}" > /dev/null 2>&1 ; then
groupadd --system "${KEEPER_GROUP}"
fi
GID=$(getent group "${KEEPER_GROUP}" | cut -d: -f 3)
if ! id "${KEEPER_USER}" > /dev/null 2>&1 ; then
adduser --system --home /dev/null --no-create-home \
--gid "${GID}" --shell /bin/false \
"${KEEPER_USER}"
fi
chown -R "${KEEPER_USER}:${KEEPER_GROUP}" "${KEEPER_CONFDIR}"
chmod 0755 "${KEEPER_CONFDIR}"
if ! [ -d "${KEEPER_DATADIR}" ]; then
mkdir -p "${KEEPER_DATADIR}"
chown -R "${KEEPER_USER}:${KEEPER_GROUP}" "${KEEPER_DATADIR}"
chmod 0700 "${KEEPER_DATADIR}"
fi
if ! [ -d "${KEEPER_LOGDIR}" ]; then
mkdir -p "${KEEPER_LOGDIR}"
chown -R "${KEEPER_USER}:${KEEPER_GROUP}" "${KEEPER_LOGDIR}"
chmod 0770 "${KEEPER_LOGDIR}"
fi
fi
# vim: ts=4: sw=4: sts=4: expandtab

View File

@ -0,0 +1,27 @@
[Unit]
Description=ClickHouse Keeper - zookeeper compatible distributed coordination server
Requires=network-online.target
# NOTE: that After/Wants=time-sync.target is not enough, you need to ensure
# that the time was adjusted already, if you use systemd-timesyncd you are
# safe, but if you use ntp or some other daemon, you should configure it
# additionaly.
After=time-sync.target network-online.target
Wants=time-sync.target
[Service]
Type=simple
User=clickhouse
Group=clickhouse
Restart=always
RestartSec=30
RuntimeDirectory=%p # %p is resolved to the systemd unit name
ExecStart=/usr/bin/clickhouse-keeper --config=/etc/clickhouse-keeper/keeper_config.xml --pid-file=%t/%p/%p.pid
# Minus means that this file is optional.
EnvironmentFile=-/etc/default/%p
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
[Install]
# ClickHouse should not start from the rescue shell (rescue.target).
WantedBy=multi-user.target

View File

@ -30,6 +30,8 @@ contents:
type: config|noreplace
- src: root/usr/bin/clickhouse-keeper
dst: /usr/bin/clickhouse-keeper
- src: clickhouse-keeper.service
dst: /lib/systemd/system/clickhouse-keeper.service
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-keeper/AUTHORS
@ -39,3 +41,6 @@ contents:
dst: /usr/share/doc/clickhouse-keeper/LICENSE
- src: ../README.md
dst: /usr/share/doc/clickhouse-keeper/README.md
scripts:
postinstall: ./clickhouse-keeper.postinstall

View File

@ -11,8 +11,6 @@ CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR:=/var/lib/clickhouse}
CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR:=/var/log/clickhouse-server}
CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin}
CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule

View File

@ -17,10 +17,10 @@ User=clickhouse
Group=clickhouse
Restart=always
RestartSec=30
RuntimeDirectory=clickhouse-server
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid
RuntimeDirectory=%p # %p is resolved to the systemd unit name
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=%t/%p/%p.pid
# Minus means that this file is optional.
EnvironmentFile=-/etc/default/clickhouse
EnvironmentFile=-/etc/default/%p
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE

View File

@ -474,7 +474,7 @@ private:
executor.sendQuery(ClientInfo::QueryKind::INITIAL_QUERY);
ProfileInfo info;
while (Block block = executor.read())
while (Block block = executor.readBlock())
info.update(block);
executor.finish();

View File

@ -1,3 +1,4 @@
#include <boost/algorithm/string/join.hpp>
#include <cstdlib>
#include <fcntl.h>
#include <map>
@ -538,24 +539,28 @@ void Client::connect()
// Prints changed settings to stderr. Useful for debugging fuzzing failures.
void Client::printChangedSettings() const
{
const auto & changes = global_context->getSettingsRef().changes();
if (!changes.empty())
auto print_changes = [](const auto & changes, std::string_view settings_name)
{
fmt::print(stderr, "Changed settings: ");
for (size_t i = 0; i < changes.size(); ++i)
if (!changes.empty())
{
if (i)
fmt::print(stderr, "Changed {}: ", settings_name);
for (size_t i = 0; i < changes.size(); ++i)
{
fmt::print(stderr, ", ");
if (i)
fmt::print(stderr, ", ");
fmt::print(stderr, "{} = '{}'", changes[i].name, toString(changes[i].value));
}
fmt::print(stderr, "{} = '{}'", changes[i].name, toString(changes[i].value));
fmt::print(stderr, "\n");
}
fmt::print(stderr, "\n");
}
else
{
fmt::print(stderr, "No changed settings.\n");
}
else
{
fmt::print(stderr, "No changed {}.\n", settings_name);
}
};
print_changes(global_context->getSettingsRef().changes(), "settings");
print_changes(cmd_merge_tree_settings.changes(), "MergeTree settings");
}
@ -1352,6 +1357,8 @@ void Client::readArguments(
}
else if (arg == "--allow_repeated_settings")
allow_repeated_settings = true;
else if (arg == "--allow_merge_tree_settings")
allow_merge_tree_settings = true;
else
common_arguments.emplace_back(arg);
}

View File

@ -6,7 +6,6 @@
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/setThreadName.h>
#include <IO/ConnectionTimeoutsContext.h>
#include <Interpreters/InterpreterInsertQuery.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <Parsers/ASTFunction.h>
@ -2041,7 +2040,7 @@ UInt64 ClusterCopier::executeQueryOnCluster(
while (true)
{
auto block = remote_query_executor->read();
auto block = remote_query_executor->readBlock();
if (!block)
break;
}

View File

@ -362,6 +362,7 @@ try
else
path = std::filesystem::path{KEEPER_DEFAULT_PATH};
std::filesystem::create_directories(path);
/// Check that the process user id matches the owner of the data.
const auto effective_user_id = geteuid();

View File

@ -19,6 +19,9 @@ target_link_libraries(clickhouse-local-lib PRIVATE clickhouse-server-lib)
if (TARGET ch_rust::skim)
target_link_libraries(clickhouse-local-lib PRIVATE ch_rust::skim)
endif()
if (TARGET ch_contrib::azure_sdk)
target_link_libraries(clickhouse-local-lib PRIVATE ch_contrib::azure_sdk)
endif()
# Always use internal readpassphrase
target_link_libraries(clickhouse-local-lib PRIVATE readpassphrase)

View File

@ -51,6 +51,10 @@
#include <Functions/getFuzzerData.h>
#endif
#if USE_AZURE_BLOB_STORAGE
# include <azure/storage/common/internal/xml_wrapper.hpp>
#endif
namespace fs = std::filesystem;
@ -115,6 +119,14 @@ void LocalServer::initialize(Poco::Util::Application & self)
config().getUInt("thread_pool_queue_size", 10000)
);
#if USE_AZURE_BLOB_STORAGE
/// See the explanation near the same line in Server.cpp
GlobalThreadPool::instance().addOnDestroyCallback([]
{
Azure::Storage::_internal::XmlGlobalDeinitialize();
});
#endif
IOThreadPool::initialize(
config().getUInt("max_io_thread_pool_size", 100),
config().getUInt("max_io_thread_pool_free_size", 0),

View File

@ -27,6 +27,9 @@ set (CLICKHOUSE_SERVER_LINK
if (TARGET ch_contrib::jemalloc)
list(APPEND CLICKHOUSE_SERVER_LINK PRIVATE ch_contrib::jemalloc)
endif()
if (TARGET ch_contrib::azure_sdk)
list(APPEND CLICKHOUSE_SERVER_LINK PRIVATE ch_contrib::azure_sdk)
endif()
clickhouse_program_add(server)

View File

@ -82,9 +82,7 @@
#include <Common/ThreadFuzzer.h>
#include <Common/getHashOfLoadedBinary.h>
#include <Common/filesystemHelpers.h>
#if USE_BORINGSSL
#include <Compression/CompressionCodecEncrypted.h>
#endif
#include <Server/HTTP/HTTPServerConnectionFactory.h>
#include <Server/MySQLHandlerFactory.h>
#include <Server/PostgreSQLHandlerFactory.h>
@ -128,6 +126,10 @@
# include <jemalloc/jemalloc.h>
#endif
#if USE_AZURE_BLOB_STORAGE
# include <azure/storage/common/internal/xml_wrapper.hpp>
#endif
namespace CurrentMetrics
{
extern const Metric Revision;
@ -750,6 +752,19 @@ try
config().getUInt("max_thread_pool_free_size", 1000),
config().getUInt("thread_pool_queue_size", 10000));
#if USE_AZURE_BLOB_STORAGE
/// It makes sense to deinitialize libxml after joining of all threads
/// in global pool because libxml uses thread-local memory allocations via
/// 'pthread_key_create' and 'pthread_setspecific' which should be deallocated
/// at 'pthread_exit'. Deinitialization of libxml leads to call of 'pthread_key_delete'
/// and if it is done before joining of threads, allocated memory will not be freed
/// and there may be memory leaks in threads that used libxml.
GlobalThreadPool::instance().addOnDestroyCallback([]
{
Azure::Storage::_internal::XmlGlobalDeinitialize();
});
#endif
IOThreadPool::initialize(
config().getUInt("max_io_thread_pool_size", 100),
config().getUInt("max_io_thread_pool_free_size", 0),
@ -1331,9 +1346,8 @@ try
global_context->updateStorageConfiguration(*config);
global_context->updateInterserverCredentials(*config);
#if USE_BORINGSSL
global_context->updateQueryCacheConfiguration(*config);
CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs");
#endif
#if USE_SSL
CertificateReloader::instance().tryLoad(*config);
#endif
@ -1517,13 +1531,7 @@ try
global_context->setMMappedFileCache(mmap_cache_size);
/// A cache for query results.
size_t query_result_cache_size = config().getUInt64("query_result_cache.size", 1_GiB);
if (query_result_cache_size)
global_context->setQueryResultCache(
query_result_cache_size,
config().getUInt64("query_result_cache.max_entries", 1024),
config().getUInt64("query_result_cache.max_entry_size", 1_MiB),
config().getUInt64("query_result_cache.max_entry_records", 30'000'000));
global_context->setQueryCache(config());
#if USE_EMBEDDED_COMPILER
/// 128 MB
@ -1547,10 +1555,8 @@ try
global_context->getMergeTreeSettings().sanityCheck(background_pool_tasks);
global_context->getReplicatedMergeTreeSettings().sanityCheck(background_pool_tasks);
}
#if USE_BORINGSSL
/// try set up encryption. There are some errors in config, error will be printed and server wouldn't start.
CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs");
#endif
SCOPE_EXIT({
async_metrics.stop();

View File

@ -854,6 +854,51 @@
</replica>
</shard-->
</test_cluster_one_shard_three_replicas_localhost>
<parallel_replicas>
<shard>
<internal_replication>false</internal_replication>
<replica>
<host>127.0.0.1</host>
<port>9000</port>
</replica>
<replica>
<host>127.0.0.2</host>
<port>9000</port>
</replica>
<replica>
<host>127.0.0.3</host>
<port>9000</port>
</replica>
<replica>
<host>127.0.0.4</host>
<port>9000</port>
</replica>
<replica>
<host>127.0.0.5</host>
<port>9000</port>
</replica>
<replica>
<host>127.0.0.6</host>
<port>9000</port>
</replica>
<replica>
<host>127.0.0.7</host>
<port>9000</port>
</replica>
<replica>
<host>127.0.0.8</host>
<port>9000</port>
</replica>
<replica>
<host>127.0.0.9</host>
<port>9000</port>
</replica>
<replica>
<host>127.0.0.10</host>
<port>9000</port>
</replica>
</shard>
</parallel_replicas>
<test_cluster_two_shards_localhost>
<shard>
<replica>
@ -1466,13 +1511,13 @@
</rocksdb>
-->
<!-- Configuration for the query result cache -->
<!-- <query_result_cache> -->
<!-- Configuration for the query cache -->
<!-- <query_cache> -->
<!-- <size>1073741824</size> -->
<!-- <max_entries>1024</max_entries> -->
<!-- <max_entry_size>1048576</max_entry_size> -->
<!-- <max_entry_records>30000000</max_entry_records> -->
<!-- </query_result_cache> -->
<!-- <max_entry_rows>30000000</max_entry_rows> -->
<!-- </query_cache> -->
<!-- Uncomment if enable merge tree metadata cache -->
<!--merge_tree_metadata_cache>

View File

@ -76,7 +76,7 @@
#charts
{
height: 100%;
display: flex;
display: none;
flex-flow: row wrap;
gap: 1rem;
}
@ -170,6 +170,14 @@
background: var(--button-background-color);
}
#auth-error {
color: var(--error-color);
display: flex;
flex-flow: row nowrap;
justify-content: center;
}
form {
display: inline;
}
@ -293,6 +301,7 @@
</div>
</form>
</div>
<div id="auth-error"></div>
<div id="charts"></div>
<script>
@ -322,6 +331,11 @@ if (location.protocol != 'file:') {
user = 'default';
}
const errorCodeRegex = /Code: (\d+)/
const errorCodeMessageMap = {
516: 'Error authenticating with database. Please check your connection params and try again.'
}
/// This is just a demo configuration of the dashboard.
let queries = [
@ -597,6 +611,11 @@ function insertChart(i) {
query_editor_confirm.value = 'Ok';
query_editor_confirm.className = 'edit-confirm';
function getCurrentIndex() {
/// Indices may change after deletion of other element, hence captured "i" may become incorrect.
return [...charts.querySelectorAll('.chart')].findIndex(child => chart == child);
}
function editConfirm() {
query_editor.style.display = 'none';
query_error.style.display = 'none';
@ -605,7 +624,8 @@ function insertChart(i) {
title_text.data = '';
findParamsInQuery(q.query, params);
buildParams();
draw(i, chart, getParamsForURL(), q.query);
const idx = getCurrentIndex();
draw(idx, chart, getParamsForURL(), q.query);
saveState();
}
@ -649,8 +669,7 @@ function insertChart(i) {
let trash_text = document.createTextNode('✕');
trash.appendChild(trash_text);
trash.addEventListener('click', e => {
/// Indices may change after deletion of other element, hence captured "i" may become incorrect.
let idx = [...charts.querySelectorAll('.chart')].findIndex(child => chart == child);
const idx = getCurrentIndex();
if (plots[idx]) {
plots[idx].destroy();
plots[idx] = null;
@ -796,6 +815,18 @@ async function draw(idx, chart, url_params, query) {
error = e.toString();
}
if (error) {
const errorMatch = error.match(errorCodeRegex)
if (errorMatch && errorMatch[1]) {
const code = errorMatch[1]
if (errorCodeMessageMap[code]) {
const authError = new Error(errorCodeMessageMap[code])
authError.code = code
throw authError
}
}
}
if (!error) {
if (!Array.isArray(data)) {
error = "Query should return an array.";
@ -853,16 +884,50 @@ async function draw(idx, chart, url_params, query) {
sync.sub(plots[idx]);
/// Set title
const title = queries[idx].title ? queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ) : '';
const title = queries[idx] && queries[idx].title ? queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ) : '';
chart.querySelector('.title').firstChild.data = title;
}
function showAuthError(message) {
const charts = document.querySelector('#charts');
charts.style.display = 'none';
const add = document.querySelector('#add');
add.style.display = 'none';
const authError = document.querySelector('#auth-error');
authError.textContent = message;
authError.style.display = 'flex';
}
function hideAuthError() {
const charts = document.querySelector('#charts');
charts.style.display = 'flex';
const add = document.querySelector('#add');
add.style.display = 'block';
const authError = document.querySelector('#auth-error');
authError.textContent = '';
authError.style.display = 'none';
}
let firstLoad = true;
async function drawAll() {
let params = getParamsForURL();
const charts = document.getElementsByClassName('chart');
for (let i = 0; i < queries.length; ++i) {
draw(i, charts[i], params, queries[i].query);
if (!firstLoad) {
hideAuthError();
}
await Promise.all([...Array(queries.length)].map(async (_, i) => {
return draw(i, charts[i], params, queries[i].query).catch((e) => {
if (!firstLoad) {
showAuthError(e.message);
}
});
})).then(() => {
firstLoad = false;
})
}
function resize() {

View File

@ -10,11 +10,29 @@ mod ffi {
}
struct Item {
text: String,
text_no_newlines: String,
orig_text: String,
}
impl Item {
fn new(text: String) -> Self {
return Self{
// Text that will be printed by skim, and will be used for matching.
//
// Text that will be shown should not contains new lines since in this case skim may
// live some symbols on the screen, and this looks odd.
text_no_newlines: text.replace("\n", " "),
// This will be used when the match had been selected.
orig_text: text,
};
}
}
impl SkimItem for Item {
fn text(&self) -> Cow<str> {
return Cow::Borrowed(&self.text);
return Cow::Borrowed(&self.text_no_newlines);
}
fn output(&self) -> Cow<str> {
return Cow::Borrowed(&self.orig_text);
}
}
@ -29,12 +47,30 @@ fn skim(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, Stri
.query(Some(prefix.to_str().unwrap()))
.tac(true)
.tiebreak(Some("-score".to_string()))
// Exact mode performs better for SQL.
//
// Default fuzzy search is too smart for SQL, it even takes into account the case, which
// should not be accounted (you don't want to type "SELECT" instead of "select" to find the
// query).
//
// Exact matching seems better algorithm for SQL, it is not 100% exact, it splits by space,
// and apply separate matcher actually for each word.
// Note, that if you think that "space is not enough" as the delimiter, then you should
// first know that this is the delimiter only for the input query, so to match
// "system.query_log" you can use "sy qu log"
// Also it should be more common for users who did not know how to use fuzzy search.
// (also you can disable exact mode by prepending "'" char).
//
// Also it ignores the case correctly, i.e. it does not have penalty for case mismatch,
// like fuzzy algorithms (take a look at SkimScoreConfig::penalty_case_mismatch).
.exact(true)
.case(CaseMatching::Ignore)
.build()
.unwrap();
let (tx, rx): (SkimItemSender, SkimItemReceiver) = unbounded();
for word in words {
tx.send(Arc::new(Item{ text: word.to_string() })).unwrap();
tx.send(Arc::new(Item::new(word.to_string()))).unwrap();
}
// so that skim could know when to stop waiting for more items.
drop(tx);

View File

@ -142,10 +142,11 @@ enum class AccessType
M(SYSTEM_DROP_MARK_CACHE, "SYSTEM DROP MARK, DROP MARK CACHE, DROP MARKS", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_UNCOMPRESSED_CACHE, "SYSTEM DROP UNCOMPRESSED, DROP UNCOMPRESSED CACHE, DROP UNCOMPRESSED", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_MMAP_CACHE, "SYSTEM DROP MMAP, DROP MMAP CACHE, DROP MMAP", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_QUERY_RESULT_CACHE, "SYSTEM DROP QUERY RESULT, DROP QUERY RESULT CACHE, DROP QUERY RESULT", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_QUERY_CACHE, "SYSTEM DROP QUERY, DROP QUERY CACHE, DROP QUERY", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_COMPILED_EXPRESSION_CACHE, "SYSTEM DROP COMPILED EXPRESSION, DROP COMPILED EXPRESSION CACHE, DROP COMPILED EXPRESSIONS", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_FILESYSTEM_CACHE, "SYSTEM DROP FILESYSTEM CACHE, DROP FILESYSTEM CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_S3_CLIENT_CACHE, "SYSTEM DROP S3 CLIENT, DROP S3 CLIENT CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \
M(SYSTEM_RELOAD_CONFIG, "RELOAD CONFIG", GLOBAL, SYSTEM_RELOAD) \
M(SYSTEM_RELOAD_USERS, "RELOAD USERS", GLOBAL, SYSTEM_RELOAD) \

View File

@ -247,15 +247,8 @@ void Adam::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac)
if (adam_rhs.average_gradient.empty())
return;
if (average_gradient.empty())
{
if (!average_squared_gradient.empty() ||
adam_rhs.average_gradient.size() != adam_rhs.average_squared_gradient.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Average_gradient and average_squared_gradient must have same size");
average_gradient.resize(adam_rhs.average_gradient.size(), Float64{0.0});
average_squared_gradient.resize(adam_rhs.average_squared_gradient.size(), Float64{0.0});
}
average_gradient.resize(adam_rhs.average_gradient.size(), Float64{0.0});
average_squared_gradient.resize(adam_rhs.average_squared_gradient.size(), Float64{0.0});
for (size_t i = 0; i < average_gradient.size(); ++i)
{
@ -268,14 +261,8 @@ void Adam::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac)
void Adam::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient)
{
if (average_gradient.empty())
{
if (!average_squared_gradient.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Average_gradient and average_squared_gradient must have same size");
average_gradient.resize(batch_gradient.size(), Float64{0.0});
average_squared_gradient.resize(batch_gradient.size(), Float64{0.0});
}
average_gradient.resize(batch_gradient.size(), Float64{0.0});
average_squared_gradient.resize(batch_gradient.size(), Float64{0.0});
for (size_t i = 0; i != average_gradient.size(); ++i)
{
@ -328,8 +315,7 @@ void Nesterov::write(WriteBuffer & buf) const
void Nesterov::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac)
{
const auto & nesterov_rhs = static_cast<const Nesterov &>(rhs);
if (accumulated_gradient.empty())
accumulated_gradient.resize(nesterov_rhs.accumulated_gradient.size(), Float64{0.0});
accumulated_gradient.resize(nesterov_rhs.accumulated_gradient.size(), Float64{0.0});
for (size_t i = 0; i < accumulated_gradient.size(); ++i)
{
@ -339,10 +325,7 @@ void Nesterov::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac
void Nesterov::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient)
{
if (accumulated_gradient.empty())
{
accumulated_gradient.resize(batch_gradient.size(), Float64{0.0});
}
accumulated_gradient.resize(batch_gradient.size(), Float64{0.0});
for (size_t i = 0; i < batch_gradient.size(); ++i)
{
@ -402,10 +385,7 @@ void Momentum::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac
void Momentum::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient)
{
/// batch_size is already checked to be greater than 0
if (accumulated_gradient.empty())
{
accumulated_gradient.resize(batch_gradient.size(), Float64{0.0});
}
accumulated_gradient.resize(batch_gradient.size(), Float64{0.0});
for (size_t i = 0; i < batch_gradient.size(); ++i)
{

View File

@ -149,9 +149,11 @@ public:
class Momentum : public IWeightsUpdater
{
public:
Momentum() = default;
explicit Momentum(Float64 alpha_) : alpha(alpha_) {}
explicit Momentum(size_t num_params, Float64 alpha_ = 0.1) : alpha(alpha_)
{
accumulated_gradient.resize(num_params + 1, 0);
}
void update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient) override;
@ -170,9 +172,10 @@ private:
class Nesterov : public IWeightsUpdater
{
public:
Nesterov() = default;
explicit Nesterov(Float64 alpha_) : alpha(alpha_) {}
explicit Nesterov(size_t num_params, Float64 alpha_ = 0.9) : alpha(alpha_)
{
accumulated_gradient.resize(num_params + 1, 0);
}
void addToBatch(
std::vector<Float64> & batch_gradient,
@ -201,10 +204,14 @@ private:
class Adam : public IWeightsUpdater
{
public:
Adam()
Adam(size_t num_params)
{
beta1_powered = beta1;
beta2_powered = beta2;
average_gradient.resize(num_params + 1, 0);
average_squared_gradient.resize(num_params + 1, 0);
}
void addToBatch(
@ -338,11 +345,11 @@ public:
if (weights_updater_name == "SGD")
new_weights_updater = std::make_shared<StochasticGradientDescent>();
else if (weights_updater_name == "Momentum")
new_weights_updater = std::make_shared<Momentum>();
new_weights_updater = std::make_shared<Momentum>(param_num);
else if (weights_updater_name == "Nesterov")
new_weights_updater = std::make_shared<Nesterov>();
new_weights_updater = std::make_shared<Nesterov>(param_num);
else if (weights_updater_name == "Adam")
new_weights_updater = std::make_shared<Adam>();
new_weights_updater = std::make_shared<Adam>(param_num);
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal name of weights updater (should have been checked earlier)");

View File

@ -50,11 +50,13 @@ AggregateFunctionPtr createAggregateFunctionSparkbar(const std::string & name, c
assertBinary(name, arguments);
if (params.size() != 1 && params.size() != 3)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The number of params does not match for aggregate function {}", name);
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"The number of params does not match for aggregate function '{}', expected 1 or 3, got {}", name, params.size());
if (params.size() == 3)
{
if (params.at(1).getType() != arguments[0]->getDefault().getType() || params.at(2).getType() != arguments[0]->getDefault().getType())
if (params.at(1).getType() != arguments[0]->getDefault().getType() ||
params.at(2).getType() != arguments[0]->getDefault().getType())
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"The second and third parameters are not the same type as the first arguments for aggregate function {}", name);
@ -63,7 +65,6 @@ AggregateFunctionPtr createAggregateFunctionSparkbar(const std::string & name, c
return createAggregateFunctionSparkbarImpl(name, *arguments[0], *arguments[1], arguments, params);
}
}
void registerAggregateFunctionSparkbar(AggregateFunctionFactory & factory)

View File

@ -1,22 +1,32 @@
#pragma once
#include <array>
#include <string_view>
#include <DataTypes/DataTypeString.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <base/range.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Columns/ColumnString.h>
#include <Common/PODArray.h>
#include <Common/logger_useful.h>
#include <IO/ReadBufferFromString.h>
#include <Common/HashTable/HashMap.h>
#include <Columns/IColumn.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
template<typename X, typename Y>
struct AggregateFunctionSparkbarData
{
/// TODO: calculate histogram instead of storing all points
using Points = HashMap<X, Y>;
Points points;
@ -26,20 +36,26 @@ struct AggregateFunctionSparkbarData
Y min_y = std::numeric_limits<Y>::max();
Y max_y = std::numeric_limits<Y>::lowest();
void insert(const X & x, const Y & y)
Y insert(const X & x, const Y & y)
{
auto result = points.insert({x, y});
if (!result.second)
result.first->getMapped() += y;
if (isNaN(y) || y <= 0)
return 0;
auto [it, inserted] = points.insert({x, y});
if (!inserted)
it->getMapped() += y;
return it->getMapped();
}
void add(X x, Y y)
{
insert(x, y);
auto new_y = insert(x, y);
min_x = std::min(x, min_x);
max_x = std::max(x, max_x);
min_y = std::min(y, min_y);
max_y = std::max(y, max_y);
max_y = std::max(new_y, max_y);
}
void merge(const AggregateFunctionSparkbarData & other)
@ -48,10 +64,14 @@ struct AggregateFunctionSparkbarData
return;
for (auto & point : other.points)
insert(point.getKey(), point.getMapped());
{
auto new_y = insert(point.getKey(), point.getMapped());
max_y = std::max(new_y, max_y);
}
min_x = std::min(other.min_x, min_x);
max_x = std::max(other.max_x, max_x);
min_y = std::min(other.min_y, min_y);
max_y = std::max(other.max_y, max_y);
}
@ -80,7 +100,6 @@ struct AggregateFunctionSparkbarData
size_t size;
readVarUInt(size, buf);
/// TODO Protection against huge size
X x;
Y y;
for (size_t i = 0; i < size; ++i)
@ -90,7 +109,6 @@ struct AggregateFunctionSparkbarData
insert(x, y);
}
}
};
template<typename X, typename Y>
@ -99,183 +117,127 @@ class AggregateFunctionSparkbar final
{
private:
size_t width;
X min_x;
X max_x;
bool specified_min_max_x;
const size_t width = 0;
template <class T>
String getBar(const T value) const
/// Range for x specified in parameters.
const bool is_specified_range_x = false;
const X begin_x = std::numeric_limits<X>::min();
const X end_x = std::numeric_limits<X>::max();
size_t updateFrame(ColumnString::Chars & frame, Y value) const
{
if (isNaN(value) || value > 8 || value < 1)
return " ";
// ▁▂▃▄▅▆▇█
switch (static_cast<UInt8>(value))
{
case 1: return "";
case 2: return "";
case 3: return "";
case 4: return "";
case 5: return "";
case 6: return "";
case 7: return "";
case 8: return "";
}
return " ";
static constexpr std::array<std::string_view, 9> bars{" ", "", "", "", "", "", "", "", ""};
const auto & bar = (isNaN(value) || value < 1 || 8 < value) ? bars[0] : bars[static_cast<UInt8>(value)];
frame.insert(bar.begin(), bar.end());
return bar.size();
}
/**
* The minimum value of y is rendered as the lowest height "",
* the maximum value of y is rendered as the highest height "", and the middle value will be rendered proportionally.
* If a bucket has no y value, it will be rendered as " ".
* If the actual number of buckets is greater than the specified bucket, it will be compressed by width.
* For example, there are actually 11 buckets, specify 10 buckets, and divide the 11 buckets as follows (11/10):
* 0.0-1.1, 1.1-2.2, 2.2-3.3, 3.3-4.4, 4.4-5.5, 5.5-6.6, 6.6-7.7, 7.7-8.8, 8.8-9.9, 9.9-11.
* The y value of the first bucket will be calculated as follows:
* the actual y value of the first position + the actual second position y*0.1, and the remaining y*0.9 is reserved for the next bucket.
* The next bucket will use the last y*0.9 + the actual third position y*0.2, and the remaining y*0.8 will be reserved for the next bucket. And so on.
*/
String render(const AggregateFunctionSparkbarData<X, Y> & data) const
void render(ColumnString & to_column, const AggregateFunctionSparkbarData<X, Y> & data) const
{
String value;
if (data.points.empty() || !width)
return value;
auto & values = to_column.getChars();
auto & offsets = to_column.getOffsets();
size_t diff_x;
X min_x_local;
if (specified_min_max_x)
if (data.points.empty())
{
diff_x = max_x - min_x;
min_x_local = min_x;
}
else
{
diff_x = data.max_x - data.min_x;
min_x_local = data.min_x;
values.push_back('\0');
offsets.push_back(offsets.empty() ? 1 : offsets.back() + 1);
return;
}
if ((diff_x + 1) <= width)
{
Y min_y = data.min_y;
Y max_y = data.max_y;
Float64 diff_y = max_y - min_y;
auto from_x = is_specified_range_x ? begin_x : data.min_x;
auto to_x = is_specified_range_x ? end_x : data.max_x;
if (diff_y != 0.0)
if (from_x >= to_x)
{
size_t sz = updateFrame(values, 8);
values.push_back('\0');
offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1);
return;
}
PaddedPODArray<Y> histogram(width, 0);
PaddedPODArray<UInt64> fhistogram(width, 0);
for (const auto & point : data.points)
{
if (point.getKey() < from_x || to_x < point.getKey())
continue;
X delta = to_x - from_x;
if (delta < std::numeric_limits<X>::max())
delta = delta + 1;
X value = point.getKey() - from_x;
Float64 w = histogram.size();
size_t index = std::min<size_t>(static_cast<size_t>(w / delta * value), histogram.size() - 1);
if (std::numeric_limits<Y>::max() - histogram[index] > point.getMapped())
{
for (size_t i = 0; i <= diff_x; ++i)
{
auto it = data.points.find(static_cast<X>(min_x_local + i));
bool found = it != data.points.end();
value += getBar(found ? std::round(((it->getMapped() - min_y) / diff_y) * 7) + 1 : 0.0);
}
histogram[index] += point.getMapped();
fhistogram[index] += 1;
}
else
{
for (size_t i = 0; i <= diff_x; ++i)
value += getBar(data.points.has(min_x_local + static_cast<X>(i)) ? 1 : 0);
/// In case of overflow, just saturate
histogram[index] = std::numeric_limits<Y>::max();
}
}
else
for (size_t i = 0; i < histogram.size(); ++i)
{
// begin reshapes to width buckets
Float64 multiple_d = (diff_x + 1) / static_cast<Float64>(width);
std::optional<Float64> min_y;
std::optional<Float64> max_y;
std::optional<Float64> new_y;
std::vector<std::optional<Float64>> new_points;
new_points.reserve(width);
std::pair<size_t, Float64> bound{0, 0.0};
size_t cur_bucket_num = 0;
// upper bound for bucket
auto upper_bound = [&](size_t bucket_num)
{
bound.second = (bucket_num + 1) * multiple_d;
bound.first = static_cast<size_t>(std::floor(bound.second));
};
upper_bound(cur_bucket_num);
for (size_t i = 0; i <= (diff_x + 1); ++i)
{
if (i == bound.first) // is bound
{
Float64 proportion = bound.second - bound.first;
auto it = data.points.find(min_x_local + static_cast<X>(i));
bool found = (it != data.points.end());
if (found && proportion > 0)
new_y = new_y.value_or(0) + it->getMapped() * proportion;
if (new_y)
{
Float64 avg_y = new_y.value() / multiple_d;
new_points.emplace_back(avg_y);
// If min_y has no value, or if the avg_y of the current bucket is less than min_y, update it.
if (!min_y || avg_y < min_y)
min_y = avg_y;
if (!max_y || avg_y > max_y)
max_y = avg_y;
}
else
{
new_points.emplace_back();
}
// next bucket
new_y = found ? ((1 - proportion) * it->getMapped()) : std::optional<Float64>();
upper_bound(++cur_bucket_num);
}
else
{
auto it = data.points.find(min_x_local + static_cast<X>(i));
if (it != data.points.end())
new_y = new_y.value_or(0) + it->getMapped();
}
}
if (!min_y || !max_y) // No value is set
return {};
Float64 diff_y = max_y.value() - min_y.value();
auto get_bars = [&] (const std::optional<Float64> & point_y)
{
value += getBar(point_y ? std::round(((point_y.value() - min_y.value()) / diff_y) * 7) + 1 : 0);
};
auto get_bars_for_constant = [&] (const std::optional<Float64> & point_y)
{
value += getBar(point_y ? 1 : 0);
};
if (diff_y != 0.0)
std::for_each(new_points.begin(), new_points.end(), get_bars);
else
std::for_each(new_points.begin(), new_points.end(), get_bars_for_constant);
if (fhistogram[i] > 0)
histogram[i] /= fhistogram[i];
}
return value;
Y y_max = 0;
for (auto & y : histogram)
{
if (isNaN(y) || y <= 0)
continue;
y_max = std::max(y_max, y);
}
if (y_max == 0)
{
values.push_back('\0');
offsets.push_back(offsets.empty() ? 1 : offsets.back() + 1);
return;
}
for (auto & y : histogram)
{
if (isNaN(y) || y <= 0)
y = 0;
else
y = y * 7 / y_max + 1;
}
size_t sz = 0;
for (const auto & y : histogram)
sz += updateFrame(values, y);
values.push_back('\0');
offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1);
}
public:
AggregateFunctionSparkbar(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper<AggregateFunctionSparkbarData<X, Y>, AggregateFunctionSparkbar>(
arguments, params, std::make_shared<DataTypeString>())
: IAggregateFunctionDataHelper<AggregateFunctionSparkbarData<X, Y>, AggregateFunctionSparkbar>(arguments, params, std::make_shared<DataTypeString>())
, width(params.empty() ? 0 : params.at(0).safeGet<UInt64>())
, is_specified_range_x(params.size() >= 3)
, begin_x(is_specified_range_x ? static_cast<X>(params.at(1).safeGet<X>()) : std::numeric_limits<X>::min())
, end_x(is_specified_range_x ? static_cast<X>(params.at(2).safeGet<X>()) : std::numeric_limits<X>::max())
{
width = params.at(0).safeGet<UInt64>();
if (params.size() == 3)
{
specified_min_max_x = true;
min_x = static_cast<X>(params.at(1).safeGet<X>());
max_x = static_cast<X>(params.at(2).safeGet<X>());
}
else
{
specified_min_max_x = false;
min_x = std::numeric_limits<X>::min();
max_x = std::numeric_limits<X>::max();
}
if (width < 2 || 1024 < width)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter width must be in range [2, 1024]");
if (begin_x >= end_x)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter `min_x` must be less than `max_x`");
}
String getName() const override
@ -286,7 +248,7 @@ public:
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * /*arena*/) const override
{
X x = assert_cast<const ColumnVector<X> *>(columns[0])->getData()[row_num];
if (min_x <= x && x <= max_x)
if (begin_x <= x && x <= end_x)
{
Y y = assert_cast<const ColumnVector<Y> *>(columns[1])->getData()[row_num];
this->data(place).add(x, y);
@ -314,8 +276,7 @@ public:
{
auto & to_column = assert_cast<ColumnString &>(to);
const auto & data = this->data(place);
const String & value = render(data);
to_column.insertData(value.data(), value.size());
render(to_column, data);
}
};

View File

@ -0,0 +1,40 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/AggregateFunctionVarianceMatrix.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
template <typename FunctionTemplate>
AggregateFunctionPtr createAggregateFunctionVarianceMatrix(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
for (const auto & argument_type : argument_types)
if (!isNativeNumber(argument_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} only supports numerical types", name);
return std::make_shared<FunctionTemplate>(argument_types);
}
}
void registerAggregateFunctionsVarianceMatrix(AggregateFunctionFactory & factory)
{
factory.registerFunction("covarSampMatrix", createAggregateFunctionVarianceMatrix<AggregateFunctionCovarSampMatrix>);
factory.registerFunction("covarPopMatrix", createAggregateFunctionVarianceMatrix<AggregateFunctionCovarPopMatrix>);
factory.registerFunction("corrMatrix", createAggregateFunctionVarianceMatrix<AggregateFunctionCorrMatrix>);
}
}

View File

@ -0,0 +1,159 @@
#pragma once
#include <Columns/ColumnArray.h>
#include <Columns/ColumnsNumber.h>
#include <Common/PODArray.h>
#include <Common/PODArray_fwd.h>
#include <DataTypes/DataTypeArray.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/Moments.h>
#include <DataTypes/DataTypesNumber.h>
namespace DB
{
struct Settings;
enum class StatisticsMatrixFunctionKind
{
covarPopMatrix,
covarSampMatrix,
corrMatrix
};
template <StatisticsMatrixFunctionKind _kind>
struct AggregateFunctionVarianceMatrixData
{
using DataType = std::conditional_t<_kind == StatisticsMatrixFunctionKind::corrMatrix, CorrMoments<Float64>, CovarMoments<Float64>>;
AggregateFunctionVarianceMatrixData() = default;
explicit AggregateFunctionVarianceMatrixData(const size_t _num_args)
: num_args(_num_args)
{
data_matrix.resize_fill(num_args * (num_args + 1) / 2, DataType());
}
void add(const IColumn ** column, const size_t row_num)
{
for (size_t i = 0; i < num_args; ++i)
for (size_t j = 0; j <= i; ++j)
data_matrix[i * (i + 1) / 2 + j].add(column[i]->getFloat64(row_num), column[j]->getFloat64(row_num));
}
void merge(const AggregateFunctionVarianceMatrixData & other)
{
for (size_t i = 0; i < num_args; ++i)
for (size_t j = 0; j <= i; ++j)
data_matrix[i * (i + 1) / 2 + j].merge(other.data_matrix[i * (i + 1) / 2 + j]);
}
void serialize(WriteBuffer & buf) const
{
for (size_t i = 0; i < num_args; ++i)
for (size_t j = 0; j <= i; ++j)
data_matrix[i * (i + 1) / 2 + j].write(buf);
}
void deserialize(ReadBuffer & buf)
{
for (size_t i = 0; i < num_args; ++i)
for (size_t j = 0; j <= i; ++j)
data_matrix[i * (i + 1) / 2 + j].read(buf);
}
void insertResultInto(IColumn & to) const
{
auto & data_to = assert_cast<ColumnFloat64 &>(assert_cast<ColumnArray &>(assert_cast<ColumnArray &>(to).getData()).getData()).getData();
auto & root_offsets_to = assert_cast<ColumnArray &>(to).getOffsets();
auto & nested_offsets_to = assert_cast<ColumnArray &>(assert_cast<ColumnArray &>(to).getData()).getOffsets();
for (size_t i = 0; i < num_args; ++i)
{
for (size_t j = 0; j < num_args; ++j)
{
auto & data = i < j ? data_matrix[j * (j + 1) / 2 + i] : data_matrix[i * (i + 1) / 2 + j];
if constexpr (kind == StatisticsMatrixFunctionKind::covarPopMatrix)
data_to.push_back(data.getPopulation());
if constexpr (kind == StatisticsMatrixFunctionKind::covarSampMatrix)
data_to.push_back(data.getSample());
if constexpr (kind == StatisticsMatrixFunctionKind::corrMatrix)
data_to.push_back(data.get());
}
nested_offsets_to.push_back(nested_offsets_to.back() + num_args);
}
root_offsets_to.push_back(root_offsets_to.back() + num_args);
}
static constexpr StatisticsMatrixFunctionKind kind = _kind;
PaddedPODArray<DataType> data_matrix;
size_t num_args;
};
template <typename Data>
class AggregateFunctionVarianceMatrix final
: public IAggregateFunctionDataHelper<Data, AggregateFunctionVarianceMatrix<Data>>
{
public:
explicit AggregateFunctionVarianceMatrix(const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionVarianceMatrix<Data>>(argument_types_, {}, createResultType())
{}
AggregateFunctionVarianceMatrix(const IDataType &, const DataTypes & argument_types_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionVarianceMatrix<Data>>(argument_types_, {}, createResultType())
{}
String getName() const override
{
if constexpr (Data::kind == StatisticsMatrixFunctionKind::covarPopMatrix)
return "covarPopMatrix";
if constexpr (Data::kind == StatisticsMatrixFunctionKind::covarSampMatrix)
return "covarSampMatrix";
if constexpr (Data::kind == StatisticsMatrixFunctionKind::corrMatrix)
return "corrMatrix";
UNREACHABLE();
}
void create(AggregateDataPtr __restrict place) const override
{
new (place) Data(this->argument_types.size());
}
static DataTypePtr createResultType()
{
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat64>()));
}
bool allocatesMemoryInArena() const override { return false; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
this->data(place).add(columns, row_num);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).serialize(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
this->data(place).deserialize(buf);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
this->data(place).insertResultInto(to);
}
};
using AggregateFunctionCovarPopMatrix = AggregateFunctionVarianceMatrix<AggregateFunctionVarianceMatrixData<StatisticsMatrixFunctionKind::covarPopMatrix>>;
using AggregateFunctionCovarSampMatrix = AggregateFunctionVarianceMatrix<AggregateFunctionVarianceMatrixData<StatisticsMatrixFunctionKind::covarSampMatrix>>;
using AggregateFunctionCorrMatrix = AggregateFunctionVarianceMatrix<AggregateFunctionVarianceMatrixData<StatisticsMatrixFunctionKind::corrMatrix>>;
}

View File

@ -40,6 +40,7 @@ void registerAggregateFunctionsMax(AggregateFunctionFactory &);
void registerAggregateFunctionsAny(AggregateFunctionFactory &);
void registerAggregateFunctionsStatisticsStable(AggregateFunctionFactory &);
void registerAggregateFunctionsStatisticsSimple(AggregateFunctionFactory &);
void registerAggregateFunctionsVarianceMatrix(AggregateFunctionFactory &);
void registerAggregateFunctionSum(AggregateFunctionFactory &);
void registerAggregateFunctionSumCount(AggregateFunctionFactory &);
void registerAggregateFunctionSumMap(AggregateFunctionFactory &);
@ -126,6 +127,7 @@ void registerAggregateFunctions()
registerAggregateFunctionsAny(factory);
registerAggregateFunctionsStatisticsStable(factory);
registerAggregateFunctionsStatisticsSimple(factory);
registerAggregateFunctionsVarianceMatrix(factory);
registerAggregateFunctionSum(factory);
registerAggregateFunctionSumCount(factory);
registerAggregateFunctionSumMap(factory);

View File

@ -313,7 +313,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
if (select_limit_by_limit)
current_query_tree->getLimitByLimit() = buildExpression(select_limit_by_limit, current_context);
auto select_limit_by_offset = select_query_typed.limitOffset();
auto select_limit_by_offset = select_query_typed.limitByOffset();
if (select_limit_by_offset)
current_query_tree->getLimitByOffset() = buildExpression(select_limit_by_offset, current_context);

View File

@ -9,13 +9,12 @@
#include <IO/WriteBufferFromS3.h>
#include <IO/HTTPHeaderEntries.h>
#include <IO/S3/copyS3File.h>
#include <IO/S3/Client.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <aws/core/auth/AWSCredentials.h>
#include <aws/s3/S3Client.h>
#include <aws/s3/model/DeleteObjectRequest.h>
#include <aws/s3/model/DeleteObjectsRequest.h>
#include <aws/s3/model/ListObjectsRequest.h>
#include <filesystem>
@ -31,7 +30,7 @@ namespace ErrorCodes
namespace
{
std::shared_ptr<Aws::S3::S3Client>
std::shared_ptr<S3::Client>
makeS3Client(const S3::URI & s3_uri, const String & access_key_id, const String & secret_access_key, const ContextPtr & context)
{
auto settings = context->getStorageS3Settings().getSettings(s3_uri.uri.toString());
@ -71,9 +70,9 @@ namespace
context->getConfigRef().getBool("s3.use_insecure_imds_request", false)));
}
Aws::Vector<Aws::S3::Model::Object> listObjects(Aws::S3::S3Client & client, const S3::URI & s3_uri, const String & file_name)
Aws::Vector<Aws::S3::Model::Object> listObjects(S3::Client & client, const S3::URI & s3_uri, const String & file_name)
{
Aws::S3::Model::ListObjectsRequest request;
S3::ListObjectsRequest request;
request.SetBucket(s3_uri.bucket);
request.SetPrefix(fs::path{s3_uri.key} / file_name);
request.SetMaxKeys(1);
@ -228,7 +227,7 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
void BackupWriterS3::removeFile(const String & file_name)
{
Aws::S3::Model::DeleteObjectRequest request;
S3::DeleteObjectRequest request;
request.SetBucket(s3_uri.bucket);
request.SetKey(fs::path(s3_uri.key) / file_name);
auto outcome = client->DeleteObject(request);
@ -285,7 +284,7 @@ void BackupWriterS3::removeFilesBatch(const Strings & file_names)
Aws::S3::Model::Delete delkeys;
delkeys.SetObjects(current_chunk);
Aws::S3::Model::DeleteObjectsRequest request;
S3::DeleteObjectsRequest request;
request.SetBucket(s3_uri.bucket);
request.SetDelete(delkeys);

View File

@ -7,7 +7,6 @@
#include <IO/ReadSettings.h>
#include <IO/S3Common.h>
#include <Storages/StorageS3Settings.h>
#include <aws/s3/S3Client.h>
namespace DB
@ -27,7 +26,7 @@ public:
private:
S3::URI s3_uri;
std::shared_ptr<Aws::S3::S3Client> client;
std::shared_ptr<S3::Client> client;
ReadSettings read_settings;
S3Settings::RequestSettings request_settings;
};
@ -73,7 +72,7 @@ private:
void removeFilesBatch(const Strings & file_names);
S3::URI s3_uri;
std::shared_ptr<Aws::S3::S3Client> client;
std::shared_ptr<S3::Client> client;
ReadSettings read_settings;
S3Settings::RequestSettings request_settings;
Poco::Logger * log;

View File

@ -271,16 +271,22 @@ size_t BackupImpl::getNumFiles() const
return num_files;
}
size_t BackupImpl::getNumProcessedFiles() const
UInt64 BackupImpl::getTotalSize() const
{
std::lock_guard lock{mutex};
return num_processed_files;
return total_size;
}
UInt64 BackupImpl::getProcessedFilesSize() const
size_t BackupImpl::getNumEntries() const
{
std::lock_guard lock{mutex};
return processed_files_size;
return num_entries;
}
UInt64 BackupImpl::getSizeOfEntries() const
{
std::lock_guard lock{mutex};
return size_of_entries;
}
UInt64 BackupImpl::getUncompressedSize() const
@ -295,6 +301,18 @@ UInt64 BackupImpl::getCompressedSize() const
return compressed_size;
}
size_t BackupImpl::getNumReadFiles() const
{
std::lock_guard lock{mutex};
return num_read_files;
}
UInt64 BackupImpl::getNumReadBytes() const
{
std::lock_guard lock{mutex};
return num_read_bytes;
}
void BackupImpl::writeBackupMetadata()
{
assert(!is_internal_backup);
@ -323,12 +341,18 @@ void BackupImpl::writeBackupMetadata()
}
}
size_t index = 0;
for (const auto & info : all_file_infos)
num_files = all_file_infos.size();
total_size = 0;
num_entries = 0;
size_of_entries = 0;
for (size_t i = 0; i != all_file_infos.size(); ++i)
{
String prefix = index ? "contents.file[" + std::to_string(index) + "]." : "contents.file.";
const auto & info = all_file_infos[i];
String prefix = i ? "contents.file[" + std::to_string(i) + "]." : "contents.file.";
config->setString(prefix + "name", info.file_name);
config->setUInt64(prefix + "size", info.size);
if (info.size)
{
config->setString(prefix + "checksum", hexChecksum(info.checksum));
@ -348,8 +372,14 @@ void BackupImpl::writeBackupMetadata()
if (info.pos_in_archive != static_cast<size_t>(-1))
config->setUInt64(prefix + "pos_in_archive", info.pos_in_archive);
}
increaseUncompressedSize(info);
++index;
total_size += info.size;
bool has_entry = !deduplicate_files || (info.size && (info.size != info.base_size) && (info.data_file_name.empty() || (info.data_file_name == info.file_name)));
if (has_entry)
{
++num_entries;
size_of_entries += info.size - info.base_size;
}
}
std::ostringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
@ -366,8 +396,7 @@ void BackupImpl::writeBackupMetadata()
out->write(str.data(), str.size());
out->finalize();
increaseUncompressedSize(str.size());
increaseProcessedSize(str.size());
uncompressed_size = size_of_entries + str.size();
}
@ -392,8 +421,6 @@ void BackupImpl::readBackupMetadata()
String str;
readStringUntilEOF(str, *in);
increaseUncompressedSize(str.size());
increaseProcessedSize(str.size());
Poco::XML::DOMParser dom_parser;
Poco::AutoPtr<Poco::XML::Document> config = dom_parser.parseMemory(str.data(), str.size());
const Poco::XML::Node * config_root = getRootNode(config);
@ -412,6 +439,11 @@ void BackupImpl::readBackupMetadata()
if (config_root->getNodeByPath("base_backup_uuid"))
base_backup_uuid = parse<UUID>(getString(config_root, "base_backup_uuid"));
num_files = 0;
total_size = 0;
num_entries = 0;
size_of_entries = 0;
const auto * contents = config_root->getNodeByPath("contents");
for (const Poco::XML::Node * child = contents->firstChild(); child; child = child->nextSibling())
{
@ -456,10 +488,20 @@ void BackupImpl::readBackupMetadata()
}
coordination->addFileInfo(info);
increaseUncompressedSize(info);
++num_files;
total_size += info.size;
bool has_entry = !deduplicate_files || (info.size && (info.size != info.base_size) && (info.data_file_name.empty() || (info.data_file_name == info.file_name)));
if (has_entry)
{
++num_entries;
size_of_entries += info.size - info.base_size;
}
}
}
uncompressed_size = size_of_entries + str.size();
compressed_size = uncompressed_size;
if (!use_archives)
setCompressedSize();
}
@ -612,7 +654,8 @@ BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) c
if (open_mode != OpenMode::READ)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is not opened for reading");
increaseProcessedSize(size_and_checksum.first);
++num_read_files;
num_read_bytes += size_and_checksum.first;
if (!size_and_checksum.first)
{
@ -780,7 +823,8 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
{
std::lock_guard lock{mutex};
increaseProcessedSize(info);
++num_files;
total_size += info.size;
}
/// Empty file, nothing to backup
@ -909,7 +953,12 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
{
LOG_TRACE(log, "Will copy file {}", adjusted_path);
if (!num_files_written)
bool has_entries = false;
{
std::lock_guard lock{mutex};
has_entries = num_entries > 0;
}
if (!has_entries)
checkLockFile(true);
if (use_archives)
@ -951,7 +1000,12 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
}
}
++num_files_written;
{
std::lock_guard lock{mutex};
++num_entries;
size_of_entries += info.size - info.base_size;
uncompressed_size += info.size - info.base_size;
}
}
@ -981,29 +1035,6 @@ void BackupImpl::finalizeWriting()
}
void BackupImpl::increaseUncompressedSize(UInt64 file_size)
{
uncompressed_size += file_size;
++num_files;
}
void BackupImpl::increaseUncompressedSize(const FileInfo & info)
{
if ((info.size > info.base_size) && (info.data_file_name.empty() || (info.data_file_name == info.file_name)))
increaseUncompressedSize(info.size - info.base_size);
}
void BackupImpl::increaseProcessedSize(UInt64 file_size) const
{
processed_files_size += file_size;
++num_processed_files;
}
void BackupImpl::increaseProcessedSize(const FileInfo & info)
{
increaseProcessedSize(info.size);
}
void BackupImpl::setCompressedSize()
{
if (use_archives)

View File

@ -59,10 +59,13 @@ public:
time_t getTimestamp() const override { return timestamp; }
UUID getUUID() const override { return *uuid; }
size_t getNumFiles() const override;
size_t getNumProcessedFiles() const override;
UInt64 getProcessedFilesSize() const override;
UInt64 getTotalSize() const override;
size_t getNumEntries() const override;
UInt64 getSizeOfEntries() const override;
UInt64 getUncompressedSize() const override;
UInt64 getCompressedSize() const override;
size_t getNumReadFiles() const override;
UInt64 getNumReadBytes() const override;
Strings listFiles(const String & directory, bool recursive) const override;
bool hasFiles(const String & directory) const override;
bool fileExists(const String & file_name) const override;
@ -103,16 +106,6 @@ private:
std::shared_ptr<IArchiveReader> getArchiveReader(const String & suffix) const;
std::shared_ptr<IArchiveWriter> getArchiveWriter(const String & suffix);
/// Increases `uncompressed_size` by a specific value,
/// also increases `num_files` by 1.
void increaseUncompressedSize(UInt64 file_size);
void increaseUncompressedSize(const FileInfo & info);
/// Increases `num_processed_files` by a specific value,
/// also increases `num_processed_files` by 1.
void increaseProcessedSize(UInt64 file_size) const;
void increaseProcessedSize(const FileInfo & info);
/// Calculates and sets `compressed_size`.
void setCompressedSize();
@ -129,10 +122,13 @@ private:
std::optional<UUID> uuid;
time_t timestamp = 0;
size_t num_files = 0;
mutable size_t num_processed_files = 0;
mutable UInt64 processed_files_size = 0;
UInt64 total_size = 0;
size_t num_entries = 0;
UInt64 size_of_entries = 0;
UInt64 uncompressed_size = 0;
UInt64 compressed_size = 0;
mutable size_t num_read_files = 0;
mutable UInt64 num_read_bytes = 0;
int version;
std::optional<BackupInfo> base_backup_info;
std::shared_ptr<const IBackup> base_backup;
@ -141,7 +137,6 @@ private:
std::pair<String, std::shared_ptr<IArchiveWriter>> archive_writers[2];
String current_archive_suffix;
String lock_file_name;
std::atomic<size_t> num_files_written = 0;
bool writing_finalized = false;
bool deduplicate_files = true;
const Poco::Logger * log;

View File

@ -338,20 +338,20 @@ void BackupsWorker::doBackup(
}
size_t num_files = 0;
size_t num_processed_files = 0;
UInt64 total_size = 0;
size_t num_entries = 0;
UInt64 uncompressed_size = 0;
UInt64 compressed_size = 0;
UInt64 processed_files_size = 0;
/// Finalize backup (write its metadata).
if (!backup_settings.internal)
{
backup->finalizeWriting();
num_files = backup->getNumFiles();
num_processed_files = backup->getNumProcessedFiles();
total_size = backup->getTotalSize();
num_entries = backup->getNumEntries();
uncompressed_size = backup->getUncompressedSize();
compressed_size = backup->getCompressedSize();
processed_files_size = backup->getProcessedFilesSize();
}
/// Close the backup.
@ -359,7 +359,7 @@ void BackupsWorker::doBackup(
LOG_INFO(log, "{} {} was created successfully", (backup_settings.internal ? "Internal backup" : "Backup"), backup_name_for_logging);
setStatus(backup_id, BackupStatus::BACKUP_CREATED);
setNumFilesAndSize(backup_id, num_files, num_processed_files, processed_files_size, uncompressed_size, compressed_size);
setNumFilesAndSize(backup_id, num_files, total_size, num_entries, uncompressed_size, compressed_size, 0, 0);
}
catch (...)
{
@ -583,10 +583,12 @@ void BackupsWorker::doRestore(
setNumFilesAndSize(
restore_id,
backup->getNumFiles(),
backup->getNumProcessedFiles(),
backup->getProcessedFilesSize(),
backup->getTotalSize(),
backup->getNumEntries(),
backup->getUncompressedSize(),
backup->getCompressedSize());
backup->getCompressedSize(),
backup->getNumReadFiles(),
backup->getNumReadBytes());
}
catch (...)
{
@ -667,7 +669,9 @@ void BackupsWorker::setStatus(const String & id, BackupStatus status, bool throw
}
void BackupsWorker::setNumFilesAndSize(const String & id, size_t num_files, size_t num_processed_files, UInt64 processed_files_size, UInt64 uncompressed_size, UInt64 compressed_size)
void BackupsWorker::setNumFilesAndSize(const OperationID & id, size_t num_files, UInt64 total_size, size_t num_entries,
UInt64 uncompressed_size, UInt64 compressed_size, size_t num_read_files, UInt64 num_read_bytes)
{
std::lock_guard lock{infos_mutex};
auto it = infos.find(id);
@ -676,10 +680,12 @@ void BackupsWorker::setNumFilesAndSize(const String & id, size_t num_files, size
auto & info = it->second;
info.num_files = num_files;
info.num_processed_files = num_processed_files;
info.processed_files_size = processed_files_size;
info.total_size = total_size;
info.num_entries = num_entries;
info.uncompressed_size = uncompressed_size;
info.compressed_size = compressed_size;
info.num_read_files = num_read_files;
info.num_read_bytes = num_read_bytes;
}

View File

@ -53,23 +53,27 @@ public:
/// Status of backup or restore operation.
BackupStatus status;
/// Number of files in the backup (including backup's metadata; only unique files are counted).
/// The number of files stored in the backup.
size_t num_files = 0;
/// Number of processed files during backup or restore process
/// For restore it includes files from base backups
size_t num_processed_files = 0;
/// The total size of files stored in the backup.
UInt64 total_size = 0;
/// Size of processed files during backup or restore
/// For restore in includes sizes from base backups
UInt64 processed_files_size = 0;
/// The number of entries in the backup, i.e. the number of files inside the folder if the backup is stored as a folder.
size_t num_entries = 0;
/// Size of all files in the backup (including backup's metadata; only unique files are counted).
/// The uncompressed size of the backup.
UInt64 uncompressed_size = 0;
/// Size of the backup if it's stored as an archive; or the same as `uncompressed_size` if the backup is stored as a folder.
/// The compressed size of the backup.
UInt64 compressed_size = 0;
/// Returns the number of files read during RESTORE from this backup.
size_t num_read_files = 0;
// Returns the total size of files read during RESTORE from this backup.
UInt64 num_read_bytes = 0;
/// Set only if there was an error.
std::exception_ptr exception;
String error_message;
@ -110,7 +114,9 @@ private:
void addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status);
void setStatus(const OperationID & id, BackupStatus status, bool throw_if_error = true);
void setStatusSafe(const String & id, BackupStatus status) { setStatus(id, status, false); }
void setNumFilesAndSize(const OperationID & id, size_t num_files, size_t num_processed_files, UInt64 processed_files_size, UInt64 uncompressed_size, UInt64 compressed_size);
void setNumFilesAndSize(const OperationID & id, size_t num_files, UInt64 total_size, size_t num_entries,
UInt64 uncompressed_size, UInt64 compressed_size, size_t num_read_files, UInt64 num_read_bytes);
std::vector<Info> getAllActiveBackupInfos() const;
std::vector<Info> getAllActiveRestoreInfos() const;
bool hasConcurrentBackups(const BackupSettings & backup_settings) const;

View File

@ -37,21 +37,38 @@ public:
/// Returns UUID of the backup.
virtual UUID getUUID() const = 0;
/// Returns the number of unique files in the backup.
/// Returns the number of files stored in the backup. Compare with getNumEntries().
virtual size_t getNumFiles() const = 0;
/// Returns the number of files were processed for backup or restore
virtual size_t getNumProcessedFiles() const = 0;
/// Returns the total size of files stored in the backup. Compare with getTotalSizeOfEntries().
virtual UInt64 getTotalSize() const = 0;
// Returns the total size of processed files for backup or restore
virtual UInt64 getProcessedFilesSize() const = 0;
/// Returns the number of entries in the backup, i.e. the number of files inside the folder if the backup is stored as a folder or
/// the number of files inside the archive if the backup is stored as an archive.
/// It's not the same as getNumFiles() if it's an incremental backups or if it contains empty files or duplicates.
/// The following is always true: `getNumEntries() <= getNumFiles()`.
virtual size_t getNumEntries() const = 0;
/// Returns the total size of unique files in the backup.
/// Returns the size of entries in the backup, i.e. the total size of files inside the folder if the backup is stored as a folder or
/// the total size of files inside the archive if the backup is stored as an archive.
/// It's not the same as getTotalSize() because it doesn't include the size of duplicates and the size of files from the base backup.
/// The following is always true: `getSizeOfEntries() <= getTotalSize()`.
virtual UInt64 getSizeOfEntries() const = 0;
/// Returns the uncompressed size of the backup. It equals to `getSizeOfEntries() + size_of_backup_metadata (.backup)`
virtual UInt64 getUncompressedSize() const = 0;
/// Returns the compressed size of the backup. If the backup is not stored as an archive it returns the same as getUncompressedSize().
/// Returns the compressed size of the backup. If the backup is not stored as an archive it's the same as getUncompressedSize().
virtual UInt64 getCompressedSize() const = 0;
/// Returns the number of files read during RESTORE from this backup.
/// The following is always true: `getNumFilesRead() <= getNumFiles()`.
virtual size_t getNumReadFiles() const = 0;
// Returns the total size of files read during RESTORE from this backup.
/// The following is always true: `getNumReadBytes() <= getTotalSize()`.
virtual UInt64 getNumReadBytes() const = 0;
/// Returns names of entries stored in a specified directory in the backup.
/// If `directory` is empty or '/' the functions returns entries in the backup's root.
virtual Strings listFiles(const String & directory, bool recursive = false) const = 0;

View File

@ -11,7 +11,7 @@
namespace DB
{
class CatBoostLibraryBridgeHelper : public LibraryBridgeHelper
class CatBoostLibraryBridgeHelper final : public LibraryBridgeHelper
{
public:
static constexpr inline auto PING_HANDLER = "/catboost_ping";

View File

@ -14,7 +14,7 @@ namespace DB
class Pipe;
// Class to access the external dictionary part of the clickhouse-library-bridge.
class ExternalDictionaryLibraryBridgeHelper : public LibraryBridgeHelper
class ExternalDictionaryLibraryBridgeHelper final : public LibraryBridgeHelper
{
public:

View File

@ -6,7 +6,6 @@
#include <Poco/Net/HTTPRequest.h>
#include <Common/ShellCommand.h>
#include <Common/logger_useful.h>
#include <IO/ConnectionTimeoutsContext.h>
namespace DB

View File

@ -1,5 +1,7 @@
#include "LibraryBridgeHelper.h"
#include <IO/ConnectionTimeoutsContext.h>
namespace DB
{

View File

@ -226,13 +226,7 @@ add_object_library(clickhouse_access Access)
add_object_library(clickhouse_backups Backups)
add_object_library(clickhouse_core Core)
add_object_library(clickhouse_core_mysql Core/MySQL)
if (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)
add_headers_and_sources(dbms Compression)
list(REMOVE_ITEM dbms_headers Compression/CompressionCodecEncrypted.h)
list(REMOVE_ITEM dbms_sources Compression/CompressionCodecEncrypted.cpp)
else ()
add_object_library(clickhouse_compression Compression)
endif ()
add_object_library(clickhouse_compression Compression)
add_object_library(clickhouse_querypipeline QueryPipeline)
add_object_library(clickhouse_datatypes DataTypes)
add_object_library(clickhouse_datatypes_serializations DataTypes/Serializations)
@ -348,8 +342,8 @@ set_source_files_properties(
PROPERTIES COMPILE_FLAGS "-mwaitpkg")
endif ()
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::re2_st)
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::re2)
target_link_libraries(common PUBLIC ch_contrib::re2_st)
target_link_libraries(common PUBLIC ch_contrib::re2)
target_link_libraries(clickhouse_common_io
PUBLIC
@ -518,6 +512,11 @@ if (TARGET ch_contrib::msgpack)
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::msgpack)
endif()
if (TARGET ch_contrib::liburing)
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::liburing)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${LIBURING_COMPAT_INCLUDE_DIR} ${LIBURING_INCLUDE_DIR})
endif()
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::fast_float)
if (USE_ORC)

View File

@ -41,6 +41,7 @@
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTColumnDeclaration.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Processors/Formats/Impl/NullFormat.h>
@ -816,17 +817,15 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr parsed_query)
{
if (fake_drop)
{
if (parsed_query->as<ASTDropQuery>())
return;
}
if (fake_drop && parsed_query->as<ASTDropQuery>())
return;
auto query = query_to_execute;
/// Rewrite query only when we have query parameters.
/// Note that if query is rewritten, comments in query are lost.
/// But the user often wants to see comments in server logs, query log, processlist, etc.
/// For recent versions of the server query parameters will be transferred by network and applied on the server side.
auto query = query_to_execute;
if (!query_parameters.empty()
&& connection->getServerRevision(connection_parameters.timeouts) < DBMS_MIN_PROTOCOL_VERSION_WITH_PARAMETERS)
{
@ -838,6 +837,22 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
query = serializeAST(*parsed_query);
}
if (allow_merge_tree_settings && parsed_query->as<ASTCreateQuery>())
{
/// Rewrite query if new settings were added.
if (addMergeTreeSettings(*parsed_query->as<ASTCreateQuery>()))
{
/// Replace query parameters because AST cannot be serialized otherwise.
if (!query_parameters.empty())
{
ReplaceQueryParameterVisitor visitor(query_parameters);
visitor.visit(parsed_query);
}
query = serializeAST(*parsed_query);
}
}
int retries_left = 10;
while (retries_left)
{
@ -2065,6 +2080,41 @@ void ClientBase::initQueryIdFormats()
}
bool ClientBase::addMergeTreeSettings(ASTCreateQuery & ast_create)
{
if (ast_create.attach
|| !ast_create.storage
|| !ast_create.storage->isExtendedStorageDefinition()
|| !ast_create.storage->engine
|| ast_create.storage->engine->name.find("MergeTree") == std::string::npos)
return false;
auto all_changed = cmd_merge_tree_settings.allChanged();
if (all_changed.begin() == all_changed.end())
return false;
if (!ast_create.storage->settings)
{
auto settings_ast = std::make_shared<ASTSetQuery>();
settings_ast->is_standalone = false;
ast_create.storage->set(ast_create.storage->settings, settings_ast);
}
auto & storage_settings = *ast_create.storage->settings;
bool added_new_setting = false;
for (const auto & setting : all_changed)
{
if (!storage_settings.changes.tryGet(setting.getName()))
{
storage_settings.changes.emplace_back(setting.getName(), setting.getValue());
added_new_setting = true;
}
}
return added_new_setting;
}
void ClientBase::runInteractive()
{
if (config().has("query_id"))
@ -2302,6 +2352,30 @@ void ClientBase::parseAndCheckOptions(OptionsDescription & options_description,
cmd_settings.addProgramOptionsAsMultitokens(options_description.main_description.value());
else
cmd_settings.addProgramOptions(options_description.main_description.value());
if (allow_merge_tree_settings)
{
/// Add merge tree settings manually, because names of some settings
/// may clash. Query settings have higher priority and we just
/// skip ambiguous merge tree settings.
auto & main_options = options_description.main_description.value();
NameSet main_option_names;
for (const auto & option : main_options.options())
main_option_names.insert(option->long_name());
for (const auto & setting : cmd_merge_tree_settings.all())
{
if (main_option_names.contains(setting.getName()))
continue;
if (allow_repeated_settings)
cmd_merge_tree_settings.addProgramOptionAsMultitoken(main_options, setting);
else
cmd_merge_tree_settings.addProgramOption(main_options, setting);
}
}
/// Parse main commandline options.
auto parser = po::command_line_parser(arguments).options(options_description.main_description.value()).allow_unregistered();
po::parsed_options parsed = parser.run();

View File

@ -1,6 +1,7 @@
#pragma once
#include "Common/NamePrompter.h"
#include <Parsers/ASTCreateQuery.h>
#include <Common/ProgressIndication.h>
#include <Common/InterruptListener.h>
#include <Common/ShellCommand.h>
@ -14,6 +15,7 @@
#include <boost/program_options.hpp>
#include <Storages/StorageFile.h>
#include <Storages/SelectQueryInfo.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
namespace po = boost::program_options;
@ -164,6 +166,7 @@ private:
void updateSuggest(const ASTPtr & ast);
void initQueryIdFormats();
bool addMergeTreeSettings(ASTCreateQuery & ast_create);
protected:
static bool isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context);
@ -212,6 +215,7 @@ protected:
/// Settings specified via command line args
Settings cmd_settings;
MergeTreeSettings cmd_merge_tree_settings;
/// thread status should be destructed before shared context because it relies on process list.
std::optional<ThreadStatus> thread_status;
@ -298,6 +302,7 @@ protected:
std::vector<HostAndPort> hosts_and_ports{};
bool allow_repeated_settings = false;
bool allow_merge_tree_settings = false;
bool cancelled = false;

View File

@ -686,7 +686,7 @@ void Connection::sendReadTaskResponse(const String & response)
}
void Connection::sendMergeTreeReadTaskResponse(const PartitionReadResponse & response)
void Connection::sendMergeTreeReadTaskResponse(const ParallelReadResponse & response)
{
writeVarUInt(Protocol::Client::MergeTreeReadTaskResponse, *out);
response.serialize(*out);
@ -960,8 +960,12 @@ Packet Connection::receivePacket()
case Protocol::Server::ReadTaskRequest:
return res;
case Protocol::Server::MergeTreeAllRangesAnnounecement:
res.announcement = receiveInitialParallelReadAnnounecement();
return res;
case Protocol::Server::MergeTreeReadTaskRequest:
res.request = receivePartitionReadRequest();
res.request = receiveParallelReadRequest();
return res;
case Protocol::Server::ProfileEvents:
@ -1114,13 +1118,20 @@ ProfileInfo Connection::receiveProfileInfo() const
return profile_info;
}
PartitionReadRequest Connection::receivePartitionReadRequest() const
ParallelReadRequest Connection::receiveParallelReadRequest() const
{
PartitionReadRequest request;
ParallelReadRequest request;
request.deserialize(*in);
return request;
}
InitialAllRangesAnnouncement Connection::receiveInitialParallelReadAnnounecement() const
{
InitialAllRangesAnnouncement announcement;
announcement.deserialize(*in);
return announcement;
}
void Connection::throwUnexpectedPacket(UInt64 packet_type, const char * expected) const
{

View File

@ -110,7 +110,7 @@ public:
void sendData(const Block & block, const String & name/* = "" */, bool scalar/* = false */) override;
void sendMergeTreeReadTaskResponse(const PartitionReadResponse & response) override;
void sendMergeTreeReadTaskResponse(const ParallelReadResponse & response) override;
void sendExternalTablesData(ExternalTablesData & data) override;
@ -265,7 +265,8 @@ private:
std::vector<String> receiveMultistringMessage(UInt64 msg_type) const;
std::unique_ptr<Exception> receiveException() const;
Progress receiveProgress() const;
PartitionReadRequest receivePartitionReadRequest() const;
ParallelReadRequest receiveParallelReadRequest() const;
InitialAllRangesAnnouncement receiveInitialParallelReadAnnounecement() const;
ProfileInfo receiveProfileInfo() const;
void initInputBuffers();

Some files were not shown because too many files have changed in this diff Show More