Merge branch 'master' into memory-overcommit

This commit is contained in:
mergify[bot] 2021-12-16 12:44:50 +00:00 committed by GitHub
commit cc3176d437
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
89 changed files with 3556 additions and 1716 deletions

View File

@ -8,18 +8,21 @@ jobs:
CherryPick:
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/cherry_pick
ROBOT_CLICKHOUSE_SSH_KEY=${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
REPO_OWNER=ClickHouse
REPO_NAME=ClickHouse
REPO_TEAM=core
EOF
- name: Check out repository code
uses: actions/checkout@v2
with:
token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}}
fetch-depth: 0
- name: Cherry pick
env:
TEMP_PATH: ${{runner.temp}}/cherry_pick
ROBOT_CLICKHOUSE_SSH_KEY: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
REPO_OWNER: "ClickHouse"
REPO_NAME: "ClickHouse"
REPO_TEAM: "core"
run: |
sudo pip install GitPython
cd $GITHUB_WORKSPACE/tests/ci

View File

@ -7,6 +7,9 @@ jobs:
DockerHubPush:
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
@ -22,17 +25,23 @@ jobs:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/compatibility_check
REPO_COPY=${{runner.temp}}/compatibility_check/ClickHouse
REPORTS_PATH=${{runner.temp}}/reports_dir
EOF
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
path: ${{ env.REPORTS_PATH }}
- name: CompatibilityCheck
env:
TEMP_PATH: ${{runner.temp}}/compatibility_check
REPO_COPY: ${{runner.temp}}/compatibility_check/ClickHouse
REPORTS_PATH: ${{runner.temp}}/reports_dir
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
@ -51,24 +60,30 @@ jobs:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
CHECK_NAME=ClickHouse build check (actions)
BUILD_NAME=package_release
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
submodules: 'true'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'package_release'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
@ -78,35 +93,41 @@ jobs:
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
sudo rm -fr $TEMP_PATH $CACHES_PATH
BuilderDebAsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
CHECK_NAME=ClickHouse build check (actions)
BUILD_NAME=package_asan
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
submodules: 'true'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'package_asan'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
@ -116,35 +137,41 @@ jobs:
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
sudo rm -fr $TEMP_PATH $CACHES_PATH
BuilderDebTsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
CHECK_NAME=ClickHouse build check (actions)
BUILD_NAME=package_tsan
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
submodules: 'true'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'package_tsan'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
@ -154,35 +181,41 @@ jobs:
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
sudo rm -fr $TEMP_PATH $CACHES_PATH
BuilderDebDebug:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
CHECK_NAME=ClickHouse build check (actions)
BUILD_NAME=package_debug
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
submodules: 'true'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'package_debug'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
@ -192,13 +225,13 @@ jobs:
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
sudo rm -fr $TEMP_PATH $CACHES_PATH
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
@ -210,17 +243,23 @@ jobs:
- BuilderDebDebug
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/report_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=ClickHouse build check (actions)
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Report Builder
env:
TEMP_PATH: ${{runner.temp}}/report_check
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'ClickHouse build check (actions)'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
@ -239,19 +278,25 @@ jobs:
needs: [BuilderDebAsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (address, actions)
REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT=10800
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_debug
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (address, actions)'
REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
@ -271,19 +316,25 @@ jobs:
needs: [BuilderDebDebug]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateful_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateful tests (debug, actions)
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT=3600
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateful_debug
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateful tests (debug, actions)'
REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT: 3600
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
@ -301,20 +352,30 @@ jobs:
##############################################################################################
StressTestTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, stress-tester]
# func testers have 16 cores + 128 GB memory
# while stress testers have 36 cores + 72 memory
# It would be better to have something like 32 + 128,
# but such servers almost unavailable as spot instances.
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stress_thread
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stress test (thread, actions)
REPO_COPY=${{runner.temp}}/stress_thread/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Stress test
env:
TEMP_PATH: ${{runner.temp}}/stress_thread
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stress test (thread, actions)'
REPO_COPY: ${{runner.temp}}/stress_thread/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
@ -334,18 +395,24 @@ jobs:
needs: [BuilderDebRelease]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (release, actions)
REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Integration test
env:
TEMP_PATH: ${{runner.temp}}/integration_tests_release
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Integration tests (release, actions)'
REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
@ -369,6 +436,9 @@ jobs:
- CompatibilityCheck
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Finish label

View File

@ -14,6 +14,9 @@ jobs:
CheckLabels:
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -rf $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Labels check
@ -24,6 +27,9 @@ jobs:
needs: CheckLabels
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -rf $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
@ -39,17 +45,23 @@ jobs:
needs: DockerHubPush
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/docs_check
REPO_COPY=${{runner.temp}}/docs_check/ClickHouse
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docs_check
path: ${{ env.TEMP_PATH }}
- name: Clear repository
run: |
sudo rm -rf $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Docs Check
env:
TEMP_PATH: ${{runner.temp}}/docs_check
REPO_COPY: ${{runner.temp}}/docs_check/ClickHouse
run: |
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -11,10 +11,15 @@ on: # yamllint disable-line rule:truthy
- 'website/**'
- 'benchmark/**'
- 'docker/**'
- '.github/**'
workflow_dispatch:
jobs:
DockerHubPush:
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
@ -30,13 +35,16 @@ jobs:
needs: DockerHubPush
runs-on: [self-hosted, func-tester]
steps:
- name: Clear repository
run: |
sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Check out repository code
uses: actions/checkout@v2
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{runner.temp}}/docs_release
path: ${{ env.TEMP_PATH }}
- name: Docs Release
env:
TEMP_PATH: ${{runner.temp}}/docs_release
@ -44,6 +52,8 @@ jobs:
CLOUDFLARE_TOKEN: ${{secrets.CLOUDFLARE}}
ROBOT_CLICKHOUSE_SSH_KEY: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 docs_release.py

File diff suppressed because it is too large Load Diff

View File

@ -71,8 +71,8 @@
* Fix the issue that `LowCardinality` of `Int256` cannot be created. [#31832](https://github.com/ClickHouse/ClickHouse/pull/31832) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Recreate `system.*_log` tables in case of different engine/partition_by. [#31824](https://github.com/ClickHouse/ClickHouse/pull/31824) ([Azat Khuzhin](https://github.com/azat)).
* `MaterializedMySQL`: Fix issue with table named 'table'. [#31781](https://github.com/ClickHouse/ClickHouse/pull/31781) ([Håvard Kvålen](https://github.com/havardk)).
* ClickHouse dictionary source: support named collections. Closes [#31705](https://github.com/ClickHouse/ClickHouse/issues/31705). [#31749](https://github.com/ClickHouse/ClickHouse/pull/31749) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Allow to use named collections configuration for Kafka and RabbitMQ engines (the same way as for other integration table engines). [#31691](https://github.com/ClickHouse/ClickHouse/pull/31691) ([Kseniia Sumarokova](https://github.com/kssenii)).
* ClickHouse dictionary source: support predefined connections. Closes [#31705](https://github.com/ClickHouse/ClickHouse/issues/31705). [#31749](https://github.com/ClickHouse/ClickHouse/pull/31749) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Allow to use predefined connections configuration for Kafka and RabbitMQ engines (the same way as for other integration table engines). [#31691](https://github.com/ClickHouse/ClickHouse/pull/31691) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Always re-render prompt while navigating history in clickhouse-client. This will improve usability of manipulating very long queries that don't fit on screen. [#31675](https://github.com/ClickHouse/ClickHouse/pull/31675) ([alexey-milovidov](https://github.com/alexey-milovidov)) (author: Amos Bird).
* Add key bindings for navigating through history (instead of lines/history). [#31641](https://github.com/ClickHouse/ClickHouse/pull/31641) ([Azat Khuzhin](https://github.com/azat)).
* Improve the `max_execution_time` checks. Fixed some cases when timeout checks do not happen and query could run too long. [#31636](https://github.com/ClickHouse/ClickHouse/pull/31636) ([Raúl Marín](https://github.com/Algunenano)).

View File

@ -1,6 +1,6 @@
option(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY
"Set to FALSE to use system Azure SDK instead of bundled (OFF currently not implemented)"
ON)
${ENABLE_LIBRARIES})
if (USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
set(USE_AZURE_BLOB_STORAGE 1)

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit d10351f312c1ae1ca3fdda433693dfbef3acfece
Subproject commit bb69d48e0ee35c87a0f19e509a09a914f71f0cff

View File

@ -268,7 +268,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
*
* Whether iconv support is available
*/
#if 1
#if 0
#define LIBXML_ICONV_ENABLED
#endif

View File

@ -55,7 +55,7 @@ function find_reference_sha
)
for path in "${urls_to_try[@]}"
do
if curl --fail --head "$path"
if curl --fail --retry 5 --retry-delay 1 --retry-max-time 15 --head "$path"
then
found="$path"
break
@ -76,7 +76,7 @@ chmod 777 workspace output
cd workspace
# Download the package for the version we are going to test.
if curl --fail --head "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz"
if curl --fail --retry 5 --retry-delay 1 --retry-max-time 15 --head "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz"
then
right_path="$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz"
fi

View File

@ -1687,18 +1687,17 @@ Quorum writes
`INSERT` succeeds only when ClickHouse manages to correctly write data to the `insert_quorum` of replicas during the `insert_quorum_timeout`. If for any reason the number of replicas with successful writes does not reach the `insert_quorum`, the write is considered failed and ClickHouse will delete the inserted block from all the replicas where data has already been written.
All the replicas in the quorum are consistent, i.e., they contain data from all previous `INSERT` queries. The `INSERT` sequence is linearized.
When `insert_quorum_parallel` is disabled, all replicas in the quorum are consistent, i.e. they contain data from all previous `INSERT` queries (the `INSERT` sequence is linearized). When reading data written using `insert_quorum` and `insert_quorum_parallel` is disabled, you can turn on sequential consistency for `SELECT` queries using [select_sequential_consistency](#settings-select_sequential_consistency).
When reading the data written from the `insert_quorum`, you can use the [select_sequential_consistency](#settings-select_sequential_consistency) option.
ClickHouse generates an exception
ClickHouse generates an exception:
- If the number of available replicas at the time of the query is less than the `insert_quorum`.
- At an attempt to write data when the previous block has not yet been inserted in the `insert_quorum` of replicas. This situation may occur if the user tries to perform an `INSERT` before the previous one with the `insert_quorum` is completed.
- When `insert_quorum_parallel` is disabled and an attempt to write data is made when the previous block has not yet been inserted in `insert_quorum` of replicas. This situation may occur if the user tries to perform another `INSERT` query to the same table before the previous one with `insert_quorum` is completed.
See also:
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [insert_quorum_parallel](#settings-insert_quorum_parallel)
- [select_sequential_consistency](#settings-select_sequential_consistency)
## insert_quorum_timeout {#settings-insert_quorum_timeout}
@ -1710,11 +1709,29 @@ Default value: 600 000 milliseconds (ten minutes).
See also:
- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_parallel](#settings-insert_quorum_parallel)
- [select_sequential_consistency](#settings-select_sequential_consistency)
## insert_quorum_parallel {#settings-insert_quorum_parallel}
Enables or disables parallelism for quorum `INSERT` queries. If enabled, additional `INSERT` queries can be sent while previous queries have not yet finished. If disabled, additional writes to the same table will be rejected.
Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: 1.
See also:
- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [select_sequential_consistency](#settings-select_sequential_consistency)
## select_sequential_consistency {#settings-select_sequential_consistency}
Enables or disables sequential consistency for `SELECT` queries:
Enables or disables sequential consistency for `SELECT` queries. Requires `insert_quorum_parallel` to be disabled (enabled by default).
Possible values:
@ -1727,10 +1744,13 @@ Usage
When sequential consistency is enabled, ClickHouse allows the client to execute the `SELECT` query only for those replicas that contain data from all previous `INSERT` queries executed with `insert_quorum`. If the client refers to a partial replica, ClickHouse will generate an exception. The SELECT query will not include data that has not yet been written to the quorum of replicas.
When `insert_quorum_parallel` is enabled (the default), then `select_sequential_consistency` does not work. This is because parallel `INSERT` queries can be written to different sets of quorum replicas so there is no guarantee a single replica will have received all writes.
See also:
- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [insert_quorum_parallel](#settings-insert_quorum_parallel)
## insert_deduplicate {#settings-insert-deduplicate}

View File

@ -22,7 +22,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
) ENGINE = engine
```
Creates a table named `name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine.
Creates a table named `table_name` in the `db` database or the current database if `db` is not set, with the structure specified in brackets and the `engine` engine.
The structure of the table is a list of column descriptions, secondary indexes and constraints . If [primary key](#primary-key) is supported by the engine, it will be indicated as parameter for the table engine.
A column description is `name type` in the simplest case. Example: `RegionID UInt32`.

View File

@ -206,6 +206,9 @@ This extra row is only produced in `JSON*`, `TabSeparated*`, and `Pretty*` forma
- In `Pretty*` formats, the row is output as a separate table after the main result.
- In the other formats it is not available.
!!! note "Note"
totals is output in the results of `SELECT` queries, and is not output in `INSERT INTO ... SELECT`.
`WITH TOTALS` can be run in different ways when [HAVING](../../../sql-reference/statements/select/having.md) is present. The behavior depends on the `totals_mode` setting.
### Configuring Totals Processing {#configuring-totals-processing}

View File

@ -203,6 +203,9 @@ SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE;
- В `Pretty*` форматах, строка выводится в виде отдельной таблицы после основного результата.
- В других форматах она не доступна.
!!! note "Примечание"
totals выводится только в результатах запросов `SELECT`, и не вывоводится в `INSERT INTO ... SELECT`.
При использовании секции [HAVING](having.md) поведение `WITH TOTALS` контролируется настройкой `totals_mode`.
### Настройка обработки итогов {#configuring-totals-processing}

View File

@ -727,7 +727,6 @@ void LocalServer::printHelpMessage([[maybe_unused]] const OptionsDescription & o
void LocalServer::addOptions(OptionsDescription & options_description)
{
options_description.main_description->add_options()
("database,d", po::value<std::string>(), "database")
("table,N", po::value<std::string>(), "name of the initial table")
/// If structure argument is omitted then initial query is not generated

View File

@ -0,0 +1,17 @@
#include <Common/getRandomASCIIString.h>
#include <Common/thread_local_rng.h>
#include <random>
namespace DB
{
String getRandomASCIIString(size_t len, char first, char last)
{
std::uniform_int_distribution<int> distribution(first, last);
String res(len, ' ');
for (auto & c : res)
c = distribution(thread_local_rng);
return res;
}
}

View File

@ -0,0 +1,10 @@
#pragma once
#include <Core/Types.h>
namespace DB
{
/// Slow random string. Useful for random names and things like this. Not for
/// generating data.
String getRandomASCIIString(size_t len = 32, char first = 'a', char last = 'z');
}

View File

@ -7,6 +7,7 @@
#include <Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h>
#include <Disks/IO/ReadIndirectBufferFromRemoteFS.h>
#include <Disks/IO/WriteIndirectBufferFromRemoteFS.h>
#include <Common/getRandomASCIIString.h>
namespace DB
@ -93,7 +94,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskBlobStorage::writeFile(
WriteMode mode)
{
auto metadata = readOrCreateMetaForWriting(path, mode);
auto blob_path = path + "_" + getRandomName(8); /// NOTE: path contains the tmp_* prefix in the blob name
auto blob_path = path + "_" + getRandomASCIIString(8); /// NOTE: path contains the tmp_* prefix in the blob name
LOG_TRACE(log, "{} to file by path: {}. Blob Storage path: {}",
mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), blob_path);

View File

@ -1,4 +1,5 @@
#include <Disks/RemoteDisksCommon.h>
#include <Common/getRandomASCIIString.h>
namespace DB
{
@ -8,17 +9,6 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
String getRandomName(size_t len, char first, char last)
{
std::uniform_int_distribution<int> distribution(first, last);
String res(len, ' ');
for (auto & c : res)
c = distribution(thread_local_rng);
return res;
}
std::shared_ptr<DiskCacheWrapper> wrapWithCache(
std::shared_ptr<IDisk> disk, String cache_name, String cache_path, String metadata_path)
{

View File

@ -6,13 +6,12 @@
#include <Common/thread_local_rng.h>
#include <Disks/IDisk.h>
#include <Disks/DiskCacheWrapper.h>
#include <Common/getRandomASCIIString.h>
namespace DB
{
String getRandomName(size_t len = 32, char first = 'a', char last = 'z');
std::shared_ptr<DiskCacheWrapper> wrapWithCache(
std::shared_ptr<IDisk> disk, String cache_name, String cache_path, String metadata_path);

View File

@ -16,6 +16,7 @@
#include <Common/createHardLink.h>
#include <Common/quoteString.h>
#include <Common/thread_local_rng.h>
#include <Common/getRandomASCIIString.h>
#include <Interpreters/Context.h>
#include <IO/ReadBufferFromS3.h>
@ -246,7 +247,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
auto metadata = readOrCreateMetaForWriting(path, mode);
/// Path to store new S3 object.
auto s3_path = getRandomName();
auto s3_path = getRandomASCIIString();
std::optional<ObjectMetadata> object_metadata;
if (settings->send_metadata)

View File

@ -2307,10 +2307,9 @@ namespace
if (parent_field_descriptor)
out << " field " << quoteString(parent_field_descriptor->full_name()) << " (" << parent_field_descriptor->type_name() << ")";
for (size_t i = 0; i != field_infos.size(); ++i)
for (const auto & field_info : field_infos)
{
out << "\n";
const auto & field_info = field_infos[i];
writeIndent(out, indent + 1) << "Columns #";
for (size_t j = 0; j != field_info.column_indices.size(); ++j)
{
@ -3017,8 +3016,11 @@ namespace
if (nested_message_serializer)
{
std::vector<std::string_view> column_names_used;
column_names_used.reserve(used_column_indices_in_nested.size());
for (size_t i : used_column_indices_in_nested)
column_names_used.emplace_back(nested_column_names[i]);
auto field_serializer = std::make_unique<ProtobufSerializerFlattenedNestedAsArrayOfNestedMessages>(
std::move(column_names_used), field_descriptor, std::move(nested_message_serializer), get_root_desc_function);
transformColumnIndices(used_column_indices_in_nested, nested_column_indices);

View File

@ -8,7 +8,7 @@ namespace DB
{
void registerFunctionBase64Decode(FunctionFactory & factory)
{
tb64ini(0, 1);
tb64ini(0, 0);
factory.registerFunction<FunctionBase64Conversion<Base64Decode>>();
/// MysQL compatibility alias.

View File

@ -10,7 +10,7 @@ namespace DB
{
void registerFunctionBase64Encode(FunctionFactory & factory)
{
tb64ini(0, 1);
tb64ini(0, 0);
factory.registerFunction<FunctionBase64Conversion<Base64Encode>>();
/// MysQL compatibility alias.

View File

@ -18,6 +18,7 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
extern const int DECIMAL_OVERFLOW;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int LOGICAL_ERROR;
}
@ -142,6 +143,7 @@ public:
else if (const ColumnFixedString * col_in_fixed = checkAndGetColumn<ColumnFixedString>(col_in_untyped.get()))
{
const auto n = col_in_fixed->getN();
const auto col_in_rows = col_in_fixed->size();
auto col_to = ColumnFixedString::create(n);
ColumnFixedString::Chars & chars_to = col_to->getChars();
@ -153,7 +155,16 @@ public:
const auto * ptr_in = col_in_fixed->getChars().data();
auto * ptr_to = chars_to.data();
fuzzBits(ptr_in, ptr_to, chars_to.size(), inverse_probability);
if (col_in_rows >= input_rows_count)
fuzzBits(ptr_in, ptr_to, chars_to.size(), inverse_probability);
else if (col_in_rows != 1)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"1 != col_in_rows {} < input_rows_count {}", col_in_rows, input_rows_count);
else
for (size_t i = 0; i < input_rows_count; ++i)
fuzzBits(ptr_in, ptr_to + i * n, n, inverse_probability);
return col_to;
}

View File

@ -68,7 +68,7 @@ bool ReadBufferFromBlobStorage::nextImpl()
data_capacity = internal_buffer.size();
}
size_t to_read_bytes = std::min(total_size - offset, data_capacity);
size_t to_read_bytes = std::min(static_cast<size_t>(total_size - offset), data_capacity);
size_t bytes_read = 0;
size_t sleep_time_with_backoff_milliseconds = 100;

View File

@ -427,7 +427,7 @@ namespace detail
LOG_ERROR(log,
"HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. "
"Error: {}. (Current backoff wait is {}/{} ms)",
uri.toString(), i, settings.http_max_tries,
uri.toString(), i + 1, settings.http_max_tries,
getOffset(), read_range.end ? toString(*read_range.end) : "unknown",
e.displayText(),
milliseconds_to_wait, settings.http_retry_max_backoff_ms);

View File

@ -6,6 +6,7 @@
#include <IO/WriteBufferFromBlobStorage.h>
#include <Disks/RemoteDisksCommon.h>
#include <Common/getRandomASCIIString.h>
namespace DB
@ -42,7 +43,7 @@ void WriteBufferFromBlobStorage::nextImpl()
{
auto part_len = std::min(len - read, max_single_part_upload_size);
auto block_id = getRandomName(64);
auto block_id = getRandomASCIIString(64);
block_ids.push_back(block_id);
Azure::Core::IO::MemoryBodyStream tmp_buffer(reinterpret_cast<uint8_t *>(buffer_begin + read), part_len);

View File

@ -57,9 +57,14 @@ BlockIO InterpreterCreateFunctionQuery::execute()
void InterpreterCreateFunctionQuery::validateFunction(ASTPtr function, const String & name)
{
const auto * args_tuple = function->as<ASTFunction>()->arguments->children.at(0)->as<ASTFunction>();
auto & lambda_function = function->as<ASTFunction &>();
auto & lambda_function_expression_list = lambda_function.arguments->children;
const auto & tuple_function_arguments = lambda_function_expression_list.at(0)->as<ASTFunction &>();
std::unordered_set<String> arguments;
for (const auto & argument : args_tuple->arguments->children)
for (const auto & argument : tuple_function_arguments.arguments->children)
{
const auto & argument_name = argument->as<ASTIdentifier>()->name();
auto [_, inserted] = arguments.insert(argument_name);
@ -67,7 +72,7 @@ void InterpreterCreateFunctionQuery::validateFunction(ASTPtr function, const Str
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Identifier {} already used as function parameter", argument_name);
}
ASTPtr function_body = function->as<ASTFunction>()->children.at(0)->children.at(1);
ASTPtr function_body = lambda_function_expression_list.at(1);
validateFunctionRecursiveness(function_body, name);
}
@ -82,5 +87,4 @@ void InterpreterCreateFunctionQuery::validateFunctionRecursiveness(ASTPtr node,
validateFunctionRecursiveness(child, function_to_create);
}
}
}

View File

@ -196,6 +196,9 @@ Chain InterpreterInsertQuery::buildChainImpl(
/// We create a pipeline of several streams, into which we will write data.
Chain out;
/// Keep a reference to the context to make sure it stays alive until the chain is executed and destroyed
out.addInterpreterContext(context_ptr);
/// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage.
/// Otherwise we'll get duplicates when MV reads same rows again from Kafka.
if (table->noPushingToViews() && !no_destination)

View File

@ -113,8 +113,10 @@ String InterpreterSelectQuery::generateFilterActions(ActionsDAGPtr & actions, co
select_ast->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
auto expr_list = select_ast->select();
// The first column is our filter expression.
expr_list->children.push_back(row_policy_filter);
/// The first column is our filter expression.
/// the row_policy_filter should be cloned, because it may be changed by TreeRewriter.
/// which make it possible an invalid expression, although it may be valid in whole select.
expr_list->children.push_back(row_policy_filter->clone());
/// Keep columns that are required after the filter actions.
for (const auto & column_str : prerequisite_columns)
@ -386,7 +388,9 @@ InterpreterSelectQuery::InterpreterSelectQuery(
query.setFinal();
/// Save scalar sub queries's results in the query context
if (!options.only_analyze && context->hasQueryContext())
/// But discard them if the Storage has been modified
/// In an ideal situation we would only discard the scalars affected by the storage change
if (!options.only_analyze && context->hasQueryContext() && !context->getViewSource())
for (const auto & it : syntax_analyzer_result->getScalars())
context->getQueryContext()->addScalar(it.first, it.second);

View File

@ -440,7 +440,7 @@ static ASTPtr tryGetTableOverride(const String & mapped_database, const String &
if (auto database_ptr = DatabaseCatalog::instance().tryGetDatabase(mapped_database))
{
auto create_query = database_ptr->getCreateDatabaseQuery();
if (auto create_database_query = create_query->as<ASTCreateQuery>())
if (auto * create_database_query = create_query->as<ASTCreateQuery>())
{
if (create_database_query->table_overrides)
{
@ -537,8 +537,8 @@ ASTs InterpreterCreateImpl::getRewrittenQueries(
if (auto table_override = tryGetTableOverride(mapped_to_database, create_query.table))
{
auto override = table_override->as<ASTTableOverride>();
override->applyToCreateTableQuery(rewritten_query.get());
auto * override_ast = table_override->as<ASTTableOverride>();
override_ast->applyToCreateTableQuery(rewritten_query.get());
}
return ASTs{rewritten_query};

View File

@ -635,7 +635,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
std::unique_ptr<OpenTelemetrySpanHolder> span;
if (context->query_trace_context.trace_id != UUID())
{
auto raw_interpreter_ptr = interpreter.get();
auto * raw_interpreter_ptr = interpreter.get();
std::string class_name(abi::__cxa_demangle(typeid(*raw_interpreter_ptr).name(), nullptr, nullptr, nullptr));
span = std::make_unique<OpenTelemetrySpanHolder>(class_name + "::execute()");
}

View File

@ -427,8 +427,11 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
if (select)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS" << settings.nl_or_ws << (settings.hilite ? hilite_none : "");
settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS"
<< (comment ? "(" : "")
<< settings.nl_or_ws << (settings.hilite ? hilite_none : "");
select->formatImpl(settings, state, frame);
settings.ostr << (comment ? ")" : "");
}
if (tables)

View File

@ -40,7 +40,7 @@ public:
String getID(char) const override { return "TableOverrideList"; }
ASTPtr clone() const override;
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
void setTableOverride(const String & name, ASTPtr override);
void setTableOverride(const String & name, const ASTPtr ast);
void removeTableOverride(const String & name);
ASTPtr tryGetTableOverride(const String & name) const;
bool hasOverride(const String & name) const;

View File

@ -747,6 +747,7 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
if (!select_p.parse(pos, select, expected))
return false;
auto comment = parseComment(pos, expected);
auto query = std::make_shared<ASTCreateQuery>();
node = query;
@ -781,6 +782,9 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
if (live_view_periodic_refresh)
query->live_view_periodic_refresh.emplace(live_view_periodic_refresh->as<ASTLiteral &>().value.safeGet<UInt64>());
if (comment)
query->set(query->comment, comment);
return true;
}

View File

@ -83,16 +83,16 @@ TEST_P(TableOverrideTest, applyOverrides)
ASSERT_NE(nullptr, database);
ASTPtr table_ast;
ASSERT_NO_THROW(table_ast = parseQuery(parser, table_query, 0, 0));
auto table = table_ast->as<ASTCreateQuery>();
auto * table = table_ast->as<ASTCreateQuery>();
ASSERT_NE(nullptr, table);
auto table_name = table->table->as<ASTIdentifier>()->name();
if (database->table_overrides)
{
auto override_ast = database->table_overrides->tryGetTableOverride(table_name);
ASSERT_NE(nullptr, override_ast);
auto override = override_ast->as<ASTTableOverride>();
ASSERT_NE(nullptr, override);
override->applyToCreateTableQuery(table);
auto * override_table_ast = override_ast->as<ASTTableOverride>();
ASSERT_NE(nullptr, override_table_ast);
override_table_ast->applyToCreateTableQuery(table);
}
EXPECT_EQ(expected_query, serializeAST(*table));
}

View File

@ -59,11 +59,12 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int DUPLICATE_COLUMN;
extern const int THERE_IS_NO_COLUMN;
extern const int UNKNOWN_EXCEPTION;
extern const int UNKNOWN_TYPE;
extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE;
extern const int THERE_IS_NO_COLUMN;
extern const int BAD_ARGUMENTS;
extern const int UNKNOWN_EXCEPTION;
}
@ -519,9 +520,11 @@ ArrowColumnToCHColumn::ArrowColumnToCHColumn(
void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table)
{
NameToColumnPtr name_to_column_ptr;
for (const auto& column_name : table->ColumnNames())
for (const auto & column_name : table->ColumnNames())
{
std::shared_ptr<arrow::ChunkedArray> arrow_column = table->GetColumnByName(column_name);
if (!arrow_column)
throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column '{}' is duplicated", column_name);
name_to_column_ptr[column_name] = arrow_column;
}

View File

@ -24,14 +24,14 @@ static FormatSettings updateFormatSettings(const FormatSettings & settings)
CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat(
const Block & header_,
ReadBuffer & in_,
ReadBuffer & in_buf_,
const Params & params_,
bool with_names_,
bool with_types_,
bool ignore_spaces_,
const FormatSettings & format_settings_)
: CustomSeparatedRowInputFormat(
header_, std::make_unique<PeekableReadBuffer>(in_), params_, with_names_, with_types_, ignore_spaces_, format_settings_)
header_, std::make_unique<PeekableReadBuffer>(in_buf_), params_, with_names_, with_types_, ignore_spaces_, format_settings_)
{
}

View File

@ -25,7 +25,7 @@ public:
private:
CustomSeparatedRowInputFormat(
const Block & header_,
std::unique_ptr<PeekableReadBuffer> in_,
std::unique_ptr<PeekableReadBuffer> in_buf_,
const Params & params_,
bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_settings_);
using EscapingRule = FormatSettings::EscapingRule;

View File

@ -1,5 +1,6 @@
#pragma once
#include <Interpreters/Context_fwd.h>
#include <Processors/IProcessor.h>
#include <QueryPipeline/PipelineResourcesHolder.h>
@ -42,6 +43,7 @@ public:
void addTableLock(TableLockHolder lock) { holder.table_locks.emplace_back(std::move(lock)); }
void addStorageHolder(StoragePtr storage) { holder.storage_holders.emplace_back(std::move(storage)); }
void attachResources(PipelineResourcesHolder holder_) { holder = std::move(holder_); }
void addInterpreterContext(ContextPtr context) { holder.interpreter_context.emplace_back(std::move(context)); }
PipelineResourcesHolder detachResources() { return std::move(holder); }
void reset();

View File

@ -280,7 +280,8 @@ StorageLiveView::StorageLiveView(
const StorageID & table_id_,
ContextPtr context_,
const ASTCreateQuery & query,
const ColumnsDescription & columns_)
const ColumnsDescription & columns_,
const String & comment)
: IStorage(table_id_)
, WithContext(context_->getGlobalContext())
{
@ -291,6 +292,9 @@ StorageLiveView::StorageLiveView(
StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(columns_);
if (!comment.empty())
storage_metadata.setComment(comment);
setInMemoryMetadata(storage_metadata);
if (!query.select)
@ -621,7 +625,7 @@ void registerStorageLiveView(StorageFactory & factory)
"Experimental LIVE VIEW feature is not enabled (the setting 'allow_experimental_live_view')",
ErrorCodes::SUPPORT_IS_DISABLED);
return StorageLiveView::create(args.table_id, args.getLocalContext(), args.query, args.columns);
return StorageLiveView::create(args.table_id, args.getLocalContext(), args.query, args.columns, args.comment);
});
}

View File

@ -232,8 +232,8 @@ private:
const StorageID & table_id_,
ContextPtr context_,
const ASTCreateQuery & query,
const ColumnsDescription & columns
);
const ColumnsDescription & columns,
const String & comment);
};
}

View File

@ -146,10 +146,15 @@ bool MergeTreeIndexhypothesisMergedCondition::mayBeTrueOnGranule(const MergeTree
values.push_back(granule->met);
}
if (const auto it = answer_cache.find(values); it != std::end(answer_cache))
return it->second;
const ComparisonGraph * graph = nullptr;
const auto & graph = getGraph(values);
{
std::lock_guard lock(cache_mutex);
if (const auto it = answer_cache.find(values); it != std::end(answer_cache))
return it->second;
graph = getGraph(values);
}
bool always_false = false;
expression_cnf->iterateGroups(
@ -166,7 +171,7 @@ bool MergeTreeIndexhypothesisMergedCondition::mayBeTrueOnGranule(const MergeTree
if (func && func->arguments->children.size() == 2)
{
const auto expected = ComparisonGraph::atomToCompareResult(atom);
if (graph.isPossibleCompare(expected, func->arguments->children[0], func->arguments->children[1]))
if (graph->isPossibleCompare(expected, func->arguments->children[0], func->arguments->children[1]))
{
/// If graph failed use matching.
/// We don't need to check constraints.
@ -177,6 +182,8 @@ bool MergeTreeIndexhypothesisMergedCondition::mayBeTrueOnGranule(const MergeTree
always_false = true;
});
std::lock_guard lock(cache_mutex);
answer_cache[values] = !always_false;
return !always_false;
}
@ -195,11 +202,13 @@ std::unique_ptr<ComparisonGraph> MergeTreeIndexhypothesisMergedCondition::buildG
return std::make_unique<ComparisonGraph>(active_atomic_formulas);
}
const ComparisonGraph & MergeTreeIndexhypothesisMergedCondition::getGraph(const std::vector<bool> & values) const
const ComparisonGraph * MergeTreeIndexhypothesisMergedCondition::getGraph(const std::vector<bool> & values) const
{
if (!graph_cache.contains(values))
graph_cache[values] = buildGraph(values);
return *graph_cache.at(values);
auto [it, inserted] = graph_cache.try_emplace(values);
if (inserted)
it->second = buildGraph(values);
return it->second.get();
}
}

View File

@ -21,11 +21,14 @@ public:
private:
void addConstraints(const ConstraintsDescription & constraints_description);
std::unique_ptr<ComparisonGraph> buildGraph(const std::vector<bool> & values) const;
const ComparisonGraph & getGraph(const std::vector<bool> & values) const;
const ComparisonGraph * getGraph(const std::vector<bool> & values) const;
ASTPtr expression_ast;
std::unique_ptr<CNFQuery> expression_cnf;
/// Part analysis can be done in parallel.
/// So, we have shared answer and graph cache.
mutable std::mutex cache_mutex;
mutable std::unordered_map<std::vector<bool>, std::unique_ptr<ComparisonGraph>> graph_cache;
mutable std::unordered_map<std::vector<bool>, bool> answer_cache;

View File

@ -156,9 +156,6 @@ StoragePtr StorageFactory::get(
throw Exception("Unknown table engine " + name, ErrorCodes::UNKNOWN_STORAGE);
}
if (query.comment)
comment = query.comment->as<ASTLiteral &>().value.get<String>();
auto check_feature = [&](String feature_description, FeatureMatcherFn feature_matcher_fn)
{
if (!feature_matcher_fn(it->second.features))
@ -204,6 +201,9 @@ StoragePtr StorageFactory::get(
}
}
if (query.comment)
comment = query.comment->as<ASTLiteral &>().value.get<String>();
ASTs empty_engine_args;
Arguments arguments{
.engine_name = name,

View File

@ -60,7 +60,8 @@ StorageMaterializedView::StorageMaterializedView(
ContextPtr local_context,
const ASTCreateQuery & query,
const ColumnsDescription & columns_,
bool attach_)
bool attach_,
const String & comment)
: IStorage(table_id_), WithMutableContext(local_context->getGlobalContext())
{
StorageInMemoryMetadata storage_metadata;
@ -81,6 +82,9 @@ StorageMaterializedView::StorageMaterializedView(
auto select = SelectQueryDescription::getSelectQueryFromASTForMatView(query.select->clone(), local_context);
storage_metadata.setSelectQuery(select);
if (!comment.empty())
storage_metadata.setComment(comment);
setInMemoryMetadata(storage_metadata);
bool point_to_itself_by_uuid = has_inner_table && query.to_inner_uuid != UUIDHelpers::Nil
@ -432,7 +436,7 @@ void registerStorageMaterializedView(StorageFactory & factory)
/// Pass local_context here to convey setting for inner table
return StorageMaterializedView::create(
args.table_id, args.getLocalContext(), args.query,
args.columns, args.attach);
args.columns, args.attach, args.comment);
});
}

View File

@ -108,7 +108,8 @@ protected:
ContextPtr local_context,
const ASTCreateQuery & query,
const ColumnsDescription & columns_,
bool attach_);
bool attach_,
const String & comment);
};
}

View File

@ -76,15 +76,23 @@ def get_image_name(build_config):
return 'clickhouse/deb-builder'
def build_clickhouse(packager_cmd, logs_path):
def build_clickhouse(packager_cmd, logs_path, build_output_path):
build_log_path = os.path.join(logs_path, 'build_log.log')
with TeePopen(packager_cmd, build_log_path) as process:
retcode = process.wait()
if os.path.exists(build_output_path):
build_results = os.listdir(build_output_path)
else:
build_results = []
if retcode == 0:
logging.info("Built successfully")
if len(build_results) != 0:
logging.info("Built successfully")
else:
logging.info("Success exit code, but no build artifacts => build failed")
else:
logging.info("Build failed")
return build_log_path, retcode == 0
return build_log_path, retcode == 0 and len(build_results) > 0
def get_build_results_if_exists(s3_helper, s3_prefix):
@ -136,8 +144,10 @@ if __name__ == "__main__":
if 'release' in pr_info.labels or 'release-lts' in pr_info.labels:
# for release pull requests we use branch names prefixes, not pr numbers
release_or_pr = pr_info.head_ref
elif pr_info.number == 0:
# for pushes to master - major version
elif pr_info.number == 0 and build_config['package_type'] != "performance":
# for pushes to master - major version, but not for performance builds
# they havily relies on a fixed path for build package and nobody going
# to deploy them somewhere, so it's ok.
release_or_pr = ".".join(version.as_tuple()[:2])
else:
# PR number for anything else
@ -157,7 +167,7 @@ if __name__ == "__main__":
log_url = 'https://s3.amazonaws.com/clickhouse-builds/' + url.replace('+', '%2B').replace(' ', '%20')
else:
build_urls.append('https://s3.amazonaws.com/clickhouse-builds/' + url.replace('+', '%2B').replace(' ', '%20'))
create_json_artifact(temp_path, build_name, log_url, build_urls, build_config, 0, True)
create_json_artifact(temp_path, build_name, log_url, build_urls, build_config, 0, len(build_urls) > 0)
sys.exit(0)
image_name = get_image_name(build_config)
@ -201,7 +211,7 @@ if __name__ == "__main__":
os.makedirs(build_clickhouse_log)
start = time.time()
log_path, success = build_clickhouse(packager_cmd, build_clickhouse_log)
log_path, success = build_clickhouse(packager_cmd, build_clickhouse_log, build_output_path)
elapsed = int(time.time() - start)
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {build_output_path}", shell=True)
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {ccache_path}", shell=True)

View File

@ -25,7 +25,7 @@ class BuildResult():
self.with_coverage = with_coverage
def group_by_artifacts(build_urls):
groups = {'deb': [], 'binary': [], 'tgz': [], 'rpm': [], 'preformance': []}
groups = {'deb': [], 'binary': [], 'tgz': [], 'rpm': [], 'performance': []}
for url in build_urls:
if url.endswith('performance.tgz'):
groups['performance'].append(url)

View File

@ -179,9 +179,10 @@ CI_CONFIG = {
"binary_tidy",
"binary_splitted",
"binary_darwin",
"binary_arrach64",
"binary_aarch64",
"binary_freebsd",
"binary_darwin_aarch64"
"binary_darwin_aarch64",
"binary_ppc64le",
],
},
"tests_config": {

View File

@ -2,7 +2,6 @@
import logging
import subprocess
import os
import sys
from github import Github
@ -25,13 +24,6 @@ if __name__ == "__main__":
pr_info = PRInfo(get_event(), need_changed_files=True)
gh = Github(get_best_robot_token())
if not pr_info.has_changes_in_documentation():
logging.info ("No changes in documentation")
commit = get_commit(gh, pr_info.sha)
commit.create_status(context=NAME, description="No changes in docs", state="success")
sys.exit(0)
logging.info("Has changes in docs")
if not os.path.exists(temp_path):
os.makedirs(temp_path)

View File

@ -0,0 +1,13 @@
FROM public.ecr.aws/lambda/python:3.9
# Install the function's dependencies using file requirements.txt
# from your project folder.
COPY requirements.txt .
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
# Copy function code
COPY app.py ${LAMBDA_TASK_ROOT}
# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
CMD [ "app.handler" ]

View File

@ -0,0 +1,108 @@
#!/usr/bin/env python3
import requests
import argparse
import json
from threading import Thread
from queue import Queue
def get_org_team_members(token: str, org: str, team_slug: str) -> tuple:
headers = {
"Authorization": f"token {token}",
"Accept": "application/vnd.github.v3+json",
}
response = requests.get(
f"https://api.github.com/orgs/{org}/teams/{team_slug}/members", headers=headers
)
response.raise_for_status()
data = response.json()
return tuple(m["login"] for m in data)
def get_members_keys(members: tuple) -> str:
class Worker(Thread):
def __init__(self, request_queue):
Thread.__init__(self)
self.queue = request_queue
self.results = []
def run(self):
while True:
m = self.queue.get()
if m == "":
break
response = requests.get(f"https://github.com/{m}.keys")
self.results.append(f"# {m}\n{response.text}")
self.queue.task_done()
q = Queue()
workers = []
for m in members:
q.put(m)
# Create workers and add to the queue
worker = Worker(q)
worker.start()
workers.append(worker)
# Workers keep working till they receive an empty string
for _ in workers:
q.put("")
# Join workers to wait till they finished
for worker in workers:
worker.join()
responses = []
for worker in workers:
responses.extend(worker.results)
return "".join(responses)
def get_token_from_aws() -> str:
import boto3
secret_name = "clickhouse_robot_token"
session = boto3.session.Session()
client = session.client(
service_name="secretsmanager",
)
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
data = json.loads(get_secret_value_response["SecretString"])
return data["clickhouse_robot_token"]
def main(token: str, org: str, team_slug: str) -> str:
members = get_org_team_members(token, org, team_slug)
keys = get_members_keys(members)
return keys
def handler(event, context):
token = get_token_from_aws()
result = {
"statusCode": 200,
"headers": {
"Content-Type": "text/html",
},
"body": main(token, "ClickHouse", "core"),
}
return result
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Get the public SSH keys for members of given org and team"
)
parser.add_argument("--token", required=True, help="Github PAT")
parser.add_argument(
"--organization", help="GitHub organization name", default="ClickHouse"
)
parser.add_argument("--team", help="GitHub team name", default="core")
args = parser.parse_args()
keys = main(args.token, args.organization, args.team)
print(f"Just shoing off the keys:\n{keys}")

View File

@ -0,0 +1 @@
requests

View File

@ -1,34 +0,0 @@
#!/usr/bin/env bash
set -uo pipefail
echo "Running init script"
export DEBIAN_FRONTEND=noninteractive
export RUNNER_HOME=/home/ubuntu/actions-runner
export RUNNER_URL="https://github.com/ClickHouse"
# Funny fact, but metadata service has fixed IP
export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id`
while true; do
runner_pid=`pgrep run.sh`
echo "Got runner pid $runner_pid"
cd $RUNNER_HOME
if [ -z "$runner_pid" ]; then
echo "Receiving token"
RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value`
echo "Will try to remove runner"
sudo -u ubuntu ./config.sh remove --token $RUNNER_TOKEN ||:
echo "Going to configure runner"
sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,builder' --work _work
echo "Run"
sudo -u ubuntu ./run.sh &
sleep 15
else
echo "Runner is working with pid $runner_pid, nothing to do"
sleep 10
fi
done

View File

@ -1,34 +0,0 @@
#!/usr/bin/env bash
set -uo pipefail
echo "Running init script"
export DEBIAN_FRONTEND=noninteractive
export RUNNER_HOME=/home/ubuntu/actions-runner
export RUNNER_URL="https://github.com/ClickHouse"
# Funny fact, but metadata service has fixed IP
export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id`
while true; do
runner_pid=`pgrep run.sh`
echo "Got runner pid $runner_pid"
cd $RUNNER_HOME
if [ -z "$runner_pid" ]; then
echo "Receiving token"
RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value`
echo "Will try to remove runner"
sudo -u ubuntu ./config.sh remove --token $RUNNER_TOKEN ||:
echo "Going to configure runner"
sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,func-tester' --work _work
echo "Run"
sudo -u ubuntu ./run.sh &
sleep 15
else
echo "Runner is working with pid $runner_pid, nothing to do"
sleep 10
fi
done

View File

@ -1,34 +0,0 @@
#!/usr/bin/env bash
set -uo pipefail
echo "Running init script"
export DEBIAN_FRONTEND=noninteractive
export RUNNER_HOME=/home/ubuntu/actions-runner
export RUNNER_URL="https://github.com/ClickHouse"
# Funny fact, but metadata service has fixed IP
export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id`
while true; do
runner_pid=`pgrep run.sh`
echo "Got runner pid $runner_pid"
cd $RUNNER_HOME
if [ -z "$runner_pid" ]; then
echo "Receiving token"
RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value`
echo "Will try to remove runner"
sudo -u ubuntu ./config.sh remove --token $RUNNER_TOKEN ||:
echo "Going to configure runner"
sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,fuzzer-unit-tester' --work _work
echo "Run"
sudo -u ubuntu ./run.sh &
sleep 15
else
echo "Runner is working with pid $runner_pid, nothing to do"
sleep 10
fi
done

View File

@ -0,0 +1,47 @@
#!/usr/bin/env bash
set -uo pipefail
####################################
# IMPORTANT! #
# EC2 instance should have #
# `github:runner-type` tag #
# set accordingly to a runner role #
####################################
echo "Running init script"
export DEBIAN_FRONTEND=noninteractive
export RUNNER_HOME=/home/ubuntu/actions-runner
export RUNNER_URL="https://github.com/ClickHouse"
# Funny fact, but metadata service has fixed IP
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
export INSTANCE_ID
# combine labels
RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" | jq '.Tags[] | select(."Key" == "github:runner-type") | .Value' -r)
LABELS="self-hosted,Linux,$(uname -m),$RUNNER_TYPE"
export LABELS
while true; do
runner_pid=$(pgrep run.sh)
echo "Got runner pid $runner_pid"
cd $RUNNER_HOME || exit 1
if [ -z "$runner_pid" ]; then
echo "Receiving token"
RUNNER_TOKEN=$(/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value)
echo "Will try to remove runner"
sudo -u ubuntu ./config.sh remove --token "$RUNNER_TOKEN" ||:
echo "Going to configure runner"
sudo -u ubuntu ./config.sh --url $RUNNER_URL --token "$RUNNER_TOKEN" --name "$INSTANCE_ID" --runnergroup Default --labels "$LABELS" --work _work
echo "Run"
sudo -u ubuntu ./run.sh &
sleep 15
else
echo "Runner is working with pid $runner_pid, nothing to do"
sleep 10
fi
done

View File

@ -1,34 +0,0 @@
#!/usr/bin/env bash
set -uo pipefail
echo "Running init script"
export DEBIAN_FRONTEND=noninteractive
export RUNNER_HOME=/home/ubuntu/actions-runner
export RUNNER_URL="https://github.com/ClickHouse"
# Funny fact, but metadata service has fixed IP
export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id`
while true; do
runner_pid=`pgrep run.sh`
echo "Got runner pid $runner_pid"
cd $RUNNER_HOME
if [ -z "$runner_pid" ]; then
echo "Receiving token"
RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value`
echo "Will try to remove runner"
sudo -u ubuntu ./config.sh remove --token $RUNNER_TOKEN ||:
echo "Going to configure runner"
sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,stress-tester' --work _work
echo "Run"
sudo -u ubuntu ./run.sh &
sleep 15
else
echo "Runner is working with pid $runner_pid, nothing to do"
sleep 10
fi
done

View File

@ -1,20 +0,0 @@
#!/usr/bin/bash
set -euo pipefail
echo "Running init script"
export DEBIAN_FRONTEND=noninteractive
export RUNNER_HOME=/home/ubuntu/actions-runner
echo "Receiving token"
export RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value`
export RUNNER_URL="https://github.com/ClickHouse"
# Funny fact, but metadata service has fixed IP
export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id`
cd $RUNNER_HOME
echo "Going to configure runner"
sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64,style-checker' --work _work
echo "Run"
sudo -u ubuntu ./run.sh

View File

@ -1,25 +1,47 @@
#!/usr/bin/env bash
set -euo pipefail
set -xeuo pipefail
echo "Running prepare script"
export DEBIAN_FRONTEND=noninteractive
export RUNNER_VERSION=2.283.1
export RUNNER_VERSION=2.285.1
export RUNNER_HOME=/home/ubuntu/actions-runner
deb_arch() {
case $(uname -m) in
x86_64 )
echo amd64;;
aarch64 )
echo arm64;;
esac
}
runner_arch() {
case $(uname -m) in
x86_64 )
echo x64;;
aarch64 )
echo arm64;;
esac
}
apt-get update
apt-get install --yes --no-install-recommends \
apt-transport-https \
build-essential \
ca-certificates \
curl \
gnupg \
jq \
lsb-release \
pigz \
python3-dev \
python3-pip \
unzip
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
echo "deb [arch=$(deb_arch) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
apt-get update
@ -37,21 +59,32 @@ EOT
systemctl restart docker
pip install boto3 pygithub requests urllib3 unidiff
pip install boto3 pygithub requests urllib3 unidiff dohq-artifactory
mkdir -p $RUNNER_HOME && cd $RUNNER_HOME
curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-x64-$RUNNER_VERSION.tar.gz
RUNNER_ARCHIVE="actions-runner-linux-$(runner_arch)-$RUNNER_VERSION.tar.gz"
tar xzf ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz
rm -f ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz
curl -O -L "https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/$RUNNER_ARCHIVE"
tar xzf "./$RUNNER_ARCHIVE"
rm -f "./$RUNNER_ARCHIVE"
./bin/installdependencies.sh
chown -R ubuntu:ubuntu $RUNNER_HOME
cd /home/ubuntu
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "awscliv2.zip"
unzip awscliv2.zip
./aws/install
rm -rf /home/ubuntu/awscliv2.zip /home/ubuntu/aws
# SSH keys of core team
mkdir -p /home/ubuntu/.ssh
# ~/.ssh/authorized_keys is cleaned out, so we use deprecated but working ~/.ssh/authorized_keys2
aws lambda invoke --region us-east-1 --function-name team-keys-lambda /tmp/core.keys
jq < /tmp/core.keys -r '.body' > /home/ubuntu/.ssh/authorized_keys2
chown ubuntu: /home/ubuntu/.ssh -R
chmod 0700 /home/ubuntu/.ssh

View File

@ -0,0 +1,3 @@
live_view_comment_test LiveView live view
materialized_view_comment_test MaterializedView materialized view
view_comment_test View simple view

View File

@ -0,0 +1,12 @@
-- Make sure that any kind of `VIEW` can be created with a `COMMENT` clause
-- and value of that clause is visible as `comment` column of `system.tables` table.
CREATE VIEW view_comment_test AS (SELECT 1) COMMENT 'simple view';
CREATE MATERIALIZED VIEW materialized_view_comment_test TO test1 (a UInt64) AS (SELECT 1) COMMENT 'materialized view';
SET allow_experimental_live_view=1;
CREATE LIVE VIEW live_view_comment_test AS (SELECT 1) COMMENT 'live view';
SYSTEM FLUSH LOGS;
SELECT name, engine, comment FROM system.tables WHERE database == currentDatabase() ORDER BY name;

View File

@ -0,0 +1,2 @@
SELECT * FROM tabl_1 SETTINGS log_comment = ?;
SELECT * FROM tabl_2 SETTINGS log_comment = ?;

View File

@ -0,0 +1,20 @@
-- Tags: no-fasttest
SET log_queries=1;
DROP TABLE IF EXISTS tabl_1;
DROP TABLE IF EXISTS tabl_2;
CREATE TABLE tabl_1 (key String) ENGINE MergeTree ORDER BY key;
CREATE TABLE tabl_2 (key String) ENGINE MergeTree ORDER BY key;
SELECT * FROM tabl_1 SETTINGS log_comment = 'ad15a651';
SELECT * FROM tabl_2 SETTINGS log_comment = 'ad15a651';
SYSTEM FLUSH LOGS;
SELECT base64Decode(base64Encode(normalizeQuery(query)))
FROM system.query_log
WHERE type = 'QueryFinish' AND log_comment = 'ad15a651' AND current_database = currentDatabase()
GROUP BY normalizeQuery(query)
ORDER BY normalizeQuery(query);
DROP TABLE tabl_1;
DROP TABLE tabl_2;

View File

@ -0,0 +1,8 @@
4
1
2
3
3
3
3
4

View File

@ -0,0 +1,30 @@
DROP TABLE IF EXISTS 02131_multiply_row_policies_on_same_column;
CREATE TABLE 02131_multiply_row_policies_on_same_column (x UInt8) ENGINE = MergeTree ORDER BY x;
INSERT INTO 02131_multiply_row_policies_on_same_column VALUES (1), (2), (3), (4);
DROP ROW POLICY IF EXISTS 02131_filter_1 ON 02131_multiply_row_policies_on_same_column;
DROP ROW POLICY IF EXISTS 02131_filter_2 ON 02131_multiply_row_policies_on_same_column;
DROP ROW POLICY IF EXISTS 02131_filter_3 ON 02131_multiply_row_policies_on_same_column;
SELECT count() FROM 02131_multiply_row_policies_on_same_column;
CREATE ROW POLICY 02131_filter_1 ON 02131_multiply_row_policies_on_same_column USING x=1 TO ALL;
SELECT count() FROM 02131_multiply_row_policies_on_same_column;
CREATE ROW POLICY 02131_filter_2 ON 02131_multiply_row_policies_on_same_column USING x=2 TO ALL;
SELECT count() FROM 02131_multiply_row_policies_on_same_column;
CREATE ROW POLICY 02131_filter_3 ON 02131_multiply_row_policies_on_same_column USING x=3 TO ALL;
SELECT count() FROM 02131_multiply_row_policies_on_same_column;
SELECT count() FROM 02131_multiply_row_policies_on_same_column;
CREATE ROW POLICY 02131_filter_4 ON 02131_multiply_row_policies_on_same_column USING x<4 AS RESTRICTIVE TO ALL;
SELECT count() FROM 02131_multiply_row_policies_on_same_column;
SELECT count() FROM 02131_multiply_row_policies_on_same_column;
DROP ROW POLICY 02131_filter_1 ON 02131_multiply_row_policies_on_same_column;
DROP ROW POLICY 02131_filter_2 ON 02131_multiply_row_policies_on_same_column;
DROP ROW POLICY 02131_filter_3 ON 02131_multiply_row_policies_on_same_column;
DROP ROW POLICY 02131_filter_4 ON 02131_multiply_row_policies_on_same_column;
SELECT count() FROM 02131_multiply_row_policies_on_same_column;
DROP TABLE 02131_multiply_row_policies_on_same_column;

View File

@ -0,0 +1,3 @@
sku_0001 black women nice shirt
sku_0001_black sku_0001 black women nice shirt
sku_0001_black sku_0001 black women nice shirt

View File

@ -0,0 +1,17 @@
CREATE TABLE main ( `id` String, `color` String, `section` String, `description` String) ENGINE = MergeTree ORDER BY tuple();
CREATE TABLE destination_join ( `key` String, `id` String, `color` String, `section` String, `description` String) ENGINE = Join(ANY, LEFT, key);
CREATE TABLE destination_set (`key` String) ENGINE = Set;
CREATE MATERIALIZED VIEW mv_to_join TO `destination_join` AS SELECT concat(id, '_', color) AS key, * FROM main;
CREATE MATERIALIZED VIEW mv_to_set TO `destination_set` AS SELECT key FROM destination_join;
INSERT INTO main VALUES ('sku_0001','black','women','nice shirt');
SELECT * FROM main;
SELECT * FROM destination_join;
SELECT * FROM destination_join WHERE key in destination_set;
DROP TABLE mv_to_set;
DROP TABLE destination_set;
DROP TABLE mv_to_join;
DROP TABLE destination_join;
DROP TABLE main;

View File

@ -0,0 +1,4 @@
2000
2
1500 0 1499 1500 0 1499
500 1500 1999 500 1500 1999

View File

@ -0,0 +1,24 @@
-- https://github.com/ClickHouse/ClickHouse/issues/9587#issuecomment-944431385
CREATE TABLE source (a Int32) ENGINE=MergeTree() ORDER BY tuple();
CREATE TABLE source_null AS source ENGINE=Null;
CREATE TABLE dest_a (count UInt32, min Int32, max Int32, count_subquery Int32, min_subquery Int32, max_subquery Int32) ENGINE=MergeTree() ORDER BY tuple();
CREATE MATERIALIZED VIEW mv_null TO source_null AS SELECT * FROM source;
CREATE MATERIALIZED VIEW mv_a to dest_a AS
SELECT
count() AS count,
min(a) AS min,
max(a) AS max,
(SELECT count() FROM source_null) AS count_subquery,
(SELECT min(a) FROM source_null) AS min_subquery,
(SELECT max(a) FROM source_null) AS max_subquery
FROM source_null
GROUP BY count_subquery, min_subquery, max_subquery;
INSERT INTO source SELECT number FROM numbers(2000) SETTINGS min_insert_block_size_rows=1500, max_insert_block_size=1500;
SELECT count() FROM source;
SELECT count() FROM dest_a;
SELECT * from dest_a ORDER BY count DESC;

View File

@ -0,0 +1 @@
22 [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 22

View File

@ -0,0 +1,15 @@
#!/usr/bin/env bash
set -e
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
echo '
SELECT
sum(toUInt8(1) ? toUInt8(1) : toUInt8(1)) AS metric,
groupArray(toUInt8(1) ? toUInt8(1) : toUInt8(1)),
groupArray(toUInt8(1) ? toUInt8(1) : 1),
sum(toUInt8(1) ? toUInt8(1) : 1)
FROM (SELECT materialize(toUInt64(1)) as key FROM numbers(22))
WHERE key = {b1:Int64}' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&param_b1=1" -d @-

View File

@ -0,0 +1,31 @@
#!/usr/bin/env bash
# Tags: no-fasttest
set -e
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
# Reproduce GZDATA:
#
# ```python
# import pyarrow as pa
# data = [pa.array([1]), pa.array([2]), pa.array([3])]
# batch = pa.record_batch(data, names=['x', 'y', 'x'])
# with pa.ipc.new_file('data.arrow', batch.schema) as writer:
# writer.write_batch(batch)
# ```
#
# ```bash
# cat data.arrow | gzip | base64
# ```
GZDATA="H4sIAHTzuWEAA9VTuw3CMBB9+RCsyIULhFIwAC0SJQWZACkNi1CAxCCMwCCMQMEIKdkgPJ8PJbIIEiVPujuf73yfp6Rumt1+BXTEA4CDRwmLAhMYnogkpw96hjpXDWSUA2Wt/pU1mJz6GjO9k+eUI+UicSRbqvuX3BPlNsh1zDCcZypTOJ0xvF186GOYZ5ht9NrX8Pu12svDYq4bWqmKLEdFU+GNkmcr23oOzspNgh4FxmEiO3bvoriL4jJa1Bc/+OmghkcXeJU+lmwUwoALHHDbDfUSgVNfo9V3T7U9Pz3++bswDNbyD7wAxr434AoDAAA="
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS t1"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE t1 ( x Int64, y Int64, z Int64 ) ENGINE = Memory"
echo ${GZDATA} | base64 --decode | gunzip | ${CLICKHOUSE_CLIENT} -q "INSERT INTO t1 FORMAT Arrow" 2>&1 | grep -qF "DUPLICATE_COLUMN" && echo 'OK' || echo 'FAIL' ||:
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t1"

View File

@ -0,0 +1 @@
[(1,'Hello'),(2,'World')]

View File

@ -0,0 +1 @@
SELECT CAST([(1, 'Hello'), (2, 'World')] AS Array(Tuple(a UInt64, b String)));

View File

@ -0,0 +1,3 @@
SELECT fuzzBits(toFixedString('', 200), 0.99) from numbers(1) FORMAT Null;
SELECT fuzzBits(toFixedString('', 200), 0.99) from numbers(128) FORMAT Null;
SELECT fuzzBits(toFixedString('', 200), 0.99) from numbers(60000) FORMAT Null;

View File

@ -0,0 +1,6 @@
1
2
2
4
(0,'Value')
Value

View File

@ -0,0 +1,35 @@
-- Tags: no-parallel
DROP FUNCTION IF EXISTS 02148_test_function;
CREATE FUNCTION 02148_test_function AS () -> (SELECT 1);
SELECT 02148_test_function();
CREATE OR REPLACE FUNCTION 02148_test_function AS () -> (SELECT 2);
SELECT 02148_test_function();
DROP FUNCTION 02148_test_function;
CREATE FUNCTION 02148_test_function AS (x) -> (SELECT x + 1);
SELECT 02148_test_function(1);
DROP FUNCTION IF EXISTS 02148_test_function_nested;
CREATE FUNCTION 02148_test_function_nested AS (x) -> 02148_test_function(x + 2);
SELECT 02148_test_function_nested(1);
DROP FUNCTION 02148_test_function;
DROP FUNCTION 02148_test_function_nested;
DROP TABLE IF EXISTS 02148_test_table;
CREATE TABLE 02148_test_table (id UInt64, value String) ENGINE=TinyLog;
INSERT INTO 02148_test_table VALUES (0, 'Value');
CREATE FUNCTION 02148_test_function AS () -> (SELECT * FROM 02148_test_table LIMIT 1);
SELECT 02148_test_function();
CREATE OR REPLACE FUNCTION 02148_test_function AS () -> (SELECT value FROM 02148_test_table LIMIT 1);
SELECT 02148_test_function();
DROP FUNCTION 02148_test_function;
DROP TABLE 02148_test_table;

View File

@ -0,0 +1,23 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_index_hypothesis"
$CLICKHOUSE_CLIENT -q "CREATE TABLE t_index_hypothesis (a UInt32, b UInt32, INDEX t a != b TYPE hypothesis GRANULARITY 1) ENGINE = MergeTree ORDER BY a"
$CLICKHOUSE_CLIENT -q "INSERT INTO t_index_hypothesis SELECT number, number + 1 FROM numbers(10000000)"
for _ in {0..30}; do
output=`$CLICKHOUSE_CLIENT -q "SELECT count() FROM t_index_hypothesis WHERE a = b"`
if [[ $output != "0" ]]; then
echo "output: $output, expected: 0"
exit 1
fi
done
echo OK
$CLICKHOUSE_CLIENT -q "DROP TABLE t_index_hypothesis"

View File

@ -0,0 +1,285 @@
---
title: 'What''s New in ClickHouse 21.12'
image: 'https://blog-images.clickhouse.com/en/2021/clickhouse-v21-12/featured.jpg'
date: '2021-12-16'
author: '[Alexey Milovidov](https://github.com/alexey-milovidov), [Christoph Wurm](https://github.com/cwurm)'
tags: ['company', 'community']
---
We're continuing our monthly release cadence. The 21.12 Christmas release includes 2460 new commits from 125 contributors, including 42 new contributors:
> Alex Cao, Amr Alaa, Andrey Torsunov, Constantine Peresypkin, Dmitriy Dorofeev, Egor O'Sten, Elykov Alexandr, Evgeny, Frank Chen, LB, Natasha Murashkina, Peignon Melvyn, Rich Raposa, Roman Chyrva, Roman, SuperDJY, Thom O'Connor, Timur Magomedov, Tom Risse, Tomáš Hromada, cfcz48, cgp, cms, cmsxbc, congbaoyangrou, dongyifeng, frank chen, freedomDR, jus1096, khamadiev, laurieliyang, leosunli, liyang830, loneylee, michael1589, msaf1980, p0ny, qieqieplus, spume, sunlisheng, yandd, zhanghuajie.
If you are wondering, this list is generated by the following command:
```
clickhouse-local --query "
SELECT arrayStringConcat(groupArray(s), ', ')
FROM file('contributors-21.12.txt', LineAsString, 's String')
WHERE s NOT IN (
SELECT *
FROM file('contributors-21.11.txt', LineAsString, 's String'))
FORMAT TSVRaw"
```
And to list the contributors, you can always run the
```
SELECT * FROM system.contributors
```
query on your production server.
Let's highlight some of the new capabilities in 21.12:
## ClickHouse Keeper is Feature Complete
In 21.12 `clickhouse-keeper` started to support ["four letter commands"](https://zookeeper.apache.org/doc/r3.4.8/zookeeperAdmin.html#sc_zkCommands) for status and monitoring. This feature is contributed by **JackyWoo** and reviewed by **Alexander Sapin** (the author of ClickHouse Keeper).
It was the only missing feature to implement. In this release, clickhouse-keeper is still considered in pre-production stage, but many companies already started to evaluate and use it as a replacement of ZooKeeper. You can also start using clickhouse-keeper in your testing environments and we will appreciate your feedback.
ClickHouse Keeper development started in Sep 2020, more than a year ago. It was a long road, and most of the efforts were to ensure correctness and stability in unusual and exceptional scenarios. It is covered by [Jepsen](https://jepsen.io/) tests (including ZooKeeper tests and [new introduced tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/jepsen.clickhouse-keeper)), continuous randomized stress testing with ClickHouse functional and integration tests. It is started to be tested in Yandex Cloud and among our best friends. If you're pretending to be our best friend, you can also do it.
**How does this help you?**
ClickHouse Keeper is a drop-in replacement for ZooKeeper. It implements ZooKeeper wire protocol and data model, but does it better.
In contrast to ZooKeeper, there are no issues with zxid overflow or packet sizes. It has better memory usage and it does not require JVM tuning (because it does not use JVM). Logs and snapshots are compressed (about 10x typical) and checksummed. It can run as a separate process or directly inside clickhouse-server. You can use it with ClickHouse or with your Kafkas and Hadoops as well.
[More info](http://presentations.clickhouse.tech/meetup54/keeper.pdf).
## Partitions For INSERT INTO File, URL And HDFS Storages
When using the table engines `File`, `URL`, and `HDFS` ClickHouse now supports partitions. When creating a table you can specify the partition key using the `PARTITION BY` clause e.g. `CREATE TABLE hits_files (...) ENGINE = File(TabSeparated) PARTITION BY toYYYYMM(EventDate)`.
Similarly, when exporting data from ClickHouse using the `file`, `url`, and `hdfs` table functions you can now specify that the data is to be partitioned into multiple files using a `PARTITION BY` clause. For example, `INSERT INTO TABLE FUNCTION file('path/hits_{_partition_id}', 'TSV', 'columns...') PARTITION BY toYYYYMM(EventDate) VALUES ...` will create as many files as there are unique months in the dataset.
The `s3` table function has supported partitioned writes since ClickHouse 21.10.
**How does this help you?**
If data is split into multiple files, then `SELECT` query will be automatically parallelized. Example:
```
SELECT user_id, count() FROM s3(
'https://s3.us-east-2.amazonaws.com/.../*.csv.zstd',
'...', '...',
CSV,
'user_id UInt64, ...')
```
You can even parallelize data processing across distributed compute cluster if you use `s3Cluster` table function:
```
SELECT user_id, count() FROM s3Cluster(
my_cluster,
'https://s3.us-east-2.amazonaws.com/.../*.csv.zstd',
'...',
'...', CSV,
'user_id UInt64, ...')
```
It can also be used for integrations with external data processing tools that consumes data from `s3`.
## FROM INFILE in clickhouse-client now supports glob patterns and parallel reading
Just write:
```
INSERT INTO my_table FROM INFILE '*.csv.gz' FORMAT CSV
```
Glob patterns support `*`, `?` and `{n..m}` with `{1..10}` or (aligned) `{01..10}` forms.
This query will be automatically parallelized, it will also automatically detect compression format from file extension and decompress transparently.
This improvement is done by **Arthur Filatenkov**.
**How does this help you?**
Now you don't have to recall how to write parallel for loop in your command line shell. clickhouse-client will do everything for you, it works intuitively and fast.
## Support for INTERVAL operator inside WITH FILL modifier for ORDER BY clause
What's the... WITH FILL modifier in ORDER BY clause? Just look at the example.
```
:) SELECT EventDate, count() FROM test.hits WHERE CounterID = 2841673 GROUP BY EventDate ORDER BY EventDate
┌──EventDate─┬─count()─┐
│ 2014-03-17 │ 3 │
│ 2014-03-19 │ 6 │
│ 2014-03-21 │ 7 │
│ 2014-03-22 │ 6 │
└────────────┴─────────┘
```
We have the report with Mar 17th, 19th, 21th, 22th. But Mar 18th and 20th are missing, because there is no data for these dates.
And this is how it works in all SQL databases.
But ClickHouse also has quite unique and neat `WITH FILL` modifier for `ORDER BY clause`.
You just write:
```
SELECT EventDate, count() FROM test.hits WHERE CounterID = 2841673 GROUP BY EventDate
ORDER BY EventDate WITH FILL STEP 1
┌──EventDate─┬─count()─┐
│ 2014-03-17 │ 3 │
│ 2014-03-18 │ 0 │
│ 2014-03-19 │ 6 │
│ 2014-03-20 │ 0 │
│ 2014-03-21 │ 7 │
│ 2014-03-22 │ 6 │
└────────────┴─────────┘
```
And missing data is automatically filled.
You can also add `FROM` and `TO`:
```
ORDER BY date WITH FILL FROM '2014-03-01'::Date TO '2014-03-31'::Date STEP 1;
```
And it will automatically fill missing rows in the report.
The STEP can be arbitrary number. But what to do if you want fill missing dates for report by months? You cannot just write STEP 30 or STEP 31 because months contain different number of days...
Since ClickHouse version 21.12 you can do it like this:
```
ORDER BY EventDate WITH FILL STEP INTERVAL 1 MONTH
```
`INTERVAL` is a standard SQL operator, you can use SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER and YEAR.
This is implemented by **Anton Popov** who is the author of "WITH FILL" feature.
**How does this help you?**
It allows to avoid postprocessing step for your reports.
## Add Support For "Identifier" Table and Database Query Parameters
ClickHouse has support for parameterized queries.
```
SELECT uniq(user_id) FROM table WHERE website = {name:String}
```
It allows to safely substitute parameters without the risk of SQL injections:
```
curl https://clickhouse-server:8443/?param_name=upyachka -d 'SELECT uniq(user_id) FROM table WHERE website = {name:String}'
```
You can even create customized API handlers for clickhouse-server based on prepared queries.
Since version 21.12 we introduce support for using parameters for tables and databases in your queries. This is implemented with `Identifier` table parameter:
```
SELECT uniq(user_id) FROM {tbl:Identifier}
```
Identifier parameters also work for CREATE, DROP and all DDL queries. This is implemented by **Nikolai Degterinskiy**.
**How does this help you?**
Let ClickHouse do the heavy-lifting and keep your scripts safe and secure.
## Bool Data Type
This feature is experimental in version 21.12. It is implemented by **Kevin Wan (MaxWk)** on top of initial work by **hczhcz** and reviewed by **Pavel Kruglov**.
ClickHouse now natively supports a `Bool` data type. It allows to represent values as "true"/"false" during data import and export in text formats. It can also be adjusted to anything else using the settings `bool_true_representation` and `bool_false_representation` (for example, "yes" and "no").
**How does this help you?**
Native boolean data types exist today in other databases that are often integrated with ClickHouse, such as PostgreSQL. The `Bool` data type in ClickHouse will make it more compatible with existing code and ease migration from other databases.
Also it simplifies data ingestion from various text sources.
## Query Optimizations With Table Constraints
This feature is [contributed](https://github.com/ClickHouse/ClickHouse/pull/18787) by **Nikita Vasilev**. Nikita is one of the most notable ClickHouse contributors. He started in 2019 by introducing data skipping indices into ClickHouse, then continued in 2020 with SSD-optimized key-value dictionaries and now contributed the new advancements in the query optimizer. This feature is reviewed by **Anton Popov**.
So, what optimizations? ClickHouse already allows to specify constraints for tables:
```
CREATE TABLE
(
URL String,
Domain String,
CONSTRAINT validate CHECK isValidUTF8(URL) AND length(URL) BETWEEN 10 AND 10000,
CONSTRAINT my_constraint CHECK Domain = domainWithoutWWW(URL)
) ...
```
Constraints are checked on INSERT. In this example we validate the URL and check that Domain column actually contains the domain of URL.
Since version 21.12 constraints can also automatically optimize your queries! For example, if you write:
```
SELECT count() FROM hits WHERE domainWithoutWWW(URL) = 'ghe.clickhouse.tech'
```
The query can be automatically rewritten to:
```
SELECT count() FROM hits WHERE Domain = 'ghe.clickhouse.tech'
```
because `Domain` column is smaller, more compressable, will be faster to read and it does not require calculation of the domain from URL.
The only thing you need is to enable the `optimize_using_constraints` and `optimize_substitute_columns` settings.
As a bonus, new type of constraints is introduced: `ASSUME`.
```
CONSTRAINT my_constraint ASSUME Domain = domainWithoutWWW(URL)
```
This type of constraint will not check anything on INSERT, but still use the assumption to optimize the queries.
It can also do logical inference, simplify the conditions and remove the conditions that are proved to be satisfied by constraints.
It is controlled by `convert_query_to_cnf` setting. You can also enable `optimize_append_index` setting. With this setting ClickHouse will derive more consitions on the table primary key.
The idea is so powerful that we cannot resist adding one more feature: *indices for hypothesis*.
```
INDEX my_index (a < b) TYPE hypothesis GRANULARITY 1
```
The expression is checked and the result (true/false) is written as an index for query optimization.
**How does this help you?**
Especially in large ClickHouse deployments with many complex tables it can be hard for users to always be up to date on the best way to query a given dataset. Constraints can help optimize queries without having to change the query structure itself. They can also make it easier to make changes to tables.
For example, let's say you have a table containing web requests and it includes a URL column that contains the full URL of each request. Many times, users will want to know the top level domain (.com, .co.uk, etc.), something ClickHouse provides the `topLevelDomain` function to calculate. If you discover that many people are using this function you might decide to create a new materialized column that pre-calculates the top level domain for each record.
Rather than tell all your users to change their queries you can use a table constraint to tell ClickHouse that each time a user tries to call the `topLevelDomain` function the request should be rewritten to use the new materialized column.
## Read Large Remote Files In Chunks
ClickHouse combines fast query engine and efficient data storage. It also allows to integrate external data sources for data import and export or even to process external datasets on the fly without the need for data import or preprocessing.
When reading large files in `Parquet`, `ORC`, and `Arrow` format using the `s3`, `url`, and `hdfs` table functions, ClickHouse will now automatically choose whether to read the entire file at once or read parts of it incrementally. This is now enabled by default and the setting `remote_read_min_bytes_for_seek` controls when to switch from reading it all to reading in chunks. The default is 1MiB.
`Parquet`, `ORC`, and `Arrow` are column-oriented formats (quite similar to ClickHouse Native format) and now we can read only requested columns even if they are being read from remote HTTP server with the `url` table function (range requests will be performed to skip unneeded data).
This feature is implemented by **Kseniia Sumarokova**.
**How does this help our ClickHouse Users?**
In previous versions, when reading files in Arrow-based formats from remote locations with the `s3`, `url`, and `hdfs` table functions, ClickHouse would always read the entire file into memory. This works well when the files are small but will cause excessive memory usage or not work at all when the files are large. With this change, ClickHouse will read large files in chunks to keep memory usage in check and is now able to read even very large files.
## ... And Many More
Read the [full changelog](https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md) for 21.12 "Christmas" release for the full list of the gifts from [ClickHouse Team](https://clickhouse.com/careers/).

View File

@ -0,0 +1,16 @@
---
title: 'How to Enable Predictive Capabilities in Clickhouse Databases'
image: 'https://blog-images.clickhouse.com/en/2021/mindsdb-enables-predictive-capabilities-in-clickHouse/featured.png'
date: '2021-12-14'
author: '[Ilya Yatsishin](https://github.com/qoega)'
tags: ['company', 'how-to', 'MindsDB']
---
ClickHouse is a fast, open-source, column-oriented SQL database that is very useful for data analysis and real-time analytics and with MindsDB can be turned into a powerful machine learning platform for business forecasting.
In this article, we will
- Guide you through the machine learning workflow and how to use ClickHouses powerful tools, like materialized views, to better and more effectively handle data cleaning and preparation - especially for the large datasets with billions of rows of data,
- Explore the concept of AI Tables from MindsDB and how they can be used within ClickHouse to automatically build predictive models and make forecasts using simple SQL statements, and
- Share how MindsDB automates really complex machine learning tasks, like multivariate time-series analysis with high cardinality, show how to detect anomalies, and visualize these predictions.
[Read Further](https://mindsdb.com/blog/enabling-predictive-capabilities-in-clickhouse-database/?utm_medium=referral&utm_source=clickhouse&utm_campaign=clickhouse-ml-article-2021-12)

View File

@ -1,23 +1,17 @@
<div class="hero bg-white">
<div class="hero-bg index-hero"></div>
<div class="container pt-5 pt-lg-7 pt-xl-15 pb-5 pb-lg-7">
<h1 class="display-1 mb-2 mb-xl-3 mx-auto text-center">
ClickHouse <span class="text-orange">v21.11 Released</span>
ClickHouse <span class="text-orange">v21.12 Released</span>
</h1>
<p class="lead mb-3 mb-lg-5 mb-xl-7 mx-auto text-muted text-center" style="max-width:780px;">
{{ _('ClickHouse® is an open-source, high performance columnar OLAP database management system for real-time analytics using SQL.') }}
</p>
<div class="btns btns-lg mx-auto mb-3 mb-xl-5" role="group" style="max-width:520px;">
<div class="d-flex flex-column">
<a href="https://youtu.be/xb64zoPYvqQ?t=958" class="btn btn-primary" role="button" rel="external nofollow" target="_blank">Watch on YouTube</a>
</div>
</div>
<p class="d-flex justify-content-center mb-0">
<a href="/blog/en/2021/clickhouse-v21.11-released/" class="trailing-link">Read the Blog Post</a>
<a href="/blog/en/2021/clickhouse-v21.12-released/" class="btn btn-primary trailing-link">Read the Blog Post</a>
</p>
</div>
@ -28,15 +22,15 @@
<div class="card is-large has-highlight">
<div class="card-body">
<h4 class="text-blue text-center">ClickHouse Announces $250 Million in Funding</h4>
<p class="font-lg text-center mb-6 mx-auto">Raising the Companys Valuation to $2B</p>
<p class="font-lg text-center mb-6 mx-auto">Raising the Companys Valuation to $2B</p>
<div class="btns is-3 mx-auto" role="group" style="max-width:740px;">
<a href="/blog/en/2021/clickhouse-raises-250m-series-b/" class="btn btn-secondary" role="button">Read the Blog Post</a>
<a href="https://www.bloomberg.com/news/articles/2021-10-28/clickhouse-valued-at-2-billion-in-round-after-yandex-spinout" class="btn btn-outline-secondary" role="button" rel="external nofollow" target="_blank">Read the News</a>
<a href="https://www.businesswire.com/news/home/20211028005287/en" class="btn btn-outline-secondary" role="button" rel="external nofollow" target="_blank">Read the Press Release</a>
<a href="https://www.businesswire.com/news/home/20211028005287/en" class="btn btn-outline-secondary" role="button" rel="external nofollow" target="_blank">Read the Press Release</a>
</div>
</div>
</div>