Merge branch 'master' into update-mongo-contrb

This commit is contained in:
mergify[bot] 2021-12-10 08:57:16 +00:00 committed by GitHub
commit a07061ac84
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
723 changed files with 31840 additions and 4214 deletions

377
.github/workflows/backport_branches.yml vendored Normal file
View File

@ -0,0 +1,377 @@
name: BackportPR
on: # yamllint disable-line rule:truthy
push:
branches:
- 'backport/**'
jobs:
DockerHubPush:
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 docker_images_check.py
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docker_images_check/changed_images.json
CompatibilityCheck:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: CompatibilityCheck
env:
TEMP_PATH: ${{runner.temp}}/compatibility_check
REPO_COPY: ${{runner.temp}}/compatibility_check/ClickHouse
REPORTS_PATH: ${{runner.temp}}/reports_dir
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 compatibility_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################
BuilderDebRelease:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'package_release'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebAsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'package_asan'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebTsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'package_tsan'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebDebug:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'package_debug'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:
needs:
- BuilderDebRelease
- BuilderDebAsan
- BuilderDebTsan
- BuilderDebDebug
runs-on: [self-hosted, style-checker]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Report Builder
env:
TEMP_PATH: ${{runner.temp}}/report_check
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'ClickHouse build check (actions)'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cd $GITHUB_WORKSPACE/tests/ci
python3 build_report_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################
FunctionalStatelessTestAsan:
needs: [BuilderDebAsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_debug
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (address, actions)'
REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
##############################################################################################
############################ FUNCTIONAl STATEFUL TESTS #######################################
##############################################################################################
FunctionalStatefulTestDebug:
needs: [BuilderDebDebug]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateful_debug
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateful tests (debug, actions)'
REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT: 3600
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
##############################################################################################
######################################### STRESS TESTS #######################################
##############################################################################################
StressTestTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Stress test
env:
TEMP_PATH: ${{runner.temp}}/stress_thread
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stress test (thread, actions)'
REPO_COPY: ${{runner.temp}}/stress_thread/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 stress_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
#############################################################################################
############################# INTEGRATION TESTS #############################################
#############################################################################################
IntegrationTestsRelease:
needs: [BuilderDebRelease]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Integration test
env:
TEMP_PATH: ${{runner.temp}}/integration_tests_release
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Integration tests (release, actions)'
REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FinishCheck:
needs:
- DockerHubPush
- BuilderReport
- FunctionalStatelessTestAsan
- FunctionalStatefulTestDebug
- StressTestTsan
- IntegrationTestsRelease
- CompatibilityCheck
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Finish label
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 finish_check.py

View File

@ -1,7 +1,7 @@
name: Cancel
on: # yamllint disable-line rule:truthy
workflow_run:
workflows: ["CIGithubActions", "ReleaseCI"]
workflows: ["CIGithubActions", "ReleaseCI", "DocsCheck", "BackportPR"]
types:
- requested
jobs:
@ -10,4 +10,5 @@ jobs:
steps:
- uses: styfle/cancel-workflow-action@0.9.1
with:
all_but_latest: true
workflow_id: ${{ github.event.workflow.id }}

62
.github/workflows/docs_check.yml vendored Normal file
View File

@ -0,0 +1,62 @@
name: DocsCheck
on: # yamllint disable-line rule:truthy
pull_request:
types:
- synchronize
- reopened
- opened
branches:
- master
paths:
- 'docs/**'
- 'website/**'
jobs:
CheckLabels:
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Labels check
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 run_check.py
DockerHubPush:
needs: CheckLabels
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 docker_images_check.py
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docker_images_check/changed_images.json
DocsCheck:
needs: DockerHubPush
runs-on: [self-hosted, func-tester]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docs_check
- name: Check out repository code
uses: actions/checkout@v2
- name: Docs Check
env:
TEMP_PATH: ${{runner.temp}}/docs_check
REPO_COPY: ${{runner.temp}}/docs_check/ClickHouse
run: |
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 docs_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH

View File

@ -2,13 +2,14 @@ name: CIGithubActions
on: # yamllint disable-line rule:truthy
pull_request:
types:
- labeled
- unlabeled
- synchronize
- reopened
- opened
branches:
- master
paths-ignore:
- 'docs/**'
- 'website/**'
##########################################################################################
##################################### SMALL CHECKS #######################################
##########################################################################################
@ -60,34 +61,8 @@ jobs:
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
DocsCheck:
needs: DockerHubPush
runs-on: [self-hosted, func-tester]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docs_check
- name: Check out repository code
uses: actions/checkout@v2
- name: Docs Check
env:
TEMP_PATH: ${{runner.temp}}/docs_check
REPO_COPY: ${{runner.temp}}/docs_check/ClickHouse
run: |
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 docs_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FastTest:
needs: DockerHubPush
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Check out repository code
@ -109,8 +84,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
PVSCheck:
needs: DockerHubPush
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, func-tester]
steps:
- name: Check out repository code
@ -134,7 +108,6 @@ jobs:
sudo rm -fr $TEMP_PATH
CompatibilityCheck:
needs: [BuilderDebRelease]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
@ -161,7 +134,6 @@ jobs:
sudo rm -fr $TEMP_PATH
SplitBuildSmokeTest:
needs: [BuilderDebSplitted]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
@ -191,7 +163,6 @@ jobs:
#########################################################################################
BuilderDebRelease:
needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
@ -230,7 +201,6 @@ jobs:
sudo rm -fr $TEMP_PATH
BuilderBinRelease:
needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
@ -269,7 +239,6 @@ jobs:
sudo rm -fr $TEMP_PATH
BuilderDebAsan:
needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
@ -308,7 +277,6 @@ jobs:
sudo rm -fr $TEMP_PATH
BuilderDebUBsan:
needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
@ -347,7 +315,6 @@ jobs:
sudo rm -fr $TEMP_PATH
BuilderDebTsan:
needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
@ -386,7 +353,6 @@ jobs:
sudo rm -fr $TEMP_PATH
BuilderDebMsan:
needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
@ -425,7 +391,6 @@ jobs:
sudo rm -fr $TEMP_PATH
BuilderDebDebug:
needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
@ -467,7 +432,6 @@ jobs:
##########################################################################################
BuilderDebSplitted:
needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
@ -504,6 +468,234 @@ jobs:
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinTidy:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_tidy'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinDarwin:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_darwin'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinAarch64:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_aarch64'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinFreeBSD:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_freebsd'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinDarwinAarch64:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_darwin_aarch64'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinPPC64:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_ppc64le'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
@ -516,7 +708,6 @@ jobs:
- BuilderDebUBsan
- BuilderDebMsan
- BuilderDebDebug
- BuilderDebSplitted
runs-on: [self-hosted, style-checker]
if: always()
steps:
@ -542,6 +733,40 @@ jobs:
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderSpecialReport:
needs:
- BuilderDebSplitted
- BuilderBinTidy
- BuilderBinDarwin
- BuilderBinAarch64
- BuilderBinFreeBSD
- BuilderBinDarwinAarch64
- BuilderBinPPC64
runs-on: [self-hosted, style-checker]
if: always()
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Report Builder
env:
TEMP_PATH: ${{runner.temp}}/report_check
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'ClickHouse special build check (actions)'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cd $GITHUB_WORKSPACE/tests/ci
python3 build_report_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################
@ -557,10 +782,68 @@ jobs:
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_debug
TEMP_PATH: ${{runner.temp}}/stateless_release
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (release, actions)'
REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse
REPO_COPY: ${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestReleaseDatabaseReplicated:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_database_replicated
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (release, DatabaseReplicated, actions)'
REPO_COPY: ${{runner.temp}}/stateless_database_replicated/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestReleaseWideParts:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_wide_parts
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (release, wide parts enabled, actions)'
REPO_COPY: ${{runner.temp}}/stateless_wide_parts/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
@ -605,9 +888,7 @@ jobs:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestTsan:
needs: [BuilderDebTsan]
# tests can consume more than 60GB of memory,
# so use bigger server
runs-on: [self-hosted, stress-tester]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -765,10 +1046,10 @@ jobs:
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateful_debug
TEMP_PATH: ${{runner.temp}}/stateful_release
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateful tests (release, actions)'
REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse
REPO_COPY: ${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT: 3600
run: |
sudo rm -fr $TEMP_PATH
@ -1075,7 +1356,7 @@ jobs:
##############################################################################################
ASTFuzzerTestAsan:
needs: [BuilderDebAsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1103,7 +1384,7 @@ jobs:
sudo rm -fr $TEMP_PATH
ASTFuzzerTestTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1131,7 +1412,7 @@ jobs:
sudo rm -fr $TEMP_PATH
ASTFuzzerTestUBSan:
needs: [BuilderDebUBsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1159,7 +1440,7 @@ jobs:
sudo rm -fr $TEMP_PATH
ASTFuzzerTestMSan:
needs: [BuilderDebMsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1187,7 +1468,7 @@ jobs:
sudo rm -fr $TEMP_PATH
ASTFuzzerTestDebug:
needs: [BuilderDebDebug]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1217,7 +1498,7 @@ jobs:
############################# INTEGRATION TESTS #############################################
#############################################################################################
IntegrationTestsAsan:
needs: [BuilderDebAsan, FunctionalStatelessTestAsan]
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
@ -1245,7 +1526,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
IntegrationTestsTsan:
needs: [BuilderDebTsan, FunctionalStatelessTestTsan]
needs: [BuilderDebTsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
@ -1273,7 +1554,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
IntegrationTestsRelease:
needs: [BuilderDebRelease, FunctionalStatelessTestRelease]
needs: [BuilderDebRelease]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
@ -1333,7 +1614,7 @@ jobs:
#############################################################################################
UnitTestsAsan:
needs: [BuilderDebAsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1361,7 +1642,7 @@ jobs:
sudo rm -fr $TEMP_PATH
UnitTestsReleaseClang:
needs: [BuilderBinRelease]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1389,7 +1670,7 @@ jobs:
sudo rm -fr $TEMP_PATH
UnitTestsTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1417,7 +1698,7 @@ jobs:
sudo rm -fr $TEMP_PATH
UnitTestsMsan:
needs: [BuilderDebMsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1445,7 +1726,7 @@ jobs:
sudo rm -fr $TEMP_PATH
UnitTestsUBsan:
needs: [BuilderDebUBsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1480,6 +1761,8 @@ jobs:
- FastTest
- FunctionalStatelessTestDebug
- FunctionalStatelessTestRelease
- FunctionalStatelessTestReleaseDatabaseReplicated
- FunctionalStatelessTestReleaseWideParts
- FunctionalStatelessTestAsan
- FunctionalStatelessTestTsan
- FunctionalStatelessTestMsan
@ -1490,7 +1773,6 @@ jobs:
- FunctionalStatefulTestTsan
- FunctionalStatefulTestMsan
- FunctionalStatefulTestUBsan
- DocsCheck
- StressTestDebug
- StressTestAsan
- StressTestTsan

View File

@ -41,30 +41,6 @@ jobs:
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
PVSCheck:
needs: DockerHubPush
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, func-tester]
steps:
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
- name: PVS Check
env:
TEMP_PATH: ${{runner.temp}}/pvs_check
REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 pvs_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
CompatibilityCheck:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker]
@ -430,6 +406,240 @@ jobs:
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinTidy:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_tidy'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinDarwin:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_darwin'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinAarch64:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_aarch64'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinFreeBSD:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_freebsd'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinDarwinAarch64:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_darwin_aarch64'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinPPC64:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_ppc64le'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
@ -442,7 +652,6 @@ jobs:
- BuilderDebUBsan
- BuilderDebMsan
- BuilderDebDebug
- BuilderDebSplitted
runs-on: [self-hosted, style-checker]
steps:
- name: Download json reports
@ -467,6 +676,39 @@ jobs:
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderSpecialReport:
needs:
- BuilderBinTidy
- BuilderDebSplitted
- BuilderBinDarwin
- BuilderBinAarch64
- BuilderBinFreeBSD
- BuilderBinDarwinAarch64
- BuilderBinPPC64
runs-on: [self-hosted, style-checker]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Report Builder
env:
TEMP_PATH: ${{runner.temp}}/report_check
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'ClickHouse special build check (actions)'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cd $GITHUB_WORKSPACE/tests/ci
python3 build_report_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################
@ -482,10 +724,39 @@ jobs:
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_debug
TEMP_PATH: ${{runner.temp}}/stateless_release
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (release, actions)'
REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse
REPO_COPY: ${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestReleaseDatabaseOrdinary:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_release_database_ordinary
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (release, DatabaseOrdinary, actions)'
REPO_COPY: ${{runner.temp}}/stateless_release_database_ordinary/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
@ -659,10 +930,39 @@ jobs:
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateful_debug
TEMP_PATH: ${{runner.temp}}/stateful_release
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateful tests (release, actions)'
REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse
REPO_COPY: ${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT: 3600
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatefulTestReleaseDatabaseOrdinary:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateful_release_database_ordinary
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateful tests (release, DatabaseOrdinary, actions)'
REPO_COPY: ${{runner.temp}}/stateful_release_database_ordinary/ClickHouse
KILL_TIMEOUT: 3600
run: |
sudo rm -fr $TEMP_PATH
@ -968,7 +1268,7 @@ jobs:
############################# INTEGRATION TESTS #############################################
#############################################################################################
IntegrationTestsAsan:
needs: [BuilderDebAsan, FunctionalStatelessTestAsan]
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
@ -996,7 +1296,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
IntegrationTestsTsan:
needs: [BuilderDebTsan, FunctionalStatelessTestTsan]
needs: [BuilderDebTsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
@ -1024,7 +1324,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
IntegrationTestsRelease:
needs: [BuilderDebRelease, FunctionalStatelessTestRelease]
needs: [BuilderDebRelease]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
@ -1056,7 +1356,7 @@ jobs:
##############################################################################################
ASTFuzzerTestAsan:
needs: [BuilderDebAsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1084,7 +1384,7 @@ jobs:
sudo rm -fr $TEMP_PATH
ASTFuzzerTestTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1112,7 +1412,7 @@ jobs:
sudo rm -fr $TEMP_PATH
ASTFuzzerTestUBSan:
needs: [BuilderDebUBsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1140,7 +1440,7 @@ jobs:
sudo rm -fr $TEMP_PATH
ASTFuzzerTestMSan:
needs: [BuilderDebMsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1168,7 +1468,7 @@ jobs:
sudo rm -fr $TEMP_PATH
ASTFuzzerTestDebug:
needs: [BuilderDebDebug]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1199,7 +1499,7 @@ jobs:
#############################################################################################
UnitTestsAsan:
needs: [BuilderDebAsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1227,7 +1527,7 @@ jobs:
sudo rm -fr $TEMP_PATH
UnitTestsReleaseClang:
needs: [BuilderBinRelease]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1255,7 +1555,7 @@ jobs:
sudo rm -fr $TEMP_PATH
UnitTestsTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1283,7 +1583,7 @@ jobs:
sudo rm -fr $TEMP_PATH
UnitTestsMsan:
needs: [BuilderDebMsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1311,7 +1611,7 @@ jobs:
sudo rm -fr $TEMP_PATH
UnitTestsUBsan:
needs: [BuilderDebUBsan]
runs-on: [self-hosted, func-tester]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
@ -1323,7 +1623,7 @@ jobs:
env:
TEMP_PATH: ${{runner.temp}}/unit_tests_ubsan
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Unit tests (msan, actions)'
CHECK_NAME: 'Unit tests (ubsan, actions)'
REPO_COPY: ${{runner.temp}}/unit_tests_ubsan/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
@ -1343,12 +1643,14 @@ jobs:
- BuilderReport
- FunctionalStatelessTestDebug
- FunctionalStatelessTestRelease
- FunctionalStatelessTestReleaseDatabaseOrdinary
- FunctionalStatelessTestAsan
- FunctionalStatelessTestTsan
- FunctionalStatelessTestMsan
- FunctionalStatelessTestUBsan
- FunctionalStatefulTestDebug
- FunctionalStatefulTestRelease
- FunctionalStatefulTestReleaseDatabaseOrdinary
- FunctionalStatefulTestAsan
- FunctionalStatefulTestTsan
- FunctionalStatefulTestMsan
@ -1373,7 +1675,6 @@ jobs:
- UnitTestsUBsan
- UnitTestsReleaseClang
- SplitBuildSmokeTest
- PVSCheck
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code

View File

@ -6,7 +6,6 @@ on: # yamllint disable-line rule:truthy
- '22.**'
- '23.**'
- '24.**'
- 'backport/**'
jobs:
DockerHubPush:
runs-on: [self-hosted, style-checker]

5
.gitmodules vendored
View File

@ -17,7 +17,7 @@
[submodule "contrib/zlib-ng"]
path = contrib/zlib-ng
url = https://github.com/ClickHouse-Extras/zlib-ng.git
branch = clickhouse-new
branch = clickhouse-2.0.x
[submodule "contrib/googletest"]
path = contrib/googletest
url = https://github.com/google/googletest.git
@ -135,9 +135,6 @@
[submodule "contrib/flatbuffers"]
path = contrib/flatbuffers
url = https://github.com/ClickHouse-Extras/flatbuffers.git
[submodule "contrib/libc-headers"]
path = contrib/libc-headers
url = https://github.com/ClickHouse-Extras/libc-headers.git
[submodule "contrib/replxx"]
path = contrib/replxx
url = https://github.com/ClickHouse-Extras/replxx.git

View File

@ -149,6 +149,10 @@ if (ENABLE_FUZZING)
set (ENABLE_JEMALLOC 0)
set (ENABLE_CHECK_HEAVY_BUILDS 1)
set (GLIBC_COMPATIBILITY OFF)
# For codegen_select_fuzzer
set (ENABLE_PROTOBUF 1)
set (USE_INTERNAL_PROTOBUF_LIBRARY 1)
endif()
# Global libraries
@ -223,7 +227,7 @@ if (OS_DARWIN)
# from a _specific_ library, which is what we need.
set(WHOLE_ARCHIVE -force_load)
# The `-noall_load` flag is the default and now obsolete.
set(NO_WHOLE_ARCHIVE "")
set(NO_WHOLE_ARCHIVE "-undefined,error") # Effectively, a no-op. Here to avoid empty "-Wl, " sequence to be generated in the command line.
else ()
set(WHOLE_ARCHIVE --whole-archive)
set(NO_WHOLE_ARCHIVE --no-whole-archive)

View File

@ -53,7 +53,6 @@ protected:
String input;
private:
bool multiline;
Patterns extenders;

View File

@ -22,7 +22,14 @@ namespace
/// Trim ending whitespace inplace
void trim(String & s)
{
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end());
s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), s.end());
}
/// Check if string ends with given character after skipping whitespaces.
bool ends_with(const std::string_view & s, const std::string_view & p)
{
auto ss = std::string_view(s.data(), s.rend() - std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }));
return ss.ends_with(p);
}
std::string getEditor()
@ -189,8 +196,28 @@ ReplxxLineReader::ReplxxLineReader(
rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); });
rx.bind_key(Replxx::KEY::control('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_PREVIOUS, code); });
auto commit_action = [this](char32_t code)
{
std::string_view str = rx.get_state().text();
/// Always commit line when we see extender at the end. It will start a new prompt.
for (const auto * extender : extenders)
if (ends_with(str, extender))
return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
/// If we see an delimiter at the end, commit right away.
for (const auto * delimiter : delimiters)
if (ends_with(str, delimiter))
return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
/// If we allow multiline and there is already something in the input, start a newline.
if (multiline && !input.empty())
return rx.invoke(Replxx::ACTION::NEW_LINE, code);
return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
};
/// bind C-j to ENTER action.
rx.bind_key(Replxx::KEY::control('J'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); });
rx.bind_key(Replxx::KEY::control('J'), commit_action);
rx.bind_key(Replxx::KEY::ENTER, commit_action);
/// By default COMPLETE_NEXT/COMPLETE_PREV was binded to C-p/C-n, re-bind
/// to M-P/M-N (that was used for HISTORY_COMMON_PREFIX_SEARCH before, but

View File

@ -104,7 +104,7 @@ static void writeSignalIDtoSignalPipe(int sig)
errno = saved_errno;
}
/** Signal handler for HUP / USR1 */
/** Signal handler for HUP */
static void closeLogsSignalHandler(int sig, siginfo_t *, void *)
{
DENY_ALLOCATIONS_IN_SCOPE;
@ -161,7 +161,7 @@ __attribute__((__weak__)) void collectCrashLog(
/** The thread that read info about signal or std::terminate from pipe.
* On HUP / USR1, close log files (for new files to be opened later).
* On HUP, close log files (for new files to be opened later).
* On information about std::terminate, write it to log.
* On other signals, write info to log.
*/
@ -201,7 +201,7 @@ public:
LOG_INFO(log, "Stop SignalListener thread");
break;
}
else if (sig == SIGHUP || sig == SIGUSR1)
else if (sig == SIGHUP)
{
LOG_DEBUG(log, "Received signal to close logs.");
BaseDaemon::instance().closeLogs(BaseDaemon::instance().logger());
@ -832,7 +832,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
/// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime.
addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, &handled_signals);
addSignalHandler({SIGHUP, SIGUSR1}, closeLogsSignalHandler, &handled_signals);
addSignalHandler({SIGHUP}, closeLogsSignalHandler, &handled_signals);
addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, &handled_signals);
#if defined(SANITIZER)
@ -1006,7 +1006,7 @@ void BaseDaemon::setupWatchdog()
/// Forward signals to the child process.
addSignalHandler(
{SIGHUP, SIGUSR1, SIGINT, SIGQUIT, SIGTERM},
{SIGHUP, SIGINT, SIGQUIT, SIGTERM},
[](int sig, siginfo_t *, void *)
{
/// Forward all signals except INT as it can be send by terminal to the process group when user press Ctrl+C,

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54457)
SET(VERSION_REVISION 54458)
SET(VERSION_MAJOR 21)
SET(VERSION_MINOR 12)
SET(VERSION_MINOR 13)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 503a418dedf0011e9040c3a1b6913e0b5488be4c)
SET(VERSION_DESCRIBE v21.12.1.1-prestable)
SET(VERSION_STRING 21.12.1.1)
SET(VERSION_GITHASH 4cc45c1e15912ee300bca7cc8b8da2b888a70e2a)
SET(VERSION_DESCRIBE v21.13.1.1-prestable)
SET(VERSION_STRING 21.13.1.1)
# end of autochange

View File

@ -29,14 +29,6 @@ message(STATUS "Default libraries: ${DEFAULT_LIBS}")
set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS})
set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
# glibc-compatibility library relies to constant version of libc headers
# (because minor changes in function attributes between different glibc versions will introduce incompatibilities)
# This is for x86_64. For other architectures we have separate toolchains.
if (ARCH_AMD64 AND NOT CMAKE_CROSSCOMPILING)
set(CMAKE_C_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers)
set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers)
endif ()
# Unfortunately '-pthread' doesn't work with '-nodefaultlibs'.
# Just make sure we have pthreads at all.
set(THREADS_PREFER_PTHREAD_FLAG ON)

View File

@ -22,9 +22,10 @@ if (COMPILER_GCC)
elseif (COMPILER_CLANG)
# Require minimum version of clang/apple-clang
if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")
# If you are developer you can figure out what exact versions of AppleClang are Ok,
# simply remove the following line.
message (FATAL_ERROR "AppleClang is not supported, you should install clang from brew. See the instruction: https://clickhouse.com/docs/en/development/build-osx/")
# (Experimental!) Specify "-DALLOW_APPLECLANG=ON" when running CMake configuration step, if you want to experiment with using it.
if (NOT ALLOW_APPLECLANG AND NOT DEFINED ENV{ALLOW_APPLECLANG})
message (FATAL_ERROR "AppleClang is not supported, you should install clang from brew. See the instruction: https://clickhouse.com/docs/en/development/build-osx/")
endif ()
# AppleClang 10.0.1 (Xcode 10.2) corresponds to LLVM/Clang upstream version 7.0.0
# AppleClang 11.0.0 (Xcode 11.0) corresponds to LLVM/Clang upstream version 8.0.0
@ -90,6 +91,9 @@ endif ()
if (LINKER_NAME)
if (COMPILER_CLANG AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12.0.0 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 12.0.0))
find_program (LLD_PATH NAMES ${LINKER_NAME})
if (NOT LLD_PATH)
message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.")
endif ()
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_PATH}")
else ()

View File

@ -301,9 +301,10 @@ endif()
# instead of controlling it via CMAKE_FOLDER.
function (ensure_target_rooted_in _target _folder)
# Skip INTERFACE library targets, since FOLDER property is not available for them.
# Skip aliases and INTERFACE library targets, since FOLDER property is not available/writable for them.
get_target_property (_target_aliased "${_target}" ALIASED_TARGET)
get_target_property (_target_type "${_target}" TYPE)
if (_target_type STREQUAL "INTERFACE_LIBRARY")
if (_target_aliased OR _target_type STREQUAL "INTERFACE_LIBRARY")
return ()
endif ()

View File

@ -1,6 +1,6 @@
if (SANITIZE OR NOT (
((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_ARM OR ARCH_PPC64LE OR ARCH_RISCV64)) OR
(OS_DARWIN AND (CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo" OR CMAKE_BUILD_TYPE STREQUAL "Debug"))
(OS_DARWIN AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG"))
))
if (ENABLE_JEMALLOC)
message (${RECONFIGURE_MESSAGE_LEVEL}

@ -1 +0,0 @@
Subproject commit aa5429bf67a346e48ad60efd88bcefc286644bf3

View File

@ -73,6 +73,11 @@ target_compile_options(cxx PRIVATE -w)
target_link_libraries(cxx PUBLIC cxxabi)
# For __udivmodti4, __divmodti4.
if (OS_DARWIN AND COMPILER_GCC)
target_link_libraries(cxx PRIVATE gcc)
endif ()
install(
TARGETS cxx
EXPORT global

2
contrib/libpqxx vendored

@ -1 +1 @@
Subproject commit 357608d11b7a1961c3fb7db2ef9a5dbb2e87da77
Subproject commit 63e20f9485b8cbeabf99008123248fc9f033e766

View File

@ -28,11 +28,16 @@ set (SRCS
${SRC_DIR}/src/sentry_unix_pageallocator.c
${SRC_DIR}/src/path/sentry_path_unix.c
${SRC_DIR}/src/symbolizer/sentry_symbolizer_unix.c
${SRC_DIR}/src/modulefinder/sentry_modulefinder_linux.c
${SRC_DIR}/src/transports/sentry_transport_curl.c
${SRC_DIR}/src/backends/sentry_backend_none.c
)
if(APPLE)
list(APPEND SRCS ${SRC_DIR}/src/modulefinder/sentry_modulefinder_apple.c)
else()
list(APPEND SRCS ${SRC_DIR}/src/modulefinder/sentry_modulefinder_linux.c)
endif()
add_library(sentry ${SRCS})
add_library(sentry::sentry ALIAS sentry)

View File

@ -6,4 +6,6 @@ add_library(simdjson ${SIMDJSON_SRC})
target_include_directories(simdjson SYSTEM PUBLIC "${SIMDJSON_INCLUDE_DIR}" PRIVATE "${SIMDJSON_SRC_DIR}")
# simdjson is using its own CPU dispatching and get confused if we enable AVX/AVX2 flags.
target_compile_options(simdjson PRIVATE -mno-avx -mno-avx2)
if(ARCH_AMD64)
target_compile_options(simdjson PRIVATE -mno-avx -mno-avx2)
endif()

2
contrib/sysroot vendored

@ -1 +1 @@
Subproject commit 4ef348b7f30f2ad5b02b266268b3c948e51ad457
Subproject commit 410845187f582c5e6692b53dddbe43efbb728734

2
contrib/zlib-ng vendored

@ -1 +1 @@
Subproject commit 6a5e93b9007782115f7f7e5235dedc81c4f1facb
Subproject commit bffad6f6fe74d6a2f92e2668390664a926c68733

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (21.12.1.1) unstable; urgency=low
clickhouse (21.13.1.1) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Tue, 02 Nov 2021 00:56:42 +0300
-- clickhouse-release <clickhouse-release@yandex-team.ru> Thu, 09 Dec 2021 00:32:58 +0300

View File

@ -1,8 +1,10 @@
FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \

View File

@ -1,9 +1,11 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
ARG version=21.12.1.*
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
ARG version=21.13.1.*
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -1,9 +1,11 @@
# docker build -t clickhouse/docs-build .
FROM ubuntu:20.04
ENV LANG=C.UTF-8
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
ENV LANG=C.UTF-8
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \

View File

@ -1,9 +1,11 @@
# docker build -t clickhouse/binary-builder .
FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
RUN apt-get update \
&& apt-get install \

View File

@ -1,9 +1,11 @@
# docker build -t clickhouse/deb-builder .
FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \

View File

@ -31,5 +31,6 @@ do
mv "$FUZZER_PATH" /output/fuzzers
done
tar -zcvf /output/fuzzers.tar.gz /output/fuzzers
rm -rf /output/fuzzers

View File

@ -1,7 +1,11 @@
FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
ARG version=21.12.1.*
ARG version=21.13.1.*
ARG gosu_ver=1.10
# set non-empty deb_location_url url to create a docker image
@ -26,8 +30,6 @@ ARG DEBIAN_FRONTEND=noninteractive
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN groupadd -r clickhouse --gid=101 \
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
&& apt-get update \

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
ARG version=21.12.1.*
ARG version=21.13.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -1,9 +1,11 @@
# docker build -t clickhouse/test-base .
FROM clickhouse/test-util
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \

View File

@ -2,7 +2,9 @@
# docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output clickhouse/codebrowser
FROM clickhouse/binary-builder
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-9 libllvm9 libclang-9-dev

View File

@ -1,9 +1,11 @@
# docker build -t clickhouse/fasttest .
FROM clickhouse/test-util
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \

View File

@ -174,7 +174,6 @@ function clone_submodules
contrib/double-conversion
contrib/libcxx
contrib/libcxxabi
contrib/libc-headers
contrib/lz4
contrib/zstd
contrib/fastops
@ -235,6 +234,9 @@ function build
time ninja clickhouse-bundle 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then
cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse"
strip programs/clickhouse -o "$FASTTEST_OUTPUT/clickhouse-stripped"
gzip "$FASTTEST_OUTPUT/clickhouse-stripped"
fi
ccache --show-stats ||:
)

View File

@ -1,12 +1,14 @@
# docker build -t clickhouse/fuzzer .
FROM clickhouse/test-base
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV LANG=C.UTF-8
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
ca-certificates \

View File

@ -1,5 +1,5 @@
#!/bin/bash
# shellcheck disable=SC2086,SC2001,SC2046
# shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031
set -eux
set -o pipefail
@ -35,7 +35,7 @@ function clone
fi
git diff --name-only master HEAD | tee ci-changed-files.txt
else
if [ -v COMMIT_SHA ]; then
if [ -v SHA_TO_TEST ]; then
git fetch --depth 2 origin "$SHA_TO_TEST"
git checkout "$SHA_TO_TEST"
echo "Checked out nominal SHA $SHA_TO_TEST for master"
@ -165,7 +165,7 @@ thread apply all backtrace
continue
" > script.gdb
gdb -batch -command script.gdb -p $server_pid &
sudo gdb -batch -command script.gdb -p $server_pid &
# Check connectivity after we attach gdb, because it might cause the server
# to freeze and the fuzzer will fail.
@ -189,6 +189,7 @@ continue
--receive_data_timeout_ms=10000 \
--stacktrace \
--query-fuzzer-runs=1000 \
--testmode \
--queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \
$NEW_TESTS_OPT \
> >(tail -n 100000 > fuzzer.log) \

View File

@ -1,7 +1,9 @@
# docker build -t clickhouse/integration-tests-runner .
FROM ubuntu:20.04
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update \
&& env DEBIAN_FRONTEND=noninteractive apt-get install --yes \

View File

@ -1,12 +1,14 @@
# docker build -t clickhouse/performance-comparison .
FROM ubuntu:18.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV LANG=C.UTF-8
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
bash \

View File

@ -61,7 +61,7 @@ function configure
cp -rv right/config left ||:
# Start a temporary server to rename the tables
while killall clickhouse-server; do echo . ; sleep 1 ; done
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
echo all killed
set -m # Spawn temporary in its own process groups
@ -88,7 +88,7 @@ function configure
clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||:
clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||:
while killall clickhouse-server; do echo . ; sleep 1 ; done
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
echo all killed
# Make copies of the original db for both servers. Use hardlinks instead
@ -106,7 +106,7 @@ function configure
function restart
{
while killall clickhouse-server; do echo . ; sleep 1 ; done
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
echo all killed
# Change the jemalloc settings here.
@ -291,7 +291,7 @@ function get_profiles_watchdog
for pid in $(pgrep -f clickhouse)
do
gdb -p "$pid" --batch --ex "info proc all" --ex "thread apply all bt" --ex quit &> "$pid.gdb.log" &
sudo gdb -p "$pid" --batch --ex "info proc all" --ex "thread apply all bt" --ex quit &> "$pid.gdb.log" &
done
wait
@ -1409,7 +1409,7 @@ case "$stage" in
while env kill -- -$watchdog_pid ; do sleep 1; done
# Stop the servers to free memory for the subsequent query analysis.
while killall clickhouse; do echo . ; sleep 1 ; done
while pkill clickhouse-serv; do echo . ; sleep 1 ; done
echo Servers stopped.
;&
"analyze_queries")

View File

@ -1,7 +1,9 @@
# docker build -t clickhouse/sqlancer-test .
FROM ubuntu:20.04
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git default-jdk maven python3 --yes --no-install-recommends
RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip

View File

@ -61,6 +61,7 @@ chmod 777 -R /var/lib/clickhouse
clickhouse-client --query "SHOW DATABASES"
clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
service clickhouse-server restart
# Wait for server to start accepting connections
@ -109,8 +110,13 @@ function run_tests()
fi
set +e
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \
--skip 00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" \
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
clickhouse-test --timeout 1200 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \
00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt
set -e
}

View File

@ -1,6 +1,7 @@
#!/bin/bash
# shellcheck disable=SC2094
# shellcheck disable=SC2086
# shellcheck disable=SC2024
set -x
@ -55,9 +56,41 @@ function configure()
echo "<clickhouse><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></clickhouse>" \
> /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml
local total_mem
total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB
total_mem=$(( total_mem*1024 )) # bytes
# Set maximum memory usage as half of total memory (less chance of OOM).
echo "<clickhouse><max_server_memory_usage_to_ram_ratio>0.5</max_server_memory_usage_to_ram_ratio></clickhouse>" \
> /etc/clickhouse-server/config.d/max_server_memory_usage_to_ram_ratio.xml
#
# But not via max_server_memory_usage but via max_memory_usage_for_user,
# so that we can override this setting and execute service queries, like:
# - hung check
# - show/drop database
# - ...
#
# So max_memory_usage_for_user will be a soft limit, and
# max_server_memory_usage will be hard limit, and queries that should be
# executed regardless memory limits will use max_memory_usage_for_user=0,
# instead of relying on max_untracked_memory
local max_server_mem
max_server_mem=$((total_mem*75/100)) # 75%
echo "Setting max_server_memory_usage=$max_server_mem"
cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml <<EOL
<clickhouse>
<max_server_memory_usage>${max_server_mem}</max_server_memory_usage>
</clickhouse>
EOL
local max_users_mem
max_users_mem=$((total_mem*50/100)) # 50%
echo "Setting max_memory_usage_for_user=$max_users_mem"
cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml <<EOL
<clickhouse>
<profiles>
<default>
<max_memory_usage_for_user>${max_users_mem}</max_memory_usage_for_user>
</default>
</profiles>
</clickhouse>
EOL
}
function stop()
@ -110,7 +143,7 @@ quit
# FIXME Hung check may work incorrectly because of attached gdb
# 1. False positives are possible
# 2. We cannot attach another gdb to get stacktraces if some queries hung
gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" >> /test_output/gdb.log &
sudo gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" >> /test_output/gdb.log &
}
configure
@ -151,8 +184,8 @@ zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log*
# Grep logs for sanitizer asserts, crashes and other critical errors
# Sanitizer asserts
zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
grep -Fa "==================" /var/log/clickhouse-server/stderr.log | grep -v "in query:" >> /test_output/tmp
grep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \
&& echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No sanitizer asserts\tOK' >> /test_output/test_results.tsv

View File

@ -67,57 +67,85 @@ def compress_stress_logs(output_path, files_prefix):
cmd = f"cd {output_path} && tar -zcf stress_run_logs.tar.gz {files_prefix}* && rm {files_prefix}*"
check_output(cmd, shell=True)
def call_with_retry(query, timeout=30, retry_count=5):
for i in range(retry_count):
code = call(query, shell=True, stderr=STDOUT, timeout=timeout)
if code != 0:
time.sleep(i)
else:
break
def make_query_command(query):
return f"""clickhouse client -q "{query}" --max_untracked_memory=1Gi --memory_profiler_step=1Gi --max_memory_usage_for_user=0"""
def prepare_for_hung_check(drop_databases):
# FIXME this function should not exist, but...
# ThreadFuzzer significantly slows down server and causes false-positive hung check failures
call("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'", shell=True, stderr=STDOUT, timeout=30)
call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'")
# We attach gdb to clickhouse-server before running tests
# to print stacktraces of all crashes even if clickhouse cannot print it for some reason.
# However, it obstruct checking for hung queries.
logging.info("Will terminate gdb (if any)")
call("kill -TERM $(pidof gdb)", shell=True, stderr=STDOUT, timeout=30)
call_with_retry("kill -TERM $(pidof gdb)")
# Some tests set too low memory limit for default user and forget to reset in back.
# It may cause SYSTEM queries to fail, let's disable memory limit.
call("clickhouse client --max_memory_usage_for_user=0 -q 'SELECT 1 FORMAT Null'", shell=True, stderr=STDOUT, timeout=30)
call_with_retry(make_query_command('SELECT 1 FORMAT Null'))
# Some tests execute SYSTEM STOP MERGES or similar queries.
# It may cause some ALTERs to hang.
# Possibly we should fix tests and forbid to use such queries without specifying table.
call("clickhouse client -q 'SYSTEM START MERGES'", shell=True, stderr=STDOUT, timeout=30)
call("clickhouse client -q 'SYSTEM START DISTRIBUTED SENDS'", shell=True, stderr=STDOUT, timeout=30)
call("clickhouse client -q 'SYSTEM START TTL MERGES'", shell=True, stderr=STDOUT, timeout=30)
call("clickhouse client -q 'SYSTEM START MOVES'", shell=True, stderr=STDOUT, timeout=30)
call("clickhouse client -q 'SYSTEM START FETCHES'", shell=True, stderr=STDOUT, timeout=30)
call("clickhouse client -q 'SYSTEM START REPLICATED SENDS'", shell=True, stderr=STDOUT, timeout=30)
call("clickhouse client -q 'SYSTEM START REPLICATION QUEUES'", shell=True, stderr=STDOUT, timeout=30)
call_with_retry(make_query_command('SYSTEM START MERGES'))
call_with_retry(make_query_command('SYSTEM START DISTRIBUTED SENDS'))
call_with_retry(make_query_command('SYSTEM START TTL MERGES'))
call_with_retry(make_query_command('SYSTEM START MOVES'))
call_with_retry(make_query_command('SYSTEM START FETCHES'))
call_with_retry(make_query_command('SYSTEM START REPLICATED SENDS'))
call_with_retry(make_query_command('SYSTEM START REPLICATION QUEUES'))
call_with_retry(make_query_command('SYSTEM DROP MARK CACHE'))
# Issue #21004, live views are experimental, so let's just suppress it
call("""clickhouse client -q "KILL QUERY WHERE upper(query) LIKE 'WATCH %'" """, shell=True, stderr=STDOUT, timeout=30)
call_with_retry(make_query_command("KILL QUERY WHERE upper(query) LIKE 'WATCH %'"))
# Kill other queries which known to be slow
# It's query from 01232_preparing_sets_race_condition_long, it may take up to 1000 seconds in slow builds
call("""clickhouse client -q "KILL QUERY WHERE query LIKE 'insert into tableB select %'" """, shell=True, stderr=STDOUT, timeout=30)
call_with_retry(make_query_command("KILL QUERY WHERE query LIKE 'insert into tableB select %'"))
# Long query from 00084_external_agregation
call("""clickhouse client -q "KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'" """, shell=True, stderr=STDOUT, timeout=30)
call_with_retry(make_query_command("KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'"))
if drop_databases:
# Here we try to drop all databases in async mode. If some queries really hung, than drop will hung too.
# Otherwise we will get rid of queries which wait for background pool. It can take a long time on slow builds (more than 900 seconds).
databases = check_output('clickhouse client -q "SHOW DATABASES"', shell=True, timeout=30).decode('utf-8').strip().split()
for db in databases:
if db == "system":
continue
command = f'clickhouse client -q "DROP DATABASE {db}"'
# we don't wait for drop
Popen(command, shell=True)
for i in range(5):
try:
# Here we try to drop all databases in async mode. If some queries really hung, than drop will hung too.
# Otherwise we will get rid of queries which wait for background pool. It can take a long time on slow builds (more than 900 seconds).
#
# Also specify max_untracked_memory to allow 1GiB of memory to overcommit.
databases = check_output(make_query_command('SHOW DATABASES'), shell=True, timeout=30).decode('utf-8').strip().split()
for db in databases:
if db == "system":
continue
command = make_query_command(f'DROP DATABASE {db}')
# we don't wait for drop
Popen(command, shell=True)
break
except Exception as ex:
print("Failed to SHOW or DROP databasese, will retry", ex)
time.sleep(i)
else:
raise Exception("Cannot drop databases after stress tests. Probably server consumed too much memory and cannot execute simple queries")
# Wait for last queries to finish if any, not longer than 300 seconds
call("""clickhouse client -q "select sleepEachRow((
select maxOrDefault(300 - elapsed) + 1 from system.processes where query not like '%from system.processes%' and elapsed < 300
) / 300) from numbers(300) format Null" """, shell=True, stderr=STDOUT, timeout=330)
call(make_query_command("""
select sleepEachRow((
select maxOrDefault(300 - elapsed) + 1
from system.processes
where query not like '%from system.processes%' and elapsed < 300
) / 300)
from numbers(300)
format Null
"""), shell=True, stderr=STDOUT, timeout=330)
# Even if all clickhouse-test processes are finished, there are probably some sh scripts,
# which still run some new queries. Let's ignore them.
@ -170,7 +198,24 @@ if __name__ == "__main__":
if args.hung_check:
have_long_running_queries = prepare_for_hung_check(args.drop_databases)
logging.info("Checking if some queries hung")
cmd = "{} {} {}".format(args.test_cmd, "--hung-check", "00001_select_1")
cmd = ' '.join([args.test_cmd,
# Do not track memory allocations up to 1Gi,
# this will allow to ignore server memory limit (max_server_memory_usage) for this query.
#
# NOTE: memory_profiler_step should be also adjusted, because:
#
# untracked_memory_limit = min(settings.max_untracked_memory, settings.memory_profiler_step)
#
# NOTE: that if there will be queries with GROUP BY, this trick
# will not work due to CurrentMemoryTracker::check() from
# Aggregator code.
# But right now it should work, since neither hung check, nor 00001_select_1 has GROUP BY.
"--client-option", "max_untracked_memory=1Gi",
"--client-option", "max_memory_usage_for_user=0",
"--client-option", "memory_profiler_step=1Gi",
"--hung-check",
"00001_select_1"
])
res = call(cmd, shell=True, stderr=STDOUT)
hung_check_status = "No queries hung\tOK\n"
if res != 0 and have_long_running_queries:

View File

@ -1,7 +1,9 @@
# docker build -t clickhouse/style-test .
FROM ubuntu:20.04
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
shellcheck \

View File

@ -1,7 +1,9 @@
# docker build -t clickhouse/testflows-runner .
FROM ubuntu:20.04
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update \
&& env DEBIAN_FRONTEND=noninteractive apt-get install --yes \

View File

@ -3,15 +3,14 @@ toc_priority: 65
toc_title: Build on Mac OS X
---
# You don't have to build ClickHouse
You can install ClickHouse as follows: https://clickhouse.com/#quick-start
Choose Mac x86 or M1.
# How to Build ClickHouse on Mac OS X {#how-to-build-clickhouse-on-mac-os-x}
Build should work on x86_64 (Intel) and arm64 (Apple Silicon) based macOS 10.15 (Catalina) and higher with Homebrew's vanilla Clang.
It is always recommended to use `clang` compiler. It is possible to use XCode's `AppleClang` or `gcc` but it's strongly discouraged.
!!! info "You don't have to build ClickHouse yourself!"
You can install pre-built ClickHouse as described in [Quick Start](https://clickhouse.com/#quick-start).
Follow `macOS (Intel)` or `macOS (Apple silicon)` installation instructions.
Build should work on x86_64 (Intel) and arm64 (Apple silicon) based macOS 10.15 (Catalina) and higher with Homebrew's vanilla Clang.
It is always recommended to use vanilla `clang` compiler. It is possible to use XCode's `apple-clang` or `gcc` but it's strongly discouraged.
## Install Homebrew {#install-homebrew}
@ -33,8 +32,6 @@ sudo rm -rf /Library/Developer/CommandLineTools
sudo xcode-select --install
```
Reboot.
## Install Required Compilers, Tools, and Libraries {#install-required-compilers-tools-and-libraries}
``` bash
@ -51,40 +48,41 @@ git clone --recursive git@github.com:ClickHouse/ClickHouse.git
## Build ClickHouse {#build-clickhouse}
To build using Homebrew's vanilla Clang compiler:
To build using Homebrew's vanilla Clang compiler (the only **recommended** way):
``` bash
cd ClickHouse
rm -rf build
mkdir build
cd build
cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_AR=$(brew --prefix llvm)/bin/llvm-ar -DCMAKE_RANLIB=$(brew --prefix llvm)/bin/llvm-ranlib -DOBJCOPY_PATH=$(brew --prefix llvm)/bin/llvm-objcopy -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
cmake --build . --config RelWithDebInfo
cd ..
# The resulting binary will be created at: ./programs/clickhouse
```
To build using Xcode's native AppleClang compiler (this option is strongly not recommended; use the option above):
To build using Xcode's native AppleClang compiler in Xcode IDE (this option is only for development builds and workflows, and is **not recommended** unless you know what you are doing):
``` bash
cd ClickHouse
rm -rf build
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
cmake --build . --config RelWithDebInfo
cd ..
XCODE_IDE=1 ALLOW_APPLECLANG=1 cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DENABLE_JEMALLOC=OFF ..
cmake --open .
# ...then, in Xcode IDE select ALL_BUILD scheme and start the building process.
# The resulting binary will be created at: ./programs/Debug/clickhouse
```
To build using Homebrew's vanilla GCC compiler (this option is absolutely not recommended, I'm wondering why do we ever have it):
To build using Homebrew's vanilla GCC compiler (this option is only for development experiments, and is **absolutely not recommended** unless you really know what you are doing):
``` bash
cd ClickHouse
rm -rf build
mkdir build
cd build
cmake -DCMAKE_C_COMPILER=$(brew --prefix gcc)/bin/gcc-11 -DCMAKE_CXX_COMPILER=$(brew --prefix gcc)/bin/g++-11 -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
cmake -DCMAKE_C_COMPILER=$(brew --prefix gcc)/bin/gcc-11 -DCMAKE_CXX_COMPILER=$(brew --prefix gcc)/bin/g++-11 -DCMAKE_AR=$(brew --prefix gcc)/bin/gcc-ar-11 -DCMAKE_RANLIB=$(brew --prefix gcc)/bin/gcc-ranlib-11 -DOBJCOPY_PATH=$(brew --prefix binutils)/bin/objcopy -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
cmake --build . --config RelWithDebInfo
cd ..
# The resulting binary will be created at: ./programs/clickhouse
```
## Caveats {#caveats}
@ -140,9 +138,9 @@ sudo launchctl load -w /Library/LaunchDaemons/limit.maxfiles.plist
To check if its working, use the `ulimit -n` or `launchctl limit maxfiles` commands.
## Run ClickHouse server:
## Running ClickHouse server
```
``` bash
cd ClickHouse
./build/programs/clickhouse-server --config-file ./programs/server/config.xml
```

View File

@ -131,18 +131,18 @@ Builds ClickHouse in various configurations for use in further steps. You have t
- **Build log**: link to the building and files copying log, useful when build failed.
- **Build time**.
- **Artifacts**: build result files (with `XXX` being the server version e.g. `20.8.1.4344`).
- `clickhouse-client_XXX_all.deb`
- `clickhouse-common-static-dbg_XXX[+asan, +msan, +ubsan, +tsan]_amd64.deb`
- `clickhouse-common-staticXXX_amd64.deb`
- `clickhouse-server_XXX_all.deb`
- `clickhouse-test_XXX_all.deb`
- `clickhouse_XXX_amd64.buildinfo`
- `clickhouse_XXX_amd64.changes`
- `clickhouse`: Main built binary.
- `clickhouse-odbc-bridge`
- `unit_tests_dbms`: GoogleTest binary with ClickHouse unit tests.
- `shared_build.tgz`: build with shared libraries.
- `performance.tgz`: Special package for performance tests.
- `clickhouse-client_XXX_all.deb`
- `clickhouse-common-static-dbg_XXX[+asan, +msan, +ubsan, +tsan]_amd64.deb`
- `clickhouse-common-staticXXX_amd64.deb`
- `clickhouse-server_XXX_all.deb`
- `clickhouse-test_XXX_all.deb`
- `clickhouse_XXX_amd64.buildinfo`
- `clickhouse_XXX_amd64.changes`
- `clickhouse`: Main built binary.
- `clickhouse-odbc-bridge`
- `unit_tests_dbms`: GoogleTest binary with ClickHouse unit tests.
- `shared_build.tgz`: build with shared libraries.
- `performance.tgz`: Special package for performance tests.
## Special Build Check

View File

@ -39,12 +39,16 @@ DETACH TABLE postgres_database.table_to_remove;
## Settings {#settings}
- [materialized_postgresql_max_block_size](../../operations/settings/settings.md#materialized-postgresql-max-block-size)
- [materialized_postgresql_tables_list](../../operations/settings/settings.md#materialized-postgresql-tables-list)
- [materialized_postgresql_schema](../../operations/settings/settings.md#materialized-postgresql-schema)
- [materialized_postgresql_schema_list](../../operations/settings/settings.md#materialized-postgresql-schema-list)
- [materialized_postgresql_allow_automatic_update](../../operations/settings/settings.md#materialized-postgresql-allow-automatic-update)
- [materialized_postgresql_max_block_size](../../operations/settings/settings.md#materialized-postgresql-max-block-size)
- [materialized_postgresql_replication_slot](../../operations/settings/settings.md#materialized-postgresql-replication-slot)
- [materialized_postgresql_snapshot](../../operations/settings/settings.md#materialized-postgresql-snapshot)
@ -52,8 +56,7 @@ DETACH TABLE postgres_database.table_to_remove;
``` sql
CREATE DATABASE database1
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
SETTINGS materialized_postgresql_max_block_size = 65536,
materialized_postgresql_tables_list = 'table1,table2,table3';
SETTINGS materialized_postgresql_tables_list = 'table1,table2,table3';
SELECT * FROM database1.table1;
```
@ -64,6 +67,55 @@ The settings can be changed, if necessary, using a DDL query. But it is impossib
ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = <new_size>;
```
## PostgreSQL schema {#schema}
PostgreSQL [schema](https://www.postgresql.org/docs/9.1/ddl-schemas.html) can be configured in 3 ways (starting from version 21.12).
1. One schema for one `MaterializedPostgreSQL` database engine. Requires to use setting `materialized_postgresql_schema`.
Tables are accessed via table name only:
``` sql
CREATE DATABASE postgres_database
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
SETTINGS materialized_postgresql_schema = 'postgres_schema';
SELECT * FROM postgres_database.table1;
```
2. Any number of schemas with specified set of tables for one `MaterializedPostgreSQL` database engine. Requires to use setting `materialized_postgresql_tables_list`. Each table is written along with its schema.
Tables are accessed via schema name and table name at the same time:
``` sql
CREATE DATABASE database1
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
SETTINGS materialized_postgresql_tables_list = 'schema1.table1,schema2.table2,schema1.table3';
materialized_postgresql_tables_list_with_schema = 1;
SELECT * FROM database1.`schema1.table1`;
SELECT * FROM database1.`schema2.table2`;
```
But in this case all tables in `materialized_postgresql_tables_list` must be written with its schema name.
Requires `materialized_postgresql_tables_list_with_schema = 1`.
Warning: for this case dots in table name are not allowed.
3. Any number of schemas with full set of tables for one `MaterializedPostgreSQL` database engine. Requires to use setting `materialized_postgresql_schema_list`.
``` sql
CREATE DATABASE database1
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
SETTINGS materialized_postgresql_schema_list = 'schema1,schema2,schema3';
SELECT * FROM database1.`schema1.table1`;
SELECT * FROM database1.`schema1.table2`;
SELECT * FROM database1.`schema2.table2`;
```
Warning: for this case dots in table name are not allowed.
## Requirements {#requirements}
1. The [wal_level](https://www.postgresql.org/docs/current/runtime-config-wal.html) setting must have a value `logical` and `max_replication_slots` parameter must have a value at least `2` in the PostgreSQL config file.

View File

@ -33,7 +33,7 @@ CREATE TABLE test
`key` String,
`v1` UInt32,
`v2` String,
`v3` Float32,
`v3` Float32
)
ENGINE = EmbeddedRocksDB
PRIMARY KEY key

View File

@ -133,8 +133,7 @@ Example:
SELECT level, sum(total) FROM daily GROUP BY level;
```
To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size). If the block wasnt formed within [stream_flush_interval_ms](../../../operations/server-configuration-parameters/settings.md) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/settings/settings/#settings-max_insert_block_size). If the block wasnt formed within [stream_flush_interval_ms](../../../operations/settings/settings/#stream-flush-interval-ms) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
To stop receiving topic data or to change the conversion logic, detach the materialized view:
@ -192,6 +191,6 @@ Example:
**See Also**
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
- [background_message_broker_schedule_pool_size](../../../operations/settings/settings.md#background_message_broker_schedule_pool_size)
[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/kafka/) <!--hide-->

View File

@ -108,9 +108,43 @@ SELECT * FROM mysql_table
└────────────────┴────────┘
```
## Settings {#mysql-settings}
Default settings are not very efficient, since they do not even reuse connections. These settings allow you to increase the number of queries run by the server per second.
### connection_auto_close {#connection-auto-close}
Allows to automatically close the connection after query execution, i.e. disable connection reuse.
Possible values:
- 1 — Auto-close connection is allowed, so the connection reuse is disabled
- 0 — Auto-close connection is not allowed, so the connection reuse is enabled
Default value: `1`.
### connection_max_tries {#connection-max-tries}
Sets the number of retries for pool with failover.
Possible values:
- Positive integer.
- 0 — There are no retries for pool with failover.
Default value: `3`.
### connection_pool_size {#connection-pool-size}
Size of connection pool (if all connections are in use, the query will wait until some connection will be freed).
Possible values:
- Positive integer.
Default value: `16`.
## See Also {#see-also}
- [The mysql table function](../../../sql-reference/table-functions/mysql.md)
- [The mysql table function](../../../sql-reference/table-functions/mysql.md)
- [Using MySQL as a source of external dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql)
[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/mysql/) <!--hide-->

View File

@ -262,7 +262,7 @@ In the example below, the index cant be used.
SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%'
```
To check whether ClickHouse can use the index when running a query, use the settings [force_index_by_date](../../../operations/settings/settings.md#settings-force_index_by_date) and [force_primary_key](../../../operations/settings/settings.md).
To check whether ClickHouse can use the index when running a query, use the settings [force_index_by_date](../../../operations/settings/settings.md#settings-force_index_by_date) and [force_primary_key](../../../operations/settings/settings.md#force-primary-key).
The key for partitioning by month allows reading only those data blocks which contain dates from the proper range. In this case, the data block may contain data for many dates (up to an entire month). Within a block, data is sorted by primary key, which might not contain the date as the first column. Because of this, using a query with only a date condition that does not specify the primary key prefix will cause more data to be read than for a single date.

View File

@ -24,6 +24,8 @@ The supported formats are:
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
| [Values](#data-format-values) | ✔ | ✔ |
| [Vertical](#vertical) | ✗ | ✔ |
| [JSON](#json) | ✗ | ✔ |
@ -429,8 +431,17 @@ Also prints two header rows with column names and types, similar to [TabSeparate
## CustomSeparated {#format-customseparated}
Similar to [Template](#format-template), but it prints or reads all columns and uses escaping rule from setting `format_custom_escaping_rule` and delimiters from settings `format_custom_field_delimiter`, `format_custom_row_before_delimiter`, `format_custom_row_after_delimiter`, `format_custom_row_between_delimiter`, `format_custom_result_before_delimiter` and `format_custom_result_after_delimiter`, not from format strings.
There is also `CustomSeparatedIgnoreSpaces` format, which is similar to `TemplateIgnoreSpaces`.
Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](../operations/settings/settings.md#format-custom-escaping-rule) setting and delimiters from [format_custom_field_delimiter](../operations/settings/settings.md#format-custom-field-delimiter), [format_custom_row_before_delimiter](../operations/settings/settings.md#format-custom-row-before-delimiter), [format_custom_row_after_delimiter](../operations/settings/settings.md#format-custom-row-after-delimiter), [format_custom_row_between_delimiter](../operations/settings/settings.md#format-custom-row-between-delimiter), [format_custom_result_before_delimiter](../operations/settings/settings.md#format-custom-result-before-delimiter) and [format_custom_result_after_delimiter](../operations/settings/settings.md#format-custom-result-after-delimiter) settings, not from format strings.
There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [TemplateIgnoreSpaces](#templateignorespaces).
## CustomSeparatedWithNames {#customseparatedwithnames}
Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
## CustomSeparatedWithNamesAndTypes {#customseparatedwithnamesandtypes}
Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
## JSON {#json}
@ -1536,14 +1547,17 @@ Each line of imported data is parsed according to the regular expression.
When working with the `Regexp` format, you can use the following settings:
- `format_regexp` — [String](../sql-reference/data-types/string.md). Contains regular expression in the [re2](https://github.com/google/re2/wiki/Syntax) format.
- `format_regexp_escaping_rule` — [String](../sql-reference/data-types/string.md). The following escaping rules are supported:
- CSV (similarly to [CSV](#csv))
- JSON (similarly to [JSONEachRow](#jsoneachrow))
- Escaped (similarly to [TSV](#tabseparated))
- Quoted (similarly to [Values](#data-format-values))
- Raw (extracts subpatterns as a whole, no escaping rules)
- `format_regexp_skip_unmatched` — [UInt8](../sql-reference/data-types/int-uint.md). Defines the need to throw an exeption in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`.
- `format_regexp` — [String](../sql-reference/data-types/string.md). Contains regular expression in the [re2](https://github.com/google/re2/wiki/Syntax) format.
- `format_regexp_escaping_rule` — [String](../sql-reference/data-types/string.md). The following escaping rules are supported:
- CSV (similarly to [CSV](#csv))
- JSON (similarly to [JSONEachRow](#jsoneachrow))
- Escaped (similarly to [TSV](#tabseparated))
- Quoted (similarly to [Values](#data-format-values))
- Raw (extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](#tabseparatedraw))
- `format_regexp_skip_unmatched` — [UInt8](../sql-reference/data-types/int-uint.md). Defines the need to throw an exeption in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`.
**Usage**

View File

@ -105,7 +105,7 @@ ClickHouse Keeper is bundled into the ClickHouse server package, just add config
clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon
```
## Four Latter Word Commands
## Four Letter Word Commands
ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively.

View File

@ -69,8 +69,6 @@ If no conditions met for a data part, ClickHouse uses the `lz4` compression.
</compression>
```
<!--
## encryption {#server-settings-encryption}
Configures a command to obtain a key to be used by [encryption codecs](../../sql-reference/statements/create/table.md#create-query-encryption-codecs). Key (or keys) should be written in environment variables or set in the configuration file.
@ -150,7 +148,6 @@ Or it can be set in hex:
Everything mentioned above can be applied for `aes_256_gcm_siv` (but the key must be 32 bytes long).
-->
## custom_settings_prefixes {#custom_settings_prefixes}

View File

@ -356,3 +356,24 @@ Possible values:
- 1 — Parts are detached.
Default value: `0`.
## merge_tree_clear_old_temporary_directories_interval_seconds {#setting-merge-tree-clear-old-temporary-directories-interval-seconds}
Sets the interval in seconds for ClickHouse to execute the cleanup of old temporary directories.
Possible values:
- Any positive integer.
Default value: `60` seconds.
## merge_tree_clear_old_parts_interval_seconds {#setting-merge-tree-clear-old-parts-interval-seconds}
Sets the interval in seconds for ClickHouse to execute the cleanup of old parts, WALs, and mutations.
Possible values:
- Any positive integer.
Default value: `1` second.

View File

@ -885,26 +885,6 @@ Possible values:
Default value: 2013265920.
## merge_tree_clear_old_temporary_directories_interval_seconds {#setting-merge-tree-clear-old-temporary-directories-interval-seconds}
Sets the interval in seconds for ClickHouse to execute the cleanup of old temporary directories.
Possible values:
- Any positive integer.
Default value: `60` seconds.
## merge_tree_clear_old_parts_interval_seconds {#setting-merge-tree-clear-old-parts-interval-seconds}
Sets the interval in seconds for ClickHouse to execute the cleanup of old parts, WALs, and mutations.
Possible values:
- Any positive integer.
Default value: `1` second.
## min_bytes_to_use_direct_io {#settings-min-bytes-to-use-direct-io}
The minimum data volume required for using direct I/O access to the storage disk.
@ -992,9 +972,16 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING'
Setting up query threads logging.
Queries threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter.
Query threads log into [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting have effect only when [log_queries](#settings-log-queries) is true. Queries threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter.
Example:
Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: `1`.
**Example**
``` text
log_query_threads=1
@ -3691,6 +3678,14 @@ Sets a comma-separated list of PostgreSQL database tables, which will be replica
Default value: empty list — means whole PostgreSQL database will be replicated.
## materialized_postgresql_schema {#materialized-postgresql-schema}
Default value: empty string. (Default schema is used)
## materialized_postgresql_schema_list {#materialized-postgresql-schema-list}
Default value: empty list. (Default schema is used)
## materialized_postgresql_allow_automatic_update {#materialized-postgresql-allow-automatic-update}
Allows reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them.
@ -4049,6 +4044,41 @@ Possible values:
Default value: `0`.
## alter_partition_verbose_result {#alter-partition-verbose-result}
Enables or disables the display of information about the parts to which the manipulation operations with partitions and parts have been successfully applied.
Applicable to [ATTACH PARTITION|PART](../../sql-reference/statements/alter/partition.md#alter_attach-partition) and to [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md#alter_freeze-partition).
Possible values:
- 0 — disable verbosity.
- 1 — enable verbosity.
Default value: `0`.
**Example**
```sql
CREATE TABLE test(a Int64, d Date, s String) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY a;
INSERT INTO test VALUES(1, '2021-01-01', '');
INSERT INTO test VALUES(1, '2021-01-01', '');
ALTER TABLE test DETACH PARTITION ID '202101';
ALTER TABLE test ATTACH PARTITION ID '202101' SETTINGS alter_partition_verbose_result = 1;
┌─command_type─────┬─partition_id─┬─part_name────┬─old_part_name─┐
│ ATTACH PARTITION │ 202101 │ 202101_7_7_0 │ 202101_5_5_0 │
│ ATTACH PARTITION │ 202101 │ 202101_8_8_0 │ 202101_6_6_0 │
└──────────────────┴──────────────┴──────────────┴───────────────┘
ALTER TABLE test FREEZE SETTINGS alter_partition_verbose_result = 1;
┌─command_type─┬─partition_id─┬─part_name────┬─backup_name─┬─backup_path───────────────────┬─part_backup_path────────────────────────────────────────────┐
│ FREEZE ALL │ 202101 │ 202101_7_7_0 │ 8 │ /var/lib/clickhouse/shadow/8/ │ /var/lib/clickhouse/shadow/8/data/default/test/202101_7_7_0 │
│ FREEZE ALL │ 202101 │ 202101_8_8_0 │ 8 │ /var/lib/clickhouse/shadow/8/ │ /var/lib/clickhouse/shadow/8/data/default/test/202101_8_8_0 │
└──────────────┴──────────────┴──────────────┴─────────────┴───────────────────────────────┴─────────────────────────────────────────────────────────────┘
```
## format_capn_proto_enum_comparising_mode {#format-capn-proto-enum-comparising-mode}
Determines how to map ClickHouse `Enum` data type and [CapnProto](../../interfaces/formats.md#capnproto) `Enum` data type from schema.
@ -4071,3 +4101,54 @@ Possible values:
- 0 — Big files read with only copying data from kernel to userspace.
Default value: `0`.
## format_custom_escaping_rule {#format-custom-escaping-rule}
Sets the field escaping rule for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Possible values:
- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md#tabseparated).
- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md#data-format-values).
- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md#csv).
- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md#jsoneachrow).
- `'XML'` — Similarly to [XML](../../interfaces/formats.md#xml).
- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md#tabseparatedraw).
Default value: `'Escaped'`.
## format_custom_field_delimiter {#format-custom-field-delimiter}
Sets the character that is interpreted as a delimiter between the fields for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Default value: `'\t'`.
## format_custom_row_before_delimiter {#format-custom-row-before-delimiter}
Sets the character that is interpreted as a delimiter before the field of the first column for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Default value: `''`.
## format_custom_row_after_delimiter {#format-custom-row-after-delimiter}
Sets the character that is interpreted as a delimiter after the field of the last column for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Default value: `'\n'`.
## format_custom_row_between_delimiter {#format-custom-row-between-delimiter}
Sets the character that is interpreted as a delimiter between the rows for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Default value: `''`.
## format_custom_result_before_delimiter {#format-custom-result-before-delimiter}
Sets the character that is interpreted as a prefix before the result set for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Default value: `''`.
## format_custom_result_after_delimiter {#format-custom-result-after-delimiter}
Sets the character that is interpreted as a suffix after the result set for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Default value: `''`.

View File

@ -4,8 +4,8 @@ Contains information about the dependent views executed when running a query, fo
To start logging:
1. Configure parameters in the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) section.
2. Set [log_query_views](../../operations/settings/settings.md#settings-log-query-views) to 1.
1. Configure parameters in the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) section.
2. Set [log_query_views](../../operations/settings/settings.md#settings-log-query-views) to 1.
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.

View File

@ -1,98 +1,114 @@
---
toc_priority: 65
toc_title: clickhouse-format
---
# clickhouse-format {#clickhouse-format}
Allows formatting input queries.
Keys:
- `--help` or`-h` — Produce help message.
- `--hilite` — Add syntax highlight with ANSI terminal escape sequences.
- `--oneline` — Format in single line.
- `--quiet` or `-q` — Just check syntax, no output on success.
- `--multiquery` or `-n` — Allow multiple queries in the same file.
- `--obfuscate` — Obfuscate instead of formatting.
- `--seed <string>` — Seed arbitrary string that determines the result of obfuscation.
- `--backslash` — Add a backslash at the end of each line of the formatted query. Can be useful when you copy a query from web or somewhere else with multiple lines, and want to execute it in command line.
## Examples {#examples}
1. Highlighting and single line:
```bash
$ clickhouse-format --oneline --hilite <<< "SELECT sum(number) FROM numbers(5);"
```
Result:
```sql
SELECT sum(number) FROM numbers(5)
```
2. Multiqueries:
```bash
$ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Result:
```text
SELECT *
FROM
(
SELECT 1 AS x
UNION ALL
SELECT 1
UNION DISTINCT
SELECT 3
)
;
```
3. Obfuscating:
```bash
$ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Result:
```text
SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END;
```
Same query and another seed string:
```bash
$ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Result:
```text
SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END;
```
4. Adding backslash:
```bash
$ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Result:
```text
SELECT * \
FROM \
( \
SELECT 1 AS x \
UNION ALL \
SELECT 1 \
UNION DISTINCT \
SELECT 3 \
)
```
---
toc_priority: 65
toc_title: clickhouse-format
---
# clickhouse-format {#clickhouse-format}
Allows formatting input queries.
Keys:
- `--help` or`-h` — Produce help message.
- `--query` — Format queries of any length and complexity.
- `--hilite` — Add syntax highlight with ANSI terminal escape sequences.
- `--oneline` — Format in single line.
- `--quiet` or `-q` — Just check syntax, no output on success.
- `--multiquery` or `-n` — Allow multiple queries in the same file.
- `--obfuscate` — Obfuscate instead of formatting.
- `--seed <string>` — Seed arbitrary string that determines the result of obfuscation.
- `--backslash` — Add a backslash at the end of each line of the formatted query. Can be useful when you copy a query from web or somewhere else with multiple lines, and want to execute it in command line.
## Examples {#examples}
1. Formatting a query:
```bash
$ clickhouse-format --query "select number from numbers(10) where number%2 order by number desc;"
```
Result:
```text
SELECT number
FROM numbers(10)
WHERE number % 2
ORDER BY number DESC
```
2. Highlighting and single line:
```bash
$ clickhouse-format --oneline --hilite <<< "SELECT sum(number) FROM numbers(5);"
```
Result:
```sql
SELECT sum(number) FROM numbers(5)
```
3. Multiqueries:
```bash
$ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Result:
```text
SELECT *
FROM
(
SELECT 1 AS x
UNION ALL
SELECT 1
UNION DISTINCT
SELECT 3
)
;
```
4. Obfuscating:
```bash
$ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Result:
```text
SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END;
```
Same query and another seed string:
```bash
$ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Result:
```text
SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END;
```
5. Adding backslash:
```bash
$ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Result:
```text
SELECT * \
FROM \
( \
SELECT 1 AS x \
UNION ALL \
SELECT 1 \
UNION DISTINCT \
SELECT 3 \
)
```

View File

@ -26,7 +26,7 @@ Query is send in post body. Response is returned in RowBinary format.
```bash
$ clickhouse-odbc-bridge --http-port 9018 --daemon
$ curl -d "query=SELECT PageID, ImpID, AdType FROM Keys ORDER BY PageID, ImpID" --data-urlencode "connection_string=DSN=ClickHouse;DATABASE=stat" --data-urlencode "columns=columns format version: 1
$ curl -d "query=SELECT PageID, ImpID, AdType FROM Keys ORDER BY PageID, ImpID" --data-urlencode "connection_string=DSN=ClickHouse;DATABASE=stat" --data-urlencode "sample_block=columns format version: 1
3 columns:
\`PageID\` String
\`ImpID\` String

View File

@ -0,0 +1,148 @@
---
toc_priority: 108
---
## exponentialMovingAverage {#exponential-moving-average}
Сalculates the exponential moving average of values for the determined time.
**Syntax**
```sql
exponentialMovingAverage(x)(value, timestamp)
```
Each `value` corresponds to the determinate `timestamp`. The half-life `x` is the time lag at which the exponential weights decay by one-half. The function returns a weighted average: the older the time point, the less weight the corresponding value is considered to be.
**Arguments**
- `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `timestamp` — Timestamp. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
**Parameters**
- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
**Returned values**
- Returnes an [exponentially smoothed moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) of the values for the past `x` time at the latest point of time.
Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64).
**Examples**
Input table:
``` text
┌──temperature─┬─timestamp──┐
│ 95 │ 1 │
│ 95 │ 2 │
│ 95 │ 3 │
│ 96 │ 4 │
│ 96 │ 5 │
│ 96 │ 6 │
│ 96 │ 7 │
│ 97 │ 8 │
│ 97 │ 9 │
│ 97 │ 10 │
│ 97 │ 11 │
│ 98 │ 12 │
│ 98 │ 13 │
│ 98 │ 14 │
│ 98 │ 15 │
│ 99 │ 16 │
│ 99 │ 17 │
│ 99 │ 18 │
│ 100 │ 19 │
│ 100 │ 20 │
└──────────────┴────────────┘
```
Query:
```sql
SELECT exponentialMovingAverage(5)(temperature, timestamp);
```
Result:
``` text
┌──exponentialMovingAverage(5)(temperature, timestamp)──┐
│ 92.25779635374204 │
└───────────────────────────────────────────────────────┘
```
Query:
```sql
SELECT
value,
time,
round(exp_smooth, 3),
bar(exp_smooth, 0, 1, 50) AS bar
FROM
(
SELECT
(number = 0) OR (number >= 25) AS value,
number AS time,
exponentialMovingAverage(10)(value, time) OVER (Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS exp_smooth
FROM numbers(50)
)
```
Result:
``` text
┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar────────────────────────────────────────┐
│ 1 │ 0 │ 0.067 │ ███▎ │
│ 0 │ 1 │ 0.062 │ ███ │
│ 0 │ 2 │ 0.058 │ ██▊ │
│ 0 │ 3 │ 0.054 │ ██▋ │
│ 0 │ 4 │ 0.051 │ ██▌ │
│ 0 │ 5 │ 0.047 │ ██▎ │
│ 0 │ 6 │ 0.044 │ ██▏ │
│ 0 │ 7 │ 0.041 │ ██ │
│ 0 │ 8 │ 0.038 │ █▊ │
│ 0 │ 9 │ 0.036 │ █▋ │
│ 0 │ 10 │ 0.033 │ █▋ │
│ 0 │ 11 │ 0.031 │ █▌ │
│ 0 │ 12 │ 0.029 │ █▍ │
│ 0 │ 13 │ 0.027 │ █▎ │
│ 0 │ 14 │ 0.025 │ █▎ │
│ 0 │ 15 │ 0.024 │ █▏ │
│ 0 │ 16 │ 0.022 │ █ │
│ 0 │ 17 │ 0.021 │ █ │
│ 0 │ 18 │ 0.019 │ ▊ │
│ 0 │ 19 │ 0.018 │ ▊ │
│ 0 │ 20 │ 0.017 │ ▋ │
│ 0 │ 21 │ 0.016 │ ▋ │
│ 0 │ 22 │ 0.015 │ ▋ │
│ 0 │ 23 │ 0.014 │ ▋ │
│ 0 │ 24 │ 0.013 │ ▋ │
│ 1 │ 25 │ 0.079 │ ███▊ │
│ 1 │ 26 │ 0.14 │ ███████ │
│ 1 │ 27 │ 0.198 │ █████████▊ │
│ 1 │ 28 │ 0.252 │ ████████████▌ │
│ 1 │ 29 │ 0.302 │ ███████████████ │
│ 1 │ 30 │ 0.349 │ █████████████████▍ │
│ 1 │ 31 │ 0.392 │ ███████████████████▌ │
│ 1 │ 32 │ 0.433 │ █████████████████████▋ │
│ 1 │ 33 │ 0.471 │ ███████████████████████▌ │
│ 1 │ 34 │ 0.506 │ █████████████████████████▎ │
│ 1 │ 35 │ 0.539 │ ██████████████████████████▊ │
│ 1 │ 36 │ 0.57 │ ████████████████████████████▌ │
│ 1 │ 37 │ 0.599 │ █████████████████████████████▊ │
│ 1 │ 38 │ 0.626 │ ███████████████████████████████▎ │
│ 1 │ 39 │ 0.651 │ ████████████████████████████████▌ │
│ 1 │ 40 │ 0.674 │ █████████████████████████████████▋ │
│ 1 │ 41 │ 0.696 │ ██████████████████████████████████▋ │
│ 1 │ 42 │ 0.716 │ ███████████████████████████████████▋ │
│ 1 │ 43 │ 0.735 │ ████████████████████████████████████▋ │
│ 1 │ 44 │ 0.753 │ █████████████████████████████████████▋ │
│ 1 │ 45 │ 0.77 │ ██████████████████████████████████████▍ │
│ 1 │ 46 │ 0.785 │ ███████████████████████████████████████▎ │
│ 1 │ 47 │ 0.8 │ ███████████████████████████████████████▊ │
│ 1 │ 48 │ 0.813 │ ████████████████████████████████████████▋ │
│ 1 │ 49 │ 0.825 │ █████████████████████████████████████████▎│
└───────┴──────┴──────────────────────┴────────────────────────────────────────────┘
```

View File

@ -0,0 +1,64 @@
---
toc_priority: 311
toc_title: sparkbar
---
# sparkbar {#sparkbar}
The function plots a frequency histogram for values `x` and the repetition rate `y` of these values over the interval `[min_x, max_x]`.
If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end.
**Syntax**
``` sql
sparkbar(width[, min_x, max_x])(x, y)
```
**Parameters**
- `width` — The number of segments. Type: [Integer](../../../sql-reference/data-types/int-uint.md).
- `min_x` — The interval start. Optional parameter.
- `max_x` — The interval end. Optional parameter.
**Arguments**
- `x` — The field with values.
- `y` — The field with the frequency of values.
**Returned value**
- The frequency histogram.
**Example**
Query:
``` sql
CREATE TABLE spark_bar_data (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree ORDER BY event_date SETTINGS index_granularity = 8192;
INSERT INTO spark_bar_data VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11');
SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_data;
SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_data;
```
Result:
``` text
┌─sparkbar(9)(event_date, cnt)─┐
│ │
│ ▁▅▄▃██▅ ▁ │
│ │
└──────────────────────────────┘
┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐
│ │
│▁▄▄▂▅▇█▁ │
│ │
└──────────────────────────────────────────────────────────────────────────┘
```

View File

@ -3,13 +3,12 @@ toc_priority: 46
toc_title: Polygon Dictionaries With Grids
---
# Polygon dictionaries {#polygon-dictionaries}
Polygon dictionaries allow you to efficiently search for the polygon containing specified points.
For example: defining a city area by geographical coordinates.
Example configuration:
Example of a polygon dictionary configuration:
``` xml
<dictionary>
@ -30,13 +29,15 @@ Example configuration:
<type>UInt64</type>
<null_value>0</null_value>
</attribute>
</structure>
<layout>
<polygon />
<polygon>
<store_polygon_key_column>1</store_polygon_key_column>
</polygon>
</layout>
...
</dictionary>
```
@ -48,11 +49,12 @@ CREATE DICTIONARY polygon_dict_name (
value UInt64
)
PRIMARY KEY key
LAYOUT(POLYGON())
LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1))
...
```
When configuring the polygon dictionary, the key must have one of two types:
- A simple polygon. It is an array of points.
- MultiPolygon. It is an array of polygons. Each polygon is a two-dimensional array of points. The first element of this array is the outer boundary of the polygon, and subsequent elements specify areas to be excluded from it.
@ -60,25 +62,27 @@ Points can be specified as an array or a tuple of their coordinates. In the curr
The user can [upload their own data](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) in all formats supported by ClickHouse.
There are 3 types of [in-memory storage](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) available:
- POLYGON_SIMPLE. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes.
- `POLYGON_SIMPLE`. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes.
- POLYGON_INDEX_EACH. A separate index is built for each polygon, which allows you to quickly check whether it belongs in most cases (optimized for geographical regions).
- `POLYGON_INDEX_EACH`. A separate index is built for each polygon, which allows you to quickly check whether it belongs in most cases (optimized for geographical regions).
Also, a grid is superimposed on the area under consideration, which significantly narrows the number of polygons under consideration.
The grid is created by recursively dividing the cell into 16 equal parts and is configured with two parameters.
The division stops when the recursion depth reaches MAX_DEPTH or when the cell crosses no more than MIN_INTERSECTIONS polygons.
The division stops when the recursion depth reaches `MAX_DEPTH` or when the cell crosses no more than `MIN_INTERSECTIONS` polygons.
To respond to the query, there is a corresponding cell, and the index for the polygons stored in it is accessed alternately.
- POLYGON_INDEX_CELL. This placement also creates the grid described above. The same options are available. For each sheet cell, an index is built on all pieces of polygons that fall into it, which allows you to quickly respond to a request.
- `POLYGON_INDEX_CELL`. This placement also creates the grid described above. The same options are available. For each sheet cell, an index is built on all pieces of polygons that fall into it, which allows you to quickly respond to a request.
- POLYGON. Synonym to POLYGON_INDEX_CELL.
- `POLYGON`. Synonym to `POLYGON_INDEX_CELL`.
Dictionary queries are carried out using standard [functions](../../../sql-reference/functions/ext-dict-functions.md) for working with external dictionaries.
An important difference is that here the keys will be the points for which you want to find the polygon containing them.
**Example**
Example of working with the dictionary defined above:
``` sql
CREATE TABLE points (
x Float64,
@ -89,3 +93,39 @@ SELECT tuple(x, y) AS key, dictGet(dict_name, 'name', key), dictGet(dict_name, '
```
As a result of executing the last command for each point in the 'points' table, a minimum area polygon containing this point will be found, and the requested attributes will be output.
**Example**
You can read columns from polygon dictionaries via SELECT query, just turn on the `store_polygon_key_column = 1` in the dictionary configuration or corresponding DDL-query.
Query:
``` sql
CREATE TABLE polygons_test_table
(
key Array(Array(Array(Tuple(Float64, Float64)))),
name String
) ENGINE = TinyLog;
INSERT INTO polygons_test_table VALUES ([[[(3, 1), (0, 1), (0, -1), (3, -1)]]], 'Value');
CREATE DICTIONARY polygons_test_dictionary
(
key Array(Array(Array(Tuple(Float64, Float64)))),
name String
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(TABLE 'polygons_test_table'))
LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1))
LIFETIME(0);
SELECT * FROM polygons_test_dictionary;
```
Result:
``` text
┌─key─────────────────────────────┬─name──┐
│ [[[(3,1),(0,1),(0,-1),(3,-1)]]] │ Value │
└─────────────────────────────────┴───────┘
```

View File

@ -89,9 +89,39 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00
## sipHash128 {#hash_functions-siphash128}
Calculates SipHash from a string.
Accepts a String-type argument. Returns FixedString(16).
Differs from sipHash64 in that the final xor-folding state is only done up to 128 bits.
Produces a 128-bit [SipHash](https://131002.net/siphash/) hash value. Differs from [sipHash64](#hash_functions-siphash64) in that the final xor-folding state is done up to 128 bits.
**Syntax**
``` sql
sipHash128(par1,...)
```
**Arguments**
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
**Returned value**
A 128-bit `SipHash` hash value.
Type: [FixedString(16)](../../sql-reference/data-types/fixedstring.md).
**Example**
Query:
``` sql
SELECT hex(sipHash128('foo', '\x01', 3));
```
Result:
``` text
┌─hex(sipHash128('foo', '', 3))────┐
│ 9DE516A64A414D4B1B609415E4523F24 │
└──────────────────────────────────┘
```
## cityHash64 {#cityhash64}
@ -459,28 +489,36 @@ SELECT murmurHash3_32(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:
Produces a 128-bit [MurmurHash3](https://github.com/aappleby/smhasher) hash value.
**Syntax**
``` sql
murmurHash3_128( expr )
murmurHash3_128(expr)
```
**Arguments**
- `expr`[Expressions](../../sql-reference/syntax.md#syntax-expressions) returning a [String](../../sql-reference/data-types/string.md)-type value.
- `expr`A list of [expressions](../../sql-reference/syntax.md#syntax-expressions). [String](../../sql-reference/data-types/string.md).
**Returned Value**
**Returned value**
A [FixedString(16)](../../sql-reference/data-types/fixedstring.md) data type hash value.
A 128-bit `MurmurHash3` hash value.
Type: [FixedString(16)](../../sql-reference/data-types/fixedstring.md).
**Example**
Query:
``` sql
SELECT hex(murmurHash3_128('example_string')) AS MurmurHash3, toTypeName(MurmurHash3) AS type;
SELECT hex(murmurHash3_128('foo', 'foo', 'foo'));
```
Result:
``` text
┌─MurmurHash3──────────────────────┬─type───┐
368A1A311CB7342253354B548E7E7E71 │ String
└──────────────────────────────────────────┘
┌─hex(murmurHash3_128('foo', 'foo', 'foo'))─┐
F8F7AD9B6CD4CF117A71E277E2EC2931
└──────────────────────────────────────────┘
```
## xxHash32, xxHash64 {#hash-functions-xxhash32}

View File

@ -175,6 +175,7 @@ in which the `Strings` represents the named fields of the tuple and `T` are the
``` sql
tupleToNameValuePairs(tuple)
```
**Arguments**
@ -196,7 +197,7 @@ CREATE TABLE tupletest (`col` Tuple(user_ID UInt64, session_ID UInt64) ENGINE =
INSERT INTO tupletest VALUES (tuple( 100, 2502)), (tuple(1,100));
SELECT tupleToNameValuePairs(col) FROM tupletest;
```
```
Result:

View File

@ -0,0 +1,112 @@
---
toc_priority: 68
toc_title: Window View
---
# Window View Functions {#window-view-functions}
Window view functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with WindowView are listed below:
## tumble {#window-view-functions-tumble}
A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`).
``` sql
tumble(time_attr, interval [, timezone])
```
**Arguments**
- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type.
- `interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type.
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
**Returned values**
- The inclusive lower and exclusive upper bound of the corresponding tumbling window.
Type: `Tuple(DateTime, DateTime)`
**Example**
Query:
``` sql
SELECT tumble(now(), toIntervalDay('1'))
```
Result:
``` text
┌─tumble(now(), toIntervalDay('1'))─────────────┐
│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │
└───────────────────────────────────────────────┘
```
## hop {#window-view-functions-hop}
A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows.
``` sql
hop(time_attr, hop_interval, window_interval [, timezone])
```
**Arguments**
- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type.
- `hop_interval` - Hop interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number.
- `window_interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number.
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
**Returned values**
- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`.
Type: `Tuple(DateTime, DateTime)`
**Example**
Query:
``` sql
SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND)
```
Result:
``` text
┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐
│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │
└───────────────────────────────────────────────────────────┘
```
## tumbleStart {#window-view-functions-tumblestart}
Returns the inclusive lower bound of the corresponding tumbling window.
``` sql
tumbleStart(time_attr, interval [, timezone]);
```
## tumbleEnd {#window-view-functions-tumbleend}
Returns the exclusive upper bound of the corresponding tumbling window.
``` sql
tumbleEnd(time_attr, interval [, timezone]);
```
## hopStart {#window-view-functions-hopstart}
Returns the inclusive lower bound of the corresponding hopping window.
``` sql
hopStart(time_attr, hop_interval, window_interval [, timezone]);
```
## hopEnd {#window-view-functions-hopend}
Returns the exclusive upper bound of the corresponding hopping window.
``` sql
hopEnd(time_attr, hop_interval, window_interval [, timezone]);
```

View File

@ -10,7 +10,7 @@ A set of queries that allow changing the table structure.
Syntax:
``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|MODIFY|MATERIALIZE COLUMN ...
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
```
In the query, specify a list of one or more comma-separated actions.
@ -138,6 +138,7 @@ ALTER TABLE visits COMMENT COLUMN browser 'The table shows the browser used for
``` sql
MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
ALTER COLUMN [IF EXISTS] name TYPE [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
```
This query changes the `name` column properties:

View File

@ -5,7 +5,7 @@ toc_title: VIEW
# CREATE VIEW {#create-view}
Creates a new view. Views can be [normal](#normal), [materialized](#materialized) and [live](#live-view) (the latter is an experimental feature).
Creates a new view. Views can be [normal](#normal), [materialized](#materialized), [live](#live-view), and [window](#window-view) (live view and window view are experimental features).
## Normal View {#normal}
@ -243,3 +243,119 @@ Most common uses of live view tables include:
**See Also**
- [ALTER LIVE VIEW](../alter/view.md#alter-live-view)
## Window View [Experimental] {#window-view}
!!! important "Important"
This is an experimental feature that may change in backwards-incompatible ways in the future releases.
Enable usage of window views and `WATCH` query using [allow_experimental_window_view](../../../operations/settings/settings.md#allow-experimental-window-view) setting. Input the command `set allow_experimental_window_view = 1`.
``` sql
CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [ENGINE = engine] [WATERMARK = strategy] [ALLOWED_LATENESS = interval_function] AS SELECT ... GROUP BY window_view_function
```
Window view can aggregate data by time window and output the results when the window is ready to fire. It stores the partial aggregation results in an inner(or specified) table to reduce latency and can push the processing result to a specified table or push notifications using the WATCH query.
Creating a window view is similar to creating `MATERIALIZED VIEW`. Window view needs an inner storage engine to store intermediate data. The inner storage will use `AggregatingMergeTree` as the default engine.
### Window View Functions {#window-view-windowviewfunctions}
[Window view functions](../../functions/window-view-functions.md) are used to get the lower and upper window bound of records. The window view needs to be used with a window view function.
### TIME ATTRIBUTES {#window-view-timeattributes}
Window view supports **processing time** and **event time** process.
**Processing time** allows window view to produce results based on the local machine's time and is used by default. It is the most straightforward notion of time but does not provide determinism. The processing time attribute can be defined by setting the `time_attr` of the window view function to a table column or using the function `now()`. The following query creates a window view with processing time.
``` sql
CREATE WINDOW VIEW wv AS SELECT count(number), tumbleStart(w_id) as w_start from date GROUP BY tumble(now(), INTERVAL '5' SECOND) as w_id
```
**Event time** is the time that each individual event occurred on its producing device. This time is typically embedded within the records when it is generated. Event time processing allows for consistent results even in case of out-of-order events or late events. Window view supports event time processing by using `WATERMARK` syntax.
Window view provides three watermark strategies:
* `STRICTLY_ASCENDING`: Emits a watermark of the maximum observed timestamp so far. Rows that have a timestamp smaller to the max timestamp are not late.
* `ASCENDING`: Emits a watermark of the maximum observed timestamp so far minus 1. Rows that have a timestamp equal and smaller to the max timestamp are not late.
* `BOUNDED`: WATERMARK=INTERVAL. Emits watermarks, which are the maximum observed timestamp minus the specified delay.
The following queries are examples of creating a window view with `WATERMARK`:
``` sql
CREATE WINDOW VIEW wv WATERMARK=STRICTLY_ASCENDING AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND);
CREATE WINDOW VIEW wv WATERMARK=ASCENDING AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND);
CREATE WINDOW VIEW wv WATERMARK=INTERVAL '3' SECOND AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND);
```
By default, the window will be fired when the watermark comes, and elements that arrived behind the watermark will be dropped. Window view supports late event processing by setting `ALLOWED_LATENESS=INTERVAL`. An example of lateness handling is:
``` sql
CREATE WINDOW VIEW test.wv TO test.dst WATERMARK=ASCENDING ALLOWED_LATENESS=INTERVAL '2' SECOND AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM test.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid;
```
Note that elements emitted by a late firing should be treated as updated results of a previous computation. Instead of firing at the end of windows, the window view will fire immediately when the late event arrives. Thus, it will result in multiple outputs for the same window. Users need to take these duplicated results into account or deduplicate them.
### Monitoring New Windows {#window-view-monitoring}
Window view supports the `WATCH` query to constantly append the processing results to the console or use `TO` syntax to output the results to a table.
``` sql
WATCH [db.]name [LIMIT n]
```
`WATCH` query acts similar as in `LIVE VIEW`. A `LIMIT` can be specified to set the number of updates to receive before terminating the query.
### Settings {#window-view-settings}
- `window_view_clean_interval`: The clean interval of window view in seconds to free outdated data. The system will retain the windows that have not been fully triggered according to the system time or `WATERMARK` configuration, and the other data will be deleted.
- `window_view_heartbeat_interval`: The heartbeat interval in seconds to indicate the watch query is alive.
### Example {#window-view-example}
Suppose we need to count the number of click logs per 10 seconds in a log table called `data`, and its table structure is:
``` sql
CREATE TABLE data ( `id` UInt64, `timestamp` DateTime) ENGINE = Memory;
```
First, we create a window view with tumble window of 10 seconds interval:
``` sql
CREATE WINDOW VIEW wv as select count(id), tumbleStart(w_id) as window_start from data group by tumble(timestamp, INTERVAL '10' SECOND) as w_id
```
Then, we use the `WATCH` query to get the results.
``` sql
WATCH wv
```
When logs are inserted into table `data`,
``` sql
INSERT INTO data VALUES(1,now())
```
The `WATCH` query should print the results as follows:
``` text
┌─count(id)─┬────────window_start─┐
│ 1 │ 2020-01-14 16:56:40 │
└───────────┴─────────────────────┘
```
Alternatively, we can attach the output to another table using `TO` syntax.
``` sql
CREATE WINDOW VIEW wv TO dst AS SELECT count(id), tumbleStart(w_id) as window_start FROM data GROUP BY tumble(timestamp, INTERVAL '10' SECOND) as w_id
```
Additional examples can be found among stateful tests of ClickHouse (they are named `*window_view*` there).
### Window View Usage {#window-view-usage}
The window view is useful in the following scenarios:
* **Monitoring**: Aggregate and calculate the metrics logs by time, and output the results to a target table. The dashboard can use the target table as a source table.
* **Analyzing**: Automatically aggregate and preprocess data in the time window. This can be useful when analyzing a large number of logs. The preprocessing eliminates repeated calculations in multiple queries and reduces query latency.

View File

@ -5,9 +5,9 @@ toc_title: INSERT INTO
## INSERT INTO Statement {#insert}
Adding data.
Inserts data into a table.
Basic query format:
**Syntax**
``` sql
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
@ -92,6 +92,8 @@ If table has [constraints](../../sql-reference/statements/create/table.md#constr
### Inserting the Results of `SELECT` {#insert_query_insert-select}
**Syntax**
``` sql
INSERT INTO [db.]table [(c1, c2, c3)] SELECT ...
```
@ -107,6 +109,68 @@ However, you can delete old data using `ALTER TABLE ... DROP PARTITION`.
To insert a default value instead of `NULL` into a column with not nullable data type, enable [insert_null_as_default](../../operations/settings/settings.md#insert_null_as_default) setting.
### Inserting Data from a File {#inserting-data-from-a-file}
**Syntax**
``` sql
INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name
```
Use the syntax above to insert data from a file stored on a **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause.
Compressed files are supported. Compression type is detected by the extension of the file name. Or it can be explicitly specified in a `COMPRESSION` clause. Supported types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
This functionality is available in the [command-line client](../../interfaces/cli.md) and [clickhouse-local](../../operations/utilities/clickhouse-local.md).
**Example**
Execute the following queries using [command-line client](../../interfaces/cli.md):
```bash
echo 1,A > input.csv ; echo 2,B >> input.csv
clickhouse-client --query="CREATE TABLE table_from_file (id UInt32, text String) ENGINE=MergeTree() ORDER BY id;"
clickhouse-client --query="INSERT INTO table_from_file FROM INFILE 'input.csv' FORMAT CSV;"
clickhouse-client --query="SELECT * FROM table_from_file FORMAT PrettyCompact;"
```
Result:
```text
┌─id─┬─text─┐
│ 1 │ A │
│ 2 │ B │
└────┴──────┘
```
### Inserting into Table Function {#inserting-into-table-function}
Data can be inserted into tables referenced by [table functions](../../sql-reference/table-functions/index.md).
**Syntax**
``` sql
INSERT INTO [TABLE] FUNCTION table_func ...
```
**Example**
[remote](../../sql-reference/table-functions/index.md#remote) table function is used in the following queries:
``` sql
CREATE TABLE simple_table (id UInt32, text String) ENGINE=MergeTree() ORDER BY id;
INSERT INTO TABLE FUNCTION remote('localhost', default.simple_table)
VALUES (100, 'inserted via remote()');
SELECT * FROM simple_table;
```
Result:
``` text
┌──id─┬─text──────────────────┐
│ 100 │ inserted via remote() │
└─────┴───────────────────────┘
```
### Performance Considerations {#performance-considerations}
`INSERT` sorts the input data by primary key and splits them into partitions by a partition key. If you insert data into several partitions at once, it can significantly reduce the performance of the `INSERT` query. To avoid this:

View File

@ -27,7 +27,7 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list
[LIMIT [n, ]m] [WITH TIES]
[SETTINGS ...]
[UNION ...]
[INTO OUTFILE filename]
[INTO OUTFILE filename [COMPRESSION type] ]
[FORMAT format]
```

View File

@ -4,10 +4,35 @@ toc_title: INTO OUTFILE
# INTO OUTFILE Clause {#into-outfile-clause}
Add the `INTO OUTFILE filename` clause (where filename is a string literal) to `SELECT query` to redirect its output to the specified file on the client-side.
`INTO OUTFILE` clause redirects the result of a `SELECT` query to a file on the **client** side.
Compressed files are supported. Compression type is detected by the extension of the file name (mode `'auto'` is used by default). Or it can be explicitly specified in a `COMPRESSION` clause.
**Syntax**
```sql
SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type]
```
`file_name` and `type` are string literals. Supported compression types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
## Implementation Details {#implementation-details}
- This functionality is available in the [command-line client](../../../interfaces/cli.md) and [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Thus a query sent via [HTTP interface](../../../interfaces/http.md) will fail.
- The query will fail if a file with the same filename already exists.
- The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode).
- The query will fail if a file with the same file name already exists.
- The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). Use [FORMAT](format.md) clause to change it.
**Example**
Execute the following query using [command-line client](../../../interfaces/cli.md):
```bash
clickhouse-client --query="SELECT 1,'ABC' INTO OUTFILE 'select.gz' FORMAT CSV;"
zcat select.gz
```
Result:
```text
1,"ABC"
```

View File

@ -18,6 +18,8 @@ You can use table functions in:
It's one of the methods of creating a table.
- [INSERT INTO TABLE FUNCTION](../../sql-reference/statements/insert-into.md#inserting-into-table-function) query.
!!! warning "Warning"
You cant use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled.

View File

@ -15,7 +15,13 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']);
) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause'])
SETTINGS
[connection_pool_size=16, ]
[connection_max_tries=3, ]
[connection_wait_timeout=5, ] /* 0 -- не ждать */
[connection_auto_close=true ]
;
```
Смотрите подробное описание запроса [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query).
@ -102,8 +108,43 @@ SELECT * FROM mysql_table
└────────────────┴────────┘
```
## Смотрите также {#smotrite-takzhe}
## Настройки {#mysql-settings}
- [Табличная функция mysql](../../../engines/table-engines/integrations/mysql.md)
Настройки по умолчанию не очень эффективны, так как они не используют повторное соединение. Эти настройки позволяют увеличить количество запросов, выполняемых сервером в секунду.
### connection_auto_close {#connection-auto-close}
Позволяет автоматически закрыть соединение после выполнения запроса, то есть отключить повторное использование соединения.
Возможные значения:
- 1 — автоматическое закрытие соединения разрешено (повторное использование отключается).
- 0 — автоматическое закрытие соединения запрещено (повторное использование включается).
Значение по умолчанию: `1`.
### connection_max_tries {#connection-max-tries}
Устанавливает количество повторных попыток для пула со сбоями соединения.
Возможные значения:
- Положительное целое число.
- 0 — отсутствуют повторные попытки для пула со сбоями соединения.
Значение по умолчанию: `3`.
### connection_pool_size {#connection-pool-size}
Задает размер пула соединений (если используются все соединения, запрос будет ждать, пока какое-либо соединение не будет освобождено).
Возможные значения:
- Положительное целое число.
Значение по умолчанию: `16`.
## См. также {#see-also}
- [Табличная функция mysql](../../../engines/table-engines/integrations/mysql.md)
- [Использование MySQL в качестве источника для внешнего словаря](../../../engines/table-engines/integrations/mysql.md#dicts-external_dicts_dict_sources-mysql)

View File

@ -20,6 +20,8 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
| [CSV](#csv) | ✔ | ✔ |
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
| [Values](#data-format-values) | ✔ | ✔ |
| [Vertical](#vertical) | ✗ | ✔ |
| [JSON](#json) | ✗ | ✔ |
@ -368,8 +370,17 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR
## CustomSeparated {#format-customseparated}
Аналогичен [Template](#format-template), но выводит (или считывает) все столбцы, используя для них правило экранирования из настройки `format_custom_escaping_rule` и разделители из настроек `format_custom_field_delimiter`, `format_custom_row_before_delimiter`, `format_custom_row_after_delimiter`, `format_custom_row_between_delimiter`, `format_custom_result_before_delimiter` и `format_custom_result_after_delimiter`, а не из форматных строк.
Также существует формат `CustomSeparatedIgnoreSpaces`, аналогичный `TemplateIgnoreSpaces`.
Аналогичен [Template](#format-template), но выводит (или считывает) все имена и типы столбцов, используя для них правило экранирования из настройки [format_custom_escaping_rule](../operations/settings/settings.md#format-custom-escaping-rule) и разделители из настроек [format_custom_field_delimiter](../operations/settings/settings.md#format-custom-field-delimiter), [format_custom_row_before_delimiter](../operations/settings/settings.md#format-custom-row-before-delimiter), [format_custom_row_after_delimiter](../operations/settings/settings.md#format-custom-row-after-delimiter), [format_custom_row_between_delimiter](../operations/settings/settings.md#format-custom-row-between-delimiter), [format_custom_result_before_delimiter](../operations/settings/settings.md#format-custom-result-before-delimiter) и [format_custom_result_after_delimiter](../operations/settings/settings.md#format-custom-result-after-delimiter), а не из форматных строк.
Также существует формат `CustomSeparatedIgnoreSpaces`, аналогичный формату [TemplateIgnoreSpaces](#templateignorespaces).
## CustomSeparatedWithNames {#customseparatedwithnames}
Выводит также заголовок с именами столбцов, аналогичен формату [TabSeparatedWithNames](#tabseparatedwithnames).
## CustomSeparatedWithNamesAndTypes {#customseparatedwithnamesandtypes}
Выводит также два заголовка с именами и типами столбцов, аналогичен формату [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
## JSON {#json}
@ -1399,14 +1410,17 @@ SELECT * FROM line_as_string;
При работе с форматом `Regexp` можно использовать следующие параметры:
- `format_regexp` — [String](../sql-reference/data-types/string.md). Строка с регулярным выражением в формате [re2](https://github.com/google/re2/wiki/Syntax).
- `format_regexp_escaping_rule` — [String](../sql-reference/data-types/string.md). Правило сериализации. Поддерживаются следующие правила:
- CSV (как в [CSV](#csv))
- JSON (как в [JSONEachRow](#jsoneachrow))
- Escaped (как в [TSV](#tabseparated))
- Quoted (как в [Values](#data-format-values))
- Raw (данные импортируются как есть, без сериализации)
- `format_regexp_skip_unmatched` — [UInt8](../sql-reference/data-types/int-uint.md). Признак, будет ли генерироваться исключение в случае, если импортируемые данные не соответствуют регулярному выражению `format_regexp`. Может принимать значение `0` или `1`.
- `format_regexp` — [String](../sql-reference/data-types/string.md). Строка с регулярным выражением в формате [re2](https://github.com/google/re2/wiki/Syntax).
- `format_regexp_escaping_rule` — [String](../sql-reference/data-types/string.md). Правило экранирования. Поддерживаются следующие правила:
- CSV (как в формате [CSV](#csv))
- JSON (как в формате [JSONEachRow](#jsoneachrow))
- Escaped (как в формате [TSV](#tabseparated))
- Quoted (как в формате [Values](#data-format-values))
- Raw (данные импортируются как есть, без экранирования, как в формате [TSVRaw](#tabseparatedraw))
- `format_regexp_skip_unmatched` — [UInt8](../sql-reference/data-types/int-uint.md). Признак, будет ли генерироваться исключение в случае, если импортируемые данные не соответствуют регулярному выражению `format_regexp`. Может принимать значение `0` или `1`.
**Использование**

View File

@ -131,7 +131,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
```xml
<encryption_codecs>
<aes_128_gcm_siv>
<nonce>0123456789101</nonce>
<nonce>012345678910</nonce>
</aes_128_gcm_siv>
</encryption_codecs>
```
@ -999,14 +999,14 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
Настройки логирования информации о зависимых представлениях (materialized, live и т.п.) в запросах принятых с настройкой [log_query_views=1](../../operations/settings/settings.md#settings-log-query-views).
Запросы сохраняются в таблицу system.query_views_log. Вы можете изменить название этой таблицы в параметре `table` (см. ниже).
Запросы логируются в таблице [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log). Вы можете изменить название этой таблицы в параметре `table` (см. ниже).
При настройке логирования используются следующие параметры:
- `database` имя базы данных.
- `table` имя таблицы куда будут записываться использованные представления.
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать если используется `engine`
- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`.
- `table` имя системной таблицы, где будут логироваться запросы.
- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать, если задан параметр `engine`.
- `engine` устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать, если задан параметр `partition_by`.
- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу.
Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически.

View File

@ -355,3 +355,23 @@ Eсли суммарное число активных кусков во все
- 1 — куски данных открепляются.
Значение по умолчанию: `0`.
## merge_tree_clear_old_temporary_directories_interval_seconds {#setting-merge-tree-clear-old-temporary-directories-interval-seconds}
Задает интервал в секундах для удаления старых временных каталогов на сервере ClickHouse.
Возможные значения:
- Положительное целое число.
Значение по умолчанию: `60` секунд.
## merge_tree_clear_old_parts_interval_seconds {#setting-merge-tree-clear-old-parts-interval-seconds}
Задает интервал в секундах для удаления старых кусков данных, журналов предзаписи (WAL) и мутаций на сервере ClickHouse.
Возможные значения:
- Положительное целое число.
Значение по умолчанию: `1` секунда.

View File

@ -807,26 +807,6 @@ ClickHouse может парсить только базовый формат `Y
Значение по умолчанию: 2013265920.
## merge_tree_clear_old_temporary_directories_interval_seconds {#setting-merge-tree-clear-old-temporary-directories-interval-seconds}
Задает интервал в секундах для удаления старых временных каталогов на сервере ClickHouse.
Возможные значения:
- Положительное целое число.
Значение по умолчанию: `60` секунд.
## merge_tree_clear_old_parts_interval_seconds {#setting-merge-tree-clear-old-parts-interval-seconds}
Задает интервал в секундах для удаления старых кусков данных, журналов предзаписи (WAL) и мутаций на сервере ClickHouse .
Возможные значения:
- Положительное целое число.
Значение по умолчанию: `1` секунда.
## min_bytes_to_use_direct_io {#settings-min-bytes-to-use-direct-io}
Минимальный объём данных, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск.
@ -912,11 +892,18 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING'
## log_query_threads {#settings-log-query-threads}
Установка логирования информации о потоках выполнения запроса.
Управляет логированием информации о потоках выполнения запросов.
Лог информации о потоках выполнения запросов, переданных в ClickHouse с этой установкой, записывается согласно правилам конфигурационного параметра сервера [query_thread_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log).
Информация о потоках выполнения запросов сохраняется в системной таблице [system.query_thread_log](../../operations/system-tables/query_thread_log.md). Работает только в том случае, если включена настройка [log_queries](#settings-log-queries). Лог информации о потоках выполнения запросов, переданных в ClickHouse с этой установкой, записывается согласно правилам конфигурационного параметра сервера [query_thread_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log).
Пример:
Возможные значения:
- 0 — отключено.
- 1 — включено.
Значение по умолчанию: `1`.
**Пример**
``` text
log_query_threads=1
@ -3808,6 +3795,40 @@ SELECT * FROM positional_arguments ORDER BY 2,3;
Значение по умолчанию: `0`.
## alter_partition_verbose_result {#alter-partition-verbose-result}
Включает или отключает вывод информации о кусках, к которым были успешно применены операции манипуляции с партициями и кусками. Применимо к [ATTACH PARTITION|PART](../../sql-reference/statements/alter/partition.md#alter_attach-partition) и к [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md#alter_freeze-partition)
Возможные значения:
- 0 — отображение отключено.
- 1 — отображение включено.
Значение по умолчанию: `0`.
**Пример**
```sql
CREATE TABLE test(a Int64, d Date, s String) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY a;
INSERT INTO test VALUES(1, '2021-01-01', '');
INSERT INTO test VALUES(1, '2021-01-01', '');
ALTER TABLE test DETACH PARTITION ID '202101';
ALTER TABLE test ATTACH PARTITION ID '202101' SETTINGS alter_partition_verbose_result = 1;
┌─command_type─────┬─partition_id─┬─part_name────┬─old_part_name─┐
│ ATTACH PARTITION │ 202101 │ 202101_7_7_0 │ 202101_5_5_0 │
│ ATTACH PARTITION │ 202101 │ 202101_8_8_0 │ 202101_6_6_0 │
└──────────────────┴──────────────┴──────────────┴───────────────┘
ALTER TABLE test FREEZE SETTINGS alter_partition_verbose_result = 1;
┌─command_type─┬─partition_id─┬─part_name────┬─backup_name─┬─backup_path───────────────────┬─part_backup_path────────────────────────────────────────────┐
│ FREEZE ALL │ 202101 │ 202101_7_7_0 │ 8 │ /var/lib/clickhouse/shadow/8/ │ /var/lib/clickhouse/shadow/8/data/default/test/202101_7_7_0 │
│ FREEZE ALL │ 202101 │ 202101_8_8_0 │ 8 │ /var/lib/clickhouse/shadow/8/ │ /var/lib/clickhouse/shadow/8/data/default/test/202101_8_8_0 │
└──────────────┴──────────────┴──────────────┴─────────────┴───────────────────────────────┴─────────────────────────────────────────────────────────────┘
```
## format_capn_proto_enum_comparising_mode {#format-capn-proto-enum-comparising-mode}
Определяет, как сопоставить тип данных ClickHouse `Enum` и тип данных `Enum` формата [CapnProto](../../interfaces/formats.md#capnproto) из схемы.
@ -3830,3 +3851,54 @@ SELECT * FROM positional_arguments ORDER BY 2,3;
- 0 — большие файлы считываются только с копированием данных из ядра в пространство пользователей.
Значение по умолчанию: `0`.
## format_custom_escaping_rule {#format-custom-escaping-rule}
Устанавливает правило экранирования данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Возможные значения:
- `'Escaped'` — как в формате [TSV](../../interfaces/formats.md#tabseparated).
- `'Quoted'` — как в формате [Values](../../interfaces/formats.md#data-format-values).
- `'CSV'` — как в формате [CSV](../../interfaces/formats.md#csv).
- `'JSON'` — как в формате [JSONEachRow](../../interfaces/formats.md#jsoneachrow).
- `'XML'` — как в формате [XML](../../interfaces/formats.md#xml).
- `'Raw'` — данные импортируются как есть, без экранирования, как в формате [TSVRaw](../../interfaces/formats.md#tabseparatedraw).
Значение по умолчанию: `'Escaped'`.
## format_custom_field_delimiter {#format-custom-field-delimiter}
Задает символ, который интерпретируется как разделитель между полями данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Значение по умолчанию: `'\t'`.
## format_custom_row_before_delimiter {#format-custom-row-before-delimiter}
Задает символ, который интерпретируется как разделитель перед полем первого столбца данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Значение по умолчанию: `''`.
## format_custom_row_after_delimiter {#format-custom-row-after-delimiter}
Задает символ, который интерпретируется как разделитель после поля последнего столбца данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Значение по умолчанию: `'\n'`.
## format_custom_row_between_delimiter {#format-custom-row-between-delimiter}
Задает символ, который интерпретируется как разделитель между строками данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Значение по умолчанию: `''`.
## format_custom_result_before_delimiter {#format-custom-result-before-delimiter}
Задает символ, который интерпретируется как префикс перед результирующим набором данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Значение по умолчанию: `''`.
## format_custom_result_after_delimiter {#format-custom-result-after-delimiter}
Задает символ, который интерпретируется как суффикс после результирующего набора данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Значение по умолчанию: `''`.

View File

@ -55,6 +55,7 @@ ClickHouse не удаляет данные из таблица автомати
- `query_kind` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — тип запроса.
- `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена баз данных, присутствующих в запросе.
- `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена таблиц, присутствующих в запросе.
- `views` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена представлений (материализованные или live), которые представленны в запросе.
- `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена столбцов, присутствующих в запросе.
- `projections` ([String](../../sql-reference/data-types/string.md)) — имена проекций, использованных при выполнении запроса.
- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — код исключения.

View File

@ -112,5 +112,5 @@ ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'Compr
**Смотрите также**
- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — описание системной таблицы `query_log`, которая содержит общую информацию о выполненных запросах.
- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — описание системной таблицы `query_log`, которая содержит общую информацию о выполненных запросах.
- [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) — описание системной таблицы `query_views_log`, которая содержит информацию о всех представлениях, участвующих в выполненных запросах.

View File

@ -1 +0,0 @@
../../../en/operations/system-tables/query_views_log.md

View File

@ -0,0 +1,84 @@
# system.query_views_log {#system_tables-query_views_log}
Содержит информацию о зависимых представлениях, выполняемых при выполнении запроса, например, тип представления или время выполнения.
Чтобы начать ведение журнала:
1. Настройте параметры в разделе [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log).
2. Включите настройку [log_query_views=1](../../operations/settings/settings.md#settings-log-query-views).
Период сброса данных из буфера в памяти задается в параметре `flush_interval_milliseconds` в разделе настроек сервера [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log ). Для принудительного сброса используйте запрос [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs).
ClickHouse не удаляет данные из таблицы автоматически. Подробнее смотрите раздел [Системные таблицы](../../operations/system-tables/index.md#system-tables-introduction).
Чтобы уменьшить количество запросов, регистрируемых в таблице `query_views_log`, вы можете включить настройку [log_queries_probability](../../operations/settings/settings.md#log-queries-probability).
Столбцы:
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата, когда произошло последнее событие с представлением.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время завершения выполнения представления.
- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время завершения выполнения представления с точностью до микросекунд.
- `view_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — продолжительность выполнения представления (сумма его этапов) в миллисекундах.
- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор начального запроса (при распределённом выполнении запроса).
- `view_name` ([String](../../sql-reference/data-types/string.md)) — имя представления.
- `view_uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — UUID представления.
- `view_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип представления. Возможные значения:
- `'Default' = 1` — [обычные представления](../../sql-reference/statements/create/view.md#normal). Не должно появляться в этом журнале.
- `'Materialized' = 2` — [материализованные представления](../../sql-reference/statements/create/view.md#materialized).
- `'Live' = 3` — [live представления](../../sql-reference/statements/create/view.md#live-view).
- `view_query` ([String](../../sql-reference/data-types/string.md)) — запрос, выполняемый представлением.
- `view_target` ([String](../../sql-reference/data-types/string.md)) — имя целевой таблицы представления.
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных строк.
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных байт.
- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество записанных строк.
- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество записанных байт.
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — максимальная разница между объемом выделенной и освобожденной памяти в контексте этого представления.
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — события профиля, которые измеряют различные показатели. Их описание можно найти в таблице [system.events](../../operations/system-tables/events.md#system_tables-events).
- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — статус представления. Возможные значения:
- `'QueryStart' = 1` — успешное начало выполнения представления. Не должно отображаться.
- `'QueryFinish' = 2` — успешное завершение выполнения представления.
- `'ExceptionBeforeStart' = 3` — исключение до начала выполнения представления.
- `'ExceptionWhileProcessing' = 4` — исключение во время выполнения представления.
- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — код исключения.
- `exception` ([String](../../sql-reference/data-types/string.md)) — сообщение исключения.
- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [трассировка стека](https://ru.wikipedia.org/wiki/Трассировка_стека). Пустая строка, если запрос был успешно выполнен.
**Пример**
Запрос:
``` sql
SELECT * FROM system.query_views_log LIMIT 1 \G;
```
Результат:
``` text
Row 1:
──────
event_date: 2021-06-22
event_time: 2021-06-22 13:23:07
event_time_microseconds: 2021-06-22 13:23:07.738221
view_duration_ms: 0
initial_query_id: c3a1ac02-9cad-479b-af54-9e9c0a7afd70
view_name: default.matview_inner
view_uuid: 00000000-0000-0000-0000-000000000000
view_type: Materialized
view_query: SELECT * FROM default.table_b
view_target: default.`.inner.matview_inner`
read_rows: 4
read_bytes: 64
written_rows: 2
written_bytes: 32
peak_memory_usage: 4196188
ProfileEvents: {'FileOpen':2,'WriteBufferFromFileDescriptorWrite':2,'WriteBufferFromFileDescriptorWriteBytes':187,'IOBufferAllocs':3,'IOBufferAllocBytes':3145773,'FunctionExecute':3,'DiskWriteElapsedMicroseconds':13,'InsertedRows':2,'InsertedBytes':16,'SelectedRows':4,'SelectedBytes':48,'ContextLock':16,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':698,'SoftPageFaults':4,'OSReadChars':463}
status: QueryFinish
exception_code: 0
exception:
stack_trace:
```
**См. также**
- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — описание системной таблицы `query_log`, которая содержит общую информацию о выполненных запросах.
- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — описание системной таблицы `query_thread_log`, которая содержит информацию о каждом потоке выполнения запроса.

View File

@ -1,98 +1,114 @@
---
toc_priority: 65
toc_title: clickhouse-format
---
# clickhouse-format {#clickhouse-format}
Позволяет форматировать входящие запросы.
Ключи:
- `--help` или`-h` — выводит описание ключей.
- `--hilite` — добавляет подсветку синтаксиса с экранированием символов.
- `--oneline` — форматирование в одну строку.
- `--quiet` или `-q` — проверяет синтаксис без вывода результата.
- `--multiquery` or `-n` — поддерживает несколько запросов в одной строке.
- `--obfuscate` — обфусцирует вместо форматирования.
- `--seed <строка>` — задает строку, которая определяет результат обфускации.
- `--backslash` — добавляет обратный слеш в конце каждой строки отформатированного запроса. Удобно использовать если многострочный запрос скопирован из интернета или другого источника и его нужно выполнить из командной строки.
## Примеры {#examples}
1. Подсветка синтаксиса и форматирование в одну строку:
```bash
$ clickhouse-format --oneline --hilite <<< "SELECT sum(number) FROM numbers(5);"
```
Результат:
```sql
SELECT sum(number) FROM numbers(5)
```
2. Несколько запросов в одной строке:
```bash
$ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Результат:
```text
SELECT *
FROM
(
SELECT 1 AS x
UNION ALL
SELECT 1
UNION DISTINCT
SELECT 3
)
;
```
3. Обфускация:
```bash
$ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Результат:
```text
SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END;
```
Тот же запрос с другой инициализацией обфускатора:
```bash
$ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Результат:
```text
SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END;
```
4. Добавление обратного слеша:
```bash
$ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Результат:
```text
SELECT * \
FROM \
( \
SELECT 1 AS x \
UNION ALL \
SELECT 1 \
UNION DISTINCT \
SELECT 3 \
)
```
---
toc_priority: 65
toc_title: clickhouse-format
---
# clickhouse-format {#clickhouse-format}
Позволяет форматировать входящие запросы.
Ключи:
- `--help` или`-h` — выводит описание ключей.
- `--query` — форматирует запрос любой длины и сложности.
- `--hilite` — добавляет подсветку синтаксиса с экранированием символов.
- `--oneline` — форматирование в одну строку.
- `--quiet` или `-q` — проверяет синтаксис без вывода результата.
- `--multiquery` or `-n` — поддерживает несколько запросов в одной строке.
- `--obfuscate` — обфусцирует вместо форматирования.
- `--seed <строка>` — задает строку, которая определяет результат обфускации.
- `--backslash` — добавляет обратный слеш в конце каждой строки отформатированного запроса. Удобно использовать если многострочный запрос скопирован из интернета или другого источника и его нужно выполнить из командной строки.
## Примеры {#examples}
1. Форматирование запроса:
```bash
$ clickhouse-format --query "select number from numbers(10) where number%2 order by number desc;"
```
Результат:
```text
SELECT number
FROM numbers(10)
WHERE number % 2
ORDER BY number DESC
```
2. Подсветка синтаксиса и форматирование в одну строку:
```bash
$ clickhouse-format --oneline --hilite <<< "SELECT sum(number) FROM numbers(5);"
```
Результат:
```sql
SELECT sum(number) FROM numbers(5)
```
3. Несколько запросов в одной строке:
```bash
$ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Результат:
```text
SELECT *
FROM
(
SELECT 1 AS x
UNION ALL
SELECT 1
UNION DISTINCT
SELECT 3
)
;
```
4. Обфускация:
```bash
$ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Результат:
```text
SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END;
```
Тот же запрос с другой инициализацией обфускатора:
```bash
$ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWEEN a AND b, CASE WHEN x >= 123 THEN y ELSE NULL END;"
```
Результат:
```text
SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END;
```
5. Добавление обратного слеша:
```bash
$ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
```
Результат:
```text
SELECT * \
FROM \
( \
SELECT 1 AS x \
UNION ALL \
SELECT 1 \
UNION DISTINCT \
SELECT 3 \
)
```

View File

@ -0,0 +1,148 @@
---
toc_priority: 108
---
## exponentialMovingAverage {#exponential-moving-average}
Вычисляет экспоненциальное скользящее среднее за определенный промежуток времени.
**Синтакис:**
```sql
exponentialMovingAverage(x)(value, timestamp)
```
Каждой точке `timestamp` на временном отрезке соответствует определенное значение `value`. Период полураспада — временной интервал `х`, через который вес значений уменьшается в 2 раза. Функция возвращает взвешенное среднее: чем старше временная точка, тем c меньшим весом считается соответствующее ей значение.
**Аргументы**
- `value` — входные значения. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) или [Decimal](../../../sql-reference/data-types/decimal.md).
- `timestamp` — параметр для упорядочивания значений. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) или [Decimal](../../../sql-reference/data-types/decimal.md).
**Параметры**
- `x` — период полураспада. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) или [Decimal](../../../sql-reference/data-types/decimal.md).
**Возвращаемые значения**
- Возвращает [экспоненциальное скользящее среднее](https://ru.wikipedia.org/wiki/Скользящая_средняя#Экспоненциально_взвешенное_скользящее_среднее) за прошедшее время `x` в последний момент времени.
Тип: [Float64](../../../sql-reference/data-types/float.md#float32-float64).
**Пример**
Исходная таблица:
``` text
┌──temperature─┬─timestamp──┐
│ 95 │ 1 │
│ 95 │ 2 │
│ 95 │ 3 │
│ 96 │ 4 │
│ 96 │ 5 │
│ 96 │ 6 │
│ 96 │ 7 │
│ 97 │ 8 │
│ 97 │ 9 │
│ 97 │ 10 │
│ 97 │ 11 │
│ 98 │ 12 │
│ 98 │ 13 │
│ 98 │ 14 │
│ 98 │ 15 │
│ 99 │ 16 │
│ 99 │ 17 │
│ 99 │ 18 │
│ 100 │ 19 │
│ 100 │ 20 │
└──────────────┴────────────┘
```
Запрос:
```sql
SELECT exponentialMovingAverage(5)(temperature, timestamp);
```
Результат:
``` text
┌──exponentialMovingAverage(5)(temperature, timestamp)──┐
│ 92.25779635374204 │
└───────────────────────────────────────────────────────┘
```
Запрос:
```sql
SELECT
value,
time,
round(exp_smooth, 3),
bar(exp_smooth, 0, 1, 50) AS bar
FROM
(
SELECT
(number = 0) OR (number >= 25) AS value,
number AS time,
exponentialMovingAverage(10)(value, time) OVER (Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS exp_smooth
FROM numbers(50)
)
```
Результат:
``` text
┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar────────────────────────────────────────┐
│ 1 │ 0 │ 0.067 │ ███▎ │
│ 0 │ 1 │ 0.062 │ ███ │
│ 0 │ 2 │ 0.058 │ ██▊ │
│ 0 │ 3 │ 0.054 │ ██▋ │
│ 0 │ 4 │ 0.051 │ ██▌ │
│ 0 │ 5 │ 0.047 │ ██▎ │
│ 0 │ 6 │ 0.044 │ ██▏ │
│ 0 │ 7 │ 0.041 │ ██ │
│ 0 │ 8 │ 0.038 │ █▊ │
│ 0 │ 9 │ 0.036 │ █▋ │
│ 0 │ 10 │ 0.033 │ █▋ │
│ 0 │ 11 │ 0.031 │ █▌ │
│ 0 │ 12 │ 0.029 │ █▍ │
│ 0 │ 13 │ 0.027 │ █▎ │
│ 0 │ 14 │ 0.025 │ █▎ │
│ 0 │ 15 │ 0.024 │ █▏ │
│ 0 │ 16 │ 0.022 │ █ │
│ 0 │ 17 │ 0.021 │ █ │
│ 0 │ 18 │ 0.019 │ ▊ │
│ 0 │ 19 │ 0.018 │ ▊ │
│ 0 │ 20 │ 0.017 │ ▋ │
│ 0 │ 21 │ 0.016 │ ▋ │
│ 0 │ 22 │ 0.015 │ ▋ │
│ 0 │ 23 │ 0.014 │ ▋ │
│ 0 │ 24 │ 0.013 │ ▋ │
│ 1 │ 25 │ 0.079 │ ███▊ │
│ 1 │ 26 │ 0.14 │ ███████ │
│ 1 │ 27 │ 0.198 │ █████████▊ │
│ 1 │ 28 │ 0.252 │ ████████████▌ │
│ 1 │ 29 │ 0.302 │ ███████████████ │
│ 1 │ 30 │ 0.349 │ █████████████████▍ │
│ 1 │ 31 │ 0.392 │ ███████████████████▌ │
│ 1 │ 32 │ 0.433 │ █████████████████████▋ │
│ 1 │ 33 │ 0.471 │ ███████████████████████▌ │
│ 1 │ 34 │ 0.506 │ █████████████████████████▎ │
│ 1 │ 35 │ 0.539 │ ██████████████████████████▊ │
│ 1 │ 36 │ 0.57 │ ████████████████████████████▌ │
│ 1 │ 37 │ 0.599 │ █████████████████████████████▊ │
│ 1 │ 38 │ 0.626 │ ███████████████████████████████▎ │
│ 1 │ 39 │ 0.651 │ ████████████████████████████████▌ │
│ 1 │ 40 │ 0.674 │ █████████████████████████████████▋ │
│ 1 │ 41 │ 0.696 │ ██████████████████████████████████▋ │
│ 1 │ 42 │ 0.716 │ ███████████████████████████████████▋ │
│ 1 │ 43 │ 0.735 │ ████████████████████████████████████▋ │
│ 1 │ 44 │ 0.753 │ █████████████████████████████████████▋ │
│ 1 │ 45 │ 0.77 │ ██████████████████████████████████████▍ │
│ 1 │ 46 │ 0.785 │ ███████████████████████████████████████▎ │
│ 1 │ 47 │ 0.8 │ ███████████████████████████████████████▊ │
│ 1 │ 48 │ 0.813 │ ████████████████████████████████████████▋ │
│ 1 │ 49 │ 0.825 │ █████████████████████████████████████████▎│
└───────┴──────┴──────────────────────┴────────────────────────────────────────────┘
```

View File

@ -0,0 +1,66 @@
---
toc_priority: 311
toc_title: sparkbar
---
# sparkbar {#sparkbar}
Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`.
Если интервал для построения не указан, то в качестве нижней границы интервала будет взято минимальное значение `x`, а в качестве верхней границы — максимальное значение `x`.
**Синтаксис**
``` sql
sparkbar(width[, min_x, max_x])(x, y)
```
**Параметры**
- `width` — Количество столбцов гистограммы. Тип: [Integer](../../../sql-reference/data-types/int-uint.md).
- `min_x` — Начало интервала. Необязательный параметр.
- `max_x` — Конец интервала. Необязательный параметр.
**Аргументы**
- `x` — Поле со значениями.
- `y` — Поле с частотой повторения значений.
**Возвращаемые значения**
- Гистограмма частот.
**Пример**
Запрос:
``` sql
CREATE TABLE spark_bar_data (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree ORDER BY event_date SETTINGS index_granularity = 8192;
INSERT INTO spark_bar_data VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11');
SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_data;
SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_data;
```
Результат:
``` text
┌─sparkbar(9)(event_date, cnt)─┐
│ │
│ ▁▅▄▃██▅ ▁ │
│ │
└──────────────────────────────┘
┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐
│ │
│▁▄▄▂▅▇█▁ │
│ │
└──────────────────────────────────────────────────────────────────────────┘
```

View File

@ -1,9 +1,14 @@
---
toc_priority: 46
toc_title: Cловари полигонов
---
# Cловари полигонов {#polygon-dictionaries}
Словари полигонов позволяют эффективно искать полигон, в который попадают данные точки, среди множества полигонов.
Для примера: определение района города по географическим координатам.
Пример конфигурации:
Пример конфигурации словаря полигонов:
``` xml
<dictionary>
@ -28,9 +33,12 @@
</structure>
<layout>
<polygon />
<polygon>
<store_polygon_key_column>1</store_polygon_key_column>
</polygon>
</layout>
...
</dictionary>
```
@ -42,11 +50,12 @@ CREATE DICTIONARY polygon_dict_name (
value UInt64
)
PRIMARY KEY key
LAYOUT(POLYGON())
LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1))
...
```
При конфигурации словаря полигонов ключ должен иметь один из двух типов:
- Простой полигон. Представляет из себя массив точек.
- Мультиполигон. Представляет из себя массив полигонов. Каждый полигон задается двумерным массивом точек — первый элемент этого массива задает внешнюю границу полигона,
последующие элементы могут задавать дырки, вырезаемые из него.
@ -55,24 +64,25 @@ LAYOUT(POLYGON())
Пользователь может [загружать свои собственные данные](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) во всех поддерживаемых ClickHouse форматах.
Доступно 3 типа [хранения данных в памяти](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md):
- POLYGON_SIMPLE. Это наивная реализация, в которой на каждый запрос делается линейный проход по всем полигонам, и для каждого проверяется принадлежность без использования дополнительных индексов.
- `POLYGON_SIMPLE`. Это наивная реализация, в которой на каждый запрос делается линейный проход по всем полигонам, и для каждого проверяется принадлежность без использования дополнительных индексов.
- POLYGON_INDEX_EACH. Для каждого полигона строится отдельный индекс, который позволяет быстро проверять принадлежность в большинстве случаев (оптимизирован под географические регионы).
- `POLYGON_INDEX_EACH`. Для каждого полигона строится отдельный индекс, который позволяет быстро проверять принадлежность в большинстве случаев (оптимизирован под географические регионы).
Также на рассматриваемую область накладывается сетка, которая значительно сужает количество рассматриваемых полигонов.
Сетка строится рекурсивным делением ячейки на 16 равных частей и конфигурируется двумя параметрами.
Деление прекращается при достижении глубины рекурсии MAX_DEPTH или в тот момент, когда ячейку пересекают не более MIN_INTERSECTIONS полигонов.
Деление прекращается при достижении глубины рекурсии `MAX_DEPTH` или в тот момент, когда ячейку пересекают не более `MIN_INTERSECTIONS` полигонов.
Для ответа на запрос находится соответствующая ячейка, и происходит поочередное обращение к индексу для сохранных в ней полигонов.
- POLYGON_INDEX_CELL. В этом размещении также строится сетка, описанная выше. Доступны такие же параметры. Для каждой ячейки-листа строится индекс на всех попадающих в неё кусках полигонов, который позволяет быстро отвечать на запрос.
- `POLYGON_INDEX_CELL`. В этом размещении также строится сетка, описанная выше. Доступны такие же параметры. Для каждой ячейки-листа строится индекс на всех попадающих в неё кусках полигонов, который позволяет быстро отвечать на запрос.
- POLYGON. Синоним к POLYGON_INDEX_CELL.
- `POLYGON`. Синоним к `POLYGON_INDEX_CELL`.
Запросы к словарю осуществляются с помощью стандартных [функций](../../../sql-reference/functions/ext-dict-functions.md) для работы со внешними словарями.
Важным отличием является то, что здесь ключами будут являются точки, для которых хочется найти содержащий их полигон.
**Пример**
Пример работы со словарем, определенным выше:
``` sql
CREATE TABLE points (
@ -84,3 +94,40 @@ SELECT tuple(x, y) AS key, dictGet(dict_name, 'name', key), dictGet(dict_name, '
```
В результате исполнения последней команды для каждой точки в таблице `points` будет найден полигон минимальной площади, содержащий данную точку, и выведены запрошенные аттрибуты.
**Пример**
Вы можете читать столбцы из полигональных словарей с помощью SELECT, для этого включите `store_polygon_key_column = 1` в конфигурации словаря или соответствующего DDL-запроса.
Запрос:
``` sql
CREATE TABLE polygons_test_table
(
key Array(Array(Array(Tuple(Float64, Float64)))),
name String
) ENGINE = TinyLog;
INSERT INTO polygons_test_table VALUES ([[[(3, 1), (0, 1), (0, -1), (3, -1)]]], 'Value');
CREATE DICTIONARY polygons_test_dictionary
(
key Array(Array(Array(Tuple(Float64, Float64)))),
name String
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(TABLE 'polygons_test_table'))
LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1))
LIFETIME(0);
SELECT * FROM polygons_test_dictionary;
```
Результат:
``` text
┌─key─────────────────────────────┬─name──┐
│ [[[(3,1),(0,1),(0,-1),(3,-1)]]] │ Value │
└─────────────────────────────────┴───────┘
```

View File

@ -748,7 +748,7 @@ SOURCE(REDIS(
!!! info "Примечание"
Поля `column_family` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `column_family` или `query`.
### PosgreSQL {#dicts-external_dicts_dict_sources-postgresql}
### PostgreSQL {#dicts-external_dicts_dict_sources-postgresql}
Пример настройки:

View File

@ -89,9 +89,39 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00
## sipHash128 {#hash_functions-siphash128}
Вычисляет SipHash от строки.
Принимает аргумент типа String. Возвращает FixedString(16).
Отличается от sipHash64 тем, что финальный xor-folding состояния делается только до 128 бит.
Генерирует 128-битное хеш-значение [SipHash](https://131002.net/siphash/). Отличается от [sipHash64](#hash_functions-siphash64) тем, что финальный xor-folding состояния делается до 128 бит.
**Синтаксис**
``` sql
sipHash128(par1,...)
```
**Аргументы**
Функция принимает переменное число входных параметров. Аргументы могут быть любого [поддерживаемого типа данных](../../sql-reference/functions/hash-functions.md).
**Возвращаемое значение**
128-битное хеш-значение `SipHash`.
Тип: [FixedString(16)](../../sql-reference/data-types/fixedstring.md).
**Пример**
Запрос:
``` sql
SELECT hex(sipHash128('foo', '\x01', 3));
```
Результат:
``` text
┌─hex(sipHash128('foo', '', 3))────┐
│ 9DE516A64A414D4B1B609415E4523F24 │
└──────────────────────────────────┘
```
## cityHash64 {#cityhash64}
@ -459,30 +489,38 @@ SELECT murmurHash3_32(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:
## murmurHash3_128 {#murmurhash3-128}
Генерирует значение [MurmurHash3](https://github.com/aappleby/smhasher).
Генерирует 128-битное хеш-значение [MurmurHash3](https://github.com/aappleby/smhasher).
**Синтаксис**
``` sql
murmurHash3_128( expr )
murmurHash3_128(expr)
```
**Аргументы**
- `expr`[выражение](../syntax.md#syntax-expressions), возвращающее значение типа [String](../../sql-reference/functions/hash-functions.md).
- `expr`список [выражений](../../sql-reference/syntax.md#syntax-expressions). [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
Хэш-значение типа [FixedString(16)](../../sql-reference/functions/hash-functions.md).
128-битное значение хеш-значение `MurmurHash3`.
Тип: [FixedString(16)](../../sql-reference/data-types/fixedstring.md).
**Пример**
Запрос:
``` sql
SELECT hex(murmurHash3_128('example_string')) AS MurmurHash3, toTypeName(MurmurHash3) AS type;
SELECT hex(murmurHash3_128('foo', 'foo', 'foo'));
```
Результат:
``` text
┌─MurmurHash3──────────────────────┬─type───┐
368A1A311CB7342253354B548E7E7E71 │ String
└──────────────────────────────────────────┘
┌─hex(murmurHash3_128('foo', 'foo', 'foo'))─┐
F8F7AD9B6CD4CF117A71E277E2EC2931
└──────────────────────────────────────────┘
```
## xxHash32, xxHash64 {#hash-functions-xxhash32-xxhash64}

View File

@ -357,7 +357,7 @@ Result:
## multiFuzzyMatchAny(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn}
То же, что и `multiMatchAny`, но возвращает 1 если любой pattern соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). Эта функция также находится в экспериментальном режиме и может быть очень медленной. За подробностями обращайтесь к [документации hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching).
То же, что и `multiMatchAny`, но возвращает 1 если любой шаблон соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). Эта функция основана на экспериментальной библиотеке [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) и может быть медленной для некоторых частных случаев. Производительность зависит от значения редакционного расстояния и используемых шаблонов, но всегда медленнее по сравнению с non-fuzzy вариантами.
## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn}

View File

@ -10,7 +10,7 @@ toc_title: "Манипуляции со столбцами"
Синтаксис:
``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|MODIFY|MATERIALIZE COLUMN ...
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
```
В запросе можно указать сразу несколько действий над одной таблицей через запятую.
@ -138,6 +138,7 @@ ALTER TABLE visits COMMENT COLUMN browser 'Столбец показывает,
``` sql
MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
ALTER COLUMN [IF EXISTS] name TYPE [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
```
Запрос изменяет следующие свойства столбца `name`:

View File

@ -3,11 +3,11 @@ toc_priority: 33
toc_title: INSERT INTO
---
## INSERT {#insert}
## INSERT INTO {#insert}
Добавление данных.
Добавляет данные в таблицу.
Базовый формат запроса:
**Синтаксис**
``` sql
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
@ -21,17 +21,15 @@ INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), .
SHOW CREATE insert_select_testtable
```
```
┌─statement────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ CREATE TABLE insert_select_testtable
```text
CREATE TABLE insert_select_testtable
(
`a` Int8,
`b` String,
`c` Int8
)
ENGINE = MergeTree()
ORDER BY a │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
ORDER BY a
```
``` sql
@ -93,6 +91,8 @@ INSERT INTO t FORMAT TabSeparated
### Вставка результатов `SELECT` {#insert_query_insert-select}
**Синтаксис**
``` sql
INSERT INTO [db.]table [(c1, c2, c3)] SELECT ...
```
@ -109,6 +109,68 @@ INSERT INTO [db.]table [(c1, c2, c3)] SELECT ...
Чтобы вставить значение по умолчанию вместо `NULL` в столбец, который не позволяет хранить `NULL`, включите настройку [insert_null_as_default](../../operations/settings/settings.md#insert_null_as_default).
### Вставка данных из файла {#inserting-data-from-a-file}
**Синтаксис**
``` sql
INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name
```
Используйте этот синтаксис, чтобы вставить данные из файла, который хранится на стороне **клиента**. `file_name` и `type` задаются в виде строковых литералов. [Формат](../../interfaces/formats.md) входного файла должен быть задан в секции `FORMAT`.
Поддерживаются сжатые файлы. Формат сжатия определяется по расширению файла, либо он может быть задан в секции `COMPRESSION`. Поддерживаются форматы: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
Эта функциональность поддерживается [клиентом командной строки](../../interfaces/cli.md) и [clickhouse-local](../../operations/utilities/clickhouse-local.md).
**Пример**
Выполните следующие запросы, используя [клиент командной строки](../../interfaces/cli.md):
```bash
echo 1,A > input.csv ; echo 2,B >> input.csv
clickhouse-client --query="CREATE TABLE table_from_file (id UInt32, text String) ENGINE=MergeTree() ORDER BY id;"
clickhouse-client --query="INSERT INTO table_from_file FROM INFILE 'input.csv' FORMAT CSV;"
clickhouse-client --query="SELECT * FROM table_from_file FORMAT PrettyCompact;"
```
Результат:
```text
┌─id─┬─text─┐
│ 1 │ A │
│ 2 │ B │
└────┴──────┘
```
### Вставка в табличную функцию {#inserting-into-table-function}
Данные могут быть вставлены в таблицы, заданные с помощью [табличных функций](../../sql-reference/table-functions/index.md).
**Синтаксис**
``` sql
INSERT INTO [TABLE] FUNCTION table_func ...
```
**Пример**
Табличная функция [remote](../../sql-reference/table-functions/index.md#remote) используется в следующих запросах:
``` sql
CREATE TABLE simple_table (id UInt32, text String) ENGINE=MergeTree() ORDER BY id;
INSERT INTO TABLE FUNCTION remote('localhost', default.simple_table)
VALUES (100, 'inserted via remote()');
SELECT * FROM simple_table;
```
Результат:
``` text
┌──id─┬─text──────────────────┐
│ 100 │ inserted via remote() │
└─────┴───────────────────────┘
```
### Замечания о производительности {#zamechaniia-o-proizvoditelnosti}
`INSERT` сортирует входящие данные по первичному ключу и разбивает их на партиции по ключу партиционирования. Если вы вставляете данные в несколько партиций одновременно, то это может значительно снизить производительность запроса `INSERT`. Чтобы избежать этого:

View File

@ -25,7 +25,7 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list
[LIMIT [n, ]m] [WITH TIES]
[SETTINGS ...]
[UNION ALL ...]
[INTO OUTFILE filename]
[INTO OUTFILE filename [COMPRESSION type] ]
[FORMAT format]
```

View File

@ -4,10 +4,35 @@ toc_title: INTO OUTFILE
# Секция INTO OUTFILE {#into-outfile-clause}
Чтобы перенаправить вывод `SELECT` запроса в указанный файл на стороне клиента, добавьте к нему секцию `INTO OUTFILE filename` (где filename — строковый литерал).
Секция `INTO OUTFILE` перенаправляет результат запроса `SELECT` в файл на стороне **клиента**.
Поддерживаются сжатые файлы. Формат сжатия определяется по расширению файла (по умолчанию используется режим `'auto'`), либо он может быть задан явно в секции `COMPRESSION`.
**Синтаксис**
```sql
SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type]
```
`file_name` и `type` задаются в виде строковых литералов. Поддерживаются форматы сжатия: `'none`', `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
## Детали реализации {#implementation-details}
- Эта функция доступна только в следующих интерфейсах: [клиент командной строки](../../../interfaces/cli.md) и [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Таким образом, запрос, отправленный через [HTTP интерфейс](../../../interfaces/http.md) вернет ошибку.
- Запрос завершится ошибкой, если файл с тем же именем уже существует.
- По умолчанию используется [выходной формат](../../../interfaces/formats.md) `TabSeparated` (как в пакетном режиме клиента командной строки).
- По умолчанию используется [выходной формат](../../../interfaces/formats.md) `TabSeparated` (как в пакетном режиме клиента командной строки). Его можно изменить в секции [FORMAT](format.md).
**Пример**
Выполните следующий запрос, используя [клиент командной строки](../../../interfaces/cli.md):
```bash
clickhouse-client --query="SELECT 1,'ABC' INTO OUTFILE 'select.gz' FORMAT CSV;"
zcat select.gz
```
Результат:
```text
1,"ABC"
```

View File

@ -18,6 +18,8 @@ toc_title: "Введение"
Это один из методов создания таблицы.
- Запросе [INSERT INTO TABLE FUNCTION](../../sql-reference/statements/insert-into.md#inserting-into-table-function).
!!! warning "Предупреждение"
Если настройка [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) выключена, то использовать табличные функции невозможно.

View File

@ -86,6 +86,7 @@ git push
- **Compiler**: `gcc-9``clang-10` (或其他架构的`clang-10-xx`, 比如`clang-10-freebsd`).
- **Build type**: `Debug` or `RelWithDebInfo` (cmake).
- **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
- **Bundled**: `bundled` 构建使用来自 `contrib` 库, 而 `unbundled` 构建使用系统库.
- **Splitted**: `splitted` is a [split build](https://clickhouse.com/docs/en/development/build/#split-build)
- **Status**: `成功``失败`
- **Build log**: 链接到构建和文件复制日志, 当构建失败时很有用.

View File

@ -1 +0,0 @@
../../en/development/tests.md

View File

@ -0,0 +1,335 @@
# ClickHouse 测试 {#clickhouse-testing}
## 功能测试 {#functional-tests}
功能测试使用起来最简单方便. 大多数 ClickHouse 特性都可以通过功能测试进行测试, 并且对于可以通过功能测试进行测试的 ClickHouse 代码的每一个更改, 都必须使用这些特性
每个功能测试都会向正在运行的 ClickHouse 服务器发送一个或多个查询, 并将结果与参考进行比较.
测试位于 `查询` 目录中. 有两个子目录: `无状态``有状态`. 无状态测试在没有任何预加载测试数据的情况下运行查询 - 它们通常在测试本身内即时创建小型合成数据集. 状态测试需要来自 Yandex.Metrica 的预加载测试数据, 它对公众开放.
每个测试可以是两种类型之一: `.sql``.sh`. `.sql` 测试是简单的 SQL 脚本, 它通过管道传输到 `clickhouse-client --multiquery --testmode`. `.sh` 测试是一个自己运行的脚本. SQL 测试通常比 `.sh` 测试更可取. 仅当您必须测试某些无法从纯 SQL 中执行的功能时才应使用 `.sh` 测试, 例如将一些输入数据传送到 `clickhouse-client` 或测试 `clickhouse-local`.
### 在本地运行测试 {#functional-test-locally}
在本地启动ClickHouse服务器, 监听默认端口(9000). 例如, 要运行测试 `01428_hash_set_nan_key`, 请切换到存储库文件夹并运行以下命令:
```
PATH=$PATH:<path to clickhouse-client> tests/clickhouse-test 01428_hash_set_nan_key
```
有关更多选项, 请参阅`tests/clickhouse-test --help`. 您可以简单地运行所有测试或运行由测试名称中的子字符串过滤的测试子集:`./clickhouse-test substring`. 还有并行或随机顺序运行测试的选项.
### 添加新测试 {#adding-new-test}
添加新的测试, 在 `queries/0_stateless` 目录下创建 `.sql``.sh` 文件, 手动检查, 然后通过以下方式生成`.reference`文件:`clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` 或 `./00000_test.sh > ./00000_test.reference`.
测试应仅使用(创建、删除等)`test` 数据库中假定已预先创建的表; 测试也可以使用临时表.
### 选择测试名称 {#choosing-test-name}
测试名称以五位数前缀开头, 后跟描述性名称, 例如 `00422_hash_function_constexpr.sql`. 要选择前缀, 请找到目录中已存在的最大前缀, 并将其加一. 在此期间, 可能会添加一些具有相同数字前缀的其他测试, 但这没关系并且不会导致任何问题, 您以后不必更改它.
一些测试的名称中标有 `zookeeper`、`shard` 或 `long` . `zookeeper` 用于使用 ZooKeeper 的测试. `shard` 用于需要服务器监听 `127.0.0.*` 的测试; `distributed``global` 具有相同的含义. `long` 用于运行时间稍长于一秒的测试. Yo你可以分别使用 `--no-zookeeper`、`--no-shard` 和 `--no-long` 选项禁用这些测试组. 如果需要 ZooKeeper 或分布式查询,请确保为您的测试名称添加适当的前缀.
### 检查必须发生的错误 {#checking-error-must-occur}
有时您想测试是否因不正确的查询而发生服务器错误. 我们支持在 SQL 测试中对此进行特殊注释, 形式如下:
```
select x; -- { serverError 49 }
```
此测试确保服务器返回关于未知列“x”的错误代码为 49. 如果没有错误, 或者错误不同, 则测试失败. 如果您想确保错误发生在客户端, 请改用 `clientError` 注释.
不要检查错误消息的特定措辞, 它将来可能会发生变化, 并且测试将不必要地中断. 只检查错误代码. 如果现有的错误代码不足以满足您的需求, 请考虑添加一个新的.
### 测试分布式查询 {#testing-distributed-query}
如果你想在功能测试中使用分布式查询, 你可以使用 `127.0.0.{1..2}` 的地址, 以便服务器查询自己; 或者您可以在服务器配置文件中使用预定义的测试集群, 例如`test_shard_localhost`. 请记住在测试名称中添加 `shard``distributed` 字样, 以便它以正确的配置在 CI 中运行, 其中服务器配置为支持分布式查询.
## 已知错误 {#known-bugs}
如果我们知道一些可以通过功能测试轻松重现的错误, 我们将准备好的功能测试放在 `tests/queries/bugs` 目录中. 修复错误后, 这些测试将移至 `tests/queries/0_stateless` .
## 集成测试 {#integration-tests}
集成测试允许在集群配置中测试 ClickHouse 以及 ClickHouse 与其他服务器(如 MySQL、Postgres、MongoDB)的交互. 它们可以用来模拟网络分裂、丢包等情况. 这些测试在Docker下运行, 并使用各种软件创建多个容器.
有关如何运行这些测试, 请参阅 `tests/integration/README.md` .
注意, ClickHouse与第三方驱动程序的集成没有经过测试. 另外, 我们目前还没有JDBC和ODBC驱动程序的集成测试.
## 单元测试 {#unit-tests}
当您想测试的不是 ClickHouse 整体, 而是单个独立库或类时,单元测试很有用. 您可以使用 `ENABLE_TESTS` CMake 选项启用或禁用测试构建. 单元测试(和其他测试程序)位于代码中的 `tests` 子目录中. 要运行单元测试, 请键入 `ninja test` 。有些测试使用 `gtest` , 但有些程序在测试失败时会返回非零退出码.
如果代码已经被功能测试覆盖了, 就没有必要进行单元测试(而且功能测试通常更易于使用).
例如, 您可以通过直接调用可执行文件来运行单独的 gtest 检查:
```bash
$ ./src/unit_tests_dbms --gtest_filter=LocalAddress*
```
## 性能测试 {#performance-tests}
性能测试允许测量和比较 ClickHouse 的某些孤立部分在合成查询上的性能. 测试位于 `tests/performance`. 每个测试都由带有测试用例描述的 `.xml` 文件表示. 测试使用 `docker/tests/performance-comparison` 工具运行. 请参阅自述文件以进行调用.
每个测试在循环中运行一个或多个查询(可能带有参数组合). 一些测试可以包含预加载测试数据集的先决条件.
如果您希望在某些场景中提高ClickHouse的性能并且如果可以在简单的查询中观察到改进那么强烈建议编写性能测试。在测试期间使用 `perf top` 或其他perf工具总是有意义的.
## 测试工具和脚本 {#test-tools-and-scripts}
`tests` 目录中的一些程序不是准备好的测试,而是测试工具. 例如, 对于 `Lexer`, 有一个工具 `src/Parsers/tests/lexer` , 它只是对标准输入进行标记化并将着色结果写入标准输出. 您可以将这些类型的工具用作代码示例以及用于探索和手动测试.
## 其他测试 {#miscellaneous-tests}
`tests/external_models` 中有机器学习模型的测试. 这些测试不会更新, 必须转移到集成测试.
仲裁插入有单独的测试. 该测试在不同的服务器上运行 ClickHouse 集群并模拟各种故障情况:网络分裂、丢包(ClickHouse 节点之间、ClickHouse 和 ZooKeeper 之间、ClickHouse 服务器和客户端之间等)、`kill -9`、`kill -STOP` 和 `kill -CONT` , 比如 [Jepsen](https://aphyr.com/tags/Jepsen). 然后测试检查所有已确认的插入是否已写入并且所有被拒绝的插入均未写入.
在 ClickHouse 开源之前, Quorum 测试是由单独的团队编写的. 这个团队不再与ClickHouse合作. 测试碰巧是用Java编写的. 由于这些原因, 必须重写仲裁测试并将其转移到集成测试.
## 手动测试 {#manual-testing}
当您开发一个新特性时, 手动测试它也是合理的. 您可以按照以下步骤进行操作:
构建 ClickHouse. 从终端运行 ClickHouse将目录更改为 `programs/clickhouse-server` 并使用 `./clickhouse-server` 运行它. 默认情况下, 它将使用当前目录中的配置(`config.xml`、`users.xml` 和`config.d` 和`users.d` 目录中的文件). 要连接到 ClickHouse 服务器, 请运行 `programs/clickhouse-client/clickhouse-client` .
请注意, 所有 clickhouse 工具(服务器、客户端等)都只是指向名为 `clickhouse` 的单个二进制文件的符号链接. 你可以在 `programs/clickhouse` 找到这个二进制文件. 所有工具也可以作为 `clickhouse tool` 而不是 `clickhouse-tool` 调用.
或者, 您可以安装 ClickHouse 包: 从 Yandex 存储库稳定发布, 或者您可以在 ClickHouse 源根目录中使用 `./release` 为自己构建包. 然后使用 `sudo service clickhouse-server start` 启动服务器(或停止以停止服务器). 在 `/etc/clickhouse-server/clickhouse-server.log` 中查找日志.
当您的系统上已经安装了 ClickHouse 时,您可以构建一个新的 `clickhouse` 二进制文件并替换现有的二进制文件:
``` bash
$ sudo service clickhouse-server stop
$ sudo cp ./clickhouse /usr/bin/
$ sudo service clickhouse-server start
```
您也可以停止系统 clickhouse-server 并使用相同的配置运行您自己的服务器, 但登录到终端:
``` bash
$ sudo service clickhouse-server stop
$ sudo -u clickhouse /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml
```
使用 gdb 的示例:
``` bash
$ sudo -u clickhouse gdb --args /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml
```
如果系统 clickhouse-server 已经在运行并且你不想停止它, 你可以在你的 `config.xml` 中更改端口号(或在 `config.d` 目录中的文件中覆盖它们), 提供适当的数据路径, 并运行它.
`clickhouse` 二进制文件几乎没有依赖关系, 可以在广泛的 Linux 发行版中使用. 要在服务器上快速而肮脏地测试您的更改, 您可以简单地将新构建的 `clickhouse` 二进制文件 `scp` 到您的服务器, 然后按照上面的示例运行它.
## 测试环境 {#testing-environment}
在发布稳定版之前, 我们将其部署在测试环境中.测试环境是一个集群,处理 [Yandex.Metrica](https://metrica.yandex.com/) 数据的 1/39 部分. 我们与 Yandex.Metrica 团队共享我们的测试环境. ClickHouse无需在现有数据上停机即可升级. 我们首先看到的是, 数据被成功地处理了, 没有滞后于实时, 复制继续工作, Yandex.Metrica 团队没有发现任何问题. 第一次检查可以通过以下方式进行:
``` sql
SELECT hostName() AS h, any(version()), any(uptime()), max(UTCEventTime), count() FROM remote('example01-01-{1..3}t', merge, hits) WHERE EventDate >= today() - 2 GROUP BY h ORDER BY h;
```
在某些情况下, 我们还会部署到 Yandex 中我们朋友团队的测试环境Market、Cloud 等. 此外, 我们还有一些用于开发目的的硬件服务器.
## 负载测试 {#load-testing}
部署到测试环境后, 我们使用来自生产集群的查询运行负载测试. 这是手动完成的.
确保您在生产集群上启用了 `query_log`.
收集一天或更长时间的查询日志:
``` bash
$ clickhouse-client --query="SELECT DISTINCT query FROM system.query_log WHERE event_date = today() AND query LIKE '%ym:%' AND query NOT LIKE '%system.query_log%' AND type = 2 AND is_initial_query" > queries.tsv
```
这是一个复杂的例子. `type = 2` 将过滤成功执行的查询. `query LIKE '%ym:%'` 是从 Yandex.Metrica 中选择相关查询. `is_initial_query` 是只选择客户端发起的查询, 而不是 ClickHouse 本身(作为分布式查询处理的一部分).
`scp` 将此日志记录到您的测试集群并按如下方式运行它:
``` bash
$ clickhouse benchmark --concurrency 16 < queries.tsv
```
(可能你还想指定一个 `--user`)
然后把它留到晚上或周末, 去休息一下.
您应该检查 `clickhouse-server` 没有崩溃, 内存占用是有限的, 且性能不会随着时间的推移而降低.
由于查询和环境的高度可变性, 没有记录和比较精确的查询执行时间.
## 构建测试 {#build-tests}
构建测试允许检查在各种可选配置和一些外部系统上的构建是否被破坏. 这些测试也是自动化的.
示例:
- Darwin x86_64 (Mac OS X) 交叉编译
- FreeBSD x86_64 交叉编译
- Linux AArch64 交叉编译
- 使用系统包中的库在 Ubuntu 上构建(不鼓励)
- 使用库的共享链接构建(不鼓励)
例如, 使用系统包构建是不好的做法, 因为我们无法保证系统将拥有哪个确切版本的包. 但这确实是 Debian 维护者所需要的. 出于这个原因, 我们至少必须支持这种构建变体. 另一个例子: 共享链接是一个常见的麻烦来源, 但对于一些爱好者来说是需要的.
虽然我们无法对所有构建变体运行所有测试, 但我们希望至少检查各种构建变体没有被破坏. 为此, 我们使用构建测试.
我们还测试了那些太长而无法编译或需要太多RAM的没有翻译单元.
我们还测试没有太大的堆栈帧.
## 协议兼容性测试 {#testing-for-protocol-compatibility}
当我们扩展 ClickHouse 网络协议时, 我们手动测试旧的 clickhouse-client 与新的 clickhouse-server 一起工作, 而新的 clickhouse-client 与旧的 clickhouse-server 一起工作(只需从相应的包中运行二进制文件).
我们还使用集成测试自动测试一些案例:
- 旧版本ClickHouse写入的数据是否可以被新版本成功读取;
- 在具有不同 ClickHouse 版本的集群中执行分布式查询.
## 编译器的帮助 {#help-from-the-compiler}
主要的 ClickHouse 代码(位于 `dbms` 目录中)是用 `-Wall -Wextra -Werror` 和一些额外的启用警告构建的. 虽然没有为第三方库启用这些选项.
Clang 有更多有用的警告 - 你可以用 `-Weverything` 寻找它们并选择一些东西来默认构建.
对于生产构建, 使用 clang, 但我们也测试 make gcc 构建. 对于开发, clang 通常使用起来更方便. 您可以使用调试模式在自己的机器上构建(以节省笔记本电脑的电池), 但请注意, 由于更好的控制流和过程间分析, 编译器能够使用 `-O3` 生成更多警告. 在调试模式下使用 clang 构建时, 使用调试版本的 `libc++` 允许在运行时捕获更多错误.
## 地址清理器 {#sanitizers}
### 地址清理器
我们在ASan上运行功能测试、集成测试、压力测试和单元测试.
### 线程清理器
我们在TSan下运行功能测试、集成测试、压力测试和单元测试.
### 内存清理器
我们在MSan上运行功能测试、集成测试、压力测试和单元测试.
### 未定义的行为清理器
我们在UBSan下运行功能测试、集成测试、压力测试和单元测试. 某些第三方库的代码未针对 UB 进行清理.
### Valgrind (Memcheck)
我们曾经在 Valgrind 下通宵运行功能测试, 但不再这样做了. 这需要几个小时. 目前在`re2`库中有一个已知的误报, 见[这篇文章](https://research.swtch.com/sparse).
## 模糊测试 {#fuzzing}
ClickHouse 模糊测试是使用 [libFuzzer](https://llvm.org/docs/LibFuzzer.html) 和随机 SQL 查询实现的. 所有模糊测试都应使用sanitizers(地址和未定义)进行.
LibFuzzer 用于库代码的隔离模糊测试. Fuzzer 作为测试代码的一部分实现, 并具有 `_fuzzer` 名称后缀.
Fuzzer 示例可以在 `src/Parsers/tests/lexer_fuzzer.cpp` 中找到. LibFuzzer 特定的配置、字典和语料库存储在 `tests/fuzz` 中.
我们鼓励您为处理用户输入的每个功能编写模糊测试.
默认情况下不构建模糊器. 要构建模糊器, 应设置` -DENABLE_FUZZING=1` 和 `-DENABLE_TESTS=1` 选项.
我们建议在构建模糊器时禁用 Jemalloc. 用于将 ClickHouse fuzzing 集成到 Google OSS-Fuzz 的配置可以在 `docker/fuzz` 中找到.
我们还使用简单的模糊测试来生成随机SQL查询, 并检查服务器在执行这些查询时是否会死亡.
你可以在 `00746_sql_fuzzy.pl` 中找到它. 这个测试应该连续运行(通宵或更长时间).
我们还使用复杂的基于 AST 的查询模糊器, 它能够找到大量的极端情况. 它在查询 AST 中进行随机排列和替换. 它会记住先前测试中的 AST 节点, 以使用它们对后续测试进行模糊测试, 同时以随机顺序处理它们. 您可以在 [这篇博客文章](https://clickhouse.com/blog/en/2021/fuzzing-clickhouse/) 中了解有关此模糊器的更多信息.
## 压力测试 {#stress-test}
压力测试是另一种模糊测试. 它使用单个服务器以随机顺序并行运行所有功能测试. 不检查测试结果.
经检查:
- 服务器不会崩溃,不会触发调试或清理程序陷阱;
- 没有死锁;
- 数据库结构一致;
- 服务器可以在测试后成功停止并重新启动,没有异常;
有五种变体 (Debug, ASan, TSan, MSan, UBSan).
## 线程模糊器 {#thread-fuzzer}
Thread Fuzzer(请不要与 Thread Sanitizer 混淆)是另一种允许随机化线程执行顺序的模糊测试. 它有助于找到更多特殊情况.
## 安全审计 {#security-audit}
Yandex安全团队的人员从安全的角度对ClickHouse的功能做了一些基本的概述.
## 静态分析仪 {#static-analyzers}
我们在每次提交的基础上运行 `clang-tidy``PVS-Studio`. `clang-static-analyzer` 检查也被启用. `clang-tidy` 也用于一些样式检查.
我们已经评估了 `clang-tidy`、`Coverity`、`cppcheck`、`PVS-Studio`、`tscancode`、`CodeQL`. 您将在 `tests/instructions/` 目录中找到使用说明. 你也可以阅读[俄文文章](https://habr.com/company/yandex/blog/342018/).
如果你使用 `CLion` 作为 IDE, 你可以利用一些开箱即用的 `clang-tidy` 检查
我们还使用 `shellcheck` 对shell脚本进行静态分析.
## 硬化 {#hardening}
在调试版本中, 我们使用自定义分配器执行用户级分配的 ASLR.
我们还手动保护在分配后预期为只读的内存区域.
在调试构建中, 我们还需要对libc进行自定义, 以确保不会调用 "有害的" (过时的、不安全的、非线程安全的)函数.
Debug 断言被广泛使用.
在调试版本中,如果抛出带有 "逻辑错误" 代码(暗示错误)的异常, 则程序会过早终止. 它允许在发布版本中使用异常, 但在调试版本中使其成为断言.
jemalloc 的调试版本用于调试版本.
libc++ 的调试版本用于调试版本.
## 运行时完整性检查
对存储在磁盘上的数据是校验和. MergeTree 表中的数据同时以三种方式进行校验和*(压缩数据块、未压缩数据块、跨块的总校验和). 客户端和服务器之间或服务器之间通过网络传输的数据也会进行校验和. 复制确保副本上的数据位相同.
需要防止硬件故障(存储介质上的位腐烂、服务器上 RAM 中的位翻转、网络控制器 RAM 中的位翻转、网络交换机 RAM 中的位翻转、客户端 RAM 中的位翻转、线路上的位翻转). 请注意,比特位操作很常见, 即使对于 ECC RAM 和 TCP 校验和(如果您每天设法运行数千台处理 PB 数据的服务器, 也可能发生比特位操作. [观看视频(俄语)](https://www.youtube.com/watch?v=ooBAQIe0KlQ).
ClickHouse 提供诊断功能, 可帮助运维工程师找到故障硬件.
\* 它并不慢.
## 代码风格 {#code-style}
[此处](style.md)描述了代码样式规则.
要检查一些常见的样式违规,您可以使用 `utils/check-style` 脚本.
要强制使用正确的代码样式, 您可以使用 `clang-format`. 文件 `.clang-format` 位于源根目录. 它大多与我们的实际代码风格相对应. 但是不建议将 `clang-format` 应用于现有文件, 因为它会使格式变得更糟. 您可以使用可以在 clang 源代码库中找到的 `clang-format-diff` 工具.
或者, 您可以尝试使用 `uncrustify` 工具来重新格式化您的代码. 配置位于源根目录中的 `uncrustify.cfg` 中. 它比 `clang-format` 测试更少.
`CLion` 有自己的代码格式化程序, 必须根据我们的代码风格进行调整.
我们还使用 `codespell` 来查找代码中的拼写错误.它也是自动化的.
## Metrica B2B 测试 {#metrica-b2b-tests}
每个 ClickHouse 版本都使用 Yandex Metrica 和 AppMetrica 引擎进行测试. ClickHouse 的测试版和稳定版部署在 VM 上, 并使用 Metrica 引擎的小副本运行, 该引擎处理输入数据的固定样本. 然后将两个 Metrica 引擎实例的结果放在一起比较.
这些测试由单独的团队自动化. 由于移动部件数量众多, 测试在大多数情况下都因完全不相关的原因而失败, 这些原因很难弄清楚. 这些测试很可能对我们有负面价值. 尽管如此, 这些测试在数百次中被证明是有用的.
## 测试覆盖率 {#test-coverage}
我们还跟踪测试覆盖率, 但仅针对功能测试和 clickhouse-server. 它每天进行.
## Tests for Tests
有自动检测薄片测试. 它运行所有新测试100次(用于功能测试)或10次(用于集成测试). 如果至少有一次测试失败,它就被认为是脆弱的.
## Testflows
[Testflows](https://testflows.com/) 是一个企业级的测试框架. Altinity 使用它进行一些测试, 我们在 CI 中运行这些测试.
## Yandex 检查 (only for Yandex employees)
这些检查将ClickHouse代码导入到Yandex内部的单一存储库中, 所以ClickHouse代码库可以被Yandex的其他产品(YT和YDB)用作库. 请注意, clickhouse-server本身并不是由内部回购构建的, Yandex应用程序使用的是未经修改的开源构建的.
## 测试自动化 {#test-automation}
我们使用 Yandex 内部 CI 和名为 "Sandbox" 的作业自动化系统运行测试.
在每次提交的基础上, 构建作业和测试都在沙箱中运行. 生成的包和测试结果发布在GitHub上, 可以通过直接链接下载. 产物要保存几个月. 当你在GitHub上发送一个pull请求时, 我们会把它标记为 "可以测试" , 我们的CI系统会为你构建ClickHouse包(发布、调试、使用地址清理器等).
由于时间和计算能力的限制, 我们不使用 Travis CI.
我们不用Jenkins. 以前用过, 现在我们很高兴不用Jenkins了.
[原始文章](https://clickhouse.com/docs/en/development/tests/) <!--hide-->

View File

@ -0,0 +1,112 @@
---
toc_priority: 68
toc_title: Window View
---
# Window View 函数 {#window-view-han-shu}
Window view函数用于获取窗口的起始(包含边界)和结束时间(不包含边界)。系统支持的window view函数如下
## tumble {#window-view-functions-tumble}
tumble窗口是连续的、不重叠的固定大小(`interval`)时间窗口。
``` sql
tumble(time_attr, interval [, timezone])
```
**参数**
- `time_attr` - [DateTime](../../sql-reference/data-types/datetime.md)类型的时间数据。
- `interval` - [Interval](../../sql-reference/data-types/special-data-types/interval.md)类型的窗口大小。
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) 类型的时区(可选参数).
**返回值**
- tumble窗口的开始(包含边界)和结束时间(不包含边界)
类型: `Tuple(DateTime, DateTime)`
**示例**
查询:
``` sql
SELECT tumble(now(), toIntervalDay('1'))
```
结果:
``` text
┌─tumble(now(), toIntervalDay('1'))─────────────┐
│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │
└───────────────────────────────────────────────┘
```
## hop {#window-view-functions-hop}
hop窗口是一个固定大小(`window_interval`)的时间窗口,并按照一个固定的滑动间隔(`hop_interval`)滑动。当滑动间隔小于窗口大小时,滑动窗口间存在重叠,此时一个数据可能存在于多个窗口。
``` sql
hop(time_attr, hop_interval, window_interval [, timezone])
```
**参数**
- `time_attr` - [DateTime](../../sql-reference/data-types/datetime.md)类型的时间数据。
- `hop_interval` - [Interval](../../sql-reference/data-types/special-data-types/interval.md)类型的滑动间隔需要大于0。
- `window_interval` - [Interval](../../sql-reference/data-types/special-data-types/interval.md)类型的窗口大小需要大于0。
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) 类型的时区(可选参数)。
**返回值**
- hop窗口的开始(包含边界)和结束时间(不包含边界)。由于一个数据可能存在于多个窗口脱离window view单独调用该函数时只返回第一个窗口数据。
类型: `Tuple(DateTime, DateTime)`
**示例**
查询:
``` sql
SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND)
```
结果:
``` text
┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐
│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │
└───────────────────────────────────────────────────────────┘
```
## tumbleStart {#window-view-functions-tumblestart}
返回tumble窗口的开始时间(包含边界)。
``` sql
tumbleStart(time_attr, interval [, timezone]);
```
## tumbleEnd {#window-view-functions-tumbleend}
返回tumble窗口的结束时间(不包含边界)。
``` sql
tumbleEnd(time_attr, interval [, timezone]);
```
## hopStart {#window-view-functions-hopstart}
返回hop窗口的开始时间(包含边界)。
``` sql
hopStart(time_attr, hop_interval, window_interval [, timezone]);
```
## hopEnd {#window-view-functions-hopend}
返回hop窗口的结束时间(不包含边界)。
``` sql
hopEnd(time_attr, hop_interval, window_interval [, timezone]);
```

View File

@ -5,7 +5,7 @@ toc_title: VIEW
# CREATE VIEW {#create-view}
创建一个新视图。 有两种类型的视图:普通视图和物化视图。
创建一个新视图。 有两种类型的视图:普通视图物化视图Live视图和Window视图。
## Normal {#normal}
@ -241,3 +241,120 @@ Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table defa
- 使用定期刷新从系统表中查看指标。
[原始文章](https://clickhouse.com/docs/en/sql-reference/statements/create/view/) <!--hide-->
## Window View [Experimental] {#window-view}
!!! important "重要"
这是一项试验性功能,可能会在未来版本中以向后不兼容的方式进行更改。
通过[allow_experimental_window_view](../../../operations/settings/settings.md#allow-experimental-window-view)启用window view以及`WATCH`语句。输入命令
`set allow_experimental_window_view = 1`
``` sql
CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [ENGINE = engine] [WATERMARK = strategy] [ALLOWED_LATENESS = interval_function] AS SELECT ... GROUP BY window_view_function
```
Window view可以通过时间窗口聚合数据并在满足窗口触发条件时自动触发对应窗口计算。其通过将计算状态保存降低处理延迟支持将处理结果输出至目标表或通过`WATCH`语句输出至终端。
创建window view的方式和创建物化视图类似。Window view使用默认为`AggregatingMergeTree`的内部存储引擎存储计算中间状态。
### Window View 函数 {#window-view-han-shu}
[Window view函数](../../functions/window-view-functions.md)用于获取窗口的起始和结束时间。Window view需要和window view函数配合使用。
### 时间属性 {#window-view-shi-jian-shu-xing}
Window view 支持**处理时间**和**事件时间**两种时间类型。
**处理时间**为默认时间类型该模式下window view使用本地机器时间计算窗口数据。“处理时间”时间类型计算简单但具有不确定性。该模式下时间可以为window view函数的第一个参数`time_attr`,或通过函数`now()`使用当前机器时间。下面的例子展示了使用“处理时间”创建的window view的例子。
``` sql
CREATE WINDOW VIEW wv AS SELECT count(number), tumbleStart(w_id) as w_start from date GROUP BY tumble(now(), INTERVAL '5' SECOND) as w_id
```
**事件时间** 是事件真实发生的时间该时间往往在事件发生时便嵌入数据记录。事件时间处理提供较高的确定性可以处理乱序数据以及迟到数据。Window view 通过水位线(`WATERMARK`)启用事件时间处理。
Window view提供如下三种水位线策略
* `STRICTLY_ASCENDING`: 提交观测到的最大时间作为水位线,小于最大观测时间的数据不算迟到。
* `ASCENDING`: 提交观测到的最大时间减1作为水位线。小于或等于最大观测时间的数据不算迟到。
* `BOUNDED`: WATERMARK=INTERVAL. 提交最大观测时间减去固定间隔(`INTERVAL`)做为水位线。
以下为使用`WATERMARK`创建window view的示例
``` sql
CREATE WINDOW VIEW wv WATERMARK=STRICTLY_ASCENDING AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND);
CREATE WINDOW VIEW wv WATERMARK=ASCENDING AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND);
CREATE WINDOW VIEW wv WATERMARK=INTERVAL '3' SECOND AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND);
```
通常窗口会在水位线到达时触发水位线到达之后的数据会被丢弃。Window view可以通过设置`ALLOWED_LATENESS=INTERVAL`来开启迟到消息处理。示例如下:
``` sql
CREATE WINDOW VIEW test.wv TO test.dst WATERMARK=ASCENDING ALLOWED_LATENESS=INTERVAL '2' SECOND AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM test.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid;
```
需要注意的是迟到消息需要更新之前的处理结果。与在窗口结束时触发不同迟到消息到达时window view会立即触发计算。因此会导致同一个窗口输出多次计算结果。用户需要注意这种情况并消除重复结果。
### 新窗口监控 {#window-view-xin-chuang-kou-jian-kong}
Window view可以通过`WATCH`语句将处理结果推送至终端,或通过`TO`语句将结果推送至数据表。
``` sql
WATCH [db.]name [LIMIT n]
```
`WATCH`语句和`LIVE VIEW`中的类似。支持设置`LIMIT`参数,输出消息数目达到`LIMIT`限制时结束查询。
### 设置 {#window-view-she-zhi}
- `window_view_clean_interval`: window view清除过期数据间隔(单位为秒)。系统会定期清除过期数据,尚未触发的窗口数据不会被清除。
- `window_view_heartbeat_interval`: 用于判断watch查询活跃的心跳时间间隔。
### 示例 {#window-view-shi-li}
假设我们需要每10秒统计一次`data`表中的点击日志,且`data`表的结构如下:
``` sql
CREATE TABLE data ( `id` UInt64, `timestamp` DateTime) ENGINE = Memory;
```
首先使用10秒大小的tumble函数创建window view。
``` sql
CREATE WINDOW VIEW wv as select count(id), tumbleStart(w_id) as window_start from data group by tumble(timestamp, INTERVAL '10' SECOND) as w_id
```
随后,我们使用`WATCH`语句获取计算结果。
``` sql
WATCH wv
```
当日志插入表`data`时,
``` sql
INSERT INTO data VALUES(1,now())
```
`WATCH`语句会输出如下结果:
``` text
┌─count(id)─┬────────window_start─┐
│ 1 │ 2020-01-14 16:56:40 │
└───────────┴─────────────────────┘
```
或者,我们可以通过`TO`关键字将处理结果输出至另一张表。
``` sql
CREATE WINDOW VIEW wv TO dst AS SELECT count(id), tumbleStart(w_id) as window_start FROM data GROUP BY tumble(timestamp, INTERVAL '10' SECOND) as w_id
```
ClickHouse测试中提供了更多的示例(以`*window_view*`命名)。
### Window View 使用场景 {#window-view-shi-yong-chang-jing}
Window view 在以下场景有用:
* **监控**: 以时间维度聚合及处理数据,并将处理结果输出至目标表。用户可通过目标表获取并操作计算结果。
* **分析**: 以时间维度进行数据分析. 当数据源非常庞大时window view可以减少重复全表查询的计算量。

Some files were not shown because too many files have changed in this diff Show More