Merge branch 'master' into fix-lz4

This commit is contained in:
mergify[bot] 2021-12-02 12:00:43 +00:00 committed by GitHub
commit a1b31018d9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
401 changed files with 17898 additions and 1960 deletions

379
.github/workflows/backport_branches.yml vendored Normal file
View File

@ -0,0 +1,379 @@
name: BackportPR
on: # yamllint disable-line rule:truthy
push:
branches:
- 'backport/**'
jobs:
DockerHubPush:
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 docker_images_check.py
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docker_images_check/changed_images.json
CompatibilityCheck:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: CompatibilityCheck
env:
TEMP_PATH: ${{runner.temp}}/compatibility_check
REPO_COPY: ${{runner.temp}}/compatibility_check/ClickHouse
REPORTS_PATH: ${{runner.temp}}/reports_dir
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 compatibility_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################
BuilderDebRelease:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'package_release'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebAsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'package_asan'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebTsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'package_tsan'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebDebug:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'package_debug'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:
needs:
- BuilderDebRelease
- BuilderDebAsan
- BuilderDebTsan
- BuilderDebUBsan
- BuilderDebMsan
- BuilderDebDebug
runs-on: [self-hosted, style-checker]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Report Builder
env:
TEMP_PATH: ${{runner.temp}}/report_check
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'ClickHouse build check (actions)'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cd $GITHUB_WORKSPACE/tests/ci
python3 build_report_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################
FunctionalStatelessTestAsan:
needs: [BuilderDebAsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_debug
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (address, actions)'
REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
##############################################################################################
############################ FUNCTIONAl STATEFUL TESTS #######################################
##############################################################################################
FunctionalStatefulTestDebug:
needs: [BuilderDebDebug]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateful_debug
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateful tests (debug, actions)'
REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT: 3600
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
##############################################################################################
######################################### STRESS TESTS #######################################
##############################################################################################
StressTestTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Stress test
env:
TEMP_PATH: ${{runner.temp}}/stress_thread
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stress test (thread, actions)'
REPO_COPY: ${{runner.temp}}/stress_thread/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 stress_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
#############################################################################################
############################# INTEGRATION TESTS #############################################
#############################################################################################
IntegrationTestsRelease:
needs: [BuilderDebRelease, FunctionalStatelessTestRelease]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Integration test
env:
TEMP_PATH: ${{runner.temp}}/integration_tests_release
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Integration tests (release, actions)'
REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FinishCheck:
needs:
- DockerHubPush
- BuilderReport
- FunctionalStatelessTestAsan
- FunctionalStatefulTestDebug
- StressTestTsan
- IntegrationTestsRelease
- CompatibilityCheck
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Finish label
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 finish_check.py

View File

@ -1,7 +1,7 @@
name: Cancel name: Cancel
on: # yamllint disable-line rule:truthy on: # yamllint disable-line rule:truthy
workflow_run: workflow_run:
workflows: ["CIGithubActions", "ReleaseCI"] workflows: ["CIGithubActions", "ReleaseCI", "DocsCheck", "BackportPR"]
types: types:
- requested - requested
jobs: jobs:
@ -10,4 +10,5 @@ jobs:
steps: steps:
- uses: styfle/cancel-workflow-action@0.9.1 - uses: styfle/cancel-workflow-action@0.9.1
with: with:
all_but_latest: true
workflow_id: ${{ github.event.workflow.id }} workflow_id: ${{ github.event.workflow.id }}

62
.github/workflows/docs_check.yml vendored Normal file
View File

@ -0,0 +1,62 @@
name: DocsCheck
on: # yamllint disable-line rule:truthy
pull_request:
types:
- synchronize
- reopened
- opened
branches:
- master
paths:
- 'docs/**'
- 'website/**'
jobs:
CheckLabels:
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Labels check
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 run_check.py
DockerHubPush:
needs: CheckLabels
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 docker_images_check.py
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docker_images_check/changed_images.json
DocsCheck:
needs: DockerHubPush
runs-on: [self-hosted, func-tester]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docs_check
- name: Check out repository code
uses: actions/checkout@v2
- name: Docs Check
env:
TEMP_PATH: ${{runner.temp}}/docs_check
REPO_COPY: ${{runner.temp}}/docs_check/ClickHouse
run: |
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 docs_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH

View File

@ -2,13 +2,14 @@ name: CIGithubActions
on: # yamllint disable-line rule:truthy on: # yamllint disable-line rule:truthy
pull_request: pull_request:
types: types:
- labeled
- unlabeled
- synchronize - synchronize
- reopened - reopened
- opened - opened
branches: branches:
- master - master
paths-ignore:
- 'docs/**'
- 'website/**'
########################################################################################## ##########################################################################################
##################################### SMALL CHECKS ####################################### ##################################### SMALL CHECKS #######################################
########################################################################################## ##########################################################################################
@ -60,34 +61,8 @@ jobs:
docker kill $(docker ps -q) ||: docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||: docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
DocsCheck:
needs: DockerHubPush
runs-on: [self-hosted, func-tester]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docs_check
- name: Check out repository code
uses: actions/checkout@v2
- name: Docs Check
env:
TEMP_PATH: ${{runner.temp}}/docs_check
REPO_COPY: ${{runner.temp}}/docs_check/ClickHouse
run: |
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 docs_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FastTest: FastTest:
needs: DockerHubPush needs: DockerHubPush
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder] runs-on: [self-hosted, builder]
steps: steps:
- name: Check out repository code - name: Check out repository code
@ -109,8 +84,7 @@ jobs:
docker rm -f $(docker ps -a -q) ||: docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
PVSCheck: PVSCheck:
needs: DockerHubPush needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, func-tester] runs-on: [self-hosted, func-tester]
steps: steps:
- name: Check out repository code - name: Check out repository code
@ -134,7 +108,6 @@ jobs:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
CompatibilityCheck: CompatibilityCheck:
needs: [BuilderDebRelease] needs: [BuilderDebRelease]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, style-checker] runs-on: [self-hosted, style-checker]
steps: steps:
- name: Check out repository code - name: Check out repository code
@ -161,7 +134,6 @@ jobs:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
SplitBuildSmokeTest: SplitBuildSmokeTest:
needs: [BuilderDebSplitted] needs: [BuilderDebSplitted]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, style-checker] runs-on: [self-hosted, style-checker]
steps: steps:
- name: Check out repository code - name: Check out repository code
@ -191,7 +163,6 @@ jobs:
######################################################################################### #########################################################################################
BuilderDebRelease: BuilderDebRelease:
needs: [DockerHubPush, FastTest] needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder] runs-on: [self-hosted, builder]
steps: steps:
- name: Download changed images - name: Download changed images
@ -230,7 +201,6 @@ jobs:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
BuilderBinRelease: BuilderBinRelease:
needs: [DockerHubPush, FastTest] needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder] runs-on: [self-hosted, builder]
steps: steps:
- name: Download changed images - name: Download changed images
@ -269,7 +239,6 @@ jobs:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
BuilderDebAsan: BuilderDebAsan:
needs: [DockerHubPush, FastTest] needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder] runs-on: [self-hosted, builder]
steps: steps:
- name: Download changed images - name: Download changed images
@ -308,7 +277,6 @@ jobs:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
BuilderDebUBsan: BuilderDebUBsan:
needs: [DockerHubPush, FastTest] needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder] runs-on: [self-hosted, builder]
steps: steps:
- name: Download changed images - name: Download changed images
@ -347,7 +315,6 @@ jobs:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
BuilderDebTsan: BuilderDebTsan:
needs: [DockerHubPush, FastTest] needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder] runs-on: [self-hosted, builder]
steps: steps:
- name: Download changed images - name: Download changed images
@ -386,7 +353,6 @@ jobs:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
BuilderDebMsan: BuilderDebMsan:
needs: [DockerHubPush, FastTest] needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder] runs-on: [self-hosted, builder]
steps: steps:
- name: Download changed images - name: Download changed images
@ -425,7 +391,6 @@ jobs:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
BuilderDebDebug: BuilderDebDebug:
needs: [DockerHubPush, FastTest] needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder] runs-on: [self-hosted, builder]
steps: steps:
- name: Download changed images - name: Download changed images
@ -467,7 +432,6 @@ jobs:
########################################################################################## ##########################################################################################
BuilderDebSplitted: BuilderDebSplitted:
needs: [DockerHubPush, FastTest] needs: [DockerHubPush, FastTest]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder] runs-on: [self-hosted, builder]
steps: steps:
- name: Download changed images - name: Download changed images
@ -504,6 +468,234 @@ jobs:
docker kill $(docker ps -q) ||: docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||: docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
BuilderBinTidy:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_tidy'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinDarwin:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_darwin'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinAarch64:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_aarch64'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinFreeBSD:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_freebsd'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinDarwinAarch64:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_darwin_aarch64'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinPPC64:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_ppc64le'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
############################################################################################ ############################################################################################
##################################### BUILD REPORTER ####################################### ##################################### BUILD REPORTER #######################################
############################################################################################ ############################################################################################
@ -516,7 +708,6 @@ jobs:
- BuilderDebUBsan - BuilderDebUBsan
- BuilderDebMsan - BuilderDebMsan
- BuilderDebDebug - BuilderDebDebug
- BuilderDebSplitted
runs-on: [self-hosted, style-checker] runs-on: [self-hosted, style-checker]
if: always() if: always()
steps: steps:
@ -542,6 +733,40 @@ jobs:
docker kill $(docker ps -q) ||: docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||: docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
BuilderSpecialReport:
needs:
- BuilderDebSplitted
- BuilderBinTidy
- BuilderBinDarwin
- BuilderBinAarch64
- BuilderBinFreeBSD
- BuilderBinDarwinAarch64
- BuilderBinPPC64
runs-on: [self-hosted, style-checker]
if: always()
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Report Builder
env:
TEMP_PATH: ${{runner.temp}}/report_check
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'ClickHouse special build check (actions)'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cd $GITHUB_WORKSPACE/tests/ci
python3 build_report_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
############################################################################################## ##############################################################################################
########################### FUNCTIONAl STATELESS TESTS ####################################### ########################### FUNCTIONAl STATELESS TESTS #######################################
############################################################################################## ##############################################################################################
@ -557,10 +782,68 @@ jobs:
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Functional test - name: Functional test
env: env:
TEMP_PATH: ${{runner.temp}}/stateless_debug TEMP_PATH: ${{runner.temp}}/stateless_release
REPORTS_PATH: ${{runner.temp}}/reports_dir REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (release, actions)' CHECK_NAME: 'Stateless tests (release, actions)'
REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse REPO_COPY: ${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestReleaseDatabaseReplicated:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_database_replicated
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (release, DatabaseReplicated, actions)'
REPO_COPY: ${{runner.temp}}/stateless_database_replicated/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestReleaseWideParts:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_wide_parts
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (release, wide parts enabled, actions)'
REPO_COPY: ${{runner.temp}}/stateless_wide_parts/ClickHouse
KILL_TIMEOUT: 10800 KILL_TIMEOUT: 10800
run: | run: |
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
@ -605,9 +888,7 @@ jobs:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
FunctionalStatelessTestTsan: FunctionalStatelessTestTsan:
needs: [BuilderDebTsan] needs: [BuilderDebTsan]
# tests can consume more than 60GB of memory, runs-on: [self-hosted, func-tester]
# so use bigger server
runs-on: [self-hosted, stress-tester]
steps: steps:
- name: Download json reports - name: Download json reports
uses: actions/download-artifact@v2 uses: actions/download-artifact@v2
@ -765,10 +1046,10 @@ jobs:
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Functional test - name: Functional test
env: env:
TEMP_PATH: ${{runner.temp}}/stateful_debug TEMP_PATH: ${{runner.temp}}/stateful_release
REPORTS_PATH: ${{runner.temp}}/reports_dir REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateful tests (release, actions)' CHECK_NAME: 'Stateful tests (release, actions)'
REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse REPO_COPY: ${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT: 3600 KILL_TIMEOUT: 3600
run: | run: |
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
@ -1480,6 +1761,8 @@ jobs:
- FastTest - FastTest
- FunctionalStatelessTestDebug - FunctionalStatelessTestDebug
- FunctionalStatelessTestRelease - FunctionalStatelessTestRelease
- FunctionalStatelessTestReleaseDatabaseReplicated
- FunctionalStatelessTestReleaseWideParts
- FunctionalStatelessTestAsan - FunctionalStatelessTestAsan
- FunctionalStatelessTestTsan - FunctionalStatelessTestTsan
- FunctionalStatelessTestMsan - FunctionalStatelessTestMsan
@ -1490,7 +1773,6 @@ jobs:
- FunctionalStatefulTestTsan - FunctionalStatefulTestTsan
- FunctionalStatefulTestMsan - FunctionalStatefulTestMsan
- FunctionalStatefulTestUBsan - FunctionalStatefulTestUBsan
- DocsCheck
- StressTestDebug - StressTestDebug
- StressTestAsan - StressTestAsan
- StressTestTsan - StressTestTsan

View File

@ -41,30 +41,6 @@ jobs:
docker kill $(docker ps -q) ||: docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||: docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
PVSCheck:
needs: DockerHubPush
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, func-tester]
steps:
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
- name: PVS Check
env:
TEMP_PATH: ${{runner.temp}}/pvs_check
REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 pvs_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
CompatibilityCheck: CompatibilityCheck:
needs: [BuilderDebRelease] needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker] runs-on: [self-hosted, style-checker]
@ -430,6 +406,240 @@ jobs:
docker kill $(docker ps -q) ||: docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||: docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
BuilderBinTidy:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_tidy'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinDarwin:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_darwin'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinAarch64:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_aarch64'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinFreeBSD:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_freebsd'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinDarwinAarch64:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_darwin_aarch64'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderBinPPC64:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NAME: 'binary_ppc64le'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
############################################################################################ ############################################################################################
##################################### BUILD REPORTER ####################################### ##################################### BUILD REPORTER #######################################
############################################################################################ ############################################################################################
@ -442,7 +652,6 @@ jobs:
- BuilderDebUBsan - BuilderDebUBsan
- BuilderDebMsan - BuilderDebMsan
- BuilderDebDebug - BuilderDebDebug
- BuilderDebSplitted
runs-on: [self-hosted, style-checker] runs-on: [self-hosted, style-checker]
steps: steps:
- name: Download json reports - name: Download json reports
@ -467,6 +676,39 @@ jobs:
docker kill $(docker ps -q) ||: docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||: docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
BuilderSpecialReport:
needs:
- BuilderBinTidy
- BuilderDebSplitted
- BuilderBinDarwin
- BuilderBinAarch64
- BuilderBinFreeBSD
- BuilderBinDarwinAarch64
- BuilderBinPPC64
runs-on: [self-hosted, style-checker]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Report Builder
env:
TEMP_PATH: ${{runner.temp}}/report_check
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'ClickHouse special build check (actions)'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cd $GITHUB_WORKSPACE/tests/ci
python3 build_report_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
############################################################################################## ##############################################################################################
########################### FUNCTIONAl STATELESS TESTS ####################################### ########################### FUNCTIONAl STATELESS TESTS #######################################
############################################################################################## ##############################################################################################
@ -482,10 +724,39 @@ jobs:
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Functional test - name: Functional test
env: env:
TEMP_PATH: ${{runner.temp}}/stateless_debug TEMP_PATH: ${{runner.temp}}/stateless_release
REPORTS_PATH: ${{runner.temp}}/reports_dir REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (release, actions)' CHECK_NAME: 'Stateless tests (release, actions)'
REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse REPO_COPY: ${{runner.temp}}/stateless_release/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestReleaseDatabaseOrdinary:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_release_database_ordinary
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (release, DatabaseOrdinary, actions)'
REPO_COPY: ${{runner.temp}}/stateless_release_database_ordinary/ClickHouse
KILL_TIMEOUT: 10800 KILL_TIMEOUT: 10800
run: | run: |
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
@ -659,10 +930,39 @@ jobs:
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Functional test - name: Functional test
env: env:
TEMP_PATH: ${{runner.temp}}/stateful_debug TEMP_PATH: ${{runner.temp}}/stateful_release
REPORTS_PATH: ${{runner.temp}}/reports_dir REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateful tests (release, actions)' CHECK_NAME: 'Stateful tests (release, actions)'
REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse REPO_COPY: ${{runner.temp}}/stateful_release/ClickHouse
KILL_TIMEOUT: 3600
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatefulTestReleaseDatabaseOrdinary:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateful_release_database_ordinary
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateful tests (release, DatabaseOrdinary, actions)'
REPO_COPY: ${{runner.temp}}/stateful_release_database_ordinary/ClickHouse
KILL_TIMEOUT: 3600 KILL_TIMEOUT: 3600
run: | run: |
sudo rm -fr $TEMP_PATH sudo rm -fr $TEMP_PATH
@ -1343,12 +1643,14 @@ jobs:
- BuilderReport - BuilderReport
- FunctionalStatelessTestDebug - FunctionalStatelessTestDebug
- FunctionalStatelessTestRelease - FunctionalStatelessTestRelease
- FunctionalStatelessTestReleaseDatabaseOrdinary
- FunctionalStatelessTestAsan - FunctionalStatelessTestAsan
- FunctionalStatelessTestTsan - FunctionalStatelessTestTsan
- FunctionalStatelessTestMsan - FunctionalStatelessTestMsan
- FunctionalStatelessTestUBsan - FunctionalStatelessTestUBsan
- FunctionalStatefulTestDebug - FunctionalStatefulTestDebug
- FunctionalStatefulTestRelease - FunctionalStatefulTestRelease
- FunctionalStatefulTestReleaseDatabaseOrdinary
- FunctionalStatefulTestAsan - FunctionalStatefulTestAsan
- FunctionalStatefulTestTsan - FunctionalStatefulTestTsan
- FunctionalStatefulTestMsan - FunctionalStatefulTestMsan
@ -1373,7 +1675,6 @@ jobs:
- UnitTestsUBsan - UnitTestsUBsan
- UnitTestsReleaseClang - UnitTestsReleaseClang
- SplitBuildSmokeTest - SplitBuildSmokeTest
- PVSCheck
runs-on: [self-hosted, style-checker] runs-on: [self-hosted, style-checker]
steps: steps:
- name: Check out repository code - name: Check out repository code

View File

@ -6,7 +6,6 @@ on: # yamllint disable-line rule:truthy
- '22.**' - '22.**'
- '23.**' - '23.**'
- '24.**' - '24.**'
- 'backport/**'
jobs: jobs:
DockerHubPush: DockerHubPush:
runs-on: [self-hosted, style-checker] runs-on: [self-hosted, style-checker]

5
.gitmodules vendored
View File

@ -17,7 +17,7 @@
[submodule "contrib/zlib-ng"] [submodule "contrib/zlib-ng"]
path = contrib/zlib-ng path = contrib/zlib-ng
url = https://github.com/ClickHouse-Extras/zlib-ng.git url = https://github.com/ClickHouse-Extras/zlib-ng.git
branch = clickhouse-new branch = clickhouse-2.0.x
[submodule "contrib/googletest"] [submodule "contrib/googletest"]
path = contrib/googletest path = contrib/googletest
url = https://github.com/google/googletest.git url = https://github.com/google/googletest.git
@ -135,9 +135,6 @@
[submodule "contrib/flatbuffers"] [submodule "contrib/flatbuffers"]
path = contrib/flatbuffers path = contrib/flatbuffers
url = https://github.com/ClickHouse-Extras/flatbuffers.git url = https://github.com/ClickHouse-Extras/flatbuffers.git
[submodule "contrib/libc-headers"]
path = contrib/libc-headers
url = https://github.com/ClickHouse-Extras/libc-headers.git
[submodule "contrib/replxx"] [submodule "contrib/replxx"]
path = contrib/replxx path = contrib/replxx
url = https://github.com/ClickHouse-Extras/replxx.git url = https://github.com/ClickHouse-Extras/replxx.git

View File

@ -223,7 +223,7 @@ if (OS_DARWIN)
# from a _specific_ library, which is what we need. # from a _specific_ library, which is what we need.
set(WHOLE_ARCHIVE -force_load) set(WHOLE_ARCHIVE -force_load)
# The `-noall_load` flag is the default and now obsolete. # The `-noall_load` flag is the default and now obsolete.
set(NO_WHOLE_ARCHIVE "") set(NO_WHOLE_ARCHIVE "-undefined,error") # Effectively, a no-op. Here to avoid empty "-Wl, " sequence to be generated in the command line.
else () else ()
set(WHOLE_ARCHIVE --whole-archive) set(WHOLE_ARCHIVE --whole-archive)
set(NO_WHOLE_ARCHIVE --no-whole-archive) set(NO_WHOLE_ARCHIVE --no-whole-archive)

View File

@ -29,14 +29,6 @@ message(STATUS "Default libraries: ${DEFAULT_LIBS}")
set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS}) set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS})
set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS}) set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
# glibc-compatibility library relies to constant version of libc headers
# (because minor changes in function attributes between different glibc versions will introduce incompatibilities)
# This is for x86_64. For other architectures we have separate toolchains.
if (ARCH_AMD64 AND NOT CMAKE_CROSSCOMPILING)
set(CMAKE_C_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers)
set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers)
endif ()
# Unfortunately '-pthread' doesn't work with '-nodefaultlibs'. # Unfortunately '-pthread' doesn't work with '-nodefaultlibs'.
# Just make sure we have pthreads at all. # Just make sure we have pthreads at all.
set(THREADS_PREFER_PTHREAD_FLAG ON) set(THREADS_PREFER_PTHREAD_FLAG ON)

View File

@ -22,9 +22,10 @@ if (COMPILER_GCC)
elseif (COMPILER_CLANG) elseif (COMPILER_CLANG)
# Require minimum version of clang/apple-clang # Require minimum version of clang/apple-clang
if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")
# If you are developer you can figure out what exact versions of AppleClang are Ok, # (Experimental!) Specify "-DALLOW_APPLECLANG=ON" when running CMake configuration step, if you want to experiment with using it.
# simply remove the following line. if (NOT ALLOW_APPLECLANG AND NOT DEFINED ENV{ALLOW_APPLECLANG})
message (FATAL_ERROR "AppleClang is not supported, you should install clang from brew. See the instruction: https://clickhouse.com/docs/en/development/build-osx/") message (FATAL_ERROR "AppleClang is not supported, you should install clang from brew. See the instruction: https://clickhouse.com/docs/en/development/build-osx/")
endif ()
# AppleClang 10.0.1 (Xcode 10.2) corresponds to LLVM/Clang upstream version 7.0.0 # AppleClang 10.0.1 (Xcode 10.2) corresponds to LLVM/Clang upstream version 7.0.0
# AppleClang 11.0.0 (Xcode 11.0) corresponds to LLVM/Clang upstream version 8.0.0 # AppleClang 11.0.0 (Xcode 11.0) corresponds to LLVM/Clang upstream version 8.0.0

View File

@ -301,9 +301,10 @@ endif()
# instead of controlling it via CMAKE_FOLDER. # instead of controlling it via CMAKE_FOLDER.
function (ensure_target_rooted_in _target _folder) function (ensure_target_rooted_in _target _folder)
# Skip INTERFACE library targets, since FOLDER property is not available for them. # Skip aliases and INTERFACE library targets, since FOLDER property is not available/writable for them.
get_target_property (_target_aliased "${_target}" ALIASED_TARGET)
get_target_property (_target_type "${_target}" TYPE) get_target_property (_target_type "${_target}" TYPE)
if (_target_type STREQUAL "INTERFACE_LIBRARY") if (_target_aliased OR _target_type STREQUAL "INTERFACE_LIBRARY")
return () return ()
endif () endif ()

@ -1 +0,0 @@
Subproject commit aa5429bf67a346e48ad60efd88bcefc286644bf3

View File

@ -73,6 +73,11 @@ target_compile_options(cxx PRIVATE -w)
target_link_libraries(cxx PUBLIC cxxabi) target_link_libraries(cxx PUBLIC cxxabi)
# For __udivmodti4, __divmodti4.
if (OS_DARWIN AND COMPILER_GCC)
target_link_libraries(cxx PRIVATE gcc)
endif ()
install( install(
TARGETS cxx TARGETS cxx
EXPORT global EXPORT global

View File

@ -28,11 +28,16 @@ set (SRCS
${SRC_DIR}/src/sentry_unix_pageallocator.c ${SRC_DIR}/src/sentry_unix_pageallocator.c
${SRC_DIR}/src/path/sentry_path_unix.c ${SRC_DIR}/src/path/sentry_path_unix.c
${SRC_DIR}/src/symbolizer/sentry_symbolizer_unix.c ${SRC_DIR}/src/symbolizer/sentry_symbolizer_unix.c
${SRC_DIR}/src/modulefinder/sentry_modulefinder_linux.c
${SRC_DIR}/src/transports/sentry_transport_curl.c ${SRC_DIR}/src/transports/sentry_transport_curl.c
${SRC_DIR}/src/backends/sentry_backend_none.c ${SRC_DIR}/src/backends/sentry_backend_none.c
) )
if(APPLE)
list(APPEND SRCS ${SRC_DIR}/src/modulefinder/sentry_modulefinder_apple.c)
else()
list(APPEND SRCS ${SRC_DIR}/src/modulefinder/sentry_modulefinder_linux.c)
endif()
add_library(sentry ${SRCS}) add_library(sentry ${SRCS})
add_library(sentry::sentry ALIAS sentry) add_library(sentry::sentry ALIAS sentry)

View File

@ -6,4 +6,6 @@ add_library(simdjson ${SIMDJSON_SRC})
target_include_directories(simdjson SYSTEM PUBLIC "${SIMDJSON_INCLUDE_DIR}" PRIVATE "${SIMDJSON_SRC_DIR}") target_include_directories(simdjson SYSTEM PUBLIC "${SIMDJSON_INCLUDE_DIR}" PRIVATE "${SIMDJSON_SRC_DIR}")
# simdjson is using its own CPU dispatching and get confused if we enable AVX/AVX2 flags. # simdjson is using its own CPU dispatching and get confused if we enable AVX/AVX2 flags.
target_compile_options(simdjson PRIVATE -mno-avx -mno-avx2) if(ARCH_AMD64)
target_compile_options(simdjson PRIVATE -mno-avx -mno-avx2)
endif()

2
contrib/sysroot vendored

@ -1 +1 @@
Subproject commit 4ef348b7f30f2ad5b02b266268b3c948e51ad457 Subproject commit 410845187f582c5e6692b53dddbe43efbb728734

2
contrib/zlib-ng vendored

@ -1 +1 @@
Subproject commit 6a5e93b9007782115f7f7e5235dedc81c4f1facb Subproject commit bffad6f6fe74d6a2f92e2668390664a926c68733

View File

@ -174,7 +174,6 @@ function clone_submodules
contrib/double-conversion contrib/double-conversion
contrib/libcxx contrib/libcxx
contrib/libcxxabi contrib/libcxxabi
contrib/libc-headers
contrib/lz4 contrib/lz4
contrib/zstd contrib/zstd
contrib/fastops contrib/fastops
@ -235,6 +234,9 @@ function build
time ninja clickhouse-bundle 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt" time ninja clickhouse-bundle 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt"
if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then
cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse" cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse"
strip programs/clickhouse -o "$FASTTEST_OUTPUT/clickhouse-stripped"
gzip "$FASTTEST_OUTPUT/clickhouse-stripped"
fi fi
ccache --show-stats ||: ccache --show-stats ||:
) )

View File

@ -151,8 +151,8 @@ zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log*
# Grep logs for sanitizer asserts, crashes and other critical errors # Grep logs for sanitizer asserts, crashes and other critical errors
# Sanitizer asserts # Sanitizer asserts
zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp grep -Fa "==================" /var/log/clickhouse-server/stderr.log | grep -v "in query:" >> /test_output/tmp
zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp grep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \ zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \
&& echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ && echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No sanitizer asserts\tOK' >> /test_output/test_results.tsv || echo -e 'No sanitizer asserts\tOK' >> /test_output/test_results.tsv

View File

@ -67,52 +67,70 @@ def compress_stress_logs(output_path, files_prefix):
cmd = f"cd {output_path} && tar -zcf stress_run_logs.tar.gz {files_prefix}* && rm {files_prefix}*" cmd = f"cd {output_path} && tar -zcf stress_run_logs.tar.gz {files_prefix}* && rm {files_prefix}*"
check_output(cmd, shell=True) check_output(cmd, shell=True)
def call_with_retry(query, timeout=30, retry_count=5):
for i in range(retry_count):
code = call(query, shell=True, stderr=STDOUT, timeout=timeout)
if code != 0:
time.sleep(i)
else:
break
def prepare_for_hung_check(drop_databases): def prepare_for_hung_check(drop_databases):
# FIXME this function should not exist, but... # FIXME this function should not exist, but...
# ThreadFuzzer significantly slows down server and causes false-positive hung check failures # ThreadFuzzer significantly slows down server and causes false-positive hung check failures
call("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'", shell=True, stderr=STDOUT, timeout=30) call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'")
# We attach gdb to clickhouse-server before running tests # We attach gdb to clickhouse-server before running tests
# to print stacktraces of all crashes even if clickhouse cannot print it for some reason. # to print stacktraces of all crashes even if clickhouse cannot print it for some reason.
# However, it obstruct checking for hung queries. # However, it obstruct checking for hung queries.
logging.info("Will terminate gdb (if any)") logging.info("Will terminate gdb (if any)")
call("kill -TERM $(pidof gdb)", shell=True, stderr=STDOUT, timeout=30) call_with_retry("kill -TERM $(pidof gdb)")
# Some tests set too low memory limit for default user and forget to reset in back. # Some tests set too low memory limit for default user and forget to reset in back.
# It may cause SYSTEM queries to fail, let's disable memory limit. # It may cause SYSTEM queries to fail, let's disable memory limit.
call("clickhouse client --max_memory_usage_for_user=0 -q 'SELECT 1 FORMAT Null'", shell=True, stderr=STDOUT, timeout=30) call_with_retry("clickhouse client --max_memory_usage_for_user=0 -q 'SELECT 1 FORMAT Null'")
# Some tests execute SYSTEM STOP MERGES or similar queries. # Some tests execute SYSTEM STOP MERGES or similar queries.
# It may cause some ALTERs to hang. # It may cause some ALTERs to hang.
# Possibly we should fix tests and forbid to use such queries without specifying table. # Possibly we should fix tests and forbid to use such queries without specifying table.
call("clickhouse client -q 'SYSTEM START MERGES'", shell=True, stderr=STDOUT, timeout=30) call_with_retry("clickhouse client -q 'SYSTEM START MERGES'")
call("clickhouse client -q 'SYSTEM START DISTRIBUTED SENDS'", shell=True, stderr=STDOUT, timeout=30) call_with_retry("clickhouse client -q 'SYSTEM START DISTRIBUTED SENDS'")
call("clickhouse client -q 'SYSTEM START TTL MERGES'", shell=True, stderr=STDOUT, timeout=30) call_with_retry("clickhouse client -q 'SYSTEM START TTL MERGES'")
call("clickhouse client -q 'SYSTEM START MOVES'", shell=True, stderr=STDOUT, timeout=30) call_with_retry("clickhouse client -q 'SYSTEM START MOVES'")
call("clickhouse client -q 'SYSTEM START FETCHES'", shell=True, stderr=STDOUT, timeout=30) call_with_retry("clickhouse client -q 'SYSTEM START FETCHES'")
call("clickhouse client -q 'SYSTEM START REPLICATED SENDS'", shell=True, stderr=STDOUT, timeout=30) call_with_retry("clickhouse client -q 'SYSTEM START REPLICATED SENDS'")
call("clickhouse client -q 'SYSTEM START REPLICATION QUEUES'", shell=True, stderr=STDOUT, timeout=30) call_with_retry("clickhouse client -q 'SYSTEM START REPLICATION QUEUES'")
# Issue #21004, live views are experimental, so let's just suppress it # Issue #21004, live views are experimental, so let's just suppress it
call("""clickhouse client -q "KILL QUERY WHERE upper(query) LIKE 'WATCH %'" """, shell=True, stderr=STDOUT, timeout=30) call_with_retry("""clickhouse client -q "KILL QUERY WHERE upper(query) LIKE 'WATCH %'" """)
# Kill other queries which known to be slow # Kill other queries which known to be slow
# It's query from 01232_preparing_sets_race_condition_long, it may take up to 1000 seconds in slow builds # It's query from 01232_preparing_sets_race_condition_long, it may take up to 1000 seconds in slow builds
call("""clickhouse client -q "KILL QUERY WHERE query LIKE 'insert into tableB select %'" """, shell=True, stderr=STDOUT, timeout=30) call_with_retry("""clickhouse client -q "KILL QUERY WHERE query LIKE 'insert into tableB select %'" """)
# Long query from 00084_external_agregation # Long query from 00084_external_agregation
call("""clickhouse client -q "KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'" """, shell=True, stderr=STDOUT, timeout=30) call_with_retry("""clickhouse client -q "KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'" """)
if drop_databases: if drop_databases:
# Here we try to drop all databases in async mode. If some queries really hung, than drop will hung too. for i in range(5):
# Otherwise we will get rid of queries which wait for background pool. It can take a long time on slow builds (more than 900 seconds). try:
databases = check_output('clickhouse client -q "SHOW DATABASES"', shell=True, timeout=30).decode('utf-8').strip().split() # Here we try to drop all databases in async mode. If some queries really hung, than drop will hung too.
for db in databases: # Otherwise we will get rid of queries which wait for background pool. It can take a long time on slow builds (more than 900 seconds).
if db == "system": databases = check_output('clickhouse client -q "SHOW DATABASES"', shell=True, timeout=30).decode('utf-8').strip().split()
continue for db in databases:
command = f'clickhouse client -q "DROP DATABASE {db}"' if db == "system":
# we don't wait for drop continue
Popen(command, shell=True) command = f'clickhouse client -q "DROP DATABASE {db}"'
# we don't wait for drop
Popen(command, shell=True)
break
except Exception as ex:
print("Failed to SHOW or DROP databasese, will retry", ex)
time.sleep(i)
else:
raise Exception("Cannot drop databases after stress tests. Probably server consumed too much memory and cannot execute simple queries")
# Wait for last queries to finish if any, not longer than 300 seconds # Wait for last queries to finish if any, not longer than 300 seconds
call("""clickhouse client -q "select sleepEachRow(( call("""clickhouse client -q "select sleepEachRow((

View File

@ -3,15 +3,14 @@ toc_priority: 65
toc_title: Build on Mac OS X toc_title: Build on Mac OS X
--- ---
# You don't have to build ClickHouse
You can install ClickHouse as follows: https://clickhouse.com/#quick-start
Choose Mac x86 or M1.
# How to Build ClickHouse on Mac OS X {#how-to-build-clickhouse-on-mac-os-x} # How to Build ClickHouse on Mac OS X {#how-to-build-clickhouse-on-mac-os-x}
Build should work on x86_64 (Intel) and arm64 (Apple Silicon) based macOS 10.15 (Catalina) and higher with Homebrew's vanilla Clang. !!! info "You don't have to build ClickHouse yourself!"
It is always recommended to use `clang` compiler. It is possible to use XCode's `AppleClang` or `gcc` but it's strongly discouraged. You can install pre-built ClickHouse as described in [Quick Start](https://clickhouse.com/#quick-start).
Follow `macOS (Intel)` or `macOS (Apple silicon)` installation instructions.
Build should work on x86_64 (Intel) and arm64 (Apple silicon) based macOS 10.15 (Catalina) and higher with Homebrew's vanilla Clang.
It is always recommended to use vanilla `clang` compiler. It is possible to use XCode's `apple-clang` or `gcc` but it's strongly discouraged.
## Install Homebrew {#install-homebrew} ## Install Homebrew {#install-homebrew}
@ -33,8 +32,6 @@ sudo rm -rf /Library/Developer/CommandLineTools
sudo xcode-select --install sudo xcode-select --install
``` ```
Reboot.
## Install Required Compilers, Tools, and Libraries {#install-required-compilers-tools-and-libraries} ## Install Required Compilers, Tools, and Libraries {#install-required-compilers-tools-and-libraries}
``` bash ``` bash
@ -51,40 +48,41 @@ git clone --recursive git@github.com:ClickHouse/ClickHouse.git
## Build ClickHouse {#build-clickhouse} ## Build ClickHouse {#build-clickhouse}
To build using Homebrew's vanilla Clang compiler: To build using Homebrew's vanilla Clang compiler (the only **recommended** way):
``` bash ``` bash
cd ClickHouse cd ClickHouse
rm -rf build rm -rf build
mkdir build mkdir build
cd build cd build
cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo .. cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_AR=$(brew --prefix llvm)/bin/llvm-ar -DCMAKE_RANLIB=$(brew --prefix llvm)/bin/llvm-ranlib -DOBJCOPY_PATH=$(brew --prefix llvm)/bin/llvm-objcopy -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
cmake --build . --config RelWithDebInfo cmake --build . --config RelWithDebInfo
cd .. # The resulting binary will be created at: ./programs/clickhouse
``` ```
To build using Xcode's native AppleClang compiler (this option is strongly not recommended; use the option above): To build using Xcode's native AppleClang compiler in Xcode IDE (this option is only for development builds and workflows, and is **not recommended** unless you know what you are doing):
``` bash ``` bash
cd ClickHouse cd ClickHouse
rm -rf build rm -rf build
mkdir build mkdir build
cd build cd build
cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo .. XCODE_IDE=1 ALLOW_APPLECLANG=1 cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DENABLE_JEMALLOC=OFF ..
cmake --build . --config RelWithDebInfo cmake --open .
cd .. # ...then, in Xcode IDE select ALL_BUILD scheme and start the building process.
# The resulting binary will be created at: ./programs/Debug/clickhouse
``` ```
To build using Homebrew's vanilla GCC compiler (this option is absolutely not recommended, I'm wondering why do we ever have it): To build using Homebrew's vanilla GCC compiler (this option is only for development experiments, and is **absolutely not recommended** unless you really know what you are doing):
``` bash ``` bash
cd ClickHouse cd ClickHouse
rm -rf build rm -rf build
mkdir build mkdir build
cd build cd build
cmake -DCMAKE_C_COMPILER=$(brew --prefix gcc)/bin/gcc-11 -DCMAKE_CXX_COMPILER=$(brew --prefix gcc)/bin/g++-11 -DCMAKE_BUILD_TYPE=RelWithDebInfo .. cmake -DCMAKE_C_COMPILER=$(brew --prefix gcc)/bin/gcc-11 -DCMAKE_CXX_COMPILER=$(brew --prefix gcc)/bin/g++-11 -DCMAKE_AR=$(brew --prefix gcc)/bin/gcc-ar-11 -DCMAKE_RANLIB=$(brew --prefix gcc)/bin/gcc-ranlib-11 -DOBJCOPY_PATH=$(brew --prefix binutils)/bin/objcopy -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
cmake --build . --config RelWithDebInfo cmake --build . --config RelWithDebInfo
cd .. # The resulting binary will be created at: ./programs/clickhouse
``` ```
## Caveats {#caveats} ## Caveats {#caveats}
@ -140,9 +138,9 @@ sudo launchctl load -w /Library/LaunchDaemons/limit.maxfiles.plist
To check if its working, use the `ulimit -n` or `launchctl limit maxfiles` commands. To check if its working, use the `ulimit -n` or `launchctl limit maxfiles` commands.
## Run ClickHouse server: ## Running ClickHouse server
``` ``` bash
cd ClickHouse cd ClickHouse
./build/programs/clickhouse-server --config-file ./programs/server/config.xml ./build/programs/clickhouse-server --config-file ./programs/server/config.xml
``` ```

View File

@ -131,18 +131,18 @@ Builds ClickHouse in various configurations for use in further steps. You have t
- **Build log**: link to the building and files copying log, useful when build failed. - **Build log**: link to the building and files copying log, useful when build failed.
- **Build time**. - **Build time**.
- **Artifacts**: build result files (with `XXX` being the server version e.g. `20.8.1.4344`). - **Artifacts**: build result files (with `XXX` being the server version e.g. `20.8.1.4344`).
- `clickhouse-client_XXX_all.deb` - `clickhouse-client_XXX_all.deb`
- `clickhouse-common-static-dbg_XXX[+asan, +msan, +ubsan, +tsan]_amd64.deb` - `clickhouse-common-static-dbg_XXX[+asan, +msan, +ubsan, +tsan]_amd64.deb`
- `clickhouse-common-staticXXX_amd64.deb` - `clickhouse-common-staticXXX_amd64.deb`
- `clickhouse-server_XXX_all.deb` - `clickhouse-server_XXX_all.deb`
- `clickhouse-test_XXX_all.deb` - `clickhouse-test_XXX_all.deb`
- `clickhouse_XXX_amd64.buildinfo` - `clickhouse_XXX_amd64.buildinfo`
- `clickhouse_XXX_amd64.changes` - `clickhouse_XXX_amd64.changes`
- `clickhouse`: Main built binary. - `clickhouse`: Main built binary.
- `clickhouse-odbc-bridge` - `clickhouse-odbc-bridge`
- `unit_tests_dbms`: GoogleTest binary with ClickHouse unit tests. - `unit_tests_dbms`: GoogleTest binary with ClickHouse unit tests.
- `shared_build.tgz`: build with shared libraries. - `shared_build.tgz`: build with shared libraries.
- `performance.tgz`: Special package for performance tests. - `performance.tgz`: Special package for performance tests.
## Special Build Check ## Special Build Check

View File

@ -39,12 +39,16 @@ DETACH TABLE postgres_database.table_to_remove;
## Settings {#settings} ## Settings {#settings}
- [materialized_postgresql_max_block_size](../../operations/settings/settings.md#materialized-postgresql-max-block-size)
- [materialized_postgresql_tables_list](../../operations/settings/settings.md#materialized-postgresql-tables-list) - [materialized_postgresql_tables_list](../../operations/settings/settings.md#materialized-postgresql-tables-list)
- [materialized_postgresql_schema](../../operations/settings/settings.md#materialized-postgresql-schema)
- [materialized_postgresql_schema_list](../../operations/settings/settings.md#materialized-postgresql-schema-list)
- [materialized_postgresql_allow_automatic_update](../../operations/settings/settings.md#materialized-postgresql-allow-automatic-update) - [materialized_postgresql_allow_automatic_update](../../operations/settings/settings.md#materialized-postgresql-allow-automatic-update)
- [materialized_postgresql_max_block_size](../../operations/settings/settings.md#materialized-postgresql-max-block-size)
- [materialized_postgresql_replication_slot](../../operations/settings/settings.md#materialized-postgresql-replication-slot) - [materialized_postgresql_replication_slot](../../operations/settings/settings.md#materialized-postgresql-replication-slot)
- [materialized_postgresql_snapshot](../../operations/settings/settings.md#materialized-postgresql-snapshot) - [materialized_postgresql_snapshot](../../operations/settings/settings.md#materialized-postgresql-snapshot)
@ -52,8 +56,7 @@ DETACH TABLE postgres_database.table_to_remove;
``` sql ``` sql
CREATE DATABASE database1 CREATE DATABASE database1
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password') ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
SETTINGS materialized_postgresql_max_block_size = 65536, SETTINGS materialized_postgresql_tables_list = 'table1,table2,table3';
materialized_postgresql_tables_list = 'table1,table2,table3';
SELECT * FROM database1.table1; SELECT * FROM database1.table1;
``` ```
@ -64,6 +67,55 @@ The settings can be changed, if necessary, using a DDL query. But it is impossib
ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = <new_size>; ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = <new_size>;
``` ```
## PostgreSQL schema {#schema}
PostgreSQL [schema](https://www.postgresql.org/docs/9.1/ddl-schemas.html) can be used in two ways.
1. One schema for one `MaterializedPostgreSQL` database engine. Requires to use setting `materialized_postgresql_schema`.
Tables are accessed via table name only:
``` sql
CREATE DATABASE postgres_database
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
SETTINGS materialized_postgresql_schema = 'postgres_schema';
SELECT * FROM postgres_database.table1;
```
2. Any number of schemas with specified set of tables for one `MaterializedPostgreSQL` database engine. Requires to use setting `materialized_postgresql_tables_list`. Each table is written along with its schema.
Tables are accessed via schema name and table name at the same time:
``` sql
CREATE DATABASE database1
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
SETTINGS materialized_postgresql_tables_list = 'schema1.table1,schema2.table2,schema1.table3';
materialized_postgresql_tables_list_with_schema = 1;
SELECT * FROM database1.`schema1.table1`;
SELECT * FROM database1.`schema2.table2`;
```
But in this case all tables in `materialized_postgresql_tables_list` must be written with its schema name.
Requires `materialized_postgresql_tables_list_with_schema = 1`.
Warning: for this case dots in table name are not allowed.
3. Any number of schemas with full set of tables for one `MaterializedPostgreSQL` database engine. Requires to use setting `materialized_postgresql_schema_list`.
``` sql
CREATE DATABASE database1
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
SETTINGS materialized_postgresql_schema_list = 'schema1,schema2,schema3';
SELECT * FROM database1.`schema1.table1`;
SELECT * FROM database1.`schema1.table2`;
SELECT * FROM database1.`schema2.table2`;
```
Warning: for this case dots in table name are not allowed.
## Requirements {#requirements} ## Requirements {#requirements}
1. The [wal_level](https://www.postgresql.org/docs/current/runtime-config-wal.html) setting must have a value `logical` and `max_replication_slots` parameter must have a value at least `2` in the PostgreSQL config file. 1. The [wal_level](https://www.postgresql.org/docs/current/runtime-config-wal.html) setting must have a value `logical` and `max_replication_slots` parameter must have a value at least `2` in the PostgreSQL config file.

View File

@ -33,7 +33,7 @@ CREATE TABLE test
`key` String, `key` String,
`v1` UInt32, `v1` UInt32,
`v2` String, `v2` String,
`v3` Float32, `v3` Float32
) )
ENGINE = EmbeddedRocksDB ENGINE = EmbeddedRocksDB
PRIMARY KEY key PRIMARY KEY key

View File

@ -133,8 +133,7 @@ Example:
SELECT level, sum(total) FROM daily GROUP BY level; SELECT level, sum(total) FROM daily GROUP BY level;
``` ```
To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/settings/settings/#settings-max_insert_block_size). If the block wasnt formed within [stream_flush_interval_ms](../../../operations/settings/settings/#stream-flush-interval-ms) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size). If the block wasnt formed within [stream_flush_interval_ms](../../../operations/server-configuration-parameters/settings.md) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
To stop receiving topic data or to change the conversion logic, detach the materialized view: To stop receiving topic data or to change the conversion logic, detach the materialized view:
@ -192,6 +191,6 @@ Example:
**See Also** **See Also**
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) - [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) - [background_message_broker_schedule_pool_size](../../../operations/settings/settings.md#background_message_broker_schedule_pool_size)
[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/kafka/) <!--hide--> [Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/kafka/) <!--hide-->

View File

@ -108,9 +108,43 @@ SELECT * FROM mysql_table
└────────────────┴────────┘ └────────────────┴────────┘
``` ```
## Settings {#mysql-settings}
Default settings are not very efficient, since they do not even reuse connections. These settings allow you to increase the number of queries run by the server per second.
### connection_auto_close {#connection-auto-close}
Allows to automatically close the connection after query execution, i.e. disable connection reuse.
Possible values:
- 1 — Auto-close connection is allowed, so the connection reuse is disabled
- 0 — Auto-close connection is not allowed, so the connection reuse is enabled
Default value: `1`.
### connection_max_tries {#connection-max-tries}
Sets the number of retries for pool with failover.
Possible values:
- Positive integer.
- 0 — There are no retries for pool with failover.
Default value: `3`.
### connection_pool_size {#connection-pool-size}
Size of connection pool (if all connections are in use, the query will wait until some connection will be freed).
Possible values:
- Positive integer.
Default value: `16`.
## See Also {#see-also} ## See Also {#see-also}
- [The mysql table function](../../../sql-reference/table-functions/mysql.md) - [The mysql table function](../../../sql-reference/table-functions/mysql.md)
- [Using MySQL as a source of external dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql) - [Using MySQL as a source of external dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql)
[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/mysql/) <!--hide-->

View File

@ -24,6 +24,8 @@ The supported formats are:
| [CSVWithNames](#csvwithnames) | ✔ | ✔ | | [CSVWithNames](#csvwithnames) | ✔ | ✔ |
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ | | [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
| [CustomSeparated](#format-customseparated) | ✔ | ✔ | | [CustomSeparated](#format-customseparated) | ✔ | ✔ |
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
| [Values](#data-format-values) | ✔ | ✔ | | [Values](#data-format-values) | ✔ | ✔ |
| [Vertical](#vertical) | ✗ | ✔ | | [Vertical](#vertical) | ✗ | ✔ |
| [JSON](#json) | ✗ | ✔ | | [JSON](#json) | ✗ | ✔ |
@ -429,8 +431,17 @@ Also prints two header rows with column names and types, similar to [TabSeparate
## CustomSeparated {#format-customseparated} ## CustomSeparated {#format-customseparated}
Similar to [Template](#format-template), but it prints or reads all columns and uses escaping rule from setting `format_custom_escaping_rule` and delimiters from settings `format_custom_field_delimiter`, `format_custom_row_before_delimiter`, `format_custom_row_after_delimiter`, `format_custom_row_between_delimiter`, `format_custom_result_before_delimiter` and `format_custom_result_after_delimiter`, not from format strings. Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](../operations/settings/settings.md#format-custom-escaping-rule) setting and delimiters from [format_custom_field_delimiter](../operations/settings/settings.md#format-custom-field-delimiter), [format_custom_row_before_delimiter](../operations/settings/settings.md#format-custom-row-before-delimiter), [format_custom_row_after_delimiter](../operations/settings/settings.md#format-custom-row-after-delimiter), [format_custom_row_between_delimiter](../operations/settings/settings.md#format-custom-row-between-delimiter), [format_custom_result_before_delimiter](../operations/settings/settings.md#format-custom-result-before-delimiter) and [format_custom_result_after_delimiter](../operations/settings/settings.md#format-custom-result-after-delimiter) settings, not from format strings.
There is also `CustomSeparatedIgnoreSpaces` format, which is similar to `TemplateIgnoreSpaces`.
There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [TemplateIgnoreSpaces](#templateignorespaces).
## CustomSeparatedWithNames {#customseparatedwithnames}
Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
## CustomSeparatedWithNamesAndTypes {#customseparatedwithnamesandtypes}
Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
## JSON {#json} ## JSON {#json}
@ -1536,14 +1547,17 @@ Each line of imported data is parsed according to the regular expression.
When working with the `Regexp` format, you can use the following settings: When working with the `Regexp` format, you can use the following settings:
- `format_regexp` — [String](../sql-reference/data-types/string.md). Contains regular expression in the [re2](https://github.com/google/re2/wiki/Syntax) format. - `format_regexp` — [String](../sql-reference/data-types/string.md). Contains regular expression in the [re2](https://github.com/google/re2/wiki/Syntax) format.
- `format_regexp_escaping_rule` — [String](../sql-reference/data-types/string.md). The following escaping rules are supported:
- CSV (similarly to [CSV](#csv)) - `format_regexp_escaping_rule` — [String](../sql-reference/data-types/string.md). The following escaping rules are supported:
- JSON (similarly to [JSONEachRow](#jsoneachrow))
- Escaped (similarly to [TSV](#tabseparated)) - CSV (similarly to [CSV](#csv))
- Quoted (similarly to [Values](#data-format-values)) - JSON (similarly to [JSONEachRow](#jsoneachrow))
- Raw (extracts subpatterns as a whole, no escaping rules) - Escaped (similarly to [TSV](#tabseparated))
- `format_regexp_skip_unmatched` — [UInt8](../sql-reference/data-types/int-uint.md). Defines the need to throw an exeption in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`. - Quoted (similarly to [Values](#data-format-values))
- Raw (extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](#tabseparatedraw))
- `format_regexp_skip_unmatched` — [UInt8](../sql-reference/data-types/int-uint.md). Defines the need to throw an exeption in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`.
**Usage** **Usage**

View File

@ -69,8 +69,6 @@ If no conditions met for a data part, ClickHouse uses the `lz4` compression.
</compression> </compression>
``` ```
<!--
## encryption {#server-settings-encryption} ## encryption {#server-settings-encryption}
Configures a command to obtain a key to be used by [encryption codecs](../../sql-reference/statements/create/table.md#create-query-encryption-codecs). Key (or keys) should be written in environment variables or set in the configuration file. Configures a command to obtain a key to be used by [encryption codecs](../../sql-reference/statements/create/table.md#create-query-encryption-codecs). Key (or keys) should be written in environment variables or set in the configuration file.
@ -150,7 +148,6 @@ Or it can be set in hex:
Everything mentioned above can be applied for `aes_256_gcm_siv` (but the key must be 32 bytes long). Everything mentioned above can be applied for `aes_256_gcm_siv` (but the key must be 32 bytes long).
-->
## custom_settings_prefixes {#custom_settings_prefixes} ## custom_settings_prefixes {#custom_settings_prefixes}

View File

@ -3691,6 +3691,14 @@ Sets a comma-separated list of PostgreSQL database tables, which will be replica
Default value: empty list — means whole PostgreSQL database will be replicated. Default value: empty list — means whole PostgreSQL database will be replicated.
## materialized_postgresql_schema {#materialized-postgresql-schema}
Default value: empty string. (Default schema is used)
## materialized_postgresql_schema_list {#materialized-postgresql-schema-list}
Default value: empty list. (Default schema is used)
## materialized_postgresql_allow_automatic_update {#materialized-postgresql-allow-automatic-update} ## materialized_postgresql_allow_automatic_update {#materialized-postgresql-allow-automatic-update}
Allows reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them. Allows reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them.
@ -4071,3 +4079,54 @@ Possible values:
- 0 — Big files read with only copying data from kernel to userspace. - 0 — Big files read with only copying data from kernel to userspace.
Default value: `0`. Default value: `0`.
## format_custom_escaping_rule {#format-custom-escaping-rule}
Sets the field escaping rule for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Possible values:
- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md#tabseparated).
- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md#data-format-values).
- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md#csv).
- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md#jsoneachrow).
- `'XML'` — Similarly to [XML](../../interfaces/formats.md#xml).
- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md#tabseparatedraw).
Default value: `'Escaped'`.
## format_custom_field_delimiter {#format-custom-field-delimiter}
Sets the character that is interpreted as a delimiter between the fields for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Default value: `'\t'`.
## format_custom_row_before_delimiter {#format-custom-row-before-delimiter}
Sets the character that is interpreted as a delimiter before the field of the first column for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Default value: `''`.
## format_custom_row_after_delimiter {#format-custom-row-after-delimiter}
Sets the character that is interpreted as a delimiter after the field of the last column for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Default value: `'\n'`.
## format_custom_row_between_delimiter {#format-custom-row-between-delimiter}
Sets the character that is interpreted as a delimiter between the rows for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Default value: `''`.
## format_custom_result_before_delimiter {#format-custom-result-before-delimiter}
Sets the character that is interpreted as a prefix before the result set for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Default value: `''`.
## format_custom_result_after_delimiter {#format-custom-result-after-delimiter}
Sets the character that is interpreted as a suffix after the result set for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Default value: `''`.

View File

@ -3,13 +3,12 @@ toc_priority: 46
toc_title: Polygon Dictionaries With Grids toc_title: Polygon Dictionaries With Grids
--- ---
# Polygon dictionaries {#polygon-dictionaries} # Polygon dictionaries {#polygon-dictionaries}
Polygon dictionaries allow you to efficiently search for the polygon containing specified points. Polygon dictionaries allow you to efficiently search for the polygon containing specified points.
For example: defining a city area by geographical coordinates. For example: defining a city area by geographical coordinates.
Example configuration: Example of a polygon dictionary configuration:
``` xml ``` xml
<dictionary> <dictionary>
@ -30,13 +29,15 @@ Example configuration:
<type>UInt64</type> <type>UInt64</type>
<null_value>0</null_value> <null_value>0</null_value>
</attribute> </attribute>
</structure> </structure>
<layout> <layout>
<polygon /> <polygon>
<store_polygon_key_column>1</store_polygon_key_column>
</polygon>
</layout> </layout>
...
</dictionary> </dictionary>
``` ```
@ -48,11 +49,12 @@ CREATE DICTIONARY polygon_dict_name (
value UInt64 value UInt64
) )
PRIMARY KEY key PRIMARY KEY key
LAYOUT(POLYGON()) LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1))
... ...
``` ```
When configuring the polygon dictionary, the key must have one of two types: When configuring the polygon dictionary, the key must have one of two types:
- A simple polygon. It is an array of points. - A simple polygon. It is an array of points.
- MultiPolygon. It is an array of polygons. Each polygon is a two-dimensional array of points. The first element of this array is the outer boundary of the polygon, and subsequent elements specify areas to be excluded from it. - MultiPolygon. It is an array of polygons. Each polygon is a two-dimensional array of points. The first element of this array is the outer boundary of the polygon, and subsequent elements specify areas to be excluded from it.
@ -60,25 +62,27 @@ Points can be specified as an array or a tuple of their coordinates. In the curr
The user can [upload their own data](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) in all formats supported by ClickHouse. The user can [upload their own data](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) in all formats supported by ClickHouse.
There are 3 types of [in-memory storage](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) available: There are 3 types of [in-memory storage](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) available:
- POLYGON_SIMPLE. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes. - `POLYGON_SIMPLE`. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes.
- POLYGON_INDEX_EACH. A separate index is built for each polygon, which allows you to quickly check whether it belongs in most cases (optimized for geographical regions). - `POLYGON_INDEX_EACH`. A separate index is built for each polygon, which allows you to quickly check whether it belongs in most cases (optimized for geographical regions).
Also, a grid is superimposed on the area under consideration, which significantly narrows the number of polygons under consideration. Also, a grid is superimposed on the area under consideration, which significantly narrows the number of polygons under consideration.
The grid is created by recursively dividing the cell into 16 equal parts and is configured with two parameters. The grid is created by recursively dividing the cell into 16 equal parts and is configured with two parameters.
The division stops when the recursion depth reaches MAX_DEPTH or when the cell crosses no more than MIN_INTERSECTIONS polygons. The division stops when the recursion depth reaches `MAX_DEPTH` or when the cell crosses no more than `MIN_INTERSECTIONS` polygons.
To respond to the query, there is a corresponding cell, and the index for the polygons stored in it is accessed alternately. To respond to the query, there is a corresponding cell, and the index for the polygons stored in it is accessed alternately.
- POLYGON_INDEX_CELL. This placement also creates the grid described above. The same options are available. For each sheet cell, an index is built on all pieces of polygons that fall into it, which allows you to quickly respond to a request. - `POLYGON_INDEX_CELL`. This placement also creates the grid described above. The same options are available. For each sheet cell, an index is built on all pieces of polygons that fall into it, which allows you to quickly respond to a request.
- POLYGON. Synonym to POLYGON_INDEX_CELL. - `POLYGON`. Synonym to `POLYGON_INDEX_CELL`.
Dictionary queries are carried out using standard [functions](../../../sql-reference/functions/ext-dict-functions.md) for working with external dictionaries. Dictionary queries are carried out using standard [functions](../../../sql-reference/functions/ext-dict-functions.md) for working with external dictionaries.
An important difference is that here the keys will be the points for which you want to find the polygon containing them. An important difference is that here the keys will be the points for which you want to find the polygon containing them.
**Example**
Example of working with the dictionary defined above: Example of working with the dictionary defined above:
``` sql ``` sql
CREATE TABLE points ( CREATE TABLE points (
x Float64, x Float64,
@ -89,3 +93,39 @@ SELECT tuple(x, y) AS key, dictGet(dict_name, 'name', key), dictGet(dict_name, '
``` ```
As a result of executing the last command for each point in the 'points' table, a minimum area polygon containing this point will be found, and the requested attributes will be output. As a result of executing the last command for each point in the 'points' table, a minimum area polygon containing this point will be found, and the requested attributes will be output.
**Example**
You can read columns from polygon dictionaries via SELECT query, just turn on the `store_polygon_key_column = 1` in the dictionary configuration or corresponding DDL-query.
Query:
``` sql
CREATE TABLE polygons_test_table
(
key Array(Array(Array(Tuple(Float64, Float64)))),
name String
) ENGINE = TinyLog;
INSERT INTO polygons_test_table VALUES ([[[(3, 1), (0, 1), (0, -1), (3, -1)]]], 'Value');
CREATE DICTIONARY polygons_test_dictionary
(
key Array(Array(Array(Tuple(Float64, Float64)))),
name String
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(TABLE 'polygons_test_table'))
LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1))
LIFETIME(0);
SELECT * FROM polygons_test_dictionary;
```
Result:
``` text
┌─key─────────────────────────────┬─name──┐
│ [[[(3,1),(0,1),(0,-1),(3,-1)]]] │ Value │
└─────────────────────────────────┴───────┘
```

View File

@ -89,9 +89,39 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00
## sipHash128 {#hash_functions-siphash128} ## sipHash128 {#hash_functions-siphash128}
Calculates SipHash from a string. Produces a 128-bit [SipHash](https://131002.net/siphash/) hash value. Differs from [sipHash64](#hash_functions-siphash64) in that the final xor-folding state is done up to 128 bits.
Accepts a String-type argument. Returns FixedString(16).
Differs from sipHash64 in that the final xor-folding state is only done up to 128 bits. **Syntax**
``` sql
sipHash128(par1,...)
```
**Arguments**
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
**Returned value**
A 128-bit `SipHash` hash value.
Type: [FixedString(16)](../../sql-reference/data-types/fixedstring.md).
**Example**
Query:
``` sql
SELECT hex(sipHash128('foo', '\x01', 3));
```
Result:
``` text
┌─hex(sipHash128('foo', '', 3))────┐
│ 9DE516A64A414D4B1B609415E4523F24 │
└──────────────────────────────────┘
```
## cityHash64 {#cityhash64} ## cityHash64 {#cityhash64}
@ -459,28 +489,36 @@ SELECT murmurHash3_32(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:
Produces a 128-bit [MurmurHash3](https://github.com/aappleby/smhasher) hash value. Produces a 128-bit [MurmurHash3](https://github.com/aappleby/smhasher) hash value.
**Syntax**
``` sql ``` sql
murmurHash3_128( expr ) murmurHash3_128(expr)
``` ```
**Arguments** **Arguments**
- `expr`[Expressions](../../sql-reference/syntax.md#syntax-expressions) returning a [String](../../sql-reference/data-types/string.md)-type value. - `expr`A list of [expressions](../../sql-reference/syntax.md#syntax-expressions). [String](../../sql-reference/data-types/string.md).
**Returned Value** **Returned value**
A [FixedString(16)](../../sql-reference/data-types/fixedstring.md) data type hash value. A 128-bit `MurmurHash3` hash value.
Type: [FixedString(16)](../../sql-reference/data-types/fixedstring.md).
**Example** **Example**
Query:
``` sql ``` sql
SELECT hex(murmurHash3_128('example_string')) AS MurmurHash3, toTypeName(MurmurHash3) AS type; SELECT hex(murmurHash3_128('foo', 'foo', 'foo'));
``` ```
Result:
``` text ``` text
┌─MurmurHash3──────────────────────┬─type───┐ ┌─hex(murmurHash3_128('foo', 'foo', 'foo'))─┐
368A1A311CB7342253354B548E7E7E71 │ String F8F7AD9B6CD4CF117A71E277E2EC2931
└──────────────────────────────────────────┘ └──────────────────────────────────────────┘
``` ```
## xxHash32, xxHash64 {#hash-functions-xxhash32} ## xxHash32, xxHash64 {#hash-functions-xxhash32}

View File

@ -10,7 +10,7 @@ A set of queries that allow changing the table structure.
Syntax: Syntax:
``` sql ``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|MODIFY|MATERIALIZE COLUMN ... ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
``` ```
In the query, specify a list of one or more comma-separated actions. In the query, specify a list of one or more comma-separated actions.
@ -138,6 +138,7 @@ ALTER TABLE visits COMMENT COLUMN browser 'The table shows the browser used for
``` sql ``` sql
MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST] MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
ALTER COLUMN [IF EXISTS] name TYPE [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
``` ```
This query changes the `name` column properties: This query changes the `name` column properties:

View File

@ -5,9 +5,9 @@ toc_title: INSERT INTO
## INSERT INTO Statement {#insert} ## INSERT INTO Statement {#insert}
Adding data. Inserts data into a table.
Basic query format: **Syntax**
``` sql ``` sql
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
@ -92,6 +92,8 @@ If table has [constraints](../../sql-reference/statements/create/table.md#constr
### Inserting the Results of `SELECT` {#insert_query_insert-select} ### Inserting the Results of `SELECT` {#insert_query_insert-select}
**Syntax**
``` sql ``` sql
INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... INSERT INTO [db.]table [(c1, c2, c3)] SELECT ...
``` ```
@ -107,6 +109,68 @@ However, you can delete old data using `ALTER TABLE ... DROP PARTITION`.
To insert a default value instead of `NULL` into a column with not nullable data type, enable [insert_null_as_default](../../operations/settings/settings.md#insert_null_as_default) setting. To insert a default value instead of `NULL` into a column with not nullable data type, enable [insert_null_as_default](../../operations/settings/settings.md#insert_null_as_default) setting.
### Inserting Data from a File {#inserting-data-from-a-file}
**Syntax**
``` sql
INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name
```
Use the syntax above to insert data from a file stored on a **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause.
Compressed files are supported. Compression type is detected by the extension of the file name. Or it can be explicitly specified in a `COMPRESSION` clause. Supported types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
This functionality is available in the [command-line client](../../interfaces/cli.md) and [clickhouse-local](../../operations/utilities/clickhouse-local.md).
**Example**
Execute the following queries using [command-line client](../../interfaces/cli.md):
```bash
echo 1,A > input.csv ; echo 2,B >> input.csv
clickhouse-client --query="CREATE TABLE table_from_file (id UInt32, text String) ENGINE=MergeTree() ORDER BY id;"
clickhouse-client --query="INSERT INTO table_from_file FROM INFILE 'input.csv' FORMAT CSV;"
clickhouse-client --query="SELECT * FROM table_from_file FORMAT PrettyCompact;"
```
Result:
```text
┌─id─┬─text─┐
│ 1 │ A │
│ 2 │ B │
└────┴──────┘
```
### Inserting into Table Function {#inserting-into-table-function}
Data can be inserted into tables referenced by [table functions](../../sql-reference/table-functions/index.md).
**Syntax**
``` sql
INSERT INTO [TABLE] FUNCTION table_func ...
```
**Example**
[remote](../../sql-reference/table-functions/index.md#remote) table function is used in the following queries:
``` sql
CREATE TABLE simple_table (id UInt32, text String) ENGINE=MergeTree() ORDER BY id;
INSERT INTO TABLE FUNCTION remote('localhost', default.simple_table)
VALUES (100, 'inserted via remote()');
SELECT * FROM simple_table;
```
Result:
``` text
┌──id─┬─text──────────────────┐
│ 100 │ inserted via remote() │
└─────┴───────────────────────┘
```
### Performance Considerations {#performance-considerations} ### Performance Considerations {#performance-considerations}
`INSERT` sorts the input data by primary key and splits them into partitions by a partition key. If you insert data into several partitions at once, it can significantly reduce the performance of the `INSERT` query. To avoid this: `INSERT` sorts the input data by primary key and splits them into partitions by a partition key. If you insert data into several partitions at once, it can significantly reduce the performance of the `INSERT` query. To avoid this:

View File

@ -27,7 +27,7 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list
[LIMIT [n, ]m] [WITH TIES] [LIMIT [n, ]m] [WITH TIES]
[SETTINGS ...] [SETTINGS ...]
[UNION ...] [UNION ...]
[INTO OUTFILE filename] [INTO OUTFILE filename [COMPRESSION type] ]
[FORMAT format] [FORMAT format]
``` ```

View File

@ -4,10 +4,35 @@ toc_title: INTO OUTFILE
# INTO OUTFILE Clause {#into-outfile-clause} # INTO OUTFILE Clause {#into-outfile-clause}
Add the `INTO OUTFILE filename` clause (where filename is a string literal) to `SELECT query` to redirect its output to the specified file on the client-side. `INTO OUTFILE` clause redirects the result of a `SELECT` query to a file on the **client** side.
Compressed files are supported. Compression type is detected by the extension of the file name (mode `'auto'` is used by default). Or it can be explicitly specified in a `COMPRESSION` clause.
**Syntax**
```sql
SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type]
```
`file_name` and `type` are string literals. Supported compression types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
## Implementation Details {#implementation-details} ## Implementation Details {#implementation-details}
- This functionality is available in the [command-line client](../../../interfaces/cli.md) and [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Thus a query sent via [HTTP interface](../../../interfaces/http.md) will fail. - This functionality is available in the [command-line client](../../../interfaces/cli.md) and [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Thus a query sent via [HTTP interface](../../../interfaces/http.md) will fail.
- The query will fail if a file with the same filename already exists. - The query will fail if a file with the same file name already exists.
- The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). - The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). Use [FORMAT](format.md) clause to change it.
**Example**
Execute the following query using [command-line client](../../../interfaces/cli.md):
```bash
clickhouse-client --query="SELECT 1,'ABC' INTO OUTFILE 'select.gz' FORMAT CSV;"
zcat select.gz
```
Result:
```text
1,"ABC"
```

View File

@ -18,6 +18,8 @@ You can use table functions in:
It's one of the methods of creating a table. It's one of the methods of creating a table.
- [INSERT INTO TABLE FUNCTION](../../sql-reference/statements/insert-into.md#inserting-into-table-function) query.
!!! warning "Warning" !!! warning "Warning"
You cant use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled. You cant use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled.

View File

@ -15,7 +15,13 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
... ...
) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']); ) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause'])
SETTINGS
[connection_pool_size=16, ]
[connection_max_tries=3, ]
[connection_wait_timeout=5, ] /* 0 -- не ждать */
[connection_auto_close=true ]
;
``` ```
Смотрите подробное описание запроса [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query). Смотрите подробное описание запроса [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query).
@ -102,8 +108,43 @@ SELECT * FROM mysql_table
└────────────────┴────────┘ └────────────────┴────────┘
``` ```
## Смотрите также {#smotrite-takzhe} ## Настройки {#mysql-settings}
- [Табличная функция mysql](../../../engines/table-engines/integrations/mysql.md) Настройки по умолчанию не очень эффективны, так как они не используют повторное соединение. Эти настройки позволяют увеличить количество запросов, выполняемых сервером в секунду.
### connection_auto_close {#connection-auto-close}
Позволяет автоматически закрыть соединение после выполнения запроса, то есть отключить повторное использование соединения.
Возможные значения:
- 1 — автоматическое закрытие соединения разрешено (повторное использование отключается).
- 0 — автоматическое закрытие соединения запрещено (повторное использование включается).
Значение по умолчанию: `1`.
### connection_max_tries {#connection-max-tries}
Устанавливает количество повторных попыток для пула со сбоями соединения.
Возможные значения:
- Положительное целое число.
- 0 — отсутствуют повторные попытки для пула со сбоями соединения.
Значение по умолчанию: `3`.
### connection_pool_size {#connection-pool-size}
Задает размер пула соединений (если используются все соединения, запрос будет ждать, пока какое-либо соединение не будет освобождено).
Возможные значения:
- Положительное целое число.
Значение по умолчанию: `16`.
## См. также {#see-also}
- [Табличная функция mysql](../../../engines/table-engines/integrations/mysql.md)
- [Использование MySQL в качестве источника для внешнего словаря](../../../engines/table-engines/integrations/mysql.md#dicts-external_dicts_dict_sources-mysql) - [Использование MySQL в качестве источника для внешнего словаря](../../../engines/table-engines/integrations/mysql.md#dicts-external_dicts_dict_sources-mysql)

View File

@ -20,6 +20,8 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
| [CSV](#csv) | ✔ | ✔ | | [CSV](#csv) | ✔ | ✔ |
| [CSVWithNames](#csvwithnames) | ✔ | ✔ | | [CSVWithNames](#csvwithnames) | ✔ | ✔ |
| [CustomSeparated](#format-customseparated) | ✔ | ✔ | | [CustomSeparated](#format-customseparated) | ✔ | ✔ |
| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ |
| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ |
| [Values](#data-format-values) | ✔ | ✔ | | [Values](#data-format-values) | ✔ | ✔ |
| [Vertical](#vertical) | ✗ | ✔ | | [Vertical](#vertical) | ✗ | ✔ |
| [JSON](#json) | ✗ | ✔ | | [JSON](#json) | ✗ | ✔ |
@ -368,8 +370,17 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR
## CustomSeparated {#format-customseparated} ## CustomSeparated {#format-customseparated}
Аналогичен [Template](#format-template), но выводит (или считывает) все столбцы, используя для них правило экранирования из настройки `format_custom_escaping_rule` и разделители из настроек `format_custom_field_delimiter`, `format_custom_row_before_delimiter`, `format_custom_row_after_delimiter`, `format_custom_row_between_delimiter`, `format_custom_result_before_delimiter` и `format_custom_result_after_delimiter`, а не из форматных строк. Аналогичен [Template](#format-template), но выводит (или считывает) все имена и типы столбцов, используя для них правило экранирования из настройки [format_custom_escaping_rule](../operations/settings/settings.md#format-custom-escaping-rule) и разделители из настроек [format_custom_field_delimiter](../operations/settings/settings.md#format-custom-field-delimiter), [format_custom_row_before_delimiter](../operations/settings/settings.md#format-custom-row-before-delimiter), [format_custom_row_after_delimiter](../operations/settings/settings.md#format-custom-row-after-delimiter), [format_custom_row_between_delimiter](../operations/settings/settings.md#format-custom-row-between-delimiter), [format_custom_result_before_delimiter](../operations/settings/settings.md#format-custom-result-before-delimiter) и [format_custom_result_after_delimiter](../operations/settings/settings.md#format-custom-result-after-delimiter), а не из форматных строк.
Также существует формат `CustomSeparatedIgnoreSpaces`, аналогичный `TemplateIgnoreSpaces`.
Также существует формат `CustomSeparatedIgnoreSpaces`, аналогичный формату [TemplateIgnoreSpaces](#templateignorespaces).
## CustomSeparatedWithNames {#customseparatedwithnames}
Выводит также заголовок с именами столбцов, аналогичен формату [TabSeparatedWithNames](#tabseparatedwithnames).
## CustomSeparatedWithNamesAndTypes {#customseparatedwithnamesandtypes}
Выводит также два заголовка с именами и типами столбцов, аналогичен формату [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
## JSON {#json} ## JSON {#json}
@ -1399,14 +1410,17 @@ SELECT * FROM line_as_string;
При работе с форматом `Regexp` можно использовать следующие параметры: При работе с форматом `Regexp` можно использовать следующие параметры:
- `format_regexp` — [String](../sql-reference/data-types/string.md). Строка с регулярным выражением в формате [re2](https://github.com/google/re2/wiki/Syntax). - `format_regexp` — [String](../sql-reference/data-types/string.md). Строка с регулярным выражением в формате [re2](https://github.com/google/re2/wiki/Syntax).
- `format_regexp_escaping_rule` — [String](../sql-reference/data-types/string.md). Правило сериализации. Поддерживаются следующие правила:
- CSV (как в [CSV](#csv)) - `format_regexp_escaping_rule` — [String](../sql-reference/data-types/string.md). Правило экранирования. Поддерживаются следующие правила:
- JSON (как в [JSONEachRow](#jsoneachrow))
- Escaped (как в [TSV](#tabseparated)) - CSV (как в формате [CSV](#csv))
- Quoted (как в [Values](#data-format-values)) - JSON (как в формате [JSONEachRow](#jsoneachrow))
- Raw (данные импортируются как есть, без сериализации) - Escaped (как в формате [TSV](#tabseparated))
- `format_regexp_skip_unmatched` — [UInt8](../sql-reference/data-types/int-uint.md). Признак, будет ли генерироваться исключение в случае, если импортируемые данные не соответствуют регулярному выражению `format_regexp`. Может принимать значение `0` или `1`. - Quoted (как в формате [Values](#data-format-values))
- Raw (данные импортируются как есть, без экранирования, как в формате [TSVRaw](#tabseparatedraw))
- `format_regexp_skip_unmatched` — [UInt8](../sql-reference/data-types/int-uint.md). Признак, будет ли генерироваться исключение в случае, если импортируемые данные не соответствуют регулярному выражению `format_regexp`. Может принимать значение `0` или `1`.
**Использование** **Использование**

View File

@ -131,7 +131,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
```xml ```xml
<encryption_codecs> <encryption_codecs>
<aes_128_gcm_siv> <aes_128_gcm_siv>
<nonce>0123456789101</nonce> <nonce>012345678910</nonce>
</aes_128_gcm_siv> </aes_128_gcm_siv>
</encryption_codecs> </encryption_codecs>
``` ```

View File

@ -3830,3 +3830,54 @@ SELECT * FROM positional_arguments ORDER BY 2,3;
- 0 — большие файлы считываются только с копированием данных из ядра в пространство пользователей. - 0 — большие файлы считываются только с копированием данных из ядра в пространство пользователей.
Значение по умолчанию: `0`. Значение по умолчанию: `0`.
## format_custom_escaping_rule {#format-custom-escaping-rule}
Устанавливает правило экранирования данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Возможные значения:
- `'Escaped'` — как в формате [TSV](../../interfaces/formats.md#tabseparated).
- `'Quoted'` — как в формате [Values](../../interfaces/formats.md#data-format-values).
- `'CSV'` — как в формате [CSV](../../interfaces/formats.md#csv).
- `'JSON'` — как в формате [JSONEachRow](../../interfaces/formats.md#jsoneachrow).
- `'XML'` — как в формате [XML](../../interfaces/formats.md#xml).
- `'Raw'` — данные импортируются как есть, без экранирования, как в формате [TSVRaw](../../interfaces/formats.md#tabseparatedraw).
Значение по умолчанию: `'Escaped'`.
## format_custom_field_delimiter {#format-custom-field-delimiter}
Задает символ, который интерпретируется как разделитель между полями данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Значение по умолчанию: `'\t'`.
## format_custom_row_before_delimiter {#format-custom-row-before-delimiter}
Задает символ, который интерпретируется как разделитель перед полем первого столбца данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Значение по умолчанию: `''`.
## format_custom_row_after_delimiter {#format-custom-row-after-delimiter}
Задает символ, который интерпретируется как разделитель после поля последнего столбца данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Значение по умолчанию: `'\n'`.
## format_custom_row_between_delimiter {#format-custom-row-between-delimiter}
Задает символ, который интерпретируется как разделитель между строками данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Значение по умолчанию: `''`.
## format_custom_result_before_delimiter {#format-custom-result-before-delimiter}
Задает символ, который интерпретируется как префикс перед результирующим набором данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Значение по умолчанию: `''`.
## format_custom_result_after_delimiter {#format-custom-result-after-delimiter}
Задает символ, который интерпретируется как суффикс после результирующего набора данных формата [CustomSeparated](../../interfaces/formats.md#format-customseparated).
Значение по умолчанию: `''`.

View File

@ -1,9 +1,14 @@
---
toc_priority: 46
toc_title: Cловари полигонов
---
# Cловари полигонов {#polygon-dictionaries} # Cловари полигонов {#polygon-dictionaries}
Словари полигонов позволяют эффективно искать полигон, в который попадают данные точки, среди множества полигонов. Словари полигонов позволяют эффективно искать полигон, в который попадают данные точки, среди множества полигонов.
Для примера: определение района города по географическим координатам. Для примера: определение района города по географическим координатам.
Пример конфигурации: Пример конфигурации словаря полигонов:
``` xml ``` xml
<dictionary> <dictionary>
@ -28,9 +33,12 @@
</structure> </structure>
<layout> <layout>
<polygon /> <polygon>
<store_polygon_key_column>1</store_polygon_key_column>
</polygon>
</layout> </layout>
...
</dictionary> </dictionary>
``` ```
@ -42,11 +50,12 @@ CREATE DICTIONARY polygon_dict_name (
value UInt64 value UInt64
) )
PRIMARY KEY key PRIMARY KEY key
LAYOUT(POLYGON()) LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1))
... ...
``` ```
При конфигурации словаря полигонов ключ должен иметь один из двух типов: При конфигурации словаря полигонов ключ должен иметь один из двух типов:
- Простой полигон. Представляет из себя массив точек. - Простой полигон. Представляет из себя массив точек.
- Мультиполигон. Представляет из себя массив полигонов. Каждый полигон задается двумерным массивом точек — первый элемент этого массива задает внешнюю границу полигона, - Мультиполигон. Представляет из себя массив полигонов. Каждый полигон задается двумерным массивом точек — первый элемент этого массива задает внешнюю границу полигона,
последующие элементы могут задавать дырки, вырезаемые из него. последующие элементы могут задавать дырки, вырезаемые из него.
@ -55,24 +64,25 @@ LAYOUT(POLYGON())
Пользователь может [загружать свои собственные данные](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) во всех поддерживаемых ClickHouse форматах. Пользователь может [загружать свои собственные данные](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) во всех поддерживаемых ClickHouse форматах.
Доступно 3 типа [хранения данных в памяти](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md): Доступно 3 типа [хранения данных в памяти](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md):
- POLYGON_SIMPLE. Это наивная реализация, в которой на каждый запрос делается линейный проход по всем полигонам, и для каждого проверяется принадлежность без использования дополнительных индексов. - `POLYGON_SIMPLE`. Это наивная реализация, в которой на каждый запрос делается линейный проход по всем полигонам, и для каждого проверяется принадлежность без использования дополнительных индексов.
- POLYGON_INDEX_EACH. Для каждого полигона строится отдельный индекс, который позволяет быстро проверять принадлежность в большинстве случаев (оптимизирован под географические регионы). - `POLYGON_INDEX_EACH`. Для каждого полигона строится отдельный индекс, который позволяет быстро проверять принадлежность в большинстве случаев (оптимизирован под географические регионы).
Также на рассматриваемую область накладывается сетка, которая значительно сужает количество рассматриваемых полигонов. Также на рассматриваемую область накладывается сетка, которая значительно сужает количество рассматриваемых полигонов.
Сетка строится рекурсивным делением ячейки на 16 равных частей и конфигурируется двумя параметрами. Сетка строится рекурсивным делением ячейки на 16 равных частей и конфигурируется двумя параметрами.
Деление прекращается при достижении глубины рекурсии MAX_DEPTH или в тот момент, когда ячейку пересекают не более MIN_INTERSECTIONS полигонов. Деление прекращается при достижении глубины рекурсии `MAX_DEPTH` или в тот момент, когда ячейку пересекают не более `MIN_INTERSECTIONS` полигонов.
Для ответа на запрос находится соответствующая ячейка, и происходит поочередное обращение к индексу для сохранных в ней полигонов. Для ответа на запрос находится соответствующая ячейка, и происходит поочередное обращение к индексу для сохранных в ней полигонов.
- POLYGON_INDEX_CELL. В этом размещении также строится сетка, описанная выше. Доступны такие же параметры. Для каждой ячейки-листа строится индекс на всех попадающих в неё кусках полигонов, который позволяет быстро отвечать на запрос. - `POLYGON_INDEX_CELL`. В этом размещении также строится сетка, описанная выше. Доступны такие же параметры. Для каждой ячейки-листа строится индекс на всех попадающих в неё кусках полигонов, который позволяет быстро отвечать на запрос.
- POLYGON. Синоним к POLYGON_INDEX_CELL. - `POLYGON`. Синоним к `POLYGON_INDEX_CELL`.
Запросы к словарю осуществляются с помощью стандартных [функций](../../../sql-reference/functions/ext-dict-functions.md) для работы со внешними словарями. Запросы к словарю осуществляются с помощью стандартных [функций](../../../sql-reference/functions/ext-dict-functions.md) для работы со внешними словарями.
Важным отличием является то, что здесь ключами будут являются точки, для которых хочется найти содержащий их полигон. Важным отличием является то, что здесь ключами будут являются точки, для которых хочется найти содержащий их полигон.
**Пример**
Пример работы со словарем, определенным выше: Пример работы со словарем, определенным выше:
``` sql ``` sql
CREATE TABLE points ( CREATE TABLE points (
@ -84,3 +94,40 @@ SELECT tuple(x, y) AS key, dictGet(dict_name, 'name', key), dictGet(dict_name, '
``` ```
В результате исполнения последней команды для каждой точки в таблице `points` будет найден полигон минимальной площади, содержащий данную точку, и выведены запрошенные аттрибуты. В результате исполнения последней команды для каждой точки в таблице `points` будет найден полигон минимальной площади, содержащий данную точку, и выведены запрошенные аттрибуты.
**Пример**
Вы можете читать столбцы из полигональных словарей с помощью SELECT, для этого включите `store_polygon_key_column = 1` в конфигурации словаря или соответствующего DDL-запроса.
Запрос:
``` sql
CREATE TABLE polygons_test_table
(
key Array(Array(Array(Tuple(Float64, Float64)))),
name String
) ENGINE = TinyLog;
INSERT INTO polygons_test_table VALUES ([[[(3, 1), (0, 1), (0, -1), (3, -1)]]], 'Value');
CREATE DICTIONARY polygons_test_dictionary
(
key Array(Array(Array(Tuple(Float64, Float64)))),
name String
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(TABLE 'polygons_test_table'))
LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1))
LIFETIME(0);
SELECT * FROM polygons_test_dictionary;
```
Результат:
``` text
┌─key─────────────────────────────┬─name──┐
│ [[[(3,1),(0,1),(0,-1),(3,-1)]]] │ Value │
└─────────────────────────────────┴───────┘
```

View File

@ -748,7 +748,7 @@ SOURCE(REDIS(
!!! info "Примечание" !!! info "Примечание"
Поля `column_family` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `column_family` или `query`. Поля `column_family` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `column_family` или `query`.
### PosgreSQL {#dicts-external_dicts_dict_sources-postgresql} ### PostgreSQL {#dicts-external_dicts_dict_sources-postgresql}
Пример настройки: Пример настройки:

View File

@ -89,9 +89,39 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00
## sipHash128 {#hash_functions-siphash128} ## sipHash128 {#hash_functions-siphash128}
Вычисляет SipHash от строки. Генерирует 128-битное хеш-значение [SipHash](https://131002.net/siphash/). Отличается от [sipHash64](#hash_functions-siphash64) тем, что финальный xor-folding состояния делается до 128 бит.
Принимает аргумент типа String. Возвращает FixedString(16).
Отличается от sipHash64 тем, что финальный xor-folding состояния делается только до 128 бит. **Синтаксис**
``` sql
sipHash128(par1,...)
```
**Аргументы**
Функция принимает переменное число входных параметров. Аргументы могут быть любого [поддерживаемого типа данных](../../sql-reference/functions/hash-functions.md).
**Возвращаемое значение**
128-битное хеш-значение `SipHash`.
Тип: [FixedString(16)](../../sql-reference/data-types/fixedstring.md).
**Пример**
Запрос:
``` sql
SELECT hex(sipHash128('foo', '\x01', 3));
```
Результат:
``` text
┌─hex(sipHash128('foo', '', 3))────┐
│ 9DE516A64A414D4B1B609415E4523F24 │
└──────────────────────────────────┘
```
## cityHash64 {#cityhash64} ## cityHash64 {#cityhash64}
@ -459,30 +489,38 @@ SELECT murmurHash3_32(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:
## murmurHash3_128 {#murmurhash3-128} ## murmurHash3_128 {#murmurhash3-128}
Генерирует значение [MurmurHash3](https://github.com/aappleby/smhasher). Генерирует 128-битное хеш-значение [MurmurHash3](https://github.com/aappleby/smhasher).
**Синтаксис**
``` sql ``` sql
murmurHash3_128( expr ) murmurHash3_128(expr)
``` ```
**Аргументы** **Аргументы**
- `expr`[выражение](../syntax.md#syntax-expressions), возвращающее значение типа [String](../../sql-reference/functions/hash-functions.md). - `expr`список [выражений](../../sql-reference/syntax.md#syntax-expressions). [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение** **Возвращаемое значение**
Хэш-значение типа [FixedString(16)](../../sql-reference/functions/hash-functions.md). 128-битное значение хеш-значение `MurmurHash3`.
Тип: [FixedString(16)](../../sql-reference/data-types/fixedstring.md).
**Пример** **Пример**
Запрос:
``` sql ``` sql
SELECT hex(murmurHash3_128('example_string')) AS MurmurHash3, toTypeName(MurmurHash3) AS type; SELECT hex(murmurHash3_128('foo', 'foo', 'foo'));
``` ```
Результат:
``` text ``` text
┌─MurmurHash3──────────────────────┬─type───┐ ┌─hex(murmurHash3_128('foo', 'foo', 'foo'))─┐
368A1A311CB7342253354B548E7E7E71 │ String F8F7AD9B6CD4CF117A71E277E2EC2931
└──────────────────────────────────────────┘ └──────────────────────────────────────────┘
``` ```
## xxHash32, xxHash64 {#hash-functions-xxhash32-xxhash64} ## xxHash32, xxHash64 {#hash-functions-xxhash32-xxhash64}

View File

@ -10,7 +10,7 @@ toc_title: "Манипуляции со столбцами"
Синтаксис: Синтаксис:
``` sql ``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|MODIFY|MATERIALIZE COLUMN ... ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
``` ```
В запросе можно указать сразу несколько действий над одной таблицей через запятую. В запросе можно указать сразу несколько действий над одной таблицей через запятую.
@ -138,6 +138,7 @@ ALTER TABLE visits COMMENT COLUMN browser 'Столбец показывает,
``` sql ``` sql
MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST] MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
ALTER COLUMN [IF EXISTS] name TYPE [type] [default_expr] [codec] [TTL] [AFTER name_after | FIRST]
``` ```
Запрос изменяет следующие свойства столбца `name`: Запрос изменяет следующие свойства столбца `name`:

View File

@ -3,11 +3,11 @@ toc_priority: 33
toc_title: INSERT INTO toc_title: INSERT INTO
--- ---
## INSERT {#insert} ## INSERT INTO {#insert}
Добавление данных. Добавляет данные в таблицу.
Базовый формат запроса: **Синтаксис**
``` sql ``` sql
INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
@ -21,17 +21,15 @@ INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), .
SHOW CREATE insert_select_testtable SHOW CREATE insert_select_testtable
``` ```
``` ```text
┌─statement────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ CREATE TABLE insert_select_testtable
│ CREATE TABLE insert_select_testtable
( (
`a` Int8, `a` Int8,
`b` String, `b` String,
`c` Int8 `c` Int8
) )
ENGINE = MergeTree() ENGINE = MergeTree()
ORDER BY a │ ORDER BY a
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
``` ```
``` sql ``` sql
@ -93,6 +91,8 @@ INSERT INTO t FORMAT TabSeparated
### Вставка результатов `SELECT` {#insert_query_insert-select} ### Вставка результатов `SELECT` {#insert_query_insert-select}
**Синтаксис**
``` sql ``` sql
INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... INSERT INTO [db.]table [(c1, c2, c3)] SELECT ...
``` ```
@ -109,6 +109,68 @@ INSERT INTO [db.]table [(c1, c2, c3)] SELECT ...
Чтобы вставить значение по умолчанию вместо `NULL` в столбец, который не позволяет хранить `NULL`, включите настройку [insert_null_as_default](../../operations/settings/settings.md#insert_null_as_default). Чтобы вставить значение по умолчанию вместо `NULL` в столбец, который не позволяет хранить `NULL`, включите настройку [insert_null_as_default](../../operations/settings/settings.md#insert_null_as_default).
### Вставка данных из файла {#inserting-data-from-a-file}
**Синтаксис**
``` sql
INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name
```
Используйте этот синтаксис, чтобы вставить данные из файла, который хранится на стороне **клиента**. `file_name` и `type` задаются в виде строковых литералов. [Формат](../../interfaces/formats.md) входного файла должен быть задан в секции `FORMAT`.
Поддерживаются сжатые файлы. Формат сжатия определяется по расширению файла, либо он может быть задан в секции `COMPRESSION`. Поддерживаются форматы: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
Эта функциональность поддерживается [клиентом командной строки](../../interfaces/cli.md) и [clickhouse-local](../../operations/utilities/clickhouse-local.md).
**Пример**
Выполните следующие запросы, используя [клиент командной строки](../../interfaces/cli.md):
```bash
echo 1,A > input.csv ; echo 2,B >> input.csv
clickhouse-client --query="CREATE TABLE table_from_file (id UInt32, text String) ENGINE=MergeTree() ORDER BY id;"
clickhouse-client --query="INSERT INTO table_from_file FROM INFILE 'input.csv' FORMAT CSV;"
clickhouse-client --query="SELECT * FROM table_from_file FORMAT PrettyCompact;"
```
Результат:
```text
┌─id─┬─text─┐
│ 1 │ A │
│ 2 │ B │
└────┴──────┘
```
### Вставка в табличную функцию {#inserting-into-table-function}
Данные могут быть вставлены в таблицы, заданные с помощью [табличных функций](../../sql-reference/table-functions/index.md).
**Синтаксис**
``` sql
INSERT INTO [TABLE] FUNCTION table_func ...
```
**Пример**
Табличная функция [remote](../../sql-reference/table-functions/index.md#remote) используется в следующих запросах:
``` sql
CREATE TABLE simple_table (id UInt32, text String) ENGINE=MergeTree() ORDER BY id;
INSERT INTO TABLE FUNCTION remote('localhost', default.simple_table)
VALUES (100, 'inserted via remote()');
SELECT * FROM simple_table;
```
Результат:
``` text
┌──id─┬─text──────────────────┐
│ 100 │ inserted via remote() │
└─────┴───────────────────────┘
```
### Замечания о производительности {#zamechaniia-o-proizvoditelnosti} ### Замечания о производительности {#zamechaniia-o-proizvoditelnosti}
`INSERT` сортирует входящие данные по первичному ключу и разбивает их на партиции по ключу партиционирования. Если вы вставляете данные в несколько партиций одновременно, то это может значительно снизить производительность запроса `INSERT`. Чтобы избежать этого: `INSERT` сортирует входящие данные по первичному ключу и разбивает их на партиции по ключу партиционирования. Если вы вставляете данные в несколько партиций одновременно, то это может значительно снизить производительность запроса `INSERT`. Чтобы избежать этого:

View File

@ -25,7 +25,7 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list
[LIMIT [n, ]m] [WITH TIES] [LIMIT [n, ]m] [WITH TIES]
[SETTINGS ...] [SETTINGS ...]
[UNION ALL ...] [UNION ALL ...]
[INTO OUTFILE filename] [INTO OUTFILE filename [COMPRESSION type] ]
[FORMAT format] [FORMAT format]
``` ```

View File

@ -4,10 +4,35 @@ toc_title: INTO OUTFILE
# Секция INTO OUTFILE {#into-outfile-clause} # Секция INTO OUTFILE {#into-outfile-clause}
Чтобы перенаправить вывод `SELECT` запроса в указанный файл на стороне клиента, добавьте к нему секцию `INTO OUTFILE filename` (где filename — строковый литерал). Секция `INTO OUTFILE` перенаправляет результат запроса `SELECT` в файл на стороне **клиента**.
Поддерживаются сжатые файлы. Формат сжатия определяется по расширению файла (по умолчанию используется режим `'auto'`), либо он может быть задан явно в секции `COMPRESSION`.
**Синтаксис**
```sql
SELECT <expr_list> INTO OUTFILE file_name [COMPRESSION type]
```
`file_name` и `type` задаются в виде строковых литералов. Поддерживаются форматы сжатия: `'none`', `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
## Детали реализации {#implementation-details} ## Детали реализации {#implementation-details}
- Эта функция доступна только в следующих интерфейсах: [клиент командной строки](../../../interfaces/cli.md) и [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Таким образом, запрос, отправленный через [HTTP интерфейс](../../../interfaces/http.md) вернет ошибку. - Эта функция доступна только в следующих интерфейсах: [клиент командной строки](../../../interfaces/cli.md) и [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Таким образом, запрос, отправленный через [HTTP интерфейс](../../../interfaces/http.md) вернет ошибку.
- Запрос завершится ошибкой, если файл с тем же именем уже существует. - Запрос завершится ошибкой, если файл с тем же именем уже существует.
- По умолчанию используется [выходной формат](../../../interfaces/formats.md) `TabSeparated` (как в пакетном режиме клиента командной строки). - По умолчанию используется [выходной формат](../../../interfaces/formats.md) `TabSeparated` (как в пакетном режиме клиента командной строки). Его можно изменить в секции [FORMAT](format.md).
**Пример**
Выполните следующий запрос, используя [клиент командной строки](../../../interfaces/cli.md):
```bash
clickhouse-client --query="SELECT 1,'ABC' INTO OUTFILE 'select.gz' FORMAT CSV;"
zcat select.gz
```
Результат:
```text
1,"ABC"
```

View File

@ -18,6 +18,8 @@ toc_title: "Введение"
Это один из методов создания таблицы. Это один из методов создания таблицы.
- Запросе [INSERT INTO TABLE FUNCTION](../../sql-reference/statements/insert-into.md#inserting-into-table-function).
!!! warning "Предупреждение" !!! warning "Предупреждение"
Если настройка [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) выключена, то использовать табличные функции невозможно. Если настройка [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) выключена, то использовать табличные функции невозможно.

View File

@ -86,6 +86,7 @@ git push
- **Compiler**: `gcc-9``clang-10` (或其他架构的`clang-10-xx`, 比如`clang-10-freebsd`). - **Compiler**: `gcc-9``clang-10` (或其他架构的`clang-10-xx`, 比如`clang-10-freebsd`).
- **Build type**: `Debug` or `RelWithDebInfo` (cmake). - **Build type**: `Debug` or `RelWithDebInfo` (cmake).
- **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan). - **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
- **Bundled**: `bundled` 构建使用来自 `contrib` 库, 而 `unbundled` 构建使用系统库.
- **Splitted**: `splitted` is a [split build](https://clickhouse.com/docs/en/development/build/#split-build) - **Splitted**: `splitted` is a [split build](https://clickhouse.com/docs/en/development/build/#split-build)
- **Status**: `成功``失败` - **Status**: `成功``失败`
- **Build log**: 链接到构建和文件复制日志, 当构建失败时很有用. - **Build log**: 链接到构建和文件复制日志, 当构建失败时很有用.

View File

@ -1 +0,0 @@
../../en/development/tests.md

View File

@ -0,0 +1,335 @@
# ClickHouse 测试 {#clickhouse-testing}
## 功能测试 {#functional-tests}
功能测试使用起来最简单方便. 大多数 ClickHouse 特性都可以通过功能测试进行测试, 并且对于可以通过功能测试进行测试的 ClickHouse 代码的每一个更改, 都必须使用这些特性
每个功能测试都会向正在运行的 ClickHouse 服务器发送一个或多个查询, 并将结果与参考进行比较.
测试位于 `查询` 目录中. 有两个子目录: `无状态``有状态`. 无状态测试在没有任何预加载测试数据的情况下运行查询 - 它们通常在测试本身内即时创建小型合成数据集. 状态测试需要来自 Yandex.Metrica 的预加载测试数据, 它对公众开放.
每个测试可以是两种类型之一: `.sql``.sh`. `.sql` 测试是简单的 SQL 脚本, 它通过管道传输到 `clickhouse-client --multiquery --testmode`. `.sh` 测试是一个自己运行的脚本. SQL 测试通常比 `.sh` 测试更可取. 仅当您必须测试某些无法从纯 SQL 中执行的功能时才应使用 `.sh` 测试, 例如将一些输入数据传送到 `clickhouse-client` 或测试 `clickhouse-local`.
### 在本地运行测试 {#functional-test-locally}
在本地启动ClickHouse服务器, 监听默认端口(9000). 例如, 要运行测试 `01428_hash_set_nan_key`, 请切换到存储库文件夹并运行以下命令:
```
PATH=$PATH:<path to clickhouse-client> tests/clickhouse-test 01428_hash_set_nan_key
```
有关更多选项, 请参阅`tests/clickhouse-test --help`. 您可以简单地运行所有测试或运行由测试名称中的子字符串过滤的测试子集:`./clickhouse-test substring`. 还有并行或随机顺序运行测试的选项.
### 添加新测试 {#adding-new-test}
添加新的测试, 在 `queries/0_stateless` 目录下创建 `.sql``.sh` 文件, 手动检查, 然后通过以下方式生成`.reference`文件:`clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` 或 `./00000_test.sh > ./00000_test.reference`.
测试应仅使用(创建、删除等)`test` 数据库中假定已预先创建的表; 测试也可以使用临时表.
### 选择测试名称 {#choosing-test-name}
测试名称以五位数前缀开头, 后跟描述性名称, 例如 `00422_hash_function_constexpr.sql`. 要选择前缀, 请找到目录中已存在的最大前缀, 并将其加一. 在此期间, 可能会添加一些具有相同数字前缀的其他测试, 但这没关系并且不会导致任何问题, 您以后不必更改它.
一些测试的名称中标有 `zookeeper`、`shard` 或 `long` . `zookeeper` 用于使用 ZooKeeper 的测试. `shard` 用于需要服务器监听 `127.0.0.*` 的测试; `distributed``global` 具有相同的含义. `long` 用于运行时间稍长于一秒的测试. Yo你可以分别使用 `--no-zookeeper`、`--no-shard` 和 `--no-long` 选项禁用这些测试组. 如果需要 ZooKeeper 或分布式查询,请确保为您的测试名称添加适当的前缀.
### 检查必须发生的错误 {#checking-error-must-occur}
有时您想测试是否因不正确的查询而发生服务器错误. 我们支持在 SQL 测试中对此进行特殊注释, 形式如下:
```
select x; -- { serverError 49 }
```
此测试确保服务器返回关于未知列“x”的错误代码为 49. 如果没有错误, 或者错误不同, 则测试失败. 如果您想确保错误发生在客户端, 请改用 `clientError` 注释.
不要检查错误消息的特定措辞, 它将来可能会发生变化, 并且测试将不必要地中断. 只检查错误代码. 如果现有的错误代码不足以满足您的需求, 请考虑添加一个新的.
### 测试分布式查询 {#testing-distributed-query}
如果你想在功能测试中使用分布式查询, 你可以使用 `127.0.0.{1..2}` 的地址, 以便服务器查询自己; 或者您可以在服务器配置文件中使用预定义的测试集群, 例如`test_shard_localhost`. 请记住在测试名称中添加 `shard``distributed` 字样, 以便它以正确的配置在 CI 中运行, 其中服务器配置为支持分布式查询.
## 已知错误 {#known-bugs}
如果我们知道一些可以通过功能测试轻松重现的错误, 我们将准备好的功能测试放在 `tests/queries/bugs` 目录中. 修复错误后, 这些测试将移至 `tests/queries/0_stateless` .
## 集成测试 {#integration-tests}
集成测试允许在集群配置中测试 ClickHouse 以及 ClickHouse 与其他服务器(如 MySQL、Postgres、MongoDB)的交互. 它们可以用来模拟网络分裂、丢包等情况. 这些测试在Docker下运行, 并使用各种软件创建多个容器.
有关如何运行这些测试, 请参阅 `tests/integration/README.md` .
注意, ClickHouse与第三方驱动程序的集成没有经过测试. 另外, 我们目前还没有JDBC和ODBC驱动程序的集成测试.
## 单元测试 {#unit-tests}
当您想测试的不是 ClickHouse 整体, 而是单个独立库或类时,单元测试很有用. 您可以使用 `ENABLE_TESTS` CMake 选项启用或禁用测试构建. 单元测试(和其他测试程序)位于代码中的 `tests` 子目录中. 要运行单元测试, 请键入 `ninja test` 。有些测试使用 `gtest` , 但有些程序在测试失败时会返回非零退出码.
如果代码已经被功能测试覆盖了, 就没有必要进行单元测试(而且功能测试通常更易于使用).
例如, 您可以通过直接调用可执行文件来运行单独的 gtest 检查:
```bash
$ ./src/unit_tests_dbms --gtest_filter=LocalAddress*
```
## 性能测试 {#performance-tests}
性能测试允许测量和比较 ClickHouse 的某些孤立部分在合成查询上的性能. 测试位于 `tests/performance`. 每个测试都由带有测试用例描述的 `.xml` 文件表示. 测试使用 `docker/tests/performance-comparison` 工具运行. 请参阅自述文件以进行调用.
每个测试在循环中运行一个或多个查询(可能带有参数组合). 一些测试可以包含预加载测试数据集的先决条件.
如果您希望在某些场景中提高ClickHouse的性能并且如果可以在简单的查询中观察到改进那么强烈建议编写性能测试。在测试期间使用 `perf top` 或其他perf工具总是有意义的.
## 测试工具和脚本 {#test-tools-and-scripts}
`tests` 目录中的一些程序不是准备好的测试,而是测试工具. 例如, 对于 `Lexer`, 有一个工具 `src/Parsers/tests/lexer` , 它只是对标准输入进行标记化并将着色结果写入标准输出. 您可以将这些类型的工具用作代码示例以及用于探索和手动测试.
## 其他测试 {#miscellaneous-tests}
`tests/external_models` 中有机器学习模型的测试. 这些测试不会更新, 必须转移到集成测试.
仲裁插入有单独的测试. 该测试在不同的服务器上运行 ClickHouse 集群并模拟各种故障情况:网络分裂、丢包(ClickHouse 节点之间、ClickHouse 和 ZooKeeper 之间、ClickHouse 服务器和客户端之间等)、`kill -9`、`kill -STOP` 和 `kill -CONT` , 比如 [Jepsen](https://aphyr.com/tags/Jepsen). 然后测试检查所有已确认的插入是否已写入并且所有被拒绝的插入均未写入.
在 ClickHouse 开源之前, Quorum 测试是由单独的团队编写的. 这个团队不再与ClickHouse合作. 测试碰巧是用Java编写的. 由于这些原因, 必须重写仲裁测试并将其转移到集成测试.
## 手动测试 {#manual-testing}
当您开发一个新特性时, 手动测试它也是合理的. 您可以按照以下步骤进行操作:
构建 ClickHouse. 从终端运行 ClickHouse将目录更改为 `programs/clickhouse-server` 并使用 `./clickhouse-server` 运行它. 默认情况下, 它将使用当前目录中的配置(`config.xml`、`users.xml` 和`config.d` 和`users.d` 目录中的文件). 要连接到 ClickHouse 服务器, 请运行 `programs/clickhouse-client/clickhouse-client` .
请注意, 所有 clickhouse 工具(服务器、客户端等)都只是指向名为 `clickhouse` 的单个二进制文件的符号链接. 你可以在 `programs/clickhouse` 找到这个二进制文件. 所有工具也可以作为 `clickhouse tool` 而不是 `clickhouse-tool` 调用.
或者, 您可以安装 ClickHouse 包: 从 Yandex 存储库稳定发布, 或者您可以在 ClickHouse 源根目录中使用 `./release` 为自己构建包. 然后使用 `sudo service clickhouse-server start` 启动服务器(或停止以停止服务器). 在 `/etc/clickhouse-server/clickhouse-server.log` 中查找日志.
当您的系统上已经安装了 ClickHouse 时,您可以构建一个新的 `clickhouse` 二进制文件并替换现有的二进制文件:
``` bash
$ sudo service clickhouse-server stop
$ sudo cp ./clickhouse /usr/bin/
$ sudo service clickhouse-server start
```
您也可以停止系统 clickhouse-server 并使用相同的配置运行您自己的服务器, 但登录到终端:
``` bash
$ sudo service clickhouse-server stop
$ sudo -u clickhouse /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml
```
使用 gdb 的示例:
``` bash
$ sudo -u clickhouse gdb --args /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml
```
如果系统 clickhouse-server 已经在运行并且你不想停止它, 你可以在你的 `config.xml` 中更改端口号(或在 `config.d` 目录中的文件中覆盖它们), 提供适当的数据路径, 并运行它.
`clickhouse` 二进制文件几乎没有依赖关系, 可以在广泛的 Linux 发行版中使用. 要在服务器上快速而肮脏地测试您的更改, 您可以简单地将新构建的 `clickhouse` 二进制文件 `scp` 到您的服务器, 然后按照上面的示例运行它.
## 测试环境 {#testing-environment}
在发布稳定版之前, 我们将其部署在测试环境中.测试环境是一个集群,处理 [Yandex.Metrica](https://metrica.yandex.com/) 数据的 1/39 部分. 我们与 Yandex.Metrica 团队共享我们的测试环境. ClickHouse无需在现有数据上停机即可升级. 我们首先看到的是, 数据被成功地处理了, 没有滞后于实时, 复制继续工作, Yandex.Metrica 团队没有发现任何问题. 第一次检查可以通过以下方式进行:
``` sql
SELECT hostName() AS h, any(version()), any(uptime()), max(UTCEventTime), count() FROM remote('example01-01-{1..3}t', merge, hits) WHERE EventDate >= today() - 2 GROUP BY h ORDER BY h;
```
在某些情况下, 我们还会部署到 Yandex 中我们朋友团队的测试环境Market、Cloud 等. 此外, 我们还有一些用于开发目的的硬件服务器.
## 负载测试 {#load-testing}
部署到测试环境后, 我们使用来自生产集群的查询运行负载测试. 这是手动完成的.
确保您在生产集群上启用了 `query_log`.
收集一天或更长时间的查询日志:
``` bash
$ clickhouse-client --query="SELECT DISTINCT query FROM system.query_log WHERE event_date = today() AND query LIKE '%ym:%' AND query NOT LIKE '%system.query_log%' AND type = 2 AND is_initial_query" > queries.tsv
```
这是一个复杂的例子. `type = 2` 将过滤成功执行的查询. `query LIKE '%ym:%'` 是从 Yandex.Metrica 中选择相关查询. `is_initial_query` 是只选择客户端发起的查询, 而不是 ClickHouse 本身(作为分布式查询处理的一部分).
`scp` 将此日志记录到您的测试集群并按如下方式运行它:
``` bash
$ clickhouse benchmark --concurrency 16 < queries.tsv
```
(可能你还想指定一个 `--user`)
然后把它留到晚上或周末, 去休息一下.
您应该检查 `clickhouse-server` 没有崩溃, 内存占用是有限的, 且性能不会随着时间的推移而降低.
由于查询和环境的高度可变性, 没有记录和比较精确的查询执行时间.
## 构建测试 {#build-tests}
构建测试允许检查在各种可选配置和一些外部系统上的构建是否被破坏. 这些测试也是自动化的.
示例:
- Darwin x86_64 (Mac OS X) 交叉编译
- FreeBSD x86_64 交叉编译
- Linux AArch64 交叉编译
- 使用系统包中的库在 Ubuntu 上构建(不鼓励)
- 使用库的共享链接构建(不鼓励)
例如, 使用系统包构建是不好的做法, 因为我们无法保证系统将拥有哪个确切版本的包. 但这确实是 Debian 维护者所需要的. 出于这个原因, 我们至少必须支持这种构建变体. 另一个例子: 共享链接是一个常见的麻烦来源, 但对于一些爱好者来说是需要的.
虽然我们无法对所有构建变体运行所有测试, 但我们希望至少检查各种构建变体没有被破坏. 为此, 我们使用构建测试.
我们还测试了那些太长而无法编译或需要太多RAM的没有翻译单元.
我们还测试没有太大的堆栈帧.
## 协议兼容性测试 {#testing-for-protocol-compatibility}
当我们扩展 ClickHouse 网络协议时, 我们手动测试旧的 clickhouse-client 与新的 clickhouse-server 一起工作, 而新的 clickhouse-client 与旧的 clickhouse-server 一起工作(只需从相应的包中运行二进制文件).
我们还使用集成测试自动测试一些案例:
- 旧版本ClickHouse写入的数据是否可以被新版本成功读取;
- 在具有不同 ClickHouse 版本的集群中执行分布式查询.
## 编译器的帮助 {#help-from-the-compiler}
主要的 ClickHouse 代码(位于 `dbms` 目录中)是用 `-Wall -Wextra -Werror` 和一些额外的启用警告构建的. 虽然没有为第三方库启用这些选项.
Clang 有更多有用的警告 - 你可以用 `-Weverything` 寻找它们并选择一些东西来默认构建.
对于生产构建, 使用 clang, 但我们也测试 make gcc 构建. 对于开发, clang 通常使用起来更方便. 您可以使用调试模式在自己的机器上构建(以节省笔记本电脑的电池), 但请注意, 由于更好的控制流和过程间分析, 编译器能够使用 `-O3` 生成更多警告. 在调试模式下使用 clang 构建时, 使用调试版本的 `libc++` 允许在运行时捕获更多错误.
## 地址清理器 {#sanitizers}
### 地址清理器
我们在ASan上运行功能测试、集成测试、压力测试和单元测试.
### 线程清理器
我们在TSan下运行功能测试、集成测试、压力测试和单元测试.
### 内存清理器
我们在MSan上运行功能测试、集成测试、压力测试和单元测试.
### 未定义的行为清理器
我们在UBSan下运行功能测试、集成测试、压力测试和单元测试. 某些第三方库的代码未针对 UB 进行清理.
### Valgrind (Memcheck)
我们曾经在 Valgrind 下通宵运行功能测试, 但不再这样做了. 这需要几个小时. 目前在`re2`库中有一个已知的误报, 见[这篇文章](https://research.swtch.com/sparse).
## 模糊测试 {#fuzzing}
ClickHouse 模糊测试是使用 [libFuzzer](https://llvm.org/docs/LibFuzzer.html) 和随机 SQL 查询实现的. 所有模糊测试都应使用sanitizers(地址和未定义)进行.
LibFuzzer 用于库代码的隔离模糊测试. Fuzzer 作为测试代码的一部分实现, 并具有 `_fuzzer` 名称后缀.
Fuzzer 示例可以在 `src/Parsers/tests/lexer_fuzzer.cpp` 中找到. LibFuzzer 特定的配置、字典和语料库存储在 `tests/fuzz` 中.
我们鼓励您为处理用户输入的每个功能编写模糊测试.
默认情况下不构建模糊器. 要构建模糊器, 应设置` -DENABLE_FUZZING=1` 和 `-DENABLE_TESTS=1` 选项.
我们建议在构建模糊器时禁用 Jemalloc. 用于将 ClickHouse fuzzing 集成到 Google OSS-Fuzz 的配置可以在 `docker/fuzz` 中找到.
我们还使用简单的模糊测试来生成随机SQL查询, 并检查服务器在执行这些查询时是否会死亡.
你可以在 `00746_sql_fuzzy.pl` 中找到它. 这个测试应该连续运行(通宵或更长时间).
我们还使用复杂的基于 AST 的查询模糊器, 它能够找到大量的极端情况. 它在查询 AST 中进行随机排列和替换. 它会记住先前测试中的 AST 节点, 以使用它们对后续测试进行模糊测试, 同时以随机顺序处理它们. 您可以在 [这篇博客文章](https://clickhouse.com/blog/en/2021/fuzzing-clickhouse/) 中了解有关此模糊器的更多信息.
## 压力测试 {#stress-test}
压力测试是另一种模糊测试. 它使用单个服务器以随机顺序并行运行所有功能测试. 不检查测试结果.
经检查:
- 服务器不会崩溃,不会触发调试或清理程序陷阱;
- 没有死锁;
- 数据库结构一致;
- 服务器可以在测试后成功停止并重新启动,没有异常;
有五种变体 (Debug, ASan, TSan, MSan, UBSan).
## 线程模糊器 {#thread-fuzzer}
Thread Fuzzer(请不要与 Thread Sanitizer 混淆)是另一种允许随机化线程执行顺序的模糊测试. 它有助于找到更多特殊情况.
## 安全审计 {#security-audit}
Yandex安全团队的人员从安全的角度对ClickHouse的功能做了一些基本的概述.
## 静态分析仪 {#static-analyzers}
我们在每次提交的基础上运行 `clang-tidy``PVS-Studio`. `clang-static-analyzer` 检查也被启用. `clang-tidy` 也用于一些样式检查.
我们已经评估了 `clang-tidy`、`Coverity`、`cppcheck`、`PVS-Studio`、`tscancode`、`CodeQL`. 您将在 `tests/instructions/` 目录中找到使用说明. 你也可以阅读[俄文文章](https://habr.com/company/yandex/blog/342018/).
如果你使用 `CLion` 作为 IDE, 你可以利用一些开箱即用的 `clang-tidy` 检查
我们还使用 `shellcheck` 对shell脚本进行静态分析.
## 硬化 {#hardening}
在调试版本中, 我们使用自定义分配器执行用户级分配的 ASLR.
我们还手动保护在分配后预期为只读的内存区域.
在调试构建中, 我们还需要对libc进行自定义, 以确保不会调用 "有害的" (过时的、不安全的、非线程安全的)函数.
Debug 断言被广泛使用.
在调试版本中,如果抛出带有 "逻辑错误" 代码(暗示错误)的异常, 则程序会过早终止. 它允许在发布版本中使用异常, 但在调试版本中使其成为断言.
jemalloc 的调试版本用于调试版本.
libc++ 的调试版本用于调试版本.
## 运行时完整性检查
对存储在磁盘上的数据是校验和. MergeTree 表中的数据同时以三种方式进行校验和*(压缩数据块、未压缩数据块、跨块的总校验和). 客户端和服务器之间或服务器之间通过网络传输的数据也会进行校验和. 复制确保副本上的数据位相同.
需要防止硬件故障(存储介质上的位腐烂、服务器上 RAM 中的位翻转、网络控制器 RAM 中的位翻转、网络交换机 RAM 中的位翻转、客户端 RAM 中的位翻转、线路上的位翻转). 请注意,比特位操作很常见, 即使对于 ECC RAM 和 TCP 校验和(如果您每天设法运行数千台处理 PB 数据的服务器, 也可能发生比特位操作. [观看视频(俄语)](https://www.youtube.com/watch?v=ooBAQIe0KlQ).
ClickHouse 提供诊断功能, 可帮助运维工程师找到故障硬件.
\* 它并不慢.
## 代码风格 {#code-style}
[此处](style.md)描述了代码样式规则.
要检查一些常见的样式违规,您可以使用 `utils/check-style` 脚本.
要强制使用正确的代码样式, 您可以使用 `clang-format`. 文件 `.clang-format` 位于源根目录. 它大多与我们的实际代码风格相对应. 但是不建议将 `clang-format` 应用于现有文件, 因为它会使格式变得更糟. 您可以使用可以在 clang 源代码库中找到的 `clang-format-diff` 工具.
或者, 您可以尝试使用 `uncrustify` 工具来重新格式化您的代码. 配置位于源根目录中的 `uncrustify.cfg` 中. 它比 `clang-format` 测试更少.
`CLion` 有自己的代码格式化程序, 必须根据我们的代码风格进行调整.
我们还使用 `codespell` 来查找代码中的拼写错误.它也是自动化的.
## Metrica B2B 测试 {#metrica-b2b-tests}
每个 ClickHouse 版本都使用 Yandex Metrica 和 AppMetrica 引擎进行测试. ClickHouse 的测试版和稳定版部署在 VM 上, 并使用 Metrica 引擎的小副本运行, 该引擎处理输入数据的固定样本. 然后将两个 Metrica 引擎实例的结果放在一起比较.
这些测试由单独的团队自动化. 由于移动部件数量众多, 测试在大多数情况下都因完全不相关的原因而失败, 这些原因很难弄清楚. 这些测试很可能对我们有负面价值. 尽管如此, 这些测试在数百次中被证明是有用的.
## 测试覆盖率 {#test-coverage}
我们还跟踪测试覆盖率, 但仅针对功能测试和 clickhouse-server. 它每天进行.
## Tests for Tests
有自动检测薄片测试. 它运行所有新测试100次(用于功能测试)或10次(用于集成测试). 如果至少有一次测试失败,它就被认为是脆弱的.
## Testflows
[Testflows](https://testflows.com/) 是一个企业级的测试框架. Altinity 使用它进行一些测试, 我们在 CI 中运行这些测试.
## Yandex 检查 (only for Yandex employees)
这些检查将ClickHouse代码导入到Yandex内部的单一存储库中, 所以ClickHouse代码库可以被Yandex的其他产品(YT和YDB)用作库. 请注意, clickhouse-server本身并不是由内部回购构建的, Yandex应用程序使用的是未经修改的开源构建的.
## 测试自动化 {#test-automation}
我们使用 Yandex 内部 CI 和名为 "Sandbox" 的作业自动化系统运行测试.
在每次提交的基础上, 构建作业和测试都在沙箱中运行. 生成的包和测试结果发布在GitHub上, 可以通过直接链接下载. 产物要保存几个月. 当你在GitHub上发送一个pull请求时, 我们会把它标记为 "可以测试" , 我们的CI系统会为你构建ClickHouse包(发布、调试、使用地址清理器等).
由于时间和计算能力的限制, 我们不使用 Travis CI.
我们不用Jenkins. 以前用过, 现在我们很高兴不用Jenkins了.
[原始文章](https://clickhouse.com/docs/en/development/tests/) <!--hide-->

View File

@ -4,11 +4,53 @@ toc_title: WHERE
# WHERE {#select-where} # WHERE {#select-where}
`WHERE` 子句允许过滤从 [FROM](../../../sql-reference/statements/select/from.md) 子句 `SELECT`. `WHERE` 子句允许过滤来自`SELECT`的子句 [FROM](../../../sql-reference/statements/select/from.md) 的数据.
如果有一个 `WHERE` 子句,它必须包含一个表达式与 `UInt8` 类型。 这通常是一个带有比较和逻辑运算符的表达式。 此表达式计算结果为0的行将从进一步的转换或结果中解释出来 如果有一个 `WHERE` 子句,它必须包含一个表达式与 `UInt8` 类型。 这通常是一个带有比较和逻辑运算符的表达式。 表达式计算结果为0的行将被排除在在进一步的转换或结果之外
`WHERE` 如果基础表引擎支持,则根据使用索引和分区修剪的能力评估表达式 如果基础表引擎支持,`WHERE`表达式会使用索引和分区进行剪枝
!!! note "注" !!! note "注"
有一个叫做过滤优化 [prewhere](../../../sql-reference/statements/select/prewhere.md) 的东西. 有一个叫做过滤优化 [prewhere](../../../sql-reference/statements/select/prewhere.md) 的东西.
如果需要测试一个 [NULL](../../../sql-reference/syntax.md#null-literal) 值,请使用 [IS NULL](../../operators/index.md#operator-is-null) and [IS NOT NULL](../../operators/index.md#is-not-null) 运算符或 [isNull](../../../sql-reference/functions/functions-for-nulls.md#isnull) 和 [isNotNull](../../../sql-reference/functions/functions-for-nulls.md#isnotnull) 函数。否则带有 NULL 的表达式永远不会通过。
**示例**
在 [numbers table](../../../sql-reference/table-functions/numbers.md) 表上执行下述语句以找到为3的倍数且大于10的数字
To find numbers that are multiples of 3 and are greater than 10 execute the following query on the :
``` sql
SELECT number FROM numbers(20) WHERE (number > 10) AND (number % 3 == 0);
```
结果:
``` text
┌─number─┐
│ 12 │
│ 15 │
│ 18 │
└────────┘
```
带有 `NULL` 值的查询:
``` sql
CREATE TABLE t_null(x Int8, y Nullable(Int8)) ENGINE=MergeTree() ORDER BY x;
INSERT INTO t_null VALUES (1, NULL), (2, 3);
SELECT * FROM t_null WHERE y IS NULL;
SELECT * FROM t_null WHERE y != 0;
```
结果:
``` text
┌─x─┬────y─┐
│ 1 │ ᴺᵁᴸᴸ │
└───┴──────┘
┌─x─┬─y─┐
│ 2 │ 3 │
└───┴───┘
```

View File

@ -1 +0,0 @@
../../../en/whats-new/changelog/2017.md

View File

@ -0,0 +1,261 @@
### ClickHouse 版本 1.1.54327, 2017-12-21 {#clickhouse-release-1-1-54327-2017-12-21}
此版本包含先前版本 1.1.54318 的错误修复:
- 修复了可能导致数据丢失的复制中可能出现的竞争条件的错误. 此问题影响1.1.54310和1.1.54318版本. 如果将这些版本的任意一个与 Replicated 表一起使用,则强烈建议进行更新. 此问题显示在日志中的警告消息中,例如 `Part ... from own log does not exist.` 即使您没有在日志中看到这些消息,该问题也是相关的.
### ClickHouse 版本 1.1.54318, 2017-11-30 {#clickhouse-release-1-1-54318-2017-11-30}
此版本包含先前版本 1.1.54310 的错误修复:
- 修复了在 SummingMergeTree 引擎中合并期间不正确的行删除.
- 修复了未复制的 MergeTree 引擎中的内存泄漏.
- 修复了在 MergeTree 引擎中频繁插入导致性能下降的问题.
- 修复了导致复制队列停止运行的问题.
- 修复了服务器日志的轮换和归档.
### ClickHouse 版本 1.1.54310, 2017-11-01 {#clickhouse-release-1-1-54310-2017-11-01}
#### 新特征: {#new-features}
- MergeTree 系列表引擎的自定义分区键.
- [Kafka](https://clickhouse.com/docs/en/operations/table_engines/kafka/) 表引擎.
- 增加了对加载[CatBoost](https://catboost.yandex/)模型的支持, 并将它们应用到存储在ClickHouse中的数据.
- 添加了对 UTC 非整数偏移时区的支持
- 添加了对具有时间间隔的算术运算的支持.
- Date 和 DateTime 类型的值范围扩展到 2105 年.
- 添加了 `CREATE MATERIALIZED VIEW x TO y` 查询(指定用于存储物化视图数据的现有表).
- 添加了不带参数的`ATTACH TABLE` 查询.
- SummingMergeTree 表中名称以 -Map 结尾的嵌套列的处理逻辑被提取到 sumMap 聚合函数中. 您现在可以明确指定此类列.
- IP 树字典的最大大小增加到 128M 条目.
- 添加了 getSizeOfEnumType 函数.
- 添加了 sumWithOverflow 聚合函数.
- 添加了对 Cap'n Proto 输入格式的支持.
- 您现在可以在使用 zstd 算法时自定义压缩级别.
#### 向后不兼容的变化: {#backward-incompatible-changes}
- 不允许使用内存以外的引擎创建临时表.
- 不允许使用 View 或 MaterializedView 引擎显式创建表.
- 在表创建期间,新的检查验证采样键表达式是否包含在主键中.
#### Bug 修复: {#bug-fixes}
- 修复了同步插入分布式表时的挂断问题.
- 修复了复制表中部件的非原子添加和删除.
- 插入物化视图的数据不会进行不必要的重复数据删除.
- 对本地副本滞后且远程副本不可用的分布式表执行查询不再导致错误.
- 用户不再需要访问 `default` 数据库的权限来创建临时表.
- 修复了指定不带参数的 Array 类型时崩溃的问题.
- 修复了包含服务器日志的磁盘卷已满时的挂断问题.
- 修复了 Unix 纪元第一周 toRelativeWeekNum 函数中的溢出问题.
#### Build 改进: {#build-improvements}
- 更新了多个第三方库(尤其是 Poco)并转换为 git 子模块.
### ClickHouse 版本 1.1.54304, 2017-10-19 {#clickhouse-release-1-1-54304-2017-10-19}
#### 新特征: {#new-features-1}
- 本机协议中的 TLS 支持(要启用,请在 `config.xml` 中设置 `tcp_ssl_port`).
#### Bug 修复: {#bug-fixes-1}
- 复制表的`ALTER` 现在尝试尽快开始运行.
- 修复了使用设置 `preferred_block_size_bytes=0.` 读取数据时崩溃的问题.
- 修复了按下 `Page Down``clickhouse-client` 崩溃的问题.
- 使用 `GLOBAL IN``UNION ALL` 正确解释某些复杂的查询.
- `FREEZE PARTITION` 现在总是以原子方式工作.
- 空 POST 请求现在返回代码为 411 的响应.
- 修正了像 `CAST(1 AS Nullable(UInt8)).` 这样的表达式的解释错误.
- 修正了从 `MergeTree` 表中读取 `Array(Nullable(String))` 列时的错误.
- 修复了在解析诸如 `SELECT dummy AS dummy, dummy AS b` 之类的查询时崩溃的问题.
- 用户使用无效的 `users.xml` 正确更新.
- 可执行字典返回非零响应代码时的正确处理.
### ClickHouse 版本 1.1.54292, 2017-09-20 {#clickhouse-release-1-1-54292-2017-09-20}
#### 新特征: {#new-features-2}
- 添加了用于处理坐标平面上的坐标的 `pointInPolygon` 函数.
- 添加了用于计算数组总和的 `sumMap` 聚合函数, 类似于 `SummingMergeTree` .
- 添加了 `trunc` 功能. 改进了舍入函数(`round`、`floor`、`ceil`、`roundToExp2`)的性能并更正了它们工作方式的逻辑. 更改了分数和负数的 `roundToExp2` 函数的逻辑.
- ClickHouse 可执行文件现在较少依赖于 libc 版本. 同一个 ClickHouse 可执行文件可以在各种 Linux 系统上运行. 使用编译查询时仍然存在依赖性(使用设置 `compile = 1` , 默认情况下不使用).
- 减少动态编译查询所需的时间.
#### Bug 修复: {#bug-fixes-2}
- 修复了有时会产生 `part ... intersects previous part` 消息和削弱副本一致性的错误.
- 修复了关闭期间 ZooKeeper 不可用导致服务器锁定的错误.
- 恢复副本时删除了过多的日志记录.
- 修复了 UNION ALL 实现中的错误.
- 修复了如果块中的第一列具有 Array 类型时在 concat 函数中发生的错误.
- 进度现在在 system.merges 表中可以正确显示.
### ClickHouse 版本 1.1.54289, 2017-09-13 {#clickhouse-release-1-1-54289-2017-09-13}
#### 新特征: {#new-features-3}
- 用于服务器管理的 `SYSTEM` 查询: `SYSTEM RELOAD DICTIONARY`, `SYSTEM RELOAD DICTIONARIES`, `SYSTEM DROP DNS CACHE`, `SYSTEM SHUTDOWN`, `SYSTEM KILL`.
- 添加了用于处理数组的函数: `concat`, `arraySlice`, `arrayPushBack`, `arrayPushFront`, `arrayPopBack`, `arrayPopFront`.
- 为 ZooKeeper 配置添加了 `root``identity` 参数. 这将允许您隔离同一 ZooKeeper 集群上的各个用户.
- 添加了聚合函数 `groupBitAnd``groupBitOr``groupBitXor` (为了兼容性,它们也可以在名称 `BIT_AND` 、`BIT_OR`和`BIT_XOR` 下使用).
- 可以通过在文件系统中指定套接字来从 MySQL 加载外部字典.
- 可以通过 SSL 从 MySQL 加载外部字典 (`ssl_cert`, `ssl_key`, `ssl_ca` 参数).
- 添加了 `max_network_bandwidth_for_user` 设置以限制每个用户查询的总体带宽使用.
- 支持临时表的 `DROP TABLE`.
- 支持从 `CSV``JSONEachRow` 格式读取 Unix 时间戳格式的 `DateTime` 值.
- 现在默认排除分布式查询中的滞后副本(默认阈值为 5 分钟).
- 在 ALTER 期间使用 FIFO 锁定对于连续运行的查询ALTER 查询不会无限期阻塞.
- 在配置文件中设置 `umask` 的选项.
- 使用 `DISTINCT` 提高查询的性能.
#### Bug 修复: {#bug-fixes-3}
- 改进了在 ZooKeeper 中删除旧节点的过程. 以前, 如果插入非常频繁, 旧节点有时不会被删除, 从而导致服务器关闭缓慢等.
- 修复了为 ZooKeeper 连接选择主机时的随机化问题.
- 如果副本是本地主机, 则修复了在分布式查询中排除滞后副本的问题.
- 修复了在 `嵌套` 结构中的元素上运行 `ALTER MODIFY` 后, `ReplicatedMergeTree` 表中的数据部分可能被破坏的错误.
- 修复了可能导致 SELECT 查询 `hang` 的错误.
- 分布式 DDL 查询的改进.
- 修复了查询 `CREATE TABLE ... AS <materialized view>`.
- 解决了对 Buffer 表的 `ALTER ... CLEAR COLUMN IN PARTITION` 查询中的死锁.
- 修复了使用 `JSONEachRow``TSKV` 格式时 `Enum` 的无效默认值 (0 而不是最小值).
- 解决了使用带有 `可执行` 源的字典时出现僵尸进程的问题.
- 修复了 HEAD 查询的段错误.
#### 改进了开发和组装ClickHouse的工作流: {#improved-workflow-for-developing-and-assembling-clickhouse}
- 您可以使用 `pbuilder` 来构建 ClickHouse.
- 你可以使用 `libc++` 代替 `libstdc++` 在 Linux 上构建.
- 添加了使用静态代码分析工具的说明: `Coverage`, `clang-tidy`, `cppcheck`.
#### 升级时请注意: {#please-note-when-upgrading}
- 现在有更高的 MergeTree 设置默认值 `max_bytes_to_merge_at_max_space_in_pool` (要合并的数据部分的最大总大小, 以字节为单位): 它已从 100 GiB 增加到 150 GiB. 这可能会导致在服务器升级后运行大型合并, 从而导致磁盘子系统负载增加. 如果服务器上的可用空间小于正在运行的合并总量的两倍, 这将导致所有其他合并停止运行, 包括小数据部分的合并. 因此, INSERT 查询将失败并显示消息"合并的处理速度明显慢于插入." , 使用 `SELECT * FROM system.merges` 查询来监控情况. 您还可以在 `system.metrics` 表或 Graphite 中检查 `DiskSpaceReservedForMerge` 指标. 您不需要做任何事情来解决这个问题, 因为一旦大型合并完成, 问题就会自行解决. 如果您发现这不可接受, 您可以恢复 `max_bytes_to_merge_at_max_space_in_pool` 设置的先前值. 为此, 请转到 config.xml 中的 `<merge_tree>` 部分, 设置 ``` <merge_tree>``<max_bytes_to_merge_at_max_space_in_pool>107374182400</max_bytes_to_merge_at_max_space_in_pool> ``` 并重新启动服务器.
### ClickHouse 版本 1.1.54284, 2017-08-29 {#clickhouse-release-1-1-54284-2017-08-29}
- 这是先前 1.1.54282 版本的错误修复版本. 它修复了 ZooKeeper 中部分目录中的泄漏.
### ClickHouse 版本 1.1.54282, 2017-08-23 {#clickhouse-release-1-1-54282-2017-08-23}
此版本包含先前版本 1.1.54276 的错误修复:
- 修复了插入分布式表时的 `DB::Exception: Assertion violation: !_path.empty()`.
- 如果输入数据以 ';' 开头, 则在以 RowBinary 格式插入时固定解析.
- 某些聚合函数 (例如 `groupArray()` ) 的运行时编译期间的错误.
### ClickHouse 版本 1.1.54276, 2017-08-16 {#clickhouse-release-1-1-54276-2017-08-16}
#### 新特征: {#new-features-4}
- SELECT 查询添加了一个可选的 WITH 部分. 示例查询:`WITH 1+1 AS a SELECT a, a*a` .
- NSERT 可以在分布式表中同步执行:只有在所有数据都保存在所有分片上后才返回 OK. 这是通过设置 `insert_distributed_sync=1` 激活的.
- 添加了用于处理 16 字节标识符的 UUID 数据类型.
- 添加了 CHAR、FLOAT 和其他类型的别名以与 Tableau 兼容.
- 新增 toYYYYMM, toYYYYMMDD, toYYYYMMDDhhmmss 时间转数字功能.
- 您可以使用 IP 地址 (与主机名一起) 来识别集群 DDL 查询的服务器.
- 在函数 `substring(str, pos, len)` 中添加了对非常量参数和负偏移量的支持.
- 为 `groupArray(max_size)(column)` 聚合函数增加了max_size参数, 并优化了其性能.
#### Main Changes: {#main-changes}
- 安全改进:所有服务器文件都使用 0640 权限创建(可以通过 `<umask>` 配置参数更改).
- 改进了语法无效查询的错误消息.
- 合并大段 MergeTree 数据时显着减少内存消耗并提高性能.
- 显着提高了 ReplacingMergeTree 引擎的数据合并性能.
- 通过组合多个源插入提高了从分布式表进行异步插入的性能. 要启用此功能, 请使用设置 `distributed_directory_monitor_batch_inserts=1` .
#### Backward Incompatible Changes: {#backward-incompatible-changes-1}
- 更改了数组 `groupArray(array_column)` 函数聚合状态的二进制格式.
#### Complete List of Changes: {#complete-list-of-changes}
- 添加了 `output_format_json_quote_denormals` 设置, 可以以 JSON 格式输出 nan 和 inf 值.
- 从分布式表读取时优化流分配.
- 如果值不变, 可以在只读模式下配置设置.
- 加了检索 MergeTree 引擎的非整数粒度的功能, 以满足对 `preferred_block_size_bytes` 设置中指定的块大小的限制. 目的是在处理来自大列的表的查询时减少RAM的消耗并增加缓存局部性.
- 有效地使用包含像 `toStartOfHour(x)` 这样的表达式的索引来处理像 `toStartOfHour(x) op сonstexpr` 这样的条件.
- 添加了 MergeTree 引擎的新设置(config.xml 中的 merge_tree 部分):
- `replicad_deduplication_window_seconds` 设置允许在复制表中删除重复插入的秒数.
- `cleanup_delay_period` 设置启动清理以删除过时数据的频率.
- `Replicationd_can_become_leader` 可以防止副本成为领导者(并分配合并).
- 加速清理以从 ZooKeeper 中删除过时的数据.
- 集群 DDL 查询的多项改进和修复. 特别有趣的是新设置 `distributed_ddl_task_timeout`, 它限制了等待集群中服务器响应的时间. 如果 ddl 请求没有在所有主机上执行,响应将包含超时错误并且请求将以异步模式执行.
- 改进了服务器日志中堆栈跟踪的显示.
- 为压缩方法添加了 "none" 值.
- 您可以在 config.xml 中使用多个dictionaries_config 部分.
- 可以通过文件系统中的套接字连接到 MySQL.
- `system.parts` 表有一个新列, 其中包含有关标记大小的信息(以字节为单位).
#### Bug 修复: {#bug-fixes-4}
- 使用 Merge 表的分布式表现在可以正确用于带有 `_table` 字段条件的 SELECT 查询.
- 修复了检查数据部分时 ReplicatedMergeTree 中罕见的竞争条件.
- 修复了启动服务器时 `leader election` 可能会冻结的问题.
- 使用数据源的本地副本时,将忽略 `max_replica_delay_for_distributed_queries` 设置. 这已被修复.
- 修复了尝试清理不存在的列时 `ALTER TABLE CLEAR COLUMN IN PARTITION` 的错误行为.
- 修复了 multiIf 函数中使用空数组或字符串时的异常.
- 修复了反序列化本机格式时过多的内存分配.
- 修复了 Trie 词典的错误自动更新.
- 修复了在使用 SAMPLE 时从合并表中使用 GROUP BY 子句运行查询时的异常.
- 修复了 `distributed_aggregation_memory_efficient=1` 时 GROUP BY 的崩溃.
- 现在可以在 IN 和 JOIN 右侧指定 `database.table`.
- 太多线程用于并行聚合. 这已被修复.
- 修复了 `if` 函数如何与 FixedString 参数一起工作.
- 对于权重为 0 的分片, SELECT 在分布式表中工作不正确. 这已得到修复.
- 运行 `CREATE VIEW IF EXISTS 不再导致崩溃` .
- 修复了设置 `input_format_skip_unknown_fields=1` 且存在负数时的错误行为.
- 修复了如果字典中有一些无效数据, `dictGetHierarchy()` 函数中的无限循环.
- 修复了使用 IN 或 JOIN 子句和合并表中的子查询运行分布式查询时的 `Syntax error: unexpected (...)` 错误.
- 修复了对字典表中 SELECT 查询的错误解释.
- 修复了在超过 20 亿元素的 IN 和 JOIN 子句中使用数组时的 "Cannot mremap" 错误.
- 修复了以 MySQL 为源的字典的故障转移.
#### 改进了开发和组装ClickHouse的工作流: {#improved-workflow-for-developing-and-assembling-clickhouse-1}
- 可以在 Arcadia 中组装 Builds.
- 可以使用 gcc 7 编译 ClickHouse.
- 使用 ccache+distcc 的并行构建现在更快了.
### ClickHouse 版本 1.1.54245, 2017-07-04 {#clickhouse-release-1-1-54245-2017-07-04}
#### 新特征: {#new-features-5}
- 分布式 DDL(例如, `REATE TABLE ON CLUSTER`).
- 复制查询 `ALTER TABLE CLEAR COLUMN IN PARTITION.` .
- 字典表引擎 (以表的形式访问字典数据).
- 字典数据库引擎 (这种类型的数据库自动为所有连接的外部字典提供字典表).
- 您可以通过向源发送请求来检查字典的更新.
- 合格的列名.
- 使用双引号引用标识符.
- HTTP 接口中的会话.
- 复制表的 OPTIMIZE 查询不仅可以在领导者上运行.
#### 向后不兼容的变化: {#backward-incompatible-changes-2}
- 删除了 SET GLOBAL.
#### 次要更改: {#minor-changes}
- 目前在触发警报后,日志会打印完整的堆栈跟踪.
- 放宽了启动时损坏/额外数据部分数量的验证(误报太多).
#### Bug 修复: {#bug-fixes-5}
- 修复了插入分布式表时的错误连接"卡住"问题.
- GLOBAL IN 现在适用于来自查看分布式表的合并表的查询.
- 在 Google Compute Engine 虚拟机上检测到的内核数不正确. 这已被修复.
- 缓存外部字典的可执行源的工作方式发生了变化.
- 修正了包含空字符的字符串的比较.
- 修复了 Float32 主键字段与常量的比较.
- 以前,对字段大小的错误估计可能会导致分配过大.
- 修复了使用 ALTER 查询添加到表中的 Nullable 列时发生的崩溃.
- 修复了当行数小于 LIMIT 时按 Nullable 列排序时崩溃的问题.
- 修复了仅由常量值组成的 ORDER BY 子查询.
- 以前,复制表在 DROP TABLE 失败后可能保持无效状态.
- 结果为空的标量子查询的别名不再丢失.
- 现在,如果 .so 文件损坏,使用编译的查询不会因错误而失败.

View File

@ -1 +0,0 @@
../../../en/whats-new/changelog/2018.md

File diff suppressed because it is too large Load Diff

View File

@ -1 +0,0 @@
../../../en/whats-new/changelog/2019.md

File diff suppressed because it is too large Load Diff

View File

@ -1 +0,0 @@
../../../en/whats-new/changelog/2020.md

File diff suppressed because it is too large Load Diff

View File

@ -492,8 +492,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
/// Override the default paths. /// Override the default paths.
/// Data paths. /// Data paths.
const std::string data_file = config_d / "data-paths.xml";
if (!fs::exists(data_file))
{ {
std::string data_file = config_d / "data-paths.xml";
WriteBufferFromFile out(data_file); WriteBufferFromFile out(data_file);
out << "<clickhouse>\n" out << "<clickhouse>\n"
" <path>" << data_path.string() << "</path>\n" " <path>" << data_path.string() << "</path>\n"
@ -503,12 +504,14 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
"</clickhouse>\n"; "</clickhouse>\n";
out.sync(); out.sync();
out.finalize(); out.finalize();
fs::permissions(data_file, fs::perms::owner_read, fs::perm_options::replace);
fmt::print("Data path configuration override is saved to file {}.\n", data_file); fmt::print("Data path configuration override is saved to file {}.\n", data_file);
} }
/// Logger. /// Logger.
const std::string logger_file = config_d / "logger.xml";
if (!fs::exists(logger_file))
{ {
std::string logger_file = config_d / "logger.xml";
WriteBufferFromFile out(logger_file); WriteBufferFromFile out(logger_file);
out << "<clickhouse>\n" out << "<clickhouse>\n"
" <logger>\n" " <logger>\n"
@ -518,12 +521,14 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
"</clickhouse>\n"; "</clickhouse>\n";
out.sync(); out.sync();
out.finalize(); out.finalize();
fs::permissions(logger_file, fs::perms::owner_read, fs::perm_options::replace);
fmt::print("Log path configuration override is saved to file {}.\n", logger_file); fmt::print("Log path configuration override is saved to file {}.\n", logger_file);
} }
/// User directories. /// User directories.
const std::string user_directories_file = config_d / "user-directories.xml";
if (!fs::exists(user_directories_file))
{ {
std::string user_directories_file = config_d / "user-directories.xml";
WriteBufferFromFile out(user_directories_file); WriteBufferFromFile out(user_directories_file);
out << "<clickhouse>\n" out << "<clickhouse>\n"
" <user_directories>\n" " <user_directories>\n"
@ -534,12 +539,14 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
"</clickhouse>\n"; "</clickhouse>\n";
out.sync(); out.sync();
out.finalize(); out.finalize();
fs::permissions(user_directories_file, fs::perms::owner_read, fs::perm_options::replace);
fmt::print("User directory path configuration override is saved to file {}.\n", user_directories_file); fmt::print("User directory path configuration override is saved to file {}.\n", user_directories_file);
} }
/// OpenSSL. /// OpenSSL.
const std::string openssl_file = config_d / "openssl.xml";
if (!fs::exists(openssl_file))
{ {
std::string openssl_file = config_d / "openssl.xml";
WriteBufferFromFile out(openssl_file); WriteBufferFromFile out(openssl_file);
out << "<clickhouse>\n" out << "<clickhouse>\n"
" <openSSL>\n" " <openSSL>\n"
@ -552,6 +559,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
"</clickhouse>\n"; "</clickhouse>\n";
out.sync(); out.sync();
out.finalize(); out.finalize();
fs::permissions(openssl_file, fs::perms::owner_read, fs::perm_options::replace);
fmt::print("OpenSSL path configuration override is saved to file {}.\n", openssl_file); fmt::print("OpenSSL path configuration override is saved to file {}.\n", openssl_file);
} }
} }
@ -761,12 +769,13 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
#if defined(__linux__) #if defined(__linux__)
fmt::print("Setting capabilities for clickhouse binary. This is optional.\n"); fmt::print("Setting capabilities for clickhouse binary. This is optional.\n");
std::string command = fmt::format("command -v setcap >/dev/null" std::string command = fmt::format("command -v setcap >/dev/null"
" && echo > {0} && chmod a+x {0} && {0} && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {0} && {0} && rm {0}" " && command -v capsh >/dev/null"
" && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {1}" " && capsh --has-p=cap_net_admin,cap_ipc_lock,cap_sys_nice+ep >/dev/null 2>&1"
" && setcap 'cap_net_admin,cap_ipc_lock,cap_sys_nice+ep' {0}"
" || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary." " || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary."
" This is optional. Taskstats accounting will be disabled." " This is optional. Taskstats accounting will be disabled."
" To enable taskstats accounting you may add the required capability later manually.\"", " To enable taskstats accounting you may add the required capability later manually.\"",
"/tmp/test_setcap.sh", fs::canonical(main_bin_path).string()); fs::canonical(main_bin_path).string());
executeScript(command); executeScript(command);
#endif #endif

View File

@ -49,7 +49,7 @@
<!-- Internal port and hostname --> <!-- Internal port and hostname -->
<hostname>localhost</hostname> <hostname>localhost</hostname>
<port>44444</port> <port>9234</port>
</server> </server>
<!-- Add more servers here --> <!-- Add more servers here -->

View File

@ -14,7 +14,7 @@
<server> <server>
<id>1</id> <id>1</id>
<hostname>localhost</hostname> <hostname>localhost</hostname>
<port>44444</port> <port>9234</port>
</server> </server>
</raft_configuration> </raft_configuration>
</keeper_server> </keeper_server>

View File

@ -328,7 +328,11 @@ struct Checker
{ {
checkRequiredInstructions(); checkRequiredInstructions();
} }
} checker __attribute__((init_priority(101))); /// Run before other static initializers. } checker
#ifndef __APPLE__
__attribute__((init_priority(101))) /// Run before other static initializers.
#endif
;
} }

View File

@ -4,9 +4,11 @@
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Columns/ColumnsNumber.h> #include <Columns/ColumnsNumber.h>
#include <Columns/ColumnsCommon.h>
#include <DataTypes/DataTypesDecimal.h> #include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <AggregateFunctions/IAggregateFunction.h> #include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionSum.h>
#include <Core/DecimalFunctions.h> #include <Core/DecimalFunctions.h>
#include <Common/config.h> #include <Common/config.h>
@ -223,7 +225,7 @@ using AvgFieldType = std::conditional_t<is_decimal<T>,
NearestFieldType<T>>; NearestFieldType<T>>;
template <typename T> template <typename T>
class AggregateFunctionAvg final : public AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>> class AggregateFunctionAvg : public AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>
{ {
public: public:
using Base = AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>; using Base = AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>;
@ -232,14 +234,65 @@ public:
using Numerator = typename Base::Numerator; using Numerator = typename Base::Numerator;
using Denominator = typename Base::Denominator; using Denominator = typename Base::Denominator;
using Fraction = typename Base::Fraction; using Fraction = typename Base::Fraction;
using ColVecType = ColumnVectorOrDecimal<T>;
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final
{ {
this->data(place).numerator += static_cast<const ColumnVectorOrDecimal<T> &>(*columns[0]).getData()[row_num]; this->data(place).numerator += static_cast<const ColVecType &>(*columns[0]).getData()[row_num];
++this->data(place).denominator; ++this->data(place).denominator;
} }
String getName() const final { return "avg"; } void
addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena *, ssize_t if_argument_pos) const final
{
AggregateFunctionSumData<Numerator> sum_data;
const auto & column = assert_cast<const ColVecType &>(*columns[0]);
if (if_argument_pos >= 0)
{
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
sum_data.addManyConditional(column.getData().data(), flags.data(), batch_size);
this->data(place).denominator += countBytesInFilter(flags.data(), batch_size);
}
else
{
sum_data.addMany(column.getData().data(), batch_size);
this->data(place).denominator += batch_size;
}
this->data(place).numerator += sum_data.sum;
}
void addBatchSinglePlaceNotNull(
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena *, ssize_t if_argument_pos)
const final
{
AggregateFunctionSumData<Numerator> sum_data;
const auto & column = assert_cast<const ColVecType &>(*columns[0]);
if (if_argument_pos >= 0)
{
/// Merge the 2 sets of flags (null and if) into a single one. This allows us to use parallelizable sums when available
const auto * if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data();
auto final_flags = std::make_unique<UInt8[]>(batch_size);
size_t used_value = 0;
for (size_t i = 0; i < batch_size; ++i)
{
UInt8 kept = (!null_map[i]) & !!if_flags[i];
final_flags[i] = kept;
used_value += kept;
}
sum_data.addManyConditional(column.getData().data(), final_flags.get(), batch_size);
this->data(place).denominator += used_value;
}
else
{
sum_data.addManyNotNull(column.getData().data(), null_map, batch_size);
this->data(place).denominator += batch_size - countBytesInFilter(null_map, batch_size);
}
this->data(place).numerator += sum_data.sum;
}
String getName() const override { return "avg"; }
#if USE_EMBEDDED_COMPILER #if USE_EMBEDDED_COMPILER

View File

@ -8,12 +8,12 @@
namespace DB namespace DB
{ {
template <typename T> template <typename T>
class AggregateFunctionSumCount final : public AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionSumCount<T>> class AggregateFunctionSumCount final : public AggregateFunctionAvg<T>
{ {
public: public:
using Base = AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionSumCount<T>>; using Base = AggregateFunctionAvg<T>;
AggregateFunctionSumCount(const DataTypes & argument_types_, UInt32 num_scale_ = 0) explicit AggregateFunctionSumCount(const DataTypes & argument_types_, UInt32 num_scale_ = 0)
: Base(argument_types_, num_scale_), scale(num_scale_) {} : Base(argument_types_, num_scale_), scale(num_scale_) {}
DataTypePtr getReturnType() const override DataTypePtr getReturnType() const override
@ -31,12 +31,6 @@ public:
this->data(place).denominator); this->data(place).denominator);
} }
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final
{
this->data(place).numerator += static_cast<const ColumnVectorOrDecimal<T> &>(*columns[0]).getData()[row_num];
++this->data(place).denominator;
}
String getName() const final { return "sumCount"; } String getName() const final { return "sumCount"; }
#if USE_EMBEDDED_COMPILER #if USE_EMBEDDED_COMPILER

View File

@ -209,6 +209,7 @@ add_object_library(clickhouse_storages_mysql Storages/MySQL)
add_object_library(clickhouse_storages_distributed Storages/Distributed) add_object_library(clickhouse_storages_distributed Storages/Distributed)
add_object_library(clickhouse_storages_mergetree Storages/MergeTree) add_object_library(clickhouse_storages_mergetree Storages/MergeTree)
add_object_library(clickhouse_storages_liveview Storages/LiveView) add_object_library(clickhouse_storages_liveview Storages/LiveView)
add_object_library(clickhouse_storages_windowview Storages/WindowView)
add_object_library(clickhouse_client Client) add_object_library(clickhouse_client Client)
add_object_library(clickhouse_bridge Bridge) add_object_library(clickhouse_bridge Bridge)
add_object_library(clickhouse_server Server) add_object_library(clickhouse_server Server)
@ -549,10 +550,5 @@ if (ENABLE_TESTS AND USE_GTEST)
clickhouse_common_zookeeper clickhouse_common_zookeeper
string_utils) string_utils)
# For __udivmodti4 referenced in Core/tests/gtest_DecimalFunctions.cpp
if (OS_DARWIN AND COMPILER_GCC)
target_link_libraries(unit_tests_dbms PRIVATE gcc)
endif ()
add_check(unit_tests_dbms) add_check(unit_tests_dbms)
endif () endif ()

View File

@ -709,7 +709,7 @@ void ClientBase::onProfileEvents(Block & block)
const auto & array_thread_id = typeid_cast<const ColumnUInt64 &>(*block.getByName("thread_id").column).getData(); const auto & array_thread_id = typeid_cast<const ColumnUInt64 &>(*block.getByName("thread_id").column).getData();
const auto & names = typeid_cast<const ColumnString &>(*block.getByName("name").column); const auto & names = typeid_cast<const ColumnString &>(*block.getByName("name").column);
const auto & host_names = typeid_cast<const ColumnString &>(*block.getByName("host_name").column); const auto & host_names = typeid_cast<const ColumnString &>(*block.getByName("host_name").column);
const auto & array_values = typeid_cast<const ColumnUInt64 &>(*block.getByName("value").column).getData(); const auto & array_values = typeid_cast<const ColumnInt64 &>(*block.getByName("value").column).getData();
const auto * user_time_name = ProfileEvents::getName(ProfileEvents::UserTimeMicroseconds); const auto * user_time_name = ProfileEvents::getName(ProfileEvents::UserTimeMicroseconds);
const auto * system_time_name = ProfileEvents::getName(ProfileEvents::SystemTimeMicroseconds); const auto * system_time_name = ProfileEvents::getName(ProfileEvents::SystemTimeMicroseconds);
@ -736,7 +736,8 @@ void ClientBase::onProfileEvents(Block & block)
thread_times[host_name][thread_id].memory_usage = value; thread_times[host_name][thread_id].memory_usage = value;
} }
} }
progress_indication.updateThreadEventData(thread_times); auto elapsed_time = profile_events.watch.elapsedMicroseconds();
progress_indication.updateThreadEventData(thread_times, elapsed_time);
} }
if (profile_events.print) if (profile_events.print)
@ -748,7 +749,6 @@ void ClientBase::onProfileEvents(Block & block)
logs_out_stream->writeProfileEvents(block); logs_out_stream->writeProfileEvents(block);
logs_out_stream->flush(); logs_out_stream->flush();
profile_events.watch.restart();
profile_events.last_block = {}; profile_events.last_block = {};
} }
else else
@ -756,6 +756,7 @@ void ClientBase::onProfileEvents(Block & block)
profile_events.last_block = block; profile_events.last_block = block;
} }
} }
profile_events.watch.restart();
} }

View File

@ -105,7 +105,7 @@ void InternalTextLogs::writeProfileEvents(const Block & block)
const auto & array_thread_id = typeid_cast<const ColumnUInt64 &>(*block.getByName("thread_id").column).getData(); const auto & array_thread_id = typeid_cast<const ColumnUInt64 &>(*block.getByName("thread_id").column).getData();
const auto & array_type = typeid_cast<const ColumnInt8 &>(*block.getByName("type").column).getData(); const auto & array_type = typeid_cast<const ColumnInt8 &>(*block.getByName("type").column).getData();
const auto & column_name = typeid_cast<const ColumnString &>(*block.getByName("name").column); const auto & column_name = typeid_cast<const ColumnString &>(*block.getByName("name").column);
const auto & array_value = typeid_cast<const ColumnUInt64 &>(*block.getByName("value").column).getData(); const auto & array_value = typeid_cast<const ColumnInt64 &>(*block.getByName("value").column).getData();
for (size_t row_num = 0; row_num < block.rows(); ++row_num) for (size_t row_num = 0; row_num < block.rows(); ++row_num)
{ {
@ -146,7 +146,7 @@ void InternalTextLogs::writeProfileEvents(const Block & block)
writeCString(": ", wb); writeCString(": ", wb);
/// value /// value
UInt64 value = array_value[row_num]; Int64 value = array_value[row_num];
writeIntText(value, wb); writeIntText(value, wb);
//// type //// type

View File

@ -119,6 +119,24 @@ std::optional<Elf::Section> Elf::findSectionByName(const char * name) const
String Elf::getBuildID() const String Elf::getBuildID() const
{ {
/// Section headers are the first choice for a debuginfo file
if (String build_id; iterateSections([&build_id](const Section & section, size_t)
{
if (section.header.sh_type == SHT_NOTE)
{
build_id = Elf::getBuildID(section.begin(), section.size());
if (!build_id.empty())
{
return true;
}
}
return false;
}))
{
return build_id;
}
/// fallback to PHDR
for (size_t idx = 0; idx < header->e_phnum; ++idx) for (size_t idx = 0; idx < header->e_phnum; ++idx)
{ {
const ElfPhdr & phdr = program_headers[idx]; const ElfPhdr & phdr = program_headers[idx];
@ -126,6 +144,7 @@ String Elf::getBuildID() const
if (phdr.p_type == PT_NOTE) if (phdr.p_type == PT_NOTE)
return getBuildID(mapped + phdr.p_offset, phdr.p_filesz); return getBuildID(mapped + phdr.p_offset, phdr.p_filesz);
} }
return {}; return {};
} }

View File

@ -54,7 +54,8 @@ public:
const char * end() const { return mapped + elf_size; } const char * end() const { return mapped + elf_size; }
size_t size() const { return elf_size; } size_t size() const { return elf_size; }
/// Obtain build id from PT_NOTES section of program headers. Return empty string if does not exist. /// Obtain build id from SHT_NOTE of section headers (fallback to PT_NOTES section of program headers).
/// Return empty string if does not exist.
/// The string is returned in binary. Note that "readelf -n ./clickhouse-server" prints it in hex. /// The string is returned in binary. Note that "readelf -n ./clickhouse-server" prints it in hex.
String getBuildID() const; String getBuildID() const;
static String getBuildID(const char * nhdr_pos, size_t size); static String getBuildID(const char * nhdr_pos, size_t size);

View File

@ -600,6 +600,7 @@
M(630, HAVE_DEPENDENT_OBJECTS) \ M(630, HAVE_DEPENDENT_OBJECTS) \
M(631, UNKNOWN_FILE_SIZE) \ M(631, UNKNOWN_FILE_SIZE) \
M(632, UNEXPECTED_DATA_AFTER_PARSED_VALUE) \ M(632, UNEXPECTED_DATA_AFTER_PARSED_VALUE) \
M(633, QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW) \
\ \
M(999, KEEPER_EXCEPTION) \ M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \ M(1000, POCO_EXCEPTION) \

View File

@ -360,6 +360,24 @@ void increment(Event event, Count amount)
DB::CurrentThread::getProfileEvents().increment(event, amount); DB::CurrentThread::getProfileEvents().increment(event, amount);
} }
CountersIncrement::CountersIncrement(Counters::Snapshot const & snapshot)
{
init();
std::memcpy(increment_holder.get(), snapshot.counters_holder.get(), Counters::num_counters * sizeof(Increment));
}
CountersIncrement::CountersIncrement(Counters::Snapshot const & after, Counters::Snapshot const & before)
{
init();
for (Event i = 0; i < Counters::num_counters; ++i)
increment_holder[i] = static_cast<Increment>(after[i]) - static_cast<Increment>(before[i]);
}
void CountersIncrement::init()
{
increment_holder = std::make_unique<Increment[]>(Counters::num_counters);
}
} }
#undef APPLY_FOR_EVENTS #undef APPLY_FOR_EVENTS

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <Common/VariableContext.h> #include <Common/VariableContext.h>
#include "base/types.h"
#include <atomic> #include <atomic>
#include <memory> #include <memory>
#include <stddef.h> #include <stddef.h>
@ -15,6 +16,7 @@ namespace ProfileEvents
/// Event identifier (index in array). /// Event identifier (index in array).
using Event = size_t; using Event = size_t;
using Count = size_t; using Count = size_t;
using Increment = Int64;
using Counter = std::atomic<Count>; using Counter = std::atomic<Count>;
class Counters; class Counters;
@ -62,16 +64,19 @@ namespace ProfileEvents
struct Snapshot struct Snapshot
{ {
Snapshot(); Snapshot();
Snapshot(Snapshot &&) = default;
const Count & operator[] (Event event) const Count operator[] (Event event) const noexcept
{ {
return counters_holder[event]; return counters_holder[event];
} }
Snapshot & operator=(Snapshot &&) = default;
private: private:
std::unique_ptr<Count[]> counters_holder; std::unique_ptr<Count[]> counters_holder;
friend class Counters; friend class Counters;
friend struct CountersIncrement;
}; };
/// Every single value is fetched atomically, but not all values as a whole. /// Every single value is fetched atomically, but not all values as a whole.
@ -109,4 +114,25 @@ namespace ProfileEvents
/// Get index just after last event identifier. /// Get index just after last event identifier.
Event end(); Event end();
struct CountersIncrement
{
CountersIncrement() noexcept = default;
explicit CountersIncrement(Counters::Snapshot const & snapshot);
CountersIncrement(Counters::Snapshot const & after, Counters::Snapshot const & before);
CountersIncrement(CountersIncrement &&) = default;
CountersIncrement & operator=(CountersIncrement &&) = default;
Increment operator[](Event event) const noexcept
{
return increment_holder[event];
}
private:
void init();
static_assert(sizeof(Count) == sizeof(Increment), "Sizes of counter and increment differ");
std::unique_ptr<Increment[]> increment_holder;
};
} }

View File

@ -16,28 +16,16 @@ namespace
{ {
constexpr UInt64 ALL_THREADS = 0; constexpr UInt64 ALL_THREADS = 0;
UInt64 calculateNewCoresNumber(DB::ThreadIdToTimeMap const & prev, DB::ThreadIdToTimeMap const& next) UInt64 calculateCoresNumber(DB::ThreadIdToTimeMap times, UInt64 elapsed)
{ {
if (next.find(ALL_THREADS) == next.end()) auto accumulated = std::accumulate(times.begin(), times.end(), 0,
return 0; [](Int64 acc, const auto & elem)
{
auto accumulated = std::accumulate(next.cbegin(), next.cend(), 0, if (elem.first == ALL_THREADS)
[&prev](UInt64 acc, const auto & elem) return acc;
{ return acc + elem.second.time();
if (elem.first == ALL_THREADS) });
return acc; return (static_cast<UInt64>(accumulated) + elapsed - 1) / elapsed;
auto thread_time = elem.second.time();
auto it = prev.find(elem.first);
if (it != prev.end())
thread_time -= it->second.time();
return acc + thread_time;
});
auto elapsed = next.at(ALL_THREADS).time() - (prev.contains(ALL_THREADS) ? prev.at(ALL_THREADS).time() : 0);
if (elapsed == 0)
return 0;
return (accumulated + elapsed - 1) / elapsed;
} }
} }
@ -89,14 +77,13 @@ void ProgressIndication::addThreadIdToList(String const & host, UInt64 thread_id
thread_to_times[thread_id] = {}; thread_to_times[thread_id] = {};
} }
void ProgressIndication::updateThreadEventData(HostToThreadTimesMap & new_thread_data) void ProgressIndication::updateThreadEventData(HostToThreadTimesMap & new_thread_data, UInt64 elapsed_time)
{ {
for (auto & new_host_map : new_thread_data) for (auto & new_host_map : new_thread_data)
{ {
auto & host_map = thread_data[new_host_map.first]; auto new_cores = calculateCoresNumber(new_host_map.second, elapsed_time);
auto new_cores = calculateNewCoresNumber(host_map, new_host_map.second);
host_active_cores[new_host_map.first] = new_cores; host_active_cores[new_host_map.first] = new_cores;
host_map = std::move(new_host_map.second); thread_data[new_host_map.first] = std::move(new_host_map.second);
} }
} }

View File

@ -16,11 +16,11 @@ namespace DB
struct ThreadEventData struct ThreadEventData
{ {
UInt64 time() const noexcept { return user_ms + system_ms; } Int64 time() const noexcept { return user_ms + system_ms; }
UInt64 user_ms = 0; Int64 user_ms = 0;
UInt64 system_ms = 0; Int64 system_ms = 0;
UInt64 memory_usage = 0; Int64 memory_usage = 0;
}; };
using ThreadIdToTimeMap = std::unordered_map<UInt64, ThreadEventData>; using ThreadIdToTimeMap = std::unordered_map<UInt64, ThreadEventData>;
@ -58,7 +58,7 @@ public:
void addThreadIdToList(String const & host, UInt64 thread_id); void addThreadIdToList(String const & host, UInt64 thread_id);
void updateThreadEventData(HostToThreadTimesMap & new_thread_data); void updateThreadEventData(HostToThreadTimesMap & new_thread_data, UInt64 elapsed_time);
bool print_hardware_utilization = false; bool print_hardware_utilization = false;

View File

@ -117,8 +117,10 @@ QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(const UInt64 thread_id, const
if (sigaddset(&sa.sa_mask, pause_signal)) if (sigaddset(&sa.sa_mask, pause_signal))
throwFromErrno("Failed to add signal to mask for query profiler", ErrorCodes::CANNOT_MANIPULATE_SIGSET); throwFromErrno("Failed to add signal to mask for query profiler", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
if (sigaction(pause_signal, &sa, previous_handler)) struct sigaction local_previous_handler;
if (sigaction(pause_signal, &sa, &local_previous_handler))
throwFromErrno("Failed to setup signal handler for query profiler", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); throwFromErrno("Failed to setup signal handler for query profiler", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
previous_handler.emplace(local_previous_handler);
try try
{ {
@ -133,7 +135,8 @@ QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(const UInt64 thread_id, const
#else #else
sev._sigev_un._tid = thread_id; sev._sigev_un._tid = thread_id;
#endif #endif
if (timer_create(clock_type, &sev, &timer_id)) timer_t local_timer_id;
if (timer_create(clock_type, &sev, &local_timer_id))
{ {
/// In Google Cloud Run, the function "timer_create" is implemented incorrectly as of 2020-01-25. /// In Google Cloud Run, the function "timer_create" is implemented incorrectly as of 2020-01-25.
/// https://mybranch.dev/posts/clickhouse-on-cloud-run/ /// https://mybranch.dev/posts/clickhouse-on-cloud-run/
@ -143,6 +146,7 @@ QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(const UInt64 thread_id, const
throwFromErrno("Failed to create thread timer", ErrorCodes::CANNOT_CREATE_TIMER); throwFromErrno("Failed to create thread timer", ErrorCodes::CANNOT_CREATE_TIMER);
} }
timer_id.emplace(local_timer_id);
/// Randomize offset as uniform random value from 0 to period - 1. /// Randomize offset as uniform random value from 0 to period - 1.
/// It will allow to sample short queries even if timer period is large. /// It will allow to sample short queries even if timer period is large.
@ -154,7 +158,7 @@ QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(const UInt64 thread_id, const
struct timespec offset{.tv_sec = period_rand / TIMER_PRECISION, .tv_nsec = period_rand % TIMER_PRECISION}; struct timespec offset{.tv_sec = period_rand / TIMER_PRECISION, .tv_nsec = period_rand % TIMER_PRECISION};
struct itimerspec timer_spec = {.it_interval = interval, .it_value = offset}; struct itimerspec timer_spec = {.it_interval = interval, .it_value = offset};
if (timer_settime(timer_id, 0, &timer_spec, nullptr)) if (timer_settime(*timer_id, 0, &timer_spec, nullptr))
throwFromErrno("Failed to set thread timer period", ErrorCodes::CANNOT_SET_TIMER_PERIOD); throwFromErrno("Failed to set thread timer period", ErrorCodes::CANNOT_SET_TIMER_PERIOD);
} }
catch (...) catch (...)
@ -175,10 +179,10 @@ template <typename ProfilerImpl>
void QueryProfilerBase<ProfilerImpl>::tryCleanup() void QueryProfilerBase<ProfilerImpl>::tryCleanup()
{ {
#if USE_UNWIND #if USE_UNWIND
if (timer_id != nullptr && timer_delete(timer_id)) if (timer_id.has_value() && timer_delete(*timer_id))
LOG_ERROR(log, "Failed to delete query profiler timer {}", errnoToString(ErrorCodes::CANNOT_DELETE_TIMER)); LOG_ERROR(log, "Failed to delete query profiler timer {}", errnoToString(ErrorCodes::CANNOT_DELETE_TIMER));
if (previous_handler != nullptr && sigaction(pause_signal, previous_handler, nullptr)) if (previous_handler.has_value() && sigaction(pause_signal, &*previous_handler, nullptr))
LOG_ERROR(log, "Failed to restore signal handler after query profiler {}", errnoToString(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER)); LOG_ERROR(log, "Failed to restore signal handler after query profiler {}", errnoToString(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER));
#endif #endif
} }

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include <optional>
#include <base/types.h> #include <base/types.h>
#include <signal.h> #include <signal.h>
#include <time.h> #include <time.h>
@ -40,14 +41,14 @@ private:
#if USE_UNWIND #if USE_UNWIND
/// Timer id from timer_create(2) /// Timer id from timer_create(2)
timer_t timer_id = nullptr; std::optional<timer_t> timer_id;
#endif #endif
/// Pause signal to interrupt threads to get traces /// Pause signal to interrupt threads to get traces
int pause_signal; int pause_signal;
/// Previous signal handler to restore after query profiler exits /// Previous signal handler to restore after query profiler exits
struct sigaction * previous_handler = nullptr; std::optional<struct sigaction> previous_handler;
}; };
/// Query profiler with timer based on real clock /// Query profiler with timer based on real clock

View File

@ -165,7 +165,7 @@ protected:
std::function<void()> fatal_error_callback; std::function<void()> fatal_error_callback;
/// It is used to avoid enabling the query profiler when you have multiple ThreadStatus in the same thread /// It is used to avoid enabling the query profiler when you have multiple ThreadStatus in the same thread
bool query_profiled_enabled = true; bool query_profiler_enabled = true;
/// Requires access to query_id. /// Requires access to query_id.
friend class MemoryTrackerThreadSwitcher; friend class MemoryTrackerThreadSwitcher;
@ -207,7 +207,8 @@ public:
void disableProfiling() void disableProfiling()
{ {
query_profiled_enabled = false; assert(!query_profiler_real && !query_profiler_cpu);
query_profiler_enabled = false;
} }
/// Starts new query and create new thread group for it, current thread becomes master thread of the query /// Starts new query and create new thread group for it, current thread becomes master thread of the query

View File

@ -33,6 +33,8 @@
#define DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC 5 #define DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC 5
#define DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC 60 #define DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC 60
#define DEFAULT_WINDOW_VIEW_CLEAN_INTERVAL_SEC 5
#define DEFAULT_WINDOW_VIEW_HEARTBEAT_INTERVAL_SEC 15
#define SHOW_CHARS_ON_SYNTAX_ERROR ptrdiff_t(160) #define SHOW_CHARS_ON_SYNTAX_ERROR ptrdiff_t(160)
#define DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3 #define DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3
/// each period reduces the error counter by 2 times /// each period reduces the error counter by 2 times

View File

@ -501,6 +501,9 @@ namespace MySQLReplication
UInt32 mask = 0; UInt32 mask = 0;
DecimalType res(0); DecimalType res(0);
if (payload.eof())
throw Exception("Attempt to read after EOF.", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
if ((*payload.position() & 0x80) == 0) if ((*payload.position() & 0x80) == 0)
mask = UInt32(-1); mask = UInt32(-1);

View File

@ -38,7 +38,7 @@
#define DBMS_MIN_PROTOCOL_VERSION_WITH_DISTRIBUTED_DEPTH 54448 #define DBMS_MIN_PROTOCOL_VERSION_WITH_DISTRIBUTED_DEPTH 54448
#define DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS 54450 #define DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS 54451
/// Version of ClickHouse TCP protocol. /// Version of ClickHouse TCP protocol.
/// ///
@ -47,6 +47,6 @@
/// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION,
/// later is just a number for server version (one number instead of commit SHA) /// later is just a number for server version (one number instead of commit SHA)
/// for simplicity (sometimes it may be more convenient in some use cases). /// for simplicity (sometimes it may be more convenient in some use cases).
#define DBMS_TCP_PROTOCOL_VERSION 54450 #define DBMS_TCP_PROTOCOL_VERSION 54451
#define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449 #define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449

View File

@ -90,6 +90,7 @@ class IColumn;
M(UInt64, background_distributed_schedule_pool_size, 16, "Number of threads performing background tasks for distributed sends. Only has meaning at server startup.", 0) \ M(UInt64, background_distributed_schedule_pool_size, 16, "Number of threads performing background tasks for distributed sends. Only has meaning at server startup.", 0) \
M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited. Only has meaning at server startup.", 0) \ M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited. Only has meaning at server startup.", 0) \
M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited. Only has meaning at server startup.", 0) \ M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited. Only has meaning at server startup.", 0) \
M(Bool, stream_like_engine_allow_direct_select, false, "Allow direct SELECT query for Kafka, RabbitMQ and FileLog engines. In case there are attached materialized views, SELECT query is not allowed even if this setting is enabled.", 0) \
\ \
M(Milliseconds, distributed_directory_monitor_sleep_time_ms, 100, "Sleep time for StorageDistributed DirectoryMonitors, in case of any errors delay grows exponentially.", 0) \ M(Milliseconds, distributed_directory_monitor_sleep_time_ms, 100, "Sleep time for StorageDistributed DirectoryMonitors, in case of any errors delay grows exponentially.", 0) \
M(Milliseconds, distributed_directory_monitor_max_sleep_time_ms, 30000, "Maximum sleep time for StorageDistributed DirectoryMonitors, it limits exponential growth too.", 0) \ M(Milliseconds, distributed_directory_monitor_max_sleep_time_ms, 30000, "Maximum sleep time for StorageDistributed DirectoryMonitors, it limits exponential growth too.", 0) \
@ -411,6 +412,9 @@ class IColumn;
M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \ M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \
M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \ M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \
M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \ M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \
M(Bool, allow_experimental_window_view, false, "Enable WINDOW VIEW. Not mature enough.", 0) \
M(Seconds, window_view_clean_interval, DEFAULT_WINDOW_VIEW_CLEAN_INTERVAL_SEC, "The clean interval of window view in seconds to free outdated data.", 0) \
M(Seconds, window_view_heartbeat_interval, DEFAULT_WINDOW_VIEW_HEARTBEAT_INTERVAL_SEC, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \
M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \ M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \
\ \
M(DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic, "Default database engine.", 0) \ M(DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic, "Default database engine.", 0) \
@ -588,6 +592,9 @@ class IColumn;
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \ M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \ M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \
\ \
M(String, bool_true_representation, "true", "Text to represent bool value in TSV/CSV formats.", 0) \
M(String, bool_false_representation, "false", "Text to represent bool value in TSV/CSV formats.", 0) \
\
M(Bool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \ M(Bool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \
M(Bool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \ M(Bool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \
M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \ M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \

View File

@ -0,0 +1,401 @@
#include <DataStreams/ConvertingBlockInputStream.h>
#include <DataStreams/PushingToViewsBlockOutputStream.h>
#include <DataStreams/SquashingBlockInputStream.h>
#include <DataStreams/OneBlockInputStream.h>
#include <DataStreams/MaterializingBlockInputStream.h>
#include <DataStreams/copyData.h>
#include <DataTypes/NestedUtils.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/InterpreterInsertQuery.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTInsertQuery.h>
#include <Common/CurrentThread.h>
#include <Common/setThreadName.h>
#include <Common/ThreadPool.h>
#include <Common/checkStackSize.h>
#include <Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h>
#include <Storages/StorageValues.h>
#include <Storages/WindowView/StorageWindowView.h>
#include <Storages/LiveView/StorageLiveView.h>
#include <Storages/StorageMaterializedView.h>
#include <common/logger_useful.h>
namespace DB
{
PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
const StoragePtr & storage_,
const StorageMetadataPtr & metadata_snapshot_,
ContextPtr context_,
const ASTPtr & query_ptr_,
bool no_destination)
: WithContext(context_)
, storage(storage_)
, metadata_snapshot(metadata_snapshot_)
, log(&Poco::Logger::get("PushingToViewsBlockOutputStream"))
, query_ptr(query_ptr_)
{
checkStackSize();
/** TODO This is a very important line. At any insertion into the table one of streams should own lock.
* Although now any insertion into the table is done via PushingToViewsBlockOutputStream,
* but it's clear that here is not the best place for this functionality.
*/
addTableLock(
storage->lockForShare(getContext()->getInitialQueryId(), getContext()->getSettingsRef().lock_acquire_timeout));
/// If the "root" table deduplicates blocks, there are no need to make deduplication for children
/// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks
bool disable_deduplication_for_children = false;
if (!getContext()->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views)
disable_deduplication_for_children = !no_destination && storage->supportsDeduplication();
auto table_id = storage->getStorageID();
Dependencies dependencies = DatabaseCatalog::instance().getDependencies(table_id);
/// We need special context for materialized views insertions
if (!dependencies.empty())
{
select_context = Context::createCopy(context);
insert_context = Context::createCopy(context);
const auto & insert_settings = insert_context->getSettingsRef();
// Do not deduplicate insertions into MV if the main insertion is Ok
if (disable_deduplication_for_children)
insert_context->setSetting("insert_deduplicate", Field{false});
// Separate min_insert_block_size_rows/min_insert_block_size_bytes for children
if (insert_settings.min_insert_block_size_rows_for_materialized_views)
insert_context->setSetting("min_insert_block_size_rows", insert_settings.min_insert_block_size_rows_for_materialized_views.value);
if (insert_settings.min_insert_block_size_bytes_for_materialized_views)
insert_context->setSetting("min_insert_block_size_bytes", insert_settings.min_insert_block_size_bytes_for_materialized_views.value);
}
for (const auto & database_table : dependencies)
{
auto dependent_table = DatabaseCatalog::instance().getTable(database_table, getContext());
auto dependent_metadata_snapshot = dependent_table->getInMemoryMetadataPtr();
ASTPtr query;
BlockOutputStreamPtr out;
if (auto * materialized_view = dynamic_cast<StorageMaterializedView *>(dependent_table.get()))
{
addTableLock(
materialized_view->lockForShare(getContext()->getInitialQueryId(), getContext()->getSettingsRef().lock_acquire_timeout));
StoragePtr inner_table = materialized_view->getTargetTable();
auto inner_table_id = inner_table->getStorageID();
auto inner_metadata_snapshot = inner_table->getInMemoryMetadataPtr();
query = dependent_metadata_snapshot->getSelectQuery().inner_query;
std::unique_ptr<ASTInsertQuery> insert = std::make_unique<ASTInsertQuery>();
insert->table_id = inner_table_id;
/// Get list of columns we get from select query.
auto header = InterpreterSelectQuery(query, select_context, SelectQueryOptions().analyze())
.getSampleBlock();
/// Insert only columns returned by select.
auto list = std::make_shared<ASTExpressionList>();
const auto & inner_table_columns = inner_metadata_snapshot->getColumns();
for (const auto & column : header)
{
/// But skip columns which storage doesn't have.
if (inner_table_columns.hasPhysical(column.name))
list->children.emplace_back(std::make_shared<ASTIdentifier>(column.name));
}
insert->columns = std::move(list);
ASTPtr insert_query_ptr(insert.release());
InterpreterInsertQuery interpreter(insert_query_ptr, insert_context);
BlockIO io = interpreter.execute();
out = io.out;
}
else if (
dynamic_cast<const StorageLiveView *>(dependent_table.get()) || dynamic_cast<const StorageWindowView *>(dependent_table.get()))
out = std::make_shared<PushingToViewsBlockOutputStream>(
dependent_table, dependent_metadata_snapshot, insert_context, ASTPtr(), true);
else
out = std::make_shared<PushingToViewsBlockOutputStream>(
dependent_table, dependent_metadata_snapshot, insert_context, ASTPtr());
views.emplace_back(ViewInfo{std::move(query), database_table, std::move(out), nullptr, 0 /* elapsed_ms */});
}
/// Do not push to destination table if the flag is set
if (!no_destination)
{
output = storage->write(query_ptr, storage->getInMemoryMetadataPtr(), getContext());
replicated_output = dynamic_cast<ReplicatedMergeTreeBlockOutputStream *>(output.get());
}
}
Block PushingToViewsBlockOutputStream::getHeader() const
{
/// If we don't write directly to the destination
/// then expect that we're inserting with precalculated virtual columns
if (output)
return metadata_snapshot->getSampleBlock();
else
return metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtuals());
}
void PushingToViewsBlockOutputStream::write(const Block & block)
{
/** Throw an exception if the sizes of arrays - elements of nested data structures doesn't match.
* We have to make this assertion before writing to table, because storage engine may assume that they have equal sizes.
* NOTE It'd better to do this check in serialization of nested structures (in place when this assumption is required),
* but currently we don't have methods for serialization of nested structures "as a whole".
*/
Nested::validateArraySizes(block);
if (auto * live_view = dynamic_cast<StorageLiveView *>(storage.get()))
{
StorageLiveView::writeIntoLiveView(*live_view, block, getContext());
}
else if (auto * window_view = dynamic_cast<StorageWindowView *>(storage.get()))
{
StorageWindowView::writeIntoWindowView(*window_view, block, getContext());
}
else
{
if (output)
/// TODO: to support virtual and alias columns inside MVs, we should return here the inserted block extended
/// with additional columns directly from storage and pass it to MVs instead of raw block.
output->write(block);
}
/// Don't process materialized views if this block is duplicate
if (!getContext()->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views && replicated_output && replicated_output->lastBlockIsDuplicate())
return;
// Insert data into materialized views only after successful insert into main table
const Settings & settings = getContext()->getSettingsRef();
if (settings.parallel_view_processing && views.size() > 1)
{
// Push to views concurrently if enabled and more than one view is attached
ThreadPool pool(std::min(size_t(settings.max_threads), views.size()));
for (auto & view : views)
{
auto thread_group = CurrentThread::getGroup();
pool.scheduleOrThrowOnError([=, &view, this]
{
setThreadName("PushingToViews");
if (thread_group)
CurrentThread::attachToIfDetached(thread_group);
process(block, view);
});
}
// Wait for concurrent view processing
pool.wait();
}
else
{
// Process sequentially
for (auto & view : views)
{
process(block, view);
if (view.exception)
std::rethrow_exception(view.exception);
}
}
}
void PushingToViewsBlockOutputStream::writePrefix()
{
if (output)
output->writePrefix();
for (auto & view : views)
{
try
{
view.out->writePrefix();
}
catch (Exception & ex)
{
ex.addMessage("while write prefix to view " + view.table_id.getNameForLogs());
throw;
}
}
}
void PushingToViewsBlockOutputStream::writeSuffix()
{
if (output)
output->writeSuffix();
std::exception_ptr first_exception;
const Settings & settings = getContext()->getSettingsRef();
bool parallel_processing = false;
/// Run writeSuffix() for views in separate thread pool.
/// In could have been done in PushingToViewsBlockOutputStream::process, however
/// it is not good if insert into main table fail but into view succeed.
if (settings.parallel_view_processing && views.size() > 1)
{
parallel_processing = true;
// Push to views concurrently if enabled and more than one view is attached
ThreadPool pool(std::min(size_t(settings.max_threads), views.size()));
auto thread_group = CurrentThread::getGroup();
for (auto & view : views)
{
if (view.exception)
continue;
pool.scheduleOrThrowOnError([thread_group, &view, this]
{
setThreadName("PushingToViews");
if (thread_group)
CurrentThread::attachToIfDetached(thread_group);
Stopwatch watch;
try
{
view.out->writeSuffix();
}
catch (...)
{
view.exception = std::current_exception();
}
view.elapsed_ms += watch.elapsedMilliseconds();
LOG_TRACE(log, "Pushing from {} to {} took {} ms.",
storage->getStorageID().getNameForLogs(),
view.table_id.getNameForLogs(),
view.elapsed_ms);
});
}
// Wait for concurrent view processing
pool.wait();
}
for (auto & view : views)
{
if (view.exception)
{
if (!first_exception)
first_exception = view.exception;
continue;
}
if (parallel_processing)
continue;
Stopwatch watch;
try
{
view.out->writeSuffix();
}
catch (Exception & ex)
{
ex.addMessage("while write prefix to view " + view.table_id.getNameForLogs());
throw;
}
view.elapsed_ms += watch.elapsedMilliseconds();
LOG_TRACE(log, "Pushing from {} to {} took {} ms.",
storage->getStorageID().getNameForLogs(),
view.table_id.getNameForLogs(),
view.elapsed_ms);
}
if (first_exception)
std::rethrow_exception(first_exception);
UInt64 milliseconds = main_watch.elapsedMilliseconds();
if (views.size() > 1)
{
LOG_DEBUG(log, "Pushing from {} to {} views took {} ms.",
storage->getStorageID().getNameForLogs(), views.size(),
milliseconds);
}
}
void PushingToViewsBlockOutputStream::flush()
{
if (output)
output->flush();
for (auto & view : views)
view.out->flush();
}
void PushingToViewsBlockOutputStream::process(const Block & block, ViewInfo & view)
{
Stopwatch watch;
try
{
BlockInputStreamPtr in;
/// We need keep InterpreterSelectQuery, until the processing will be finished, since:
///
/// - We copy Context inside InterpreterSelectQuery to support
/// modification of context (Settings) for subqueries
/// - InterpreterSelectQuery lives shorter than query pipeline.
/// It's used just to build the query pipeline and no longer needed
/// - ExpressionAnalyzer and then, Functions, that created in InterpreterSelectQuery,
/// **can** take a reference to Context from InterpreterSelectQuery
/// (the problem raises only when function uses context from the
/// execute*() method, like FunctionDictGet do)
/// - These objects live inside query pipeline (DataStreams) and the reference become dangling.
std::optional<InterpreterSelectQuery> select;
if (view.query)
{
/// We create a table with the same name as original table and the same alias columns,
/// but it will contain single block (that is INSERT-ed into main table).
/// InterpreterSelectQuery will do processing of alias columns.
auto local_context = Context::createCopy(select_context);
local_context->addViewSource(
StorageValues::create(storage->getStorageID(), metadata_snapshot->getColumns(), block, storage->getVirtuals()));
select.emplace(view.query, local_context, SelectQueryOptions());
in = std::make_shared<MaterializingBlockInputStream>(select->execute().getInputStream());
/// Squashing is needed here because the materialized view query can generate a lot of blocks
/// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY
/// and two-level aggregation is triggered).
in = std::make_shared<SquashingBlockInputStream>(
in, getContext()->getSettingsRef().min_insert_block_size_rows, getContext()->getSettingsRef().min_insert_block_size_bytes);
in = std::make_shared<ConvertingBlockInputStream>(in, view.out->getHeader(), ConvertingBlockInputStream::MatchColumnsMode::Name);
}
else
in = std::make_shared<OneBlockInputStream>(block);
in->readPrefix();
while (Block result_block = in->read())
{
Nested::validateArraySizes(result_block);
view.out->write(result_block);
}
in->readSuffix();
}
catch (Exception & ex)
{
ex.addMessage("while pushing to view " + view.table_id.getNameForLogs());
view.exception = std::current_exception();
}
catch (...)
{
view.exception = std::current_exception();
}
view.elapsed_ms += watch.elapsedMilliseconds();
}
}

View File

@ -0,0 +1,21 @@
#include <DataTypes/Serializations/SerializationBool.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeCustom.h>
namespace DB
{
void registerDataTypeDomainBool(DataTypeFactory & factory)
{
factory.registerSimpleDataTypeCustom("Bool", []
{
auto type = DataTypeFactory::instance().get("UInt8");
return std::make_pair(type, std::make_unique<DataTypeCustomDesc>(
std::make_unique<DataTypeCustomFixedName>("Bool"), std::make_unique<SerializationBool>(type->getDefaultSerialization())));
});
factory.registerAlias("bool", "Bool", DataTypeFactory::CaseInsensitive);
factory.registerAlias("boolean", "Bool", DataTypeFactory::CaseInsensitive);
}
}

View File

@ -209,6 +209,7 @@ DataTypeFactory::DataTypeFactory()
registerDataTypeInterval(*this); registerDataTypeInterval(*this);
registerDataTypeLowCardinality(*this); registerDataTypeLowCardinality(*this);
registerDataTypeDomainIPv4AndIPv6(*this); registerDataTypeDomainIPv4AndIPv6(*this);
registerDataTypeDomainBool(*this);
registerDataTypeDomainSimpleAggregateFunction(*this); registerDataTypeDomainSimpleAggregateFunction(*this);
registerDataTypeDomainGeo(*this); registerDataTypeDomainGeo(*this);
registerDataTypeMap(*this); registerDataTypeMap(*this);

View File

@ -85,6 +85,7 @@ void registerDataTypeNested(DataTypeFactory & factory);
void registerDataTypeInterval(DataTypeFactory & factory); void registerDataTypeInterval(DataTypeFactory & factory);
void registerDataTypeLowCardinality(DataTypeFactory & factory); void registerDataTypeLowCardinality(DataTypeFactory & factory);
void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory); void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory);
void registerDataTypeDomainBool(DataTypeFactory & factory);
void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory); void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory);
void registerDataTypeDomainGeo(DataTypeFactory & factory); void registerDataTypeDomainGeo(DataTypeFactory & factory);

View File

@ -57,8 +57,6 @@ void registerDataTypeNumbers(DataTypeFactory & factory)
/// These synonyms are added for compatibility. /// These synonyms are added for compatibility.
factory.registerAlias("TINYINT", "Int8", DataTypeFactory::CaseInsensitive); factory.registerAlias("TINYINT", "Int8", DataTypeFactory::CaseInsensitive);
factory.registerAlias("BOOL", "Int8", DataTypeFactory::CaseInsensitive);
factory.registerAlias("BOOLEAN", "Int8", DataTypeFactory::CaseInsensitive);
factory.registerAlias("INT1", "Int8", DataTypeFactory::CaseInsensitive); /// MySQL factory.registerAlias("INT1", "Int8", DataTypeFactory::CaseInsensitive); /// MySQL
factory.registerAlias("BYTE", "Int8", DataTypeFactory::CaseInsensitive); /// MS Access factory.registerAlias("BYTE", "Int8", DataTypeFactory::CaseInsensitive); /// MS Access
factory.registerAlias("SMALLINT", "Int16", DataTypeFactory::CaseInsensitive); factory.registerAlias("SMALLINT", "Int16", DataTypeFactory::CaseInsensitive);

View File

@ -0,0 +1,169 @@
#include <DataTypes/Serializations/SerializationBool.h>
#include <Columns/ColumnsNumber.h>
#include <Common/Exception.h>
#include <IO/WriteBuffer.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING;
extern const int ILLEGAL_COLUMN;
}
SerializationBool::SerializationBool(const SerializationPtr &nested_)
: SerializationCustomSimpleText(nested_)
{
}
void SerializationBool::serializeText(const IColumn &column, size_t row_num, WriteBuffer &ostr, const FormatSettings &) const
{
const auto *col = checkAndGetColumn<ColumnUInt8>(&column);
if (!col)
throw Exception("Bool type can only serialize columns of type UInt8." + column.getName(),
ErrorCodes::ILLEGAL_COLUMN);
if (col->getData()[row_num])
ostr.write(str_true, sizeof(str_true) - 1);
else
ostr.write(str_false, sizeof(str_false) - 1);
}
void SerializationBool::deserializeText(IColumn &column, ReadBuffer &istr, const FormatSettings & settings, bool whole) const
{
ColumnUInt8 *col = typeid_cast<ColumnUInt8 *>(&column);
if (!col)
{
throw Exception("Bool type can only deserialize columns of type UInt8." + column.getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
if (!istr.eof())
{
bool value = false;
if (*istr.position() == 't' || *istr.position() == 'f' || *istr.position() == 'T' || *istr.position() == 'F')
readBoolTextWord(value, istr, true);
else if (*istr.position() == '1' || *istr.position() == '0')
readBoolText(value, istr);
else
throw Exception("Invalid boolean value, should be true/false, TRUE/FALSE, 1/0.",
ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
col->insert(value);
}
else
throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
if (whole && !istr.eof())
throwUnexpectedDataAfterParsedValue(column, istr, settings, "Bool");
}
void SerializationBool::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const auto *col = checkAndGetColumn<ColumnUInt8>(&column);
if (!col)
throw Exception("Bool type can only serialize columns of type UInt8." + column.getName(),
ErrorCodes::ILLEGAL_COLUMN);
if (col->getData()[row_num])
{
writeString(settings.bool_true_representation, ostr);
}
else
{
writeString(settings.bool_false_representation, ostr);
}
}
void SerializationBool::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (istr.eof())
throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
String input;
readEscapedString(input, istr);
deserializeFromString(column, input, settings);
}
void SerializationBool::serializeTextJSON(const IColumn &column, size_t row_num, WriteBuffer &ostr, const FormatSettings &settings) const
{
serializeText(column, row_num, ostr, settings);
}
void SerializationBool::deserializeTextJSON(IColumn &column, ReadBuffer &istr, const FormatSettings &) const
{
ColumnUInt8 *col = typeid_cast<ColumnUInt8 *>(&column);
if (!col)
{
throw Exception("Bool type can only deserialize columns of type UInt8." + column.getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
if (!istr.eof())
{
bool value = false;
if (*istr.position() == 't' || *istr.position() == 'f')
readBoolTextWord(value, istr);
else if (*istr.position() == '1' || *istr.position() == '0')
readBoolText(value, istr);
else
throw Exception("Invalid boolean value, should be true/false, 1/0.",
ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
col->insert(value);
}
else
throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
}
void SerializationBool::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeTextEscaped(column, row_num, ostr, settings);
}
void SerializationBool::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (istr.eof())
throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
String input;
readCSVString(input, istr, settings.csv);
deserializeFromString(column, input, settings);
}
void SerializationBool::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeTextEscaped(column, row_num, ostr, settings);
}
void SerializationBool::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (istr.eof())
throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
String input;
readString(input, istr);
deserializeFromString(column, input, settings);
}
void SerializationBool::deserializeFromString(IColumn & column, String & input, const FormatSettings & settings)
{
ColumnUInt8 * col = typeid_cast<ColumnUInt8 *>(&column);
if (!col)
{
throw Exception("Bool type can only deserialize columns of type UInt8." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
if (settings.bool_true_representation == input)
{
col->insert(true);
}
else if (settings.bool_false_representation == input)
{
col->insert(false);
}
else
throw Exception("Invalid boolean value, should be " + settings.bool_true_representation + " or " + settings.bool_false_representation + " controlled by setting bool_true_representation and bool_false_representation.", ErrorCodes::ILLEGAL_COLUMN);
}
}

View File

@ -0,0 +1,36 @@
#pragma once
#include <DataTypes/Serializations/SerializationCustomSimpleText.h>
namespace DB
{
class SerializationBool final : public SerializationCustomSimpleText
{
private:
static constexpr char str_true[5] = "true";
static constexpr char str_false[6] = "false";
public:
SerializationBool(const SerializationPtr & nested_);
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,bool whole) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
protected:
static void deserializeFromString(IColumn & column, String & input, const FormatSettings & settings);
};
}

View File

@ -137,7 +137,9 @@ StoragePtr DatabaseMaterializedMySQL::tryGetTable(const String & name, ContextPt
StoragePtr nested_storage = DatabaseAtomic::tryGetTable(name, context_); StoragePtr nested_storage = DatabaseAtomic::tryGetTable(name, context_);
if (context_->isInternalQuery()) if (context_->isInternalQuery())
return nested_storage; return nested_storage;
return std::make_shared<StorageMaterializedMySQL>(std::move(nested_storage), this); if (nested_storage)
return std::make_shared<StorageMaterializedMySQL>(std::move(nested_storage), this);
return nullptr;
} }
DatabaseTablesIteratorPtr DatabaseTablesIteratorPtr

View File

@ -309,7 +309,7 @@ getTableOutput(const String & database_name, const String & table_name, ContextM
String comment = "Materialize MySQL step 1: execute dump data"; String comment = "Materialize MySQL step 1: execute dump data";
BlockIO res = tryToExecuteQuery("INSERT INTO " + backQuoteIfNeed(table_name) + "(" + insert_columns_str.str() + ")" + " VALUES", BlockIO res = tryToExecuteQuery("INSERT INTO " + backQuote(table_name) + " (" + insert_columns_str.str() + ")" + " VALUES",
query_context, database_name, comment); query_context, database_name, comment);
return std::move(res.pipeline); return std::move(res.pipeline);

View File

@ -78,7 +78,7 @@ void DatabaseMaterializedPostgreSQL::startSynchronization()
} }
if (tables_to_replicate.empty()) if (tables_to_replicate.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty list of tables to replicate"); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got empty list of tables to replicate");
for (const auto & table_name : tables_to_replicate) for (const auto & table_name : tables_to_replicate)
{ {

View File

@ -30,13 +30,34 @@ namespace ErrorCodes
template<typename T> template<typename T>
std::set<String> fetchPostgreSQLTablesList(T & tx, const String & postgres_schema) std::set<String> fetchPostgreSQLTablesList(T & tx, const String & postgres_schema)
{ {
std::set<String> tables; Names schemas;
std::string query = fmt::format("SELECT tablename FROM pg_catalog.pg_tables " boost::split(schemas, postgres_schema, [](char c){ return c == ','; });
"WHERE schemaname != 'pg_catalog' AND {}", for (String & key : schemas)
postgres_schema.empty() ? "schemaname != 'information_schema'" : "schemaname = " + quoteString(postgres_schema)); boost::trim(key);
for (auto table_name : tx.template stream<std::string>(query)) std::set<std::string> tables;
tables.insert(std::get<0>(table_name)); if (schemas.size() <= 1)
{
std::string query = fmt::format("SELECT tablename FROM pg_catalog.pg_tables "
"WHERE schemaname != 'pg_catalog' AND {}",
postgres_schema.empty() ? "schemaname != 'information_schema'" : "schemaname = " + quoteString(postgres_schema));
for (auto table_name : tx.template stream<std::string>(query))
tables.insert(std::get<0>(table_name));
return tables;
}
/// We add schema to table name only in case of multiple schemas for the whole database engine.
/// Because there is no need to add it if there is only one schema.
/// If we add schema to table name then table can be accessed only this way: database_name.`schema_name.table_name`
for (const auto & schema : schemas)
{
std::string query = fmt::format("SELECT tablename FROM pg_catalog.pg_tables "
"WHERE schemaname != 'pg_catalog' AND {}",
postgres_schema.empty() ? "schemaname != 'information_schema'" : "schemaname = " + quoteString(schema));
for (auto table_name : tx.template stream<std::string>(query))
tables.insert(schema + '.' + std::get<0>(table_name));
}
return tables; return tables;
} }
@ -308,7 +329,6 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
pqxx::nontransaction & tx, const String & postgres_table, const String & postrges_schema, pqxx::nontransaction & tx, const String & postgres_table, const String & postrges_schema,
bool use_nulls, bool with_primary_key, bool with_replica_identity_index); bool use_nulls, bool with_primary_key, bool with_replica_identity_index);
template
std::set<String> fetchPostgreSQLTablesList(pqxx::work & tx, const String & postgres_schema); std::set<String> fetchPostgreSQLTablesList(pqxx::work & tx, const String & postgres_schema);
template template

View File

@ -19,6 +19,7 @@ struct PostgreSQLTableStructure
using PostgreSQLTableStructurePtr = std::unique_ptr<PostgreSQLTableStructure>; using PostgreSQLTableStructurePtr = std::unique_ptr<PostgreSQLTableStructure>;
/// We need order for materialized version.
std::set<String> fetchPostgreSQLTablesList(pqxx::connection & connection, const String & postgres_schema); std::set<String> fetchPostgreSQLTablesList(pqxx::connection & connection, const String & postgres_schema);
PostgreSQLTableStructure fetchPostgreSQLTableStructure( PostgreSQLTableStructure fetchPostgreSQLTableStructure(

View File

@ -70,6 +70,8 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter; format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter;
format_settings.date_time_input_format = settings.date_time_input_format; format_settings.date_time_input_format = settings.date_time_input_format;
format_settings.date_time_output_format = settings.date_time_output_format; format_settings.date_time_output_format = settings.date_time_output_format;
format_settings.bool_true_representation = settings.bool_true_representation;
format_settings.bool_false_representation = settings.bool_false_representation;
format_settings.enable_streaming = settings.output_format_enable_streaming; format_settings.enable_streaming = settings.output_format_enable_streaming;
format_settings.import_nested_json = settings.input_format_import_nested_json; format_settings.import_nested_json = settings.input_format_import_nested_json;
format_settings.input_allow_errors_num = settings.input_format_allow_errors_num; format_settings.input_allow_errors_num = settings.input_format_allow_errors_num;

View File

@ -82,6 +82,9 @@ struct FormatSettings
UInt64 output_rows_in_file = 1; UInt64 output_rows_in_file = 1;
} avro; } avro;
String bool_true_representation = "true";
String bool_false_representation = "false";
struct CSV struct CSV
{ {
char delimiter = ','; char delimiter = ',';

View File

@ -662,18 +662,13 @@ private:
{ {
auto return_type = impl.getReturnTypeImpl(arguments); auto return_type = impl.getReturnTypeImpl(arguments);
if (!areTypesEqual(return_type, result_type)) if (!return_type->equals(*result_type))
throw Exception{"Dictionary attribute has different type " + return_type->getName() + " expected " + result_type->getName(), throw Exception{"Dictionary attribute has different type " + return_type->getName() + " expected " + result_type->getName(),
ErrorCodes::TYPE_MISMATCH}; ErrorCodes::TYPE_MISMATCH};
return impl.executeImpl(arguments, return_type, input_rows_count); return impl.executeImpl(arguments, return_type, input_rows_count);
} }
static bool areTypesEqual(const DataTypePtr & lhs, const DataTypePtr & rhs)
{
return removeNullable(recursiveRemoveLowCardinality(lhs))->equals(*removeNullable(recursiveRemoveLowCardinality(rhs)));
}
const FunctionDictGetNoType<dictionary_get_function_type> impl; const FunctionDictGetNoType<dictionary_get_function_type> impl;
}; };

View File

@ -0,0 +1,651 @@
#include <numeric>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <Functions/extractTimeZoneFromFunctionArguments.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionsWindow.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ARGUMENT_OUT_OF_BOUND;
}
namespace
{
std::tuple<IntervalKind::Kind, Int64>
dispatchForIntervalColumns(const ColumnWithTypeAndName & interval_column, const String & function_name)
{
const auto * interval_type = checkAndGetDataType<DataTypeInterval>(interval_column.type.get());
if (!interval_type)
throw Exception(
"Illegal column " + interval_column.name + " of argument of function " + function_name, ErrorCodes::ILLEGAL_COLUMN);
const auto * interval_column_const_int64 = checkAndGetColumnConst<ColumnInt64>(interval_column.column.get());
if (!interval_column_const_int64)
throw Exception(
"Illegal column " + interval_column.name + " of argument of function " + function_name, ErrorCodes::ILLEGAL_COLUMN);
Int64 num_units = interval_column_const_int64->getValue<Int64>();
if (num_units <= 0)
throw Exception(
"Value for column " + interval_column.name + " of function " + function_name + " must be positive",
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
return {interval_type->getKind(), num_units};
}
ColumnPtr executeWindowBound(const ColumnPtr & column, int index, const String & function_name)
{
if (const ColumnTuple * col_tuple = checkAndGetColumn<ColumnTuple>(column.get()); col_tuple)
{
if (!checkColumn<ColumnVector<UInt32>>(*col_tuple->getColumnPtr(index)))
throw Exception(
"Illegal column for first argument of function " + function_name + ". Must be a Tuple(DataTime, DataTime)",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return col_tuple->getColumnPtr(index);
}
else
{
throw Exception(
"Illegal column for first argument of function " + function_name + ". Must be Tuple",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}
void checkFirstArgument(const ColumnWithTypeAndName & argument, const String & function_name)
{
if (!isDateTime(argument.type))
throw Exception(
"Illegal type " + argument.type->getName() + " of argument of function " + function_name
+ ". Should be a date with time",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
void checkIntervalArgument(const ColumnWithTypeAndName & argument, const String & function_name, IntervalKind & interval_kind, bool & result_type_is_date)
{
const auto * interval_type = checkAndGetDataType<DataTypeInterval>(argument.type.get());
if (!interval_type)
throw Exception(
"Illegal type " + argument.type->getName() + " of argument of function " + function_name
+ ". Should be an interval of time",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
interval_kind = interval_type->getKind();
result_type_is_date = (interval_type->getKind() == IntervalKind::Year) || (interval_type->getKind() == IntervalKind::Quarter)
|| (interval_type->getKind() == IntervalKind::Month) || (interval_type->getKind() == IntervalKind::Week);
}
void checkIntervalArgument(const ColumnWithTypeAndName & argument, const String & function_name, bool & result_type_is_date)
{
IntervalKind interval_kind;
checkIntervalArgument(argument, function_name, interval_kind, result_type_is_date);
}
void checkTimeZoneArgument(
const ColumnWithTypeAndName & argument,
const String & function_name)
{
if (!WhichDataType(argument.type).isString())
throw Exception(
"Illegal type " + argument.type->getName() + " of argument of function " + function_name
+ ". This argument is optional and must be a constant string with timezone name",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
bool checkIntervalOrTimeZoneArgument(const ColumnWithTypeAndName & argument, const String & function_name, IntervalKind & interval_kind, bool & result_type_is_date)
{
if (WhichDataType(argument.type).isString())
{
checkTimeZoneArgument(argument, function_name);
return false;
}
checkIntervalArgument(argument, function_name, interval_kind, result_type_is_date);
return true;
}
}
template <>
struct WindowImpl<TUMBLE>
{
static constexpr auto name = "TUMBLE";
[[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
bool result_type_is_date;
if (arguments.size() == 2)
{
checkFirstArgument(arguments.at(0), function_name);
checkIntervalArgument(arguments.at(1), function_name, result_type_is_date);
}
else if (arguments.size() == 3)
{
checkFirstArgument(arguments.at(0), function_name);
checkIntervalArgument(arguments.at(1), function_name, result_type_is_date);
checkTimeZoneArgument(arguments.at(2), function_name);
}
else
{
throw Exception(
"Number of arguments for function " + function_name + " doesn't match: passed " + toString(arguments.size())
+ ", should be 2 or 3",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
DataTypePtr data_type = nullptr;
if (result_type_is_date)
data_type = std::make_shared<DataTypeDate>();
else
data_type = std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
return std::make_shared<DataTypeTuple>(DataTypes{data_type, data_type});
}
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
const auto & time_column = arguments[0];
const auto & interval_column = arguments[1];
const auto & from_datatype = *time_column.type.get();
const auto which_type = WhichDataType(from_datatype);
const auto * time_column_vec = checkAndGetColumn<ColumnUInt32>(time_column.column.get());
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0);
if (!which_type.isDateTime() || !time_column_vec)
throw Exception(
"Illegal column " + time_column.name + " of function " + function_name + ". Must contain dates or dates with time",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
auto interval = dispatchForIntervalColumns(interval_column, function_name);
switch (std::get<0>(interval))
{
case IntervalKind::Second:
return executeTumble<UInt32, IntervalKind::Second>(*time_column_vec, std::get<1>(interval), time_zone);
case IntervalKind::Minute:
return executeTumble<UInt32, IntervalKind::Minute>(*time_column_vec, std::get<1>(interval), time_zone);
case IntervalKind::Hour:
return executeTumble<UInt32, IntervalKind::Hour>(*time_column_vec, std::get<1>(interval), time_zone);
case IntervalKind::Day:
return executeTumble<UInt32, IntervalKind::Day>(*time_column_vec, std::get<1>(interval), time_zone);
case IntervalKind::Week:
return executeTumble<UInt16, IntervalKind::Week>(*time_column_vec, std::get<1>(interval), time_zone);
case IntervalKind::Month:
return executeTumble<UInt16, IntervalKind::Month>(*time_column_vec, std::get<1>(interval), time_zone);
case IntervalKind::Quarter:
return executeTumble<UInt16, IntervalKind::Quarter>(*time_column_vec, std::get<1>(interval), time_zone);
case IntervalKind::Year:
return executeTumble<UInt16, IntervalKind::Year>(*time_column_vec, std::get<1>(interval), time_zone);
}
__builtin_unreachable();
}
template <typename ToType, IntervalKind::Kind unit>
static ColumnPtr executeTumble(const ColumnUInt32 & time_column, UInt64 num_units, const DateLUTImpl & time_zone)
{
const auto & time_data = time_column.getData();
size_t size = time_column.size();
auto start = ColumnVector<ToType>::create();
auto end = ColumnVector<ToType>::create();
auto & start_data = start->getData();
auto & end_data = end->getData();
start_data.resize(size);
end_data.resize(size);
for (size_t i = 0; i != size; ++i)
{
start_data[i] = ToStartOfTransform<unit>::execute(time_data[i], num_units, time_zone);
end_data[i] = AddTime<unit>::execute(start_data[i], num_units, time_zone);
}
MutableColumns result;
result.emplace_back(std::move(start));
result.emplace_back(std::move(end));
return ColumnTuple::create(std::move(result));
}
};
template <>
struct WindowImpl<TUMBLE_START>
{
static constexpr auto name = "TUMBLE_START";
static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
if (arguments.size() == 1)
{
auto type = WhichDataType(arguments[0].type);
if (type.isTuple())
return std::static_pointer_cast<const DataTypeTuple>(arguments[0].type)->getElement(0);
else if (type.isUInt32())
return std::make_shared<DataTypeDateTime>();
else
throw Exception(
"Illegal type of first argument of function " + function_name + " should be DateTime, Tuple or UInt32",
ErrorCodes::ILLEGAL_COLUMN);
}
else
{
return std::static_pointer_cast<const DataTypeTuple>(WindowImpl<TUMBLE>::getReturnType(arguments, function_name))
->getElement(0);
}
}
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
const auto which_type = WhichDataType(arguments[0].type);
ColumnPtr result_column;
if (which_type.isDateTime())
result_column= WindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name);
else
result_column = arguments[0].column;
return executeWindowBound(result_column, 0, function_name);
}
};
template <>
struct WindowImpl<TUMBLE_END>
{
static constexpr auto name = "TUMBLE_END";
[[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
return WindowImpl<TUMBLE_START>::getReturnType(arguments, function_name);
}
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String& function_name)
{
const auto which_type = WhichDataType(arguments[0].type);
ColumnPtr result_column;
if (which_type.isDateTime())
result_column = WindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name);
else
result_column = arguments[0].column;
return executeWindowBound(result_column, 1, function_name);
}
};
template <>
struct WindowImpl<HOP>
{
static constexpr auto name = "HOP";
[[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
bool result_type_is_date;
IntervalKind interval_kind_1;
IntervalKind interval_kind_2;
if (arguments.size() == 3)
{
checkFirstArgument(arguments.at(0), function_name);
checkIntervalArgument(arguments.at(1), function_name, interval_kind_1, result_type_is_date);
checkIntervalArgument(arguments.at(2), function_name, interval_kind_2, result_type_is_date);
}
else if (arguments.size() == 4)
{
checkFirstArgument(arguments.at(0), function_name);
checkIntervalArgument(arguments.at(1), function_name, interval_kind_1, result_type_is_date);
checkIntervalArgument(arguments.at(2), function_name, interval_kind_2, result_type_is_date);
checkTimeZoneArgument(arguments.at(3), function_name);
}
else
{
throw Exception(
"Number of arguments for function " + function_name + " doesn't match: passed " + toString(arguments.size())
+ ", should be 3 or 4",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if (interval_kind_1 != interval_kind_2)
throw Exception(
"Illegal type of window and hop column of function " + function_name + ", must be same", ErrorCodes::ILLEGAL_COLUMN);
DataTypePtr data_type = nullptr;
if (result_type_is_date)
data_type = std::make_shared<DataTypeDate>();
else
data_type = std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 3, 0));
return std::make_shared<DataTypeTuple>(DataTypes{data_type, data_type});
}
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
const auto & time_column = arguments[0];
const auto & hop_interval_column = arguments[1];
const auto & window_interval_column = arguments[2];
const auto & from_datatype = *time_column.type.get();
const auto * time_column_vec = checkAndGetColumn<ColumnUInt32>(time_column.column.get());
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 3, 0);
if (!WhichDataType(from_datatype).isDateTime() || !time_column_vec)
throw Exception(
"Illegal column " + time_column.name + " argument of function " + function_name
+ ". Must contain dates or dates with time",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
auto hop_interval = dispatchForIntervalColumns(hop_interval_column, function_name);
auto window_interval = dispatchForIntervalColumns(window_interval_column, function_name);
if (std::get<1>(hop_interval) > std::get<1>(window_interval))
throw Exception(
"Value for hop interval of function " + function_name + " must not larger than window interval",
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
switch (std::get<0>(window_interval))
{
case IntervalKind::Second:
return executeHop<UInt32, IntervalKind::Second>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Minute:
return executeHop<UInt32, IntervalKind::Minute>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Hour:
return executeHop<UInt32, IntervalKind::Hour>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Day:
return executeHop<UInt32, IntervalKind::Day>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Week:
return executeHop<UInt16, IntervalKind::Week>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Month:
return executeHop<UInt16, IntervalKind::Month>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Quarter:
return executeHop<UInt16, IntervalKind::Quarter>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Year:
return executeHop<UInt16, IntervalKind::Year>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
}
__builtin_unreachable();
}
template <typename ToType, IntervalKind::Kind kind>
static ColumnPtr
executeHop(const ColumnUInt32 & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone)
{
const auto & time_data = time_column.getData();
size_t size = time_column.size();
auto start = ColumnVector<ToType>::create();
auto end = ColumnVector<ToType>::create();
auto & start_data = start->getData();
auto & end_data = end->getData();
start_data.resize(size);
end_data.resize(size);
for (size_t i = 0; i < size; ++i)
{
ToType wstart = ToStartOfTransform<kind>::execute(time_data[i], hop_num_units, time_zone);
ToType wend = AddTime<kind>::execute(wstart, hop_num_units, time_zone);
wstart = AddTime<kind>::execute(wend, -1 * window_num_units, time_zone);
ToType wend_latest;
do
{
wend_latest = wend;
wend = AddTime<kind>::execute(wend, -1 * hop_num_units, time_zone);
} while (wend > time_data[i]);
end_data[i] = wend_latest;
start_data[i] = AddTime<kind>::execute(wend_latest, -1 * window_num_units, time_zone);
}
MutableColumns result;
result.emplace_back(std::move(start));
result.emplace_back(std::move(end));
return ColumnTuple::create(std::move(result));
}
};
template <>
struct WindowImpl<WINDOW_ID>
{
static constexpr auto name = "WINDOW_ID";
[[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
bool result_type_is_date;
IntervalKind interval_kind_1;
IntervalKind interval_kind_2;
if (arguments.size() == 2)
{
checkFirstArgument(arguments.at(0), function_name);
checkIntervalArgument(arguments.at(1), function_name, interval_kind_1, result_type_is_date);
}
else if (arguments.size() == 3)
{
checkFirstArgument(arguments.at(0), function_name);
checkIntervalArgument(arguments.at(1), function_name, interval_kind_1, result_type_is_date);
if (checkIntervalOrTimeZoneArgument(arguments.at(2), function_name, interval_kind_2, result_type_is_date))
{
if (interval_kind_1 != interval_kind_2)
throw Exception(
"Illegal type of window and hop column of function " + function_name + ", must be same", ErrorCodes::ILLEGAL_COLUMN);
}
}
else if (arguments.size() == 4)
{
checkFirstArgument(arguments.at(0), function_name);
checkIntervalArgument(arguments.at(1), function_name, interval_kind_1, result_type_is_date);
checkIntervalArgument(arguments.at(2), function_name, interval_kind_2, result_type_is_date);
checkTimeZoneArgument(arguments.at(3), function_name);
}
else
{
throw Exception(
"Number of arguments for function " + function_name + " doesn't match: passed " + toString(arguments.size())
+ ", should be 2, 3 or 4",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if (result_type_is_date)
return std::make_shared<DataTypeUInt16>();
else
return std::make_shared<DataTypeUInt32>();
}
[[maybe_unused]] static ColumnPtr
dispatchForHopColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
const auto & time_column = arguments[0];
const auto & hop_interval_column = arguments[1];
const auto & window_interval_column = arguments[2];
const auto & from_datatype = *time_column.type.get();
const auto * time_column_vec = checkAndGetColumn<ColumnUInt32>(time_column.column.get());
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 3, 0);
if (!WhichDataType(from_datatype).isDateTime() || !time_column_vec)
throw Exception(
"Illegal column " + time_column.name + " argument of function " + function_name
+ ". Must contain dates or dates with time",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
auto hop_interval = dispatchForIntervalColumns(hop_interval_column, function_name);
auto window_interval = dispatchForIntervalColumns(window_interval_column, function_name);
if (std::get<1>(hop_interval) > std::get<1>(window_interval))
throw Exception(
"Value for hop interval of function " + function_name + " must not larger than window interval",
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
switch (std::get<0>(window_interval))
{
case IntervalKind::Second:
return executeHopSlice<UInt32, IntervalKind::Second>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Minute:
return executeHopSlice<UInt32, IntervalKind::Minute>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Hour:
return executeHopSlice<UInt32, IntervalKind::Hour>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Day:
return executeHopSlice<UInt32, IntervalKind::Day>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Week:
return executeHopSlice<UInt16, IntervalKind::Week>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Month:
return executeHopSlice<UInt16, IntervalKind::Month>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Quarter:
return executeHopSlice<UInt16, IntervalKind::Quarter>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Year:
return executeHopSlice<UInt16, IntervalKind::Year>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
}
__builtin_unreachable();
}
template <typename ToType, IntervalKind::Kind kind>
static ColumnPtr
executeHopSlice(const ColumnUInt32 & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone)
{
Int64 gcd_num_units = std::gcd(hop_num_units, window_num_units);
const auto & time_data = time_column.getData();
size_t size = time_column.size();
auto end = ColumnVector<ToType>::create();
auto & end_data = end->getData();
end_data.resize(size);
for (size_t i = 0; i < size; ++i)
{
ToType wstart = ToStartOfTransform<kind>::execute(time_data[i], hop_num_units, time_zone);
ToType wend = AddTime<kind>::execute(wstart, hop_num_units, time_zone);
ToType wend_latest;
do
{
wend_latest = wend;
wend = AddTime<kind>::execute(wend, -1 * gcd_num_units, time_zone);
} while (wend > time_data[i]);
end_data[i] = wend_latest;
}
return end;
}
[[maybe_unused]] static ColumnPtr
dispatchForTumbleColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
ColumnPtr column = WindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name);
return executeWindowBound(column, 1, function_name);
}
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
if (arguments.size() == 2)
return dispatchForTumbleColumns(arguments, function_name);
else
{
const auto & third_column = arguments[2];
if (arguments.size() == 3 && WhichDataType(third_column.type).isString())
return dispatchForTumbleColumns(arguments, function_name);
else
return dispatchForHopColumns(arguments, function_name);
}
}
};
template <>
struct WindowImpl<HOP_START>
{
static constexpr auto name = "HOP_START";
static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
if (arguments.size() == 1)
{
auto type = WhichDataType(arguments[0].type);
if (type.isTuple())
return std::static_pointer_cast<const DataTypeTuple>(arguments[0].type)->getElement(0);
else if (type.isUInt32())
return std::make_shared<DataTypeDateTime>();
else
throw Exception(
"Illegal type of first argument of function " + function_name + " should be DateTime, Tuple or UInt32",
ErrorCodes::ILLEGAL_COLUMN);
}
else
{
return std::static_pointer_cast<const DataTypeTuple>(WindowImpl<HOP>::getReturnType(arguments, function_name))->getElement(0);
}
}
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
const auto & time_column = arguments[0];
const auto which_type = WhichDataType(time_column.type);
ColumnPtr result_column;
if (arguments.size() == 1)
{
if (which_type.isUInt32())
return time_column.column;
else //isTuple
result_column = time_column.column;
}
else
result_column = WindowImpl<HOP>::dispatchForColumns(arguments, function_name);
return executeWindowBound(result_column, 0, function_name);
}
};
template <>
struct WindowImpl<HOP_END>
{
static constexpr auto name = "HOP_END";
[[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
return WindowImpl<HOP_START>::getReturnType(arguments, function_name);
}
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
{
const auto & time_column = arguments[0];
const auto which_type = WhichDataType(time_column.type);
ColumnPtr result_column;
if (arguments.size() == 1)
{
if (which_type.isUInt32())
return time_column.column;
else //isTuple
result_column = time_column.column;
}
else
result_column = WindowImpl<HOP>::dispatchForColumns(arguments, function_name);
return executeWindowBound(result_column, 1, function_name);
}
};
template <WindowFunctionName type>
DataTypePtr FunctionWindow<type>::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const
{
return WindowImpl<type>::getReturnType(arguments, name);
}
template <WindowFunctionName type>
ColumnPtr FunctionWindow<type>::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const
{
return WindowImpl<type>::dispatchForColumns(arguments, name);
}
void registerFunctionsWindow(FunctionFactory& factory)
{
factory.registerFunction<FunctionTumble>();
factory.registerFunction<FunctionHop>();
factory.registerFunction<FunctionTumbleStart>();
factory.registerFunction<FunctionTumbleEnd>();
factory.registerFunction<FunctionHopStart>();
factory.registerFunction<FunctionHopEnd>();
factory.registerFunction<FunctionWindowId>();
}
}

View File

@ -0,0 +1,155 @@
#pragma once
#include <base/DateLUT.h>
#include <DataTypes/DataTypeInterval.h>
#include <Functions/IFunction.h>
namespace DB
{
/** Window functions:
*
* TUMBLE(time_attr, interval [, timezone])
*
* TUMBLE_START(window_id)
*
* TUMBLE_START(time_attr, interval [, timezone])
*
* TUMBLE_END(window_id)
*
* TUMBLE_END(time_attr, interval [, timezone])
*
* HOP(time_attr, hop_interval, window_interval [, timezone])
*
* HOP_START(window_id)
*
* HOP_START(time_attr, hop_interval, window_interval [, timezone])
*
* HOP_END(window_id)
*
* HOP_END(time_attr, hop_interval, window_interval [, timezone])
*
*/
enum WindowFunctionName
{
TUMBLE,
TUMBLE_START,
TUMBLE_END,
HOP,
HOP_START,
HOP_END,
WINDOW_ID
};
template <IntervalKind::Kind unit>
struct ToStartOfTransform;
#define TRANSFORM_DATE(INTERVAL_KIND) \
template <> \
struct ToStartOfTransform<IntervalKind::INTERVAL_KIND> \
{ \
static ExtendedDayNum execute(UInt32 t, UInt64 delta, const DateLUTImpl & time_zone) \
{ \
return time_zone.toStartOf##INTERVAL_KIND##Interval(time_zone.toDayNum(t), delta); \
} \
};
TRANSFORM_DATE(Year)
TRANSFORM_DATE(Quarter)
TRANSFORM_DATE(Month)
TRANSFORM_DATE(Week)
#undef TRANSFORM_DATE
template <>
struct ToStartOfTransform<IntervalKind::Day>
{
static UInt32 execute(UInt32 t, UInt64 delta, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), delta);
}
};
#define TRANSFORM_TIME(INTERVAL_KIND) \
template <> \
struct ToStartOfTransform<IntervalKind::INTERVAL_KIND> \
{ \
static UInt32 execute(UInt32 t, UInt64 delta, const DateLUTImpl & time_zone) \
{ \
return time_zone.toStartOf##INTERVAL_KIND##Interval(t, delta); \
} \
};
TRANSFORM_TIME(Hour)
TRANSFORM_TIME(Minute)
TRANSFORM_TIME(Second)
#undef TRANSFORM_DATE
template <IntervalKind::Kind unit>
struct AddTime;
#define ADD_DATE(INTERVAL_KIND) \
template <> \
struct AddTime<IntervalKind::INTERVAL_KIND> \
{ \
static inline ExtendedDayNum execute(UInt16 d, UInt64 delta, const DateLUTImpl & time_zone) \
{ \
return time_zone.add##INTERVAL_KIND##s(ExtendedDayNum(d), delta); \
} \
};
ADD_DATE(Year)
ADD_DATE(Quarter)
ADD_DATE(Month)
#undef ADD_DATE
template <>
struct AddTime<IntervalKind::Week>
{
static inline NO_SANITIZE_UNDEFINED ExtendedDayNum execute(UInt16 d, UInt64 delta, const DateLUTImpl &) { return ExtendedDayNum(d + delta * 7);}
};
#define ADD_TIME(INTERVAL_KIND, INTERVAL) \
template <> \
struct AddTime<IntervalKind::INTERVAL_KIND> \
{ \
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) { return t + delta * INTERVAL; } \
};
ADD_TIME(Day, 86400)
ADD_TIME(Hour, 3600)
ADD_TIME(Minute, 60)
ADD_TIME(Second, 1)
#undef ADD_TIME
template <WindowFunctionName type>
struct WindowImpl
{
static constexpr auto name = "UNKNOWN";
static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name);
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name);
};
template <WindowFunctionName type>
class FunctionWindow : public IFunction
{
public:
static constexpr auto name = WindowImpl<type>::name;
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionWindow>(); }
String getName() const override { return name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override;
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override;
};
using FunctionTumble = FunctionWindow<TUMBLE>;
using FunctionTumbleStart = FunctionWindow<TUMBLE_START>;
using FunctionTumbleEnd = FunctionWindow<TUMBLE_END>;
using FunctionHop = FunctionWindow<HOP>;
using FunctionWindowId = FunctionWindow<WINDOW_ID>;
using FunctionHopStart = FunctionWindow<HOP_START>;
using FunctionHopEnd = FunctionWindow<HOP_END>;
}

Some files were not shown because too many files have changed in this diff Show More