Merge branch 'master' into fix_rename_dictionary

This commit is contained in:
tavplubix 2021-11-22 18:48:22 +03:00 committed by GitHub
commit 667dbef4a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
182 changed files with 8889 additions and 1087 deletions

View File

@ -1,7 +1,7 @@
name: Cancel
on: # yamllint disable-line rule:truthy
workflow_run:
workflows: ["CIGithubActions"]
workflows: ["CIGithubActions", "ReleaseCI"]
types:
- requested
jobs:

1384
.github/workflows/master.yml vendored Normal file

File diff suppressed because it is too large Load Diff

933
.github/workflows/release_branches.yml vendored Normal file
View File

@ -0,0 +1,933 @@
name: ReleaseCI
on: # yamllint disable-line rule:truthy
push:
branches:
- '21.**'
- '22.**'
- '23.**'
- '24.**'
- 'backport/**'
jobs:
DockerHubPush:
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 docker_images_check.py
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docker_images_check/changed_images.json
CompatibilityCheck:
needs: [BuilderDebRelease]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: CompatibilityCheck
env:
TEMP_PATH: ${{runner.temp}}/compatibility_check
REPO_COPY: ${{runner.temp}}/compatibility_check/ClickHouse
REPORTS_PATH: ${{runner.temp}}/reports_dir
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 compatibility_check.py 0
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
#########################################################################################
#################################### ORDINARY BUILDS ####################################
#########################################################################################
BuilderDebRelease:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NUMBER: 0
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebAsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NUMBER: 3
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebUBsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NUMBER: 4
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebTsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NUMBER: 5
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebMsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NUMBER: 6
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebDebug:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'recursive'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
env:
TEMP_PATH: ${{runner.temp}}/build_check
IMAGES_PATH: ${{runner.temp}}/images_path
REPO_COPY: ${{runner.temp}}/build_check/ClickHouse
CACHES_PATH: ${{runner.temp}}/../ccaches
CHECK_NAME: 'ClickHouse build check (actions)'
BUILD_NUMBER: 7
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NUMBER
- name: Upload build URLs to artifacts
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:
needs:
- BuilderDebRelease
- BuilderDebAsan
- BuilderDebTsan
- BuilderDebUBsan
- BuilderDebMsan
- BuilderDebDebug
runs-on: [self-hosted, style-checker]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Report Builder
env:
TEMP_PATH: ${{runner.temp}}/report_check
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'ClickHouse build check (actions)'
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cd $GITHUB_WORKSPACE/tests/ci
python3 build_report_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################
FunctionalStatelessTestRelease:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_debug
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (release, actions)'
REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestAsan:
needs: [BuilderDebAsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_debug
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (address, actions)'
REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_tsan
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (thread, actions)'
REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestUBsan:
needs: [BuilderDebUBsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_ubsan
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (ubsan, actions)'
REPO_COPY: ${{runner.temp}}/stateless_ubsan/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestMsan:
needs: [BuilderDebMsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_memory
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (memory, actions)'
REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestDebug:
needs: [BuilderDebDebug]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateless_debug
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateless tests (debug, actions)'
REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse
KILL_TIMEOUT: 10800
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
##############################################################################################
############################ FUNCTIONAl STATEFUL TESTS #######################################
##############################################################################################
FunctionalStatefulTestRelease:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateful_debug
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateful tests (release, actions)'
REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT: 3600
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatefulTestAsan:
needs: [BuilderDebAsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateful_debug
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateful tests (address, actions)'
REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT: 3600
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatefulTestTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateful_tsan
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateful tests (thread, actions)'
REPO_COPY: ${{runner.temp}}/stateful_tsan/ClickHouse
KILL_TIMEOUT: 3600
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatefulTestMsan:
needs: [BuilderDebMsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateful_msan
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateful tests (memory, actions)'
REPO_COPY: ${{runner.temp}}/stateful_msan/ClickHouse
KILL_TIMEOUT: 3600
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatefulTestUBsan:
needs: [BuilderDebUBsan]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateful_ubsan
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateful tests (ubsan, actions)'
REPO_COPY: ${{runner.temp}}/stateful_ubsan/ClickHouse
KILL_TIMEOUT: 3600
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatefulTestDebug:
needs: [BuilderDebDebug]
runs-on: [self-hosted, func-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Functional test
env:
TEMP_PATH: ${{runner.temp}}/stateful_debug
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stateful tests (debug, actions)'
REPO_COPY: ${{runner.temp}}/stateful_debug/ClickHouse
KILL_TIMEOUT: 3600
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
##############################################################################################
######################################### STRESS TESTS #######################################
##############################################################################################
StressTestAsan:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Stress test
env:
TEMP_PATH: ${{runner.temp}}/stress_thread
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stress test (address, actions)'
REPO_COPY: ${{runner.temp}}/stress_thread/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 stress_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
StressTestTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Stress test
env:
TEMP_PATH: ${{runner.temp}}/stress_thread
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stress test (thread, actions)'
REPO_COPY: ${{runner.temp}}/stress_thread/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 stress_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
StressTestMsan:
needs: [BuilderDebMsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Stress test
env:
TEMP_PATH: ${{runner.temp}}/stress_memory
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stress test (memory, actions)'
REPO_COPY: ${{runner.temp}}/stress_memory/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 stress_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
StressTestUBsan:
needs: [BuilderDebUBsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Stress test
env:
TEMP_PATH: ${{runner.temp}}/stress_undefined
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stress test (undefined, actions)'
REPO_COPY: ${{runner.temp}}/stress_undefined/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 stress_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
StressTestDebug:
needs: [BuilderDebDebug]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Stress test
env:
TEMP_PATH: ${{runner.temp}}/stress_debug
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stress test (debug, actions)'
REPO_COPY: ${{runner.temp}}/stress_debug/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 stress_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
#############################################################################################
############################# INTEGRATION TESTS #############################################
#############################################################################################
IntegrationTestsAsan:
needs: [BuilderDebAsan, FunctionalStatelessTestAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Integration test
env:
TEMP_PATH: ${{runner.temp}}/integration_tests_asan
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Integration tests (asan, actions)'
REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
IntegrationTestsTsan:
needs: [BuilderDebTsan, FunctionalStatelessTestTsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Integration test
env:
TEMP_PATH: ${{runner.temp}}/integration_tests_tsan
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Integration tests (thread, actions)'
REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
IntegrationTestsRelease:
needs: [BuilderDebRelease, FunctionalStatelessTestRelease]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Integration test
env:
TEMP_PATH: ${{runner.temp}}/integration_tests_release
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Integration tests (release, actions)'
REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FinishCheck:
needs:
- DockerHubPush
- BuilderReport
- FunctionalStatelessTestDebug
- FunctionalStatelessTestRelease
- FunctionalStatelessTestAsan
- FunctionalStatelessTestTsan
- FunctionalStatelessTestMsan
- FunctionalStatelessTestUBsan
- FunctionalStatefulTestDebug
- FunctionalStatefulTestRelease
- FunctionalStatefulTestAsan
- FunctionalStatefulTestTsan
- FunctionalStatefulTestMsan
- FunctionalStatefulTestUBsan
- StressTestDebug
- StressTestAsan
- StressTestTsan
- StressTestMsan
- StressTestUBsan
- IntegrationTestsAsan
- IntegrationTestsRelease
- IntegrationTestsTsan
- CompatibilityCheck
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Finish label
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 finish_check.py

View File

@ -9,14 +9,14 @@ This is for the case when you have Linux machine and want to use it to build `cl
The cross-build for Mac OS X is based on the [Build instructions](../development/build.md), follow them first.
## Install Clang-8 {#install-clang-8}
## Install Clang-13
Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
For example the commands for Bionic are like:
``` bash
sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list
sudo apt-get install clang-8
sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-13 main" >> /etc/apt/sources.list
sudo apt-get install clang-13
```
## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
@ -25,6 +25,7 @@ Lets remember the path where we install `cctools` as ${CCTOOLS}
``` bash
mkdir ${CCTOOLS}
cd ${CCTOOLS}
git clone https://github.com/tpoechtrager/apple-libtapi.git
cd apple-libtapi
@ -34,7 +35,7 @@ cd ..
git clone https://github.com/tpoechtrager/cctools-port.git
cd cctools-port/cctools
./configure --prefix=${CCTOOLS} --with-libtapi=${CCTOOLS} --target=x86_64-apple-darwin
./configure --prefix=$(readlink -f ${CCTOOLS}) --with-libtapi=$(readlink -f ${CCTOOLS}) --target=x86_64-apple-darwin
make install
```
@ -51,12 +52,10 @@ tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --s
``` bash
cd ClickHouse
mkdir build-osx
CC=clang-8 CXX=clang++-8 cmake . -Bbuild-osx -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake \
-DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar \
-DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib \
-DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld
ninja -C build-osx
mkdir build-darwin
cd build-darwin
CC=clang-13 CXX=clang++-13 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/aarch64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/aarch64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/aarch64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/aarch64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake ..
ninja
```
The resulting binary will have a Mach-O executable format and cant be run on Linux.

View File

@ -37,7 +37,7 @@ Next, you need to download the source files onto your working machine. This is c
In the command line terminal run:
git clone git@github.com:your_github_username/ClickHouse.git
git clone --recursive git@github.com:your_github_username/ClickHouse.git
cd ClickHouse
Note: please, substitute *your_github_username* with what is appropriate!
@ -65,7 +65,7 @@ It generally means that the SSH keys for connecting to GitHub are missing. These
You can also clone the repository via https protocol:
git clone https://github.com/ClickHouse/ClickHouse.git
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
This, however, will not let you send your changes to the server. You can still use it temporarily and add the SSH keys later replacing the remote address of the repository with `git remote` command.

View File

@ -1460,4 +1460,53 @@ To add an LDAP server as a remote user directory of users that are not defined l
</ldap>
```
[Original article](https://clickhouse.com/docs/en/operations/server_configuration_parameters/settings/) <!--hide-->
## total_memory_profiler_step {#total-memory-profiler-step}
Sets the memory size (in bytes) for a stack trace at every peak allocation step. The data is stored in the [system.trace_log](../../operations/system-tables/trace_log.md) system table with `query_id` equal to an empty string.
Possible values:
- Positive integer.
Default value: `4194304`.
## total_memory_tracker_sample_probability {#total-memory-tracker-sample-probability}
Allows to collect random allocations and deallocations and writes them in the [system.trace_log](../../operations/system-tables/trace_log.md) system table with `trace_type` equal to a `MemorySample` with the specified probability. The probability is for every allocation or deallocations, regardless of the size of the allocation. Note that sampling happens only when the amount of untracked memory exceeds the untracked memory limit (default value is `4` MiB). It can be lowered if [total_memory_profiler_step](#total-memory-profiler-step) is lowered. You can set `total_memory_profiler_step` equal to `1` for extra fine-grained sampling.
Possible values:
- Positive integer.
- 0 — Writing of random allocations and deallocations in the `system.trace_log` system table is disabled.
Default value: `0`.
## mmap_cache_size {#mmap-cache-size}
Sets the cache size (in bytes) for mapped files. This setting allows to avoid frequent open/[mmap/munmap](https://en.wikipedia.org/wiki/Mmap)/close calls (which are very expensive due to consequent page faults) and to reuse mappings from several threads and queries. The setting value is the number of mapped regions (usually equal to the number of mapped files). The amount of data in mapped files can be monitored in [system.metrics](../../operations/system-tables/metrics.md), [system.metric_log](../../operations/system-tables/metric_log.md) system tables by the `MMappedFiles` and `MMappedFileBytes` metrics, in [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md), [system.asynchronous_metrics_log](../../operations/system-tables/asynchronous_metric_log.md) by the `MMapCacheCells` metric, and also in [system.events](../../operations/system-tables/events.md), [system.processes](../../operations/system-tables/processes.md), [system.query_log](../../operations/system-tables/query_log.md), [system.query_thread_log](../../operations/system-tables/query_thread_log.md), [system.query_views_log](../../operations/system-tables/query_views_log.md) by the `CreatedReadBufferMMap`, `CreatedReadBufferMMapFailed`, `MMappedFileCacheHits`, `MMappedFileCacheMisses` events. Note that the amount of data in mapped files does not consume memory directly and is not accounted in query or server memory usage — because this memory can be discarded similar to OS page cache. The cache is dropped (the files are closed) automatically on the removal of old parts in tables of the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family, also it can be dropped manually by the `SYSTEM DROP MMAP CACHE` query.
Possible values:
- Positive integer.
Default value: `1000`.
## compiled_expression_cache_size {#compiled-expression-cache-size}
Sets the cache size (in bytes) for [compiled expressions](../../operations/caches.md).
Possible values:
- Positive integer.
Default value: `134217728`.
## compiled_expression_cache_elements_size {#compiled_expression_cache_elements_size}
Sets the cache size (in elements) for [compiled expressions](../../operations/caches.md).
Possible values:
- Positive integer.
Default value: `10000`.

View File

@ -4048,3 +4048,14 @@ Possible values:
- 0 — Timeout disabled.
Default value: `0`.
## min_bytes_to_use_mmap_io {#min-bytes-to-use-mmap-io}
This is an experimental setting. Sets the minimum amount of memory for reading large files without copying data from the kernel to userspace. Recommended threshold is about 64 MB, because [mmap/munmap](https://en.wikipedia.org/wiki/Mmap) is slow. It makes sense only for large files and helps only if data reside in the page cache.
Possible values:
- Positive integer.
- 0 — Big files read with only copying data from kernel to userspace.
Default value: `0`.

View File

@ -0,0 +1,44 @@
# EXISTS {#exists-operator}
The `EXISTS` operator checks how many records are in the result of a subquery. If it is empty, then the operator returns `0`. Otherwise, it returns `1`.
`EXISTS` can be used in a [WHERE](../../sql-reference/statements/select/where.md) clause.
!!! warning "Warning"
References to main query tables and columns are not supported in a subquery.
**Syntax**
```sql
WHERE EXISTS(subquery)
```
**Example**
Query with a subquery returning several rows:
``` sql
SELECT count() FROM numbers(10) WHERE EXISTS(SELECT number FROM numbers(10) WHERE number > 8);
```
Result:
``` text
┌─count()─┐
│ 10 │
└─────────┘
```
Query with a subquery that returns an empty result:
``` sql
SELECT count() FROM numbers(10) WHERE EXISTS(SELECT number FROM numbers(10) WHERE number > 11);
```
Result:
``` text
┌─count()─┐
│ 0 │
└─────────┘
```

View File

@ -71,7 +71,7 @@ For tuple subtraction: [tupleMinus](../../sql-reference/functions/tuple-function
## Operators for Working with Data Sets {#operators-for-working-with-data-sets}
*See [IN operators](../../sql-reference/operators/in.md).*
See [IN operators](../../sql-reference/operators/in.md) and [EXISTS](../../sql-reference/operators/exists.md) operator.
`a IN ...` The `in(a, b)` function.

View File

@ -6,9 +6,51 @@ toc_title: WHERE
`WHERE` clause allows to filter the data that is coming from [FROM](../../../sql-reference/statements/select/from.md) clause of `SELECT`.
If there is a `WHERE` clause, it must contain an expression with the `UInt8` type. This is usually an expression with comparison and logical operators. Rows where this expression evaluates to 0 are excluded from further transformations or result.
If there is a `WHERE` clause, it must contain an expression with the `UInt8` type. This is usually an expression with comparison and logical operators. Rows where this expression evaluates to `0` are excluded from further transformations or result.
`WHERE` expression is evaluated on the ability to use indexes and partition pruning, if the underlying table engine supports that.
!!! note "Note"
Theres a filtering optimization called [prewhere](../../../sql-reference/statements/select/prewhere.md).
There is a filtering optimization called [PREWHERE](../../../sql-reference/statements/select/prewhere.md).
If you need to test a value for [NULL](../../../sql-reference/syntax.md#null-literal), use [IS NULL](../../operators/index.md#operator-is-null) and [IS NOT NULL](../../operators/index.md#is-not-null) operators or [isNull](../../../sql-reference/functions/functions-for-nulls.md#isnull) and [isNotNull](../../../sql-reference/functions/functions-for-nulls.md#isnotnull) functions.
Otherwise an expression with `NULL` never passes.
**Example**
To find numbers that are multiples of 3 and are greater than 10 execute the following query on the [numbers table](../../../sql-reference/table-functions/numbers.md):
``` sql
SELECT number FROM numbers(20) WHERE (number > 10) AND (number % 3 == 0);
```
Result:
``` text
┌─number─┐
│ 12 │
│ 15 │
│ 18 │
└────────┘
```
Queries with `NULL` values:
``` sql
CREATE TABLE t_null(x Int8, y Nullable(Int8)) ENGINE=MergeTree() ORDER BY x;
INSERT INTO t_null VALUES (1, NULL), (2, 3);
SELECT * FROM t_null WHERE y IS NULL;
SELECT * FROM t_null WHERE y != 0;
```
Result:
``` text
┌─x─┬────y─┐
│ 1 │ ᴺᵁᴸᴸ │
└───┴──────┘
┌─x─┬─y─┐
│ 2 │ 3 │
└───┴───┘
```

View File

@ -1432,3 +1432,54 @@ ClickHouse использует ZooKeeper для хранения метадан
</roles>
</ldap>
```
## total_memory_profiler_step {#total-memory-profiler-step}
Задает размер памяти (в байтах) для трассировки стека на каждом шаге выделения максимума памяти. Данные хранятся в системной таблице [system.trace_log](../../operations/system-tables/trace_log.md) с `query_id`, равным пустой строке.
Возможные значения:
- Положительное целое число.
Значение по умолчанию: `4194304`.
## total_memory_tracker_sample_probability {#total-memory-tracker-sample-probability}
Позволяет собирать случайные выделения и освобождения памяти и записывать их в системную таблицу [system.trace_log](../../operations/system-tables/trace_log.md) с `trace_type`, равным `MemorySample`, с указанной вероятностью. Вероятность касается каждого выделения или освобождения памяти, независимо от размера выделения. Обратите внимание, что выборка происходит только тогда, когда объем неотслеживаемой памяти превышает лимит неотслеживаемой памяти (значение по умолчанию: `4` MiB). Значение настройки может быть уменьшено, если значение настройки [total_memory_profiler_step](#total-memory-profiler-step) уменьшено. Вы можете установить значение настройки `total_memory_profiler_step`, равным `1`, для особой детализованной выборки.
Возможные значения:
- Положительное целое число.
- 0 — запись случайных выделений и освобождений памяти в системную таблицу `system.trace_log` отключена.
Значение по умолчанию: `0`.
## mmap_cache_size {#mmap-cache-size}
Задает размер кеша (в байтах) для сопоставленных файлов. Эта настройка позволяет избежать частых открытых/[mmap/munmap](https://en.wikipedia.org/wiki/Mmap)/закрытых вызовов (очень дорогостоящие из-за последующих ошибок страниц) и повторного использования сопоставления из нескольких потоков и запросов. Значение настройки — это количество сопоставленных областей (обычно равно количеству сопоставленных файлов). Объем данных в сопоставленных файлах можно отслеживать в системных таблицах [system.metrics](../../operations/system-tables/metrics.md), [system.metric_log](../../operations/system-tables/metric_log.md) по метрикам `MMappedFiles` и `MMappedFileBytes`, в таблицах [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md), [system.asynchronous_metrics_log](../../operations/system-tables/asynchronous_metric_log.md) по метрике `MMapCacheCells`, а также в [system.events](../../operations/system-tables/events.md), [system.processes](../../operations/system-tables/processes.md), [system.query_log](../../operations/system-tables/query_log.md), [system.query_thread_log](../../operations/system-tables/query_thread_log.md), [system.query_views_log](../../operations/system-tables/query_views_log.md) по событиям `CreatedReadBufferMMap`, `CreatedReadBufferMMapFailed`, `MMappedFileCacheHits`, `MMappedFileCacheMisses`. Обратите внимание, что объем данных в сопоставленных файлах не потребляет память напрямую и не учитывается в запросе или использовании памяти сервера, поскольку эта память может быть удалена аналогично кешу страниц ОС. Кеш удаляется (т.е. файлы закрываются) автоматически при удалении старых кусков в таблицах семейства [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md), также его можно удалить вручную с помощью запроса `SYSTEM DROP MMAP CACHE`.
Возможные значения:
- Положительное целое число.
Значение по умолчанию: `1000`.
## compiled_expression_cache_size {#compiled-expression-cache-size}
Задает размер кеша (в байтах) для [скомпилированных выражений](../../operations/caches.md).
Возможные значения:
- Положительное целое число.
Значение по умолчанию: `134217728`.
## compiled_expression_cache_elements_size {#compiled_expression_cache_elements_size}
Задает размер кеша (в элементах) для [скомпилированных выражений](../../operations/caches.md).
Возможные значения:
- Положительное целое число.
Значение по умолчанию: `10000`.

View File

@ -3808,3 +3808,13 @@ SELECT * FROM positional_arguments ORDER BY 2,3;
Значение по умолчанию: `0`.
## min_bytes_to_use_mmap_io {#min-bytes-to-use-mmap-io}
Это экспериментальная настройка. Устанавливает минимальный объем памяти для чтения больших файлов без копирования данных из ядра в пространство пользователей. Рекомендуемый лимит составляет около 64 MB, поскольку [mmap/munmap](https://en.wikipedia.org/wiki/Mmap) работает медленно. Это имеет смысл только для больших файлов и помогает только в том случае, если данные находятся в кеше страниц.
Возможные значения:
- Положительное целое число.
- 0 — большие файлы считываются только с копированием данных из ядра в пространство пользователей.
Значение по умолчанию: `0`.

View File

@ -0,0 +1,44 @@
# EXISTS {#exists-operator}
Оператор `EXISTS` проверяет, сколько строк содержит результат выполнения подзапроса. Если результат пустой, то оператор возвращает `0`. В остальных случаях оператор возвращает `1`.
`EXISTS` может быть использован в секции [WHERE](../../sql-reference/statements/select/where.md).
!!! warning "Предупреждение"
Ссылки на таблицы или столбцы основного запроса не поддерживаются в подзапросе.
**Синтаксис**
```sql
WHERE EXISTS(subquery)
```
**Пример**
Запрос с подзапросом, возвращающим несколько строк:
``` sql
SELECT count() FROM numbers(10) WHERE EXISTS(SELECT number FROM numbers(10) WHERE number > 8);
```
Результат:
``` text
┌─count()─┐
│ 10 │
└─────────┘
```
Запрос с подзапросом, возвращающим пустой результат:
``` sql
SELECT count() FROM numbers(10) WHERE EXISTS(SELECT number FROM numbers(10) WHERE number > 11);
```
Результат:
``` text
┌─count()─┐
│ 0 │
└─────────┘
```

View File

@ -72,7 +72,7 @@ toc_title: "Операторы"
## Операторы для работы с множествами {#operatory-dlia-raboty-s-mnozhestvami}
*Смотрите раздел [Операторы IN](../../sql-reference/operators/in.md#select-in-operators).*
Смотрите [операторы IN](../../sql-reference/operators/in.md#select-in-operators) и оператор [EXISTS](../../sql-reference/operators/exists.md).
`a IN ...` - функция `in(a, b)`

View File

@ -4,26 +4,52 @@ toc_title: WHERE
# Секция WHERE {#select-where}
Позволяет задать выражение, которое ClickHouse использует для фильтрации данных перед всеми другими действиями в запросе кроме выражений, содержащихся в секции [PREWHERE](prewhere.md#prewhere-clause). Обычно, это выражение с логическими операторами.
Позволяет задать выражение, которое ClickHouse использует для фильтрации данных перед всеми другими действиями в запросе кроме выражений, содержащихся в секции [PREWHERE](prewhere.md#prewhere-clause). Обычно это выражение с логическими операторами.
Результат выражения должен иметь тип `UInt8`.
ClickHouse использует в выражении индексы, если это позволяет [движок таблицы](../../../engines/table-engines/index.md).
Если в секции необходимо проверить [NULL](../../../sql-reference/syntax.md#null-literal), то используйте операторы [IS NULL](../../operators/index.md#operator-is-null) и [IS NOT NULL](../../operators/index.md#is-not-null), а также соответствующие функции `isNull` и `isNotNull`. В противном случае выражение будет считаться всегда не выполненным.
!!! note "Примечание"
Существует оптимизация фильтрации под названием [PREWHERE](prewhere.md).
Если в секции необходимо проверить [NULL](../../../sql-reference/syntax.md#null-literal), то используйте операторы [IS NULL](../../operators/index.md#operator-is-null) и [IS NOT NULL](../../operators/index.md#is-not-null), а также соответствующие функции [isNull](../../../sql-reference/functions/functions-for-nulls.md#isnull) и [isNotNull](../../../sql-reference/functions/functions-for-nulls.md#isnotnull). В противном случае выражение будет считаться всегда не выполненным.
**Пример**
Чтобы найти числа, которые кратны 3 и больше 10, можно выполнить запрос к [таблице numbers](../../../sql-reference/table-functions/numbers.md):
``` sql
SELECT number FROM numbers(20) WHERE (number > 10) AND (number % 3 == 0);
```
Результат:
``` text
┌─number─┐
│ 12 │
│ 15 │
│ 18 │
└────────┘
```
Пример проверки на `NULL`:
``` sql
SELECT * FROM t_null WHERE y IS NULL
CREATE TABLE t_null(x Int8, y Nullable(Int8)) ENGINE=MergeTree() ORDER BY x;
INSERT INTO t_null VALUES (1, NULL), (2, 3);
SELECT * FROM t_null WHERE y IS NULL;
SELECT * FROM t_null WHERE y != 0;
```
Результат:
``` text
┌─x─┬────y─┐
│ 1 │ ᴺᵁᴸᴸ │
└───┴──────┘
┌─x─┬─y─┐
│ 2 │ 3 │
└───┴───┘
```
!!! note "Примечание"
Существует оптимизация фильтрации под названием [prewhere](prewhere.md).

View File

@ -623,7 +623,7 @@ void LocalServer::processConfig()
fs::create_directories(fs::path(path) / "metadata/");
loadMetadataSystem(global_context);
attachSystemTablesLocal(*createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
loadMetadata(global_context);
@ -634,7 +634,7 @@ void LocalServer::processConfig()
}
else if (!config().has("no-system-tables"))
{
attachSystemTablesLocal(*createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
}

View File

@ -888,7 +888,15 @@ if (ThreadFuzzer::instance().isEffective())
access_control.setCustomSettingsPrefixes(config().getString("custom_settings_prefixes"));
/// Initialize access storages.
access_control.addStoragesFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
try
{
access_control.addStoragesFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
}
catch (...)
{
tryLogCurrentException(log);
throw;
}
/// Reload config in SYSTEM RELOAD CONFIG query.
global_context->setConfigReloadCallback([&]()
@ -1146,7 +1154,7 @@ if (ThreadFuzzer::instance().isEffective())
global_context->initializeSystemLogs();
global_context->setSystemZooKeeperLogAfterInitializationIfNeeded();
/// After the system database is created, attach virtual system tables (in addition to query_log and part_log)
attachSystemTablesServer(*database_catalog.getSystemDatabase(), has_zookeeper);
attachSystemTablesServer(global_context, *database_catalog.getSystemDatabase(), has_zookeeper);
attachInformationSchema(global_context, *database_catalog.getDatabase(DatabaseCatalog::INFORMATION_SCHEMA));
attachInformationSchema(global_context, *database_catalog.getDatabase(DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
/// Firstly remove partially dropped databases, to avoid race with MaterializedMySQLSyncThread,
@ -1256,7 +1264,7 @@ if (ThreadFuzzer::instance().isEffective())
/// This object will periodically calculate some metrics.
AsynchronousMetrics async_metrics(
global_context, config().getUInt("asynchronous_metrics_update_period_s", 1), servers_to_start_before_tables, servers);
attachSystemTablesAsync(*DatabaseCatalog::instance().getSystemDatabase(), async_metrics);
attachSystemTablesAsync(global_context, *DatabaseCatalog::instance().getSystemDatabase(), async_metrics);
for (const auto & listen_host : listen_hosts)
{

View File

@ -172,7 +172,8 @@ void AccessControl::addUsersConfigStorage(const String & storage_name_, const Po
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function);
new_storage->setConfig(users_config_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
LOG_DEBUG(getLogger(), "Added {} access storage '{}', path: {}",
String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getPath());
}
void AccessControl::addUsersConfigStorage(

View File

@ -133,7 +133,16 @@ void AuthenticationData::setPasswordHashHex(const String & hash)
{
Digest digest;
digest.resize(hash.size() / 2);
boost::algorithm::unhex(hash.begin(), hash.end(), digest.data());
try
{
boost::algorithm::unhex(hash.begin(), hash.end(), digest.data());
}
catch (const std::exception &)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read password hash in hex, check for valid characters [0-9a-fA-F] and length");
}
setPasswordHashBinary(digest);
}

View File

@ -18,7 +18,7 @@ namespace ErrorCodes
extern const int KERBEROS_ERROR;
}
GSSAcceptorContext::GSSAcceptorContext(const GSSAcceptorContext::Params& params_)
GSSAcceptorContext::GSSAcceptorContext(const GSSAcceptorContext::Params & params_)
: params(params_)
{
}
@ -50,7 +50,6 @@ std::recursive_mutex gss_global_mutex;
struct PrincipalName
{
explicit PrincipalName(String principal);
// operator String() const;
String name;
std::vector<String> instances;
@ -75,24 +74,6 @@ PrincipalName::PrincipalName(String principal)
}
}
/*
PrincipalName::operator String() const
{
String principal = name;
for (const auto & instance : instances)
{
principal += '/';
principal += instance;
}
principal += '@';
principal += realm;
return principal;
}
*/
String bufferToString(const gss_buffer_desc & buf)
{
String str;

View File

@ -30,7 +30,7 @@ public:
String realm;
};
explicit GSSAcceptorContext(const Params& params_);
explicit GSSAcceptorContext(const Params & params_);
virtual ~GSSAcceptorContext() override;
GSSAcceptorContext(const GSSAcceptorContext &) = delete;

View File

@ -208,8 +208,19 @@ namespace
std::vector<AccessEntityPtr> users;
users.reserve(user_names.size());
for (const auto & user_name : user_names)
users.push_back(parseUser(config, user_name));
{
try
{
users.push_back(parseUser(config, user_name));
}
catch (Exception & e)
{
e.addMessage(fmt::format("while parsing user '{}' in users configuration file", user_name));
throw;
}
}
return users;
}
@ -275,14 +286,25 @@ namespace
Poco::Util::AbstractConfiguration::Keys quota_names;
config.keys("quotas", quota_names);
std::vector<AccessEntityPtr> quotas;
quotas.reserve(quota_names.size());
for (const auto & quota_name : quota_names)
{
auto it = quota_to_user_ids.find(quota_name);
const std::vector<UUID> & quota_users = (it != quota_to_user_ids.end()) ? std::move(it->second) : std::vector<UUID>{};
quotas.push_back(parseQuota(config, quota_name, quota_users));
try
{
auto it = quota_to_user_ids.find(quota_name);
const std::vector<UUID> & quota_users = (it != quota_to_user_ids.end()) ? std::move(it->second) : std::vector<UUID>{};
quotas.push_back(parseQuota(config, quota_name, quota_users));
}
catch (Exception & e)
{
e.addMessage(fmt::format("while parsing quota '{}' in users configuration file", quota_name));
throw;
}
}
return quotas;
}
@ -440,11 +462,24 @@ namespace
const Poco::Util::AbstractConfiguration & config,
Fn<void(std::string_view)> auto && check_setting_name_function)
{
std::vector<AccessEntityPtr> profiles;
Poco::Util::AbstractConfiguration::Keys profile_names;
config.keys("profiles", profile_names);
std::vector<AccessEntityPtr> profiles;
profiles.reserve(profile_names.size());
for (const auto & profile_name : profile_names)
profiles.push_back(parseSettingsProfile(config, profile_name, check_setting_name_function));
{
try
{
profiles.push_back(parseSettingsProfile(config, profile_name, check_setting_name_function));
}
catch (Exception & e)
{
e.addMessage(fmt::format("while parsing profile '{}' in users configuration file", profile_name));
throw;
}
}
return profiles;
}
@ -499,16 +534,24 @@ void UsersConfigAccessStorage::setConfig(const Poco::Util::AbstractConfiguration
void UsersConfigAccessStorage::parseFromConfig(const Poco::Util::AbstractConfiguration & config)
{
std::vector<std::pair<UUID, AccessEntityPtr>> all_entities;
for (const auto & entity : parseUsers(config))
all_entities.emplace_back(generateID(*entity), entity);
for (const auto & entity : parseQuotas(config))
all_entities.emplace_back(generateID(*entity), entity);
for (const auto & entity : parseRowPolicies(config))
all_entities.emplace_back(generateID(*entity), entity);
for (const auto & entity : parseSettingsProfiles(config, check_setting_name_function))
all_entities.emplace_back(generateID(*entity), entity);
memory_storage.setAll(all_entities);
try
{
std::vector<std::pair<UUID, AccessEntityPtr>> all_entities;
for (const auto & entity : parseUsers(config))
all_entities.emplace_back(generateID(*entity), entity);
for (const auto & entity : parseQuotas(config))
all_entities.emplace_back(generateID(*entity), entity);
for (const auto & entity : parseRowPolicies(config))
all_entities.emplace_back(generateID(*entity), entity);
for (const auto & entity : parseSettingsProfiles(config, check_setting_name_function))
all_entities.emplace_back(generateID(*entity), entity);
memory_storage.setAll(all_entities);
}
catch (Exception & e)
{
e.addMessage(fmt::format("while loading {}", path.empty() ? "configuration" : ("configuration file " + quoteString(path))));
throw;
}
}
void UsersConfigAccessStorage::load(

View File

@ -247,7 +247,7 @@ ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter & filt, ssize_t result_
while (filt_pos < filt_end_aligned)
{
UInt64 mask = Bytes64MaskToBits64Mask(filt_pos);
UInt64 mask = bytes64MaskToBits64Mask(filt_pos);
if (0xffffffffffffffff == mask)
{

View File

@ -242,7 +242,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
while (filt_pos < filt_end_aligned)
{
uint64_t mask = Bytes64MaskToBits64Mask(filt_pos);
uint64_t mask = bytes64MaskToBits64Mask(filt_pos);
if (0xffffffffffffffff == mask)
{

View File

@ -321,7 +321,7 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
while (filt_pos < filt_end_aligned)
{
UInt64 mask = Bytes64MaskToBits64Mask(filt_pos);
UInt64 mask = bytes64MaskToBits64Mask(filt_pos);
if (0xffffffffffffffff == mask)
{

View File

@ -235,7 +235,7 @@ namespace
while (filt_pos < filt_end_aligned)
{
uint64_t mask = Bytes64MaskToBits64Mask(filt_pos);
uint64_t mask = bytes64MaskToBits64Mask(filt_pos);
if (0xffffffffffffffff == mask)
{

View File

@ -21,7 +21,7 @@ namespace ErrorCodes
}
/// Transform 64-byte mask to 64-bit mask
inline UInt64 Bytes64MaskToBits64Mask(const UInt8 * bytes64)
inline UInt64 bytes64MaskToBits64Mask(const UInt8 * bytes64)
{
#if defined(__AVX512F__) && defined(__AVX512BW__)
static const __m512i zero64 = _mm512_setzero_epi32();
@ -46,10 +46,8 @@ inline UInt64 Bytes64MaskToBits64Mask(const UInt8 * bytes64)
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16))) << 48) & 0xffff000000000000);
#else
UInt64 res = 0;
const UInt8 * pos = bytes64;
const UInt8 * end = pos + 64;
for (; pos < end; ++pos)
res |= ((*pos == 0)<<(pos-bytes64));
for (size_t i = 0; i < 64; ++i)
res |= static_cast<UInt64>(0 == bytes64[i]) << i;
#endif
return ~res;
}

View File

@ -489,20 +489,20 @@ void ZooKeeperMultiResponse::writeImpl(WriteBuffer & out) const
}
}
ZooKeeperResponsePtr ZooKeeperHeartbeatRequest::makeResponse() const { return std::make_shared<ZooKeeperHeartbeatResponse>(); }
ZooKeeperResponsePtr ZooKeeperSyncRequest::makeResponse() const { return std::make_shared<ZooKeeperSyncResponse>(); }
ZooKeeperResponsePtr ZooKeeperAuthRequest::makeResponse() const { return std::make_shared<ZooKeeperAuthResponse>(); }
ZooKeeperResponsePtr ZooKeeperCreateRequest::makeResponse() const { return std::make_shared<ZooKeeperCreateResponse>(); }
ZooKeeperResponsePtr ZooKeeperRemoveRequest::makeResponse() const { return std::make_shared<ZooKeeperRemoveResponse>(); }
ZooKeeperResponsePtr ZooKeeperExistsRequest::makeResponse() const { return std::make_shared<ZooKeeperExistsResponse>(); }
ZooKeeperResponsePtr ZooKeeperGetRequest::makeResponse() const { return std::make_shared<ZooKeeperGetResponse>(); }
ZooKeeperResponsePtr ZooKeeperSetRequest::makeResponse() const { return std::make_shared<ZooKeeperSetResponse>(); }
ZooKeeperResponsePtr ZooKeeperListRequest::makeResponse() const { return std::make_shared<ZooKeeperListResponse>(); }
ZooKeeperResponsePtr ZooKeeperCheckRequest::makeResponse() const { return std::make_shared<ZooKeeperCheckResponse>(); }
ZooKeeperResponsePtr ZooKeeperMultiRequest::makeResponse() const { return std::make_shared<ZooKeeperMultiResponse>(requests); }
ZooKeeperResponsePtr ZooKeeperCloseRequest::makeResponse() const { return std::make_shared<ZooKeeperCloseResponse>(); }
ZooKeeperResponsePtr ZooKeeperSetACLRequest::makeResponse() const { return std::make_shared<ZooKeeperSetACLResponse>(); }
ZooKeeperResponsePtr ZooKeeperGetACLRequest::makeResponse() const { return std::make_shared<ZooKeeperGetACLResponse>(); }
ZooKeeperResponsePtr ZooKeeperHeartbeatRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperHeartbeatResponse>()); }
ZooKeeperResponsePtr ZooKeeperSyncRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperSyncResponse>()); }
ZooKeeperResponsePtr ZooKeeperAuthRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperAuthResponse>()); }
ZooKeeperResponsePtr ZooKeeperCreateRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperCreateResponse>()); }
ZooKeeperResponsePtr ZooKeeperRemoveRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperRemoveResponse>()); }
ZooKeeperResponsePtr ZooKeeperExistsRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperExistsResponse>()); }
ZooKeeperResponsePtr ZooKeeperGetRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperGetResponse>()); }
ZooKeeperResponsePtr ZooKeeperSetRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperSetResponse>()); }
ZooKeeperResponsePtr ZooKeeperListRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperListResponse>()); }
ZooKeeperResponsePtr ZooKeeperCheckRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperCheckResponse>()); }
ZooKeeperResponsePtr ZooKeeperMultiRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperMultiResponse>(requests)); }
ZooKeeperResponsePtr ZooKeeperCloseRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperCloseResponse>()); }
ZooKeeperResponsePtr ZooKeeperSetACLRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperSetACLResponse>()); }
ZooKeeperResponsePtr ZooKeeperGetACLRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperGetACLResponse>()); }
void ZooKeeperSessionIDRequest::writeImpl(WriteBuffer & out) const
{
@ -690,6 +690,40 @@ std::shared_ptr<ZooKeeperRequest> ZooKeeperRequest::read(ReadBuffer & in)
return request;
}
ZooKeeperRequest::~ZooKeeperRequest()
{
if (!request_created_time_ns)
return;
UInt64 elapsed_ns = clock_gettime_ns() - request_created_time_ns;
constexpr UInt64 max_request_time_ns = 1000000000ULL; /// 1 sec
if (max_request_time_ns < elapsed_ns)
{
LOG_TEST(&Poco::Logger::get(__PRETTY_FUNCTION__), "Processing of request xid={} took {} ms", xid, elapsed_ns / 1000000UL);
}
}
ZooKeeperResponsePtr ZooKeeperRequest::setTime(ZooKeeperResponsePtr response) const
{
if (request_created_time_ns)
{
response->response_created_time_ns = clock_gettime_ns();
}
return response;
}
ZooKeeperResponse::~ZooKeeperResponse()
{
if (!response_created_time_ns)
return;
UInt64 elapsed_ns = clock_gettime_ns() - response_created_time_ns;
constexpr UInt64 max_request_time_ns = 1000000000ULL; /// 1 sec
if (max_request_time_ns < elapsed_ns)
{
LOG_TEST(&Poco::Logger::get(__PRETTY_FUNCTION__), "Processing of response xid={} took {} ms", xid, elapsed_ns / 1000000UL);
}
}
ZooKeeperRequestPtr ZooKeeperRequestFactory::get(OpNum op_num) const
{
auto it = op_num_to_request.find(op_num);
@ -708,7 +742,12 @@ ZooKeeperRequestFactory & ZooKeeperRequestFactory::instance()
template<OpNum num, typename RequestT>
void registerZooKeeperRequest(ZooKeeperRequestFactory & factory)
{
factory.registerRequest(num, [] { return std::make_shared<RequestT>(); });
factory.registerRequest(num, []
{
auto res = std::make_shared<RequestT>();
res->request_created_time_ns = clock_gettime_ns();
return res;
});
}
ZooKeeperRequestFactory::ZooKeeperRequestFactory()

View File

@ -30,9 +30,11 @@ struct ZooKeeperResponse : virtual Response
XID xid = 0;
int64_t zxid = 0;
UInt64 response_created_time_ns = 0;
ZooKeeperResponse() = default;
ZooKeeperResponse(const ZooKeeperResponse &) = default;
virtual ~ZooKeeperResponse() override = default;
~ZooKeeperResponse() override;
virtual void readImpl(ReadBuffer &) = 0;
virtual void writeImpl(WriteBuffer &) const = 0;
virtual void write(WriteBuffer & out) const;
@ -54,9 +56,11 @@ struct ZooKeeperRequest : virtual Request
bool restored_from_zookeeper_log = false;
UInt64 request_created_time_ns = 0;
ZooKeeperRequest() = default;
ZooKeeperRequest(const ZooKeeperRequest &) = default;
virtual ~ZooKeeperRequest() override = default;
~ZooKeeperRequest() override;
virtual OpNum getOpNum() const = 0;
@ -69,6 +73,7 @@ struct ZooKeeperRequest : virtual Request
static std::shared_ptr<ZooKeeperRequest> read(ReadBuffer & in);
virtual ZooKeeperResponsePtr makeResponse() const = 0;
ZooKeeperResponsePtr setTime(ZooKeeperResponsePtr response) const;
virtual bool isReadRequest() const = 0;
virtual void createLogElements(LogElements & elems) const;

View File

@ -206,7 +206,10 @@ void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKe
/// Session was disconnected, just skip this response
if (session_response_callback == session_to_response_callback.end())
{
LOG_TEST(log, "Cannot write response xid={}, op={}, session {} disconnected", response->xid, response->getOpNum(), session_id);
return;
}
session_response_callback->second(response);

View File

@ -423,6 +423,7 @@ class IColumn;
M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
M(Bool, convert_query_to_cnf, false, "Convert SELECT query to CNF", 0) \
M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
M(Bool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
M(Bool, optimize_redundant_functions_in_order_by, true, "Remove functions from ORDER BY if its argument is also in ORDER BY", 0) \
@ -430,6 +431,9 @@ class IColumn;
M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \
M(Bool, optimize_monotonous_functions_in_order_by, true, "Replace monotonous function with its argument in ORDER BY", 0) \
M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \
M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \
M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \
M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \
M(Bool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
@ -618,7 +622,7 @@ class IColumn;
M(String, format_template_row, "", "Path to file which contains format string for rows (for Template format)", 0) \
M(String, format_template_rows_between_delimiter, "\n", "Delimiter between rows (for Template format)", 0) \
\
M(String, format_custom_escaping_rule, "Escaped", "Field escaping rule (for CustomSeparated format)", 0) \
M(EscapingRule, format_custom_escaping_rule, "Escaped", "Field escaping rule (for CustomSeparated format)", 0) \
M(String, format_custom_field_delimiter, "\t", "Delimiter between fields (for CustomSeparated format)", 0) \
M(String, format_custom_row_before_delimiter, "", "Delimiter before field of the first column (for CustomSeparated format)", 0) \
M(String, format_custom_row_after_delimiter, "\n", "Delimiter after field of the last column (for CustomSeparated format)", 0) \
@ -627,7 +631,7 @@ class IColumn;
M(String, format_custom_result_after_delimiter, "", "Suffix after result set (for CustomSeparated format)", 0) \
\
M(String, format_regexp, "", "Regular expression (for Regexp format)", 0) \
M(String, format_regexp_escaping_rule, "Raw", "Field escaping rule (for Regexp format)", 0) \
M(EscapingRule, format_regexp_escaping_rule, "Raw", "Field escaping rule (for Regexp format)", 0) \
M(Bool, format_regexp_skip_unmatched, false, "Skip lines unmatched by regular expression (for Regexp format", 0) \
\
M(Bool, output_format_enable_streaming, false, "Enable streaming in output formats that support it.", 0) \

View File

@ -121,4 +121,13 @@ IMPLEMENT_SETTING_ENUM(EnumComparingMode, ErrorCodes::BAD_ARGUMENTS,
{{"by_names", FormatSettings::EnumComparingMode::BY_NAMES},
{"by_values", FormatSettings::EnumComparingMode::BY_VALUES},
{"by_names_case_insensitive", FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE}})
IMPLEMENT_SETTING_ENUM(EscapingRule, ErrorCodes::BAD_ARGUMENTS,
{{"None", FormatSettings::EscapingRule::None},
{"Escaped", FormatSettings::EscapingRule::Escaped},
{"Quoted", FormatSettings::EscapingRule::Quoted},
{"CSV", FormatSettings::EscapingRule::CSV},
{"JSON", FormatSettings::EscapingRule::JSON},
{"XML", FormatSettings::EscapingRule::XML},
{"Raw", FormatSettings::EscapingRule::Raw}})
}

View File

@ -170,4 +170,6 @@ DECLARE_SETTING_ENUM(ShortCircuitFunctionEvaluation)
DECLARE_SETTING_ENUM_WITH_RENAME(EnumComparingMode, FormatSettings::EnumComparingMode)
DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule)
}

View File

@ -84,7 +84,7 @@ void DatabaseAtomic::drop(ContextPtr)
fs::remove_all(getMetadataPath());
}
void DatabaseAtomic::attachTable(const String & name, const StoragePtr & table, const String & relative_table_path)
void DatabaseAtomic::attachTable(ContextPtr /* context_ */, const String & name, const StoragePtr & table, const String & relative_table_path)
{
assert(relative_table_path != data_path && !relative_table_path.empty());
DetachedTables not_in_use;
@ -96,7 +96,7 @@ void DatabaseAtomic::attachTable(const String & name, const StoragePtr & table,
table_name_to_path.emplace(std::make_pair(name, relative_table_path));
}
StoragePtr DatabaseAtomic::detachTable(const String & name)
StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & name)
{
DetachedTables not_in_use;
std::unique_lock lock(mutex);

View File

@ -37,8 +37,8 @@ public:
void dropTable(ContextPtr context, const String & table_name, bool no_delay) override;
void attachTable(const String & name, const StoragePtr & table, const String & relative_table_path) override;
StoragePtr detachTable(const String & name) override;
void attachTable(ContextPtr context, const String & name, const StoragePtr & table, const String & relative_table_path) override;
StoragePtr detachTable(ContextPtr context, const String & name) override;
String getTableDataPath(const String & table_name) const override;
String getTableDataPath(const ASTCreateQuery & query) const override;

View File

@ -1,5 +1,6 @@
#include <Databases/DatabaseFactory.h>
#include <filesystem>
#include <Databases/DatabaseAtomic.h>
#include <Databases/DatabaseDictionary.h>
#include <Databases/DatabaseLazy.h>
@ -12,9 +13,9 @@
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/formatAST.h>
#include <Common/Macros.h>
#include <Parsers/queryToString.h>
#include <Storages/ExternalDataSourceConfiguration.h>
#include <filesystem>
#include <Common/Macros.h>
#include "config_core.h"
@ -55,6 +56,7 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
extern const int UNKNOWN_DATABASE_ENGINE;
extern const int CANNOT_CREATE_DATABASE;
extern const int NOT_IMPLEMENTED;
}
DatabasePtr DatabaseFactory::get(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context)
@ -211,14 +213,22 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
if (engine_define->settings)
materialize_mode_settings->loadFromQuery(*engine_define);
if (create.uuid == UUIDHelpers::Nil)
return std::make_shared<DatabaseMaterializedMySQL<DatabaseOrdinary>>(
context, database_name, metadata_path, uuid, configuration.database, std::move(mysql_pool),
std::move(client), std::move(materialize_mode_settings));
else
return std::make_shared<DatabaseMaterializedMySQL<DatabaseAtomic>>(
context, database_name, metadata_path, uuid, configuration.database, std::move(mysql_pool),
std::move(client), std::move(materialize_mode_settings));
if (uuid == UUIDHelpers::Nil)
{
auto print_create_ast = create.clone();
print_create_ast->as<ASTCreateQuery>()->attach = false;
throw Exception(
fmt::format(
"The MaterializedMySQL database engine no longer supports Ordinary databases. To re-create the database, delete "
"the old one by executing \"rm -rf {}{{,.sql}}\", then re-create the database with the following query: {}",
metadata_path,
queryToString(print_create_ast)),
ErrorCodes::NOT_IMPLEMENTED);
}
return std::make_shared<DatabaseMaterializedMySQL>(
context, database_name, metadata_path, uuid, configuration.database, std::move(mysql_pool),
std::move(client), std::move(materialize_mode_settings));
}
catch (...)
{

View File

@ -39,7 +39,7 @@ DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_,
void DatabaseLazy::loadStoredObjects(
ContextMutablePtr local_context, bool /* force_restore */, bool /*force_attach*/, bool /* skip_startup_tables */)
{
iterateMetadataFiles(local_context, [this](const String & file_name)
iterateMetadataFiles(local_context, [this, &local_context](const String & file_name)
{
const std::string table_name = unescapeForFileName(file_name.substr(0, file_name.size() - 4));
@ -50,7 +50,7 @@ void DatabaseLazy::loadStoredObjects(
return;
}
attachTable(table_name, nullptr, {});
attachTable(local_context, table_name, nullptr, {});
});
}
@ -160,7 +160,7 @@ bool DatabaseLazy::empty() const
return tables_cache.empty();
}
void DatabaseLazy::attachTable(const String & table_name, const StoragePtr & table, const String &)
void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_name, const StoragePtr & table, const String &)
{
LOG_DEBUG(log, "Attach table {}.", backQuote(table_name));
std::lock_guard lock(mutex);
@ -175,7 +175,7 @@ void DatabaseLazy::attachTable(const String & table_name, const StoragePtr & tab
it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name);
}
StoragePtr DatabaseLazy::detachTable(const String & table_name)
StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name)
{
StoragePtr res;
{

View File

@ -64,9 +64,9 @@ public:
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;
void attachTable(const String & table_name, const StoragePtr & table, const String & relative_table_path) override;
void attachTable(ContextPtr context, const String & table_name, const StoragePtr & table, const String & relative_table_path) override;
StoragePtr detachTable(const String & table_name) override;
StoragePtr detachTable(ContextPtr context, const String & table_name) override;
void shutdown() override;

View File

@ -185,7 +185,7 @@ void DatabaseOnDisk::createTable(
{
/// Metadata already exists, table was detached
removeDetachedPermanentlyFlag(local_context, table_name, table_metadata_path, true);
attachTable(table_name, table, getTableDataPath(create));
attachTable(local_context, table_name, table, getTableDataPath(create));
return;
}
@ -246,12 +246,12 @@ void DatabaseOnDisk::removeDetachedPermanentlyFlag(ContextPtr, const String & ta
void DatabaseOnDisk::commitCreateTable(const ASTCreateQuery & query, const StoragePtr & table,
const String & table_metadata_tmp_path, const String & table_metadata_path,
ContextPtr /*query_context*/)
ContextPtr query_context)
{
try
{
/// Add a table to the map of known tables.
attachTable(query.getTable(), table, getTableDataPath(query));
attachTable(query_context, query.getTable(), table, getTableDataPath(query));
/// If it was ATTACH query and file with table metadata already exist
/// (so, ATTACH is done after DETACH), then rename atomically replaces old file with new one.
@ -264,9 +264,9 @@ void DatabaseOnDisk::commitCreateTable(const ASTCreateQuery & query, const Stora
}
}
void DatabaseOnDisk::detachTablePermanently(ContextPtr, const String & table_name)
void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const String & table_name)
{
auto table = detachTable(table_name);
auto table = detachTable(query_context, table_name);
fs::path detached_permanently_flag(getObjectMetadataPath(table_name) + detached_suffix);
try
@ -288,7 +288,7 @@ void DatabaseOnDisk::dropTable(ContextPtr local_context, const String & table_na
if (table_data_path_relative.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Path is empty");
StoragePtr table = detachTable(table_name);
StoragePtr table = detachTable(local_context, table_name);
/// This is possible for Lazy database.
if (!table)
@ -309,7 +309,7 @@ void DatabaseOnDisk::dropTable(ContextPtr local_context, const String & table_na
catch (...)
{
LOG_WARNING(log, getCurrentExceptionMessage(__PRETTY_FUNCTION__));
attachTable(table_name, table, table_data_path_relative);
attachTable(local_context, table_name, table, table_data_path_relative);
if (renamed)
fs::rename(table_metadata_path_drop, table_metadata_path);
throw;
@ -373,11 +373,12 @@ void DatabaseOnDisk::renameTable(
String table_metadata_path;
ASTPtr attach_query;
/// DatabaseLazy::detachTable may return nullptr even if table exists, so we need tryGetTable for this case.
StoragePtr table = tryGetTable(table_name, getContext());
StoragePtr table = tryGetTable(table_name, local_context);
if (table->isDictionary() && !allow_rename_dictionary)
throw Exception("Dictionaries can be renamed only in Atomic databases", ErrorCodes::NOT_IMPLEMENTED);
throw Exception("Dictionaries can be renamed only in Atomic databases", ErrorCodes::NOT_IMPLEMENTED);
detachTable(local_context, table_name);
detachTable(table_name);
UUID prev_uuid = UUIDHelpers::Nil;
try
{
@ -402,12 +403,12 @@ void DatabaseOnDisk::renameTable(
}
catch (const Exception &)
{
attachTable(table_name, table, table_data_relative_path);
attachTable(local_context, table_name, table, table_data_relative_path);
throw;
}
catch (const Poco::Exception & e)
{
attachTable(table_name, table, table_data_relative_path);
attachTable(local_context, table_name, table, table_data_relative_path);
/// Better diagnostics.
throw Exception{Exception::CreateFromPocoTag{}, e};
}

View File

@ -50,7 +50,7 @@ namespace
context,
force_restore);
database.attachTable(table_name, table, database.getTableDataPath(query));
database.attachTable(context, table_name, table, database.getTableDataPath(query));
}
catch (Exception & e)
{

View File

@ -46,7 +46,9 @@ public:
/// then it will be executed on all replicas.
BlockIO tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context);
void stopReplication();
bool hasReplicationThread() const override { return true; }
void stopReplication() override;
String getFullReplicaName() const;
static std::pair<String, String> parseFullReplicaName(const String & name);

View File

@ -187,7 +187,7 @@ bool DatabaseWithOwnTablesBase::empty() const
return tables.empty();
}
StoragePtr DatabaseWithOwnTablesBase::detachTable(const String & table_name)
StoragePtr DatabaseWithOwnTablesBase::detachTable(ContextPtr /* context_ */, const String & table_name)
{
std::unique_lock lock(mutex);
return detachTableUnlocked(table_name, lock);
@ -214,7 +214,7 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n
return res;
}
void DatabaseWithOwnTablesBase::attachTable(const String & table_name, const StoragePtr & table, const String &)
void DatabaseWithOwnTablesBase::attachTable(ContextPtr /* context_ */, const String & table_name, const StoragePtr & table, const String &)
{
std::unique_lock lock(mutex);
attachTableUnlocked(table_name, table, lock);

View File

@ -28,9 +28,9 @@ public:
bool empty() const override;
void attachTable(const String & table_name, const StoragePtr & table, const String & relative_table_path) override;
void attachTable(ContextPtr context, const String & table_name, const StoragePtr & table, const String & relative_table_path) override;
StoragePtr detachTable(const String & table_name) override;
StoragePtr detachTable(ContextPtr context, const String & table_name) override;
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;

View File

@ -197,13 +197,13 @@ public:
/// Add a table to the database, but do not add it to the metadata. The database may not support this method.
///
/// Note: ATTACH TABLE statement actually uses createTable method.
virtual void attachTable(const String & /*name*/, const StoragePtr & /*table*/, [[maybe_unused]] const String & relative_table_path = {})
virtual void attachTable(ContextPtr /* context */, const String & /*name*/, const StoragePtr & /*table*/, [[maybe_unused]] const String & relative_table_path = {})
{
throw Exception("There is no ATTACH TABLE query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
}
/// Forget about the table without deleting it, and return it. The database may not support this method.
virtual StoragePtr detachTable(const String & /*name*/)
virtual StoragePtr detachTable(ContextPtr /* context */, const String & /*name*/)
{
throw Exception("There is no DETACH TABLE query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED);
}
@ -323,6 +323,13 @@ public:
getEngineName());
}
virtual bool hasReplicationThread() const { return false; }
virtual void stopReplication()
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database engine {} does not run a replication thread!", getEngineName());
}
virtual ~IDatabase() = default;
protected:

View File

@ -5,8 +5,6 @@
# include <Databases/MySQL/DatabaseMaterializedMySQL.h>
# include <Interpreters/Context.h>
# include <Databases/DatabaseOrdinary.h>
# include <Databases/DatabaseAtomic.h>
# include <Databases/MySQL/DatabaseMaterializedTablesIterator.h>
# include <Databases/MySQL/MaterializedMySQLSyncThread.h>
# include <Parsers/ASTCreateQuery.h>
@ -23,32 +21,9 @@ namespace DB
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR;
}
template <>
DatabaseMaterializedMySQL<DatabaseOrdinary>::DatabaseMaterializedMySQL(
ContextPtr context_,
const String & database_name_,
const String & metadata_path_,
UUID /*uuid*/,
const String & mysql_database_name_,
mysqlxx::Pool && pool_,
MySQLClient && client_,
std::unique_ptr<MaterializedMySQLSettings> settings_)
: DatabaseOrdinary(
database_name_,
metadata_path_,
"data/" + escapeForFileName(database_name_) + "/",
"DatabaseMaterializedMySQL<Ordinary> (" + database_name_ + ")",
context_)
, settings(std::move(settings_))
, materialize_thread(context_, database_name_, mysql_database_name_, std::move(pool_), std::move(client_), settings.get())
{
}
template <>
DatabaseMaterializedMySQL<DatabaseAtomic>::DatabaseMaterializedMySQL(
DatabaseMaterializedMySQL::DatabaseMaterializedMySQL(
ContextPtr context_,
const String & database_name_,
const String & metadata_path_,
@ -57,16 +32,15 @@ DatabaseMaterializedMySQL<DatabaseAtomic>::DatabaseMaterializedMySQL(
mysqlxx::Pool && pool_,
MySQLClient && client_,
std::unique_ptr<MaterializedMySQLSettings> settings_)
: DatabaseAtomic(database_name_, metadata_path_, uuid, "DatabaseMaterializedMySQL<Atomic> (" + database_name_ + ")", context_)
: DatabaseAtomic(database_name_, metadata_path_, uuid, "DatabaseMaterializedMySQL(" + database_name_ + ")", context_)
, settings(std::move(settings_))
, materialize_thread(context_, database_name_, mysql_database_name_, std::move(pool_), std::move(client_), settings.get())
{
}
template<typename Base>
void DatabaseMaterializedMySQL<Base>::rethrowExceptionIfNeed() const
void DatabaseMaterializedMySQL::rethrowExceptionIfNeeded() const
{
std::unique_lock<std::mutex> lock(Base::mutex);
std::unique_lock<std::mutex> lock(mutex);
if (!settings->allows_query_when_mysql_lost && exception)
{
@ -84,17 +58,15 @@ void DatabaseMaterializedMySQL<Base>::rethrowExceptionIfNeed() const
}
}
template<typename Base>
void DatabaseMaterializedMySQL<Base>::setException(const std::exception_ptr & exception_)
void DatabaseMaterializedMySQL::setException(const std::exception_ptr & exception_)
{
std::unique_lock<std::mutex> lock(Base::mutex);
std::unique_lock<std::mutex> lock(mutex);
exception = exception_;
}
template <typename Base>
void DatabaseMaterializedMySQL<Base>::startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach)
void DatabaseMaterializedMySQL::startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach)
{
Base::startupTables(thread_pool, force_restore, force_attach);
DatabaseAtomic::startupTables(thread_pool, force_restore, force_attach);
if (!force_attach)
materialize_thread.assertMySQLAvailable();
@ -103,149 +75,92 @@ void DatabaseMaterializedMySQL<Base>::startupTables(ThreadPool & thread_pool, bo
started_up = true;
}
template<typename Base>
void DatabaseMaterializedMySQL<Base>::createTable(ContextPtr context_, const String & name, const StoragePtr & table, const ASTPtr & query)
void DatabaseMaterializedMySQL::createTable(ContextPtr context_, const String & name, const StoragePtr & table, const ASTPtr & query)
{
assertCalledFromSyncThreadOrDrop("create table");
Base::createTable(context_, name, table, query);
checkIsInternalQuery(context_, "CREATE TABLE");
DatabaseAtomic::createTable(context_, name, table, query);
}
template<typename Base>
void DatabaseMaterializedMySQL<Base>::dropTable(ContextPtr context_, const String & name, bool no_delay)
void DatabaseMaterializedMySQL::dropTable(ContextPtr context_, const String & name, bool no_delay)
{
assertCalledFromSyncThreadOrDrop("drop table");
Base::dropTable(context_, name, no_delay);
checkIsInternalQuery(context_, "DROP TABLE");
DatabaseAtomic::dropTable(context_, name, no_delay);
}
template<typename Base>
void DatabaseMaterializedMySQL<Base>::attachTable(const String & name, const StoragePtr & table, const String & relative_table_path)
void DatabaseMaterializedMySQL::attachTable(ContextPtr context_, const String & name, const StoragePtr & table, const String & relative_table_path)
{
assertCalledFromSyncThreadOrDrop("attach table");
Base::attachTable(name, table, relative_table_path);
checkIsInternalQuery(context_, "ATTACH TABLE");
DatabaseAtomic::attachTable(context_, name, table, relative_table_path);
}
template<typename Base>
StoragePtr DatabaseMaterializedMySQL<Base>::detachTable(const String & name)
StoragePtr DatabaseMaterializedMySQL::detachTable(ContextPtr context_, const String & name)
{
assertCalledFromSyncThreadOrDrop("detach table");
return Base::detachTable(name);
checkIsInternalQuery(context_, "DETACH TABLE");
return DatabaseAtomic::detachTable(context_, name);
}
template<typename Base>
void DatabaseMaterializedMySQL<Base>::renameTable(ContextPtr context_, const String & name, IDatabase & to_database, const String & to_name, bool exchange, bool dictionary)
void DatabaseMaterializedMySQL::renameTable(ContextPtr context_, const String & name, IDatabase & to_database, const String & to_name, bool exchange, bool dictionary)
{
assertCalledFromSyncThreadOrDrop("rename table");
checkIsInternalQuery(context_, "RENAME TABLE");
if (exchange)
throw Exception("MaterializedMySQL database not support exchange table.", ErrorCodes::NOT_IMPLEMENTED);
throw Exception("MaterializedMySQL database does not support EXCHANGE TABLE.", ErrorCodes::NOT_IMPLEMENTED);
if (dictionary)
throw Exception("MaterializedMySQL database not support rename dictionary.", ErrorCodes::NOT_IMPLEMENTED);
throw Exception("MaterializedMySQL database does not support RENAME DICTIONARY.", ErrorCodes::NOT_IMPLEMENTED);
if (to_database.getDatabaseName() != Base::getDatabaseName())
if (to_database.getDatabaseName() != DatabaseAtomic::getDatabaseName())
throw Exception("Cannot rename with other database for MaterializedMySQL database.", ErrorCodes::NOT_IMPLEMENTED);
Base::renameTable(context_, name, *this, to_name, exchange, dictionary);
DatabaseAtomic::renameTable(context_, name, *this, to_name, exchange, dictionary);
}
template<typename Base>
void DatabaseMaterializedMySQL<Base>::alterTable(ContextPtr context_, const StorageID & table_id, const StorageInMemoryMetadata & metadata)
void DatabaseMaterializedMySQL::alterTable(ContextPtr context_, const StorageID & table_id, const StorageInMemoryMetadata & metadata)
{
assertCalledFromSyncThreadOrDrop("alter table");
Base::alterTable(context_, table_id, metadata);
checkIsInternalQuery(context_, "ALTER TABLE");
DatabaseAtomic::alterTable(context_, table_id, metadata);
}
template<typename Base>
void DatabaseMaterializedMySQL<Base>::drop(ContextPtr context_)
void DatabaseMaterializedMySQL::drop(ContextPtr context_)
{
/// Remove metadata info
fs::path metadata(Base::getMetadataPath() + "/.metadata");
fs::path metadata(getMetadataPath() + "/.metadata");
if (fs::exists(metadata))
fs::remove(metadata);
Base::drop(context_);
DatabaseAtomic::drop(context_);
}
template<typename Base>
StoragePtr DatabaseMaterializedMySQL<Base>::tryGetTable(const String & name, ContextPtr context_) const
StoragePtr DatabaseMaterializedMySQL::tryGetTable(const String & name, ContextPtr context_) const
{
if (!MaterializedMySQLSyncThread::isMySQLSyncThread())
{
StoragePtr nested_storage = Base::tryGetTable(name, context_);
if (!nested_storage)
return {};
return std::make_shared<StorageMaterializedMySQL>(std::move(nested_storage), this);
}
return Base::tryGetTable(name, context_);
StoragePtr nested_storage = DatabaseAtomic::tryGetTable(name, context_);
if (context_->isInternalQuery())
return nested_storage;
return std::make_shared<StorageMaterializedMySQL>(std::move(nested_storage), this);
}
template <typename Base>
DatabaseTablesIteratorPtr
DatabaseMaterializedMySQL<Base>::getTablesIterator(ContextPtr context_, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const
DatabaseMaterializedMySQL::getTablesIterator(ContextPtr context_, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const
{
if (!MaterializedMySQLSyncThread::isMySQLSyncThread())
{
DatabaseTablesIteratorPtr iterator = Base::getTablesIterator(context_, filter_by_table_name);
return std::make_unique<DatabaseMaterializedTablesIterator>(std::move(iterator), this);
}
return Base::getTablesIterator(context_, filter_by_table_name);
DatabaseTablesIteratorPtr iterator = DatabaseAtomic::getTablesIterator(context_, filter_by_table_name);
if (context_->isInternalQuery())
return iterator;
return std::make_unique<DatabaseMaterializedTablesIterator>(std::move(iterator), this);
}
template<typename Base>
void DatabaseMaterializedMySQL<Base>::assertCalledFromSyncThreadOrDrop(const char * method) const
void DatabaseMaterializedMySQL::checkIsInternalQuery(ContextPtr context_, const char * method) const
{
if (!MaterializedMySQLSyncThread::isMySQLSyncThread() && started_up)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MaterializedMySQL database not support {}", method);
if (started_up && context_ && !context_->isInternalQuery())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MaterializedMySQL database does not support {}", method);
}
template<typename Base>
void DatabaseMaterializedMySQL<Base>::shutdownSynchronizationThread()
void DatabaseMaterializedMySQL::stopReplication()
{
materialize_thread.stopSynchronization();
started_up = false;
}
template<typename Database, template<class> class Helper, typename... Args>
auto castToMaterializedMySQLAndCallHelper(Database * database, Args && ... args)
{
using Ordinary = DatabaseMaterializedMySQL<DatabaseOrdinary>;
using Atomic = DatabaseMaterializedMySQL<DatabaseAtomic>;
using ToOrdinary = typename std::conditional_t<std::is_const_v<Database>, const Ordinary *, Ordinary *>;
using ToAtomic = typename std::conditional_t<std::is_const_v<Database>, const Atomic *, Atomic *>;
if (auto * database_materialize = typeid_cast<ToOrdinary>(database))
return (database_materialize->*Helper<Ordinary>::v)(std::forward<Args>(args)...);
if (auto * database_materialize = typeid_cast<ToAtomic>(database))
return (database_materialize->*Helper<Atomic>::v)(std::forward<Args>(args)...);
throw Exception("LOGICAL_ERROR: cannot cast to DatabaseMaterializedMySQL, it is a bug.", ErrorCodes::LOGICAL_ERROR);
}
template<typename T> struct HelperSetException { static constexpr auto v = &T::setException; };
void setSynchronizationThreadException(const DatabasePtr & materialized_mysql_db, const std::exception_ptr & exception)
{
castToMaterializedMySQLAndCallHelper<IDatabase, HelperSetException>(materialized_mysql_db.get(), exception);
}
template<typename T> struct HelperStopSync { static constexpr auto v = &T::shutdownSynchronizationThread; };
void stopDatabaseSynchronization(const DatabasePtr & materialized_mysql_db)
{
castToMaterializedMySQLAndCallHelper<IDatabase, HelperStopSync>(materialized_mysql_db.get());
}
template<typename T> struct HelperRethrow { static constexpr auto v = &T::rethrowExceptionIfNeed; };
void rethrowSyncExceptionIfNeed(const IDatabase * materialized_mysql_db)
{
castToMaterializedMySQLAndCallHelper<const IDatabase, HelperRethrow>(materialized_mysql_db);
}
template class DatabaseMaterializedMySQL<DatabaseOrdinary>;
template class DatabaseMaterializedMySQL<DatabaseAtomic>;
}
#endif

View File

@ -6,7 +6,9 @@
#include <mysqlxx/Pool.h>
#include <Core/MySQL/MySQLClient.h>
#include <base/UUID.h>
#include <Databases/IDatabase.h>
#include <Databases/DatabaseAtomic.h>
#include <Databases/MySQL/MaterializedMySQLSettings.h>
#include <Databases/MySQL/MaterializedMySQLSyncThread.h>
@ -17,17 +19,20 @@ namespace DB
*
* All table structure and data will be written to the local file system
*/
template<typename Base>
class DatabaseMaterializedMySQL : public Base
class DatabaseMaterializedMySQL : public DatabaseAtomic
{
public:
DatabaseMaterializedMySQL(
ContextPtr context, const String & database_name_, const String & metadata_path_, UUID uuid,
const String & mysql_database_name_, mysqlxx::Pool && pool_,
MySQLClient && client_, std::unique_ptr<MaterializedMySQLSettings> settings_);
ContextPtr context,
const String & database_name_,
const String & metadata_path_,
UUID uuid,
const String & mysql_database_name_,
mysqlxx::Pool && pool_,
MySQLClient && client_,
std::unique_ptr<MaterializedMySQLSettings> settings_);
void rethrowExceptionIfNeed() const;
void rethrowExceptionIfNeeded() const;
void setException(const std::exception_ptr & exception);
protected:
@ -49,9 +54,9 @@ public:
void dropTable(ContextPtr context_, const String & name, bool no_delay) override;
void attachTable(const String & name, const StoragePtr & table, const String & relative_table_path) override;
void attachTable(ContextPtr context_, const String & name, const StoragePtr & table, const String & relative_table_path) override;
StoragePtr detachTable(const String & name) override;
StoragePtr detachTable(ContextPtr context_, const String & name) override;
void renameTable(ContextPtr context_, const String & name, IDatabase & to_database, const String & to_name, bool exchange, bool dictionary) override;
@ -63,18 +68,15 @@ public:
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context_, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const override;
void assertCalledFromSyncThreadOrDrop(const char * method) const;
void checkIsInternalQuery(ContextPtr context_, const char * method) const;
void shutdownSynchronizationThread();
bool hasReplicationThread() const override { return true; }
void stopReplication() override;
friend class DatabaseMaterializedTablesIterator;
};
void setSynchronizationThreadException(const DatabasePtr & materialized_mysql_db, const std::exception_ptr & exception);
void stopDatabaseSynchronization(const DatabasePtr & materialized_mysql_db);
void rethrowSyncExceptionIfNeed(const IDatabase * materialized_mysql_db);
}
#endif

View File

@ -335,7 +335,7 @@ void DatabaseMySQL::cleanOutdatedTables()
}
}
void DatabaseMySQL::attachTable(const String & table_name, const StoragePtr & storage, const String &)
void DatabaseMySQL::attachTable(ContextPtr /* context_ */, const String & table_name, const StoragePtr & storage, const String &)
{
std::lock_guard<std::mutex> lock{mutex};
@ -358,7 +358,7 @@ void DatabaseMySQL::attachTable(const String & table_name, const StoragePtr & st
fs::remove(remove_flag);
}
StoragePtr DatabaseMySQL::detachTable(const String & table_name)
StoragePtr DatabaseMySQL::detachTable(ContextPtr /* context */, const String & table_name)
{
std::lock_guard<std::mutex> lock{mutex};
@ -455,7 +455,7 @@ DatabaseMySQL::~DatabaseMySQL()
}
}
void DatabaseMySQL::createTable(ContextPtr, const String & table_name, const StoragePtr & storage, const ASTPtr & create_query)
void DatabaseMySQL::createTable(ContextPtr local_context, const String & table_name, const StoragePtr & storage, const ASTPtr & create_query)
{
const auto & create = create_query->as<ASTCreateQuery>();
@ -473,7 +473,7 @@ void DatabaseMySQL::createTable(ContextPtr, const String & table_name, const Sto
throw Exception("The MySQL database engine can only execute attach statements of type attach table database_name.table_name",
ErrorCodes::UNEXPECTED_AST_STRUCTURE);
attachTable(table_name, storage, {});
attachTable(local_context, table_name, storage, {});
}
}

View File

@ -77,13 +77,13 @@ public:
void loadStoredObjects(ContextMutablePtr, bool, bool force_attach, bool skip_startup_tables) override;
StoragePtr detachTable(const String & table_name) override;
StoragePtr detachTable(ContextPtr context, const String & table_name) override;
void detachTablePermanently(ContextPtr context, const String & table_name) override;
void dropTable(ContextPtr context, const String & table_name, bool no_delay) override;
void attachTable(const String & table_name, const StoragePtr & storage, const String & relative_table_path) override;
void attachTable(ContextPtr context, const String & table_name, const StoragePtr & storage, const String & relative_table_path) override;
protected:
ASTPtr getCreateTableQueryImpl(const String & name, ContextPtr context, bool throw_on_error) const override;

View File

@ -53,6 +53,8 @@ static ContextMutablePtr createQueryContext(ContextPtr context)
auto query_context = Context::createCopy(context);
query_context->setSettings(new_query_settings);
query_context->setInternalQuery(true);
query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
query_context->setCurrentQueryId(""); // generate random query_id
return query_context;
@ -764,15 +766,9 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even
}
}
bool MaterializedMySQLSyncThread::isMySQLSyncThread()
{
return getThreadName() == std::string_view(MYSQL_BACKGROUND_THREAD_NAME);
}
void MaterializedMySQLSyncThread::setSynchronizationThreadException(const std::exception_ptr & exception)
{
auto db = DatabaseCatalog::instance().getDatabase(database_name);
DB::setSynchronizationThreadException(db, exception);
assert_cast<DatabaseMaterializedMySQL *>(DatabaseCatalog::instance().getDatabase(database_name).get())->setException(exception);
}
void MaterializedMySQLSyncThread::Buffers::add(size_t block_rows, size_t block_bytes, size_t written_rows, size_t written_bytes)

View File

@ -53,8 +53,6 @@ public:
void assertMySQLAvailable();
static bool isMySQLSyncThread();
private:
Poco::Logger * log;

View File

@ -266,11 +266,11 @@ void DatabaseMaterializedPostgreSQL::createTable(ContextPtr local_context, const
DatabaseAtomic::createTable(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), table_name, table, query_copy);
/// Attach MaterializedPostgreSQL table.
attachTable(table_name, table, {});
attachTable(local_context, table_name, table, {});
}
void DatabaseMaterializedPostgreSQL::attachTable(const String & table_name, const StoragePtr & table, const String & relative_table_path)
void DatabaseMaterializedPostgreSQL::attachTable(ContextPtr context_, const String & table_name, const StoragePtr & table, const String & relative_table_path)
{
/// If there is query context then we need to attach materialized storage.
/// If there is no query context then we need to attach internal storage from atomic database.
@ -310,12 +310,12 @@ void DatabaseMaterializedPostgreSQL::attachTable(const String & table_name, cons
}
else
{
DatabaseAtomic::attachTable(table_name, table, relative_table_path);
DatabaseAtomic::attachTable(context_, table_name, table, relative_table_path);
}
}
StoragePtr DatabaseMaterializedPostgreSQL::detachTable(const String & table_name)
StoragePtr DatabaseMaterializedPostgreSQL::detachTable(ContextPtr context_, const String & table_name)
{
/// If there is query context then we need to detach materialized storage.
/// If there is no query context then we need to detach internal storage from atomic database.
@ -369,7 +369,7 @@ StoragePtr DatabaseMaterializedPostgreSQL::detachTable(const String & table_name
}
else
{
return DatabaseAtomic::detachTable(table_name);
return DatabaseAtomic::detachTable(context_, table_name);
}
}

View File

@ -49,15 +49,17 @@ public:
void createTable(ContextPtr context, const String & table_name, const StoragePtr & table, const ASTPtr & query) override;
void attachTable(const String & table_name, const StoragePtr & table, const String & relative_table_path) override;
void attachTable(ContextPtr context, const String & table_name, const StoragePtr & table, const String & relative_table_path) override;
StoragePtr detachTable(const String & table_name) override;
StoragePtr detachTable(ContextPtr context, const String & table_name) override;
void dropTable(ContextPtr local_context, const String & name, bool no_delay) override;
void drop(ContextPtr local_context) override;
void stopReplication();
bool hasReplicationThread() const override { return true; }
void stopReplication() override;
void applySettingsChanges(const SettingsChanges & settings_changes, ContextPtr query_context) override;

View File

@ -206,7 +206,7 @@ StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr,
}
void DatabasePostgreSQL::attachTable(const String & table_name, const StoragePtr & storage, const String &)
void DatabasePostgreSQL::attachTable(ContextPtr /* context_ */, const String & table_name, const StoragePtr & storage, const String &)
{
std::lock_guard<std::mutex> lock{mutex};
@ -231,7 +231,7 @@ void DatabasePostgreSQL::attachTable(const String & table_name, const StoragePtr
}
StoragePtr DatabasePostgreSQL::detachTable(const String & table_name)
StoragePtr DatabasePostgreSQL::detachTable(ContextPtr /* context_ */, const String & table_name)
{
std::lock_guard<std::mutex> lock{mutex};
@ -251,14 +251,14 @@ StoragePtr DatabasePostgreSQL::detachTable(const String & table_name)
}
void DatabasePostgreSQL::createTable(ContextPtr, const String & table_name, const StoragePtr & storage, const ASTPtr & create_query)
void DatabasePostgreSQL::createTable(ContextPtr local_context, const String & table_name, const StoragePtr & storage, const ASTPtr & create_query)
{
const auto & create = create_query->as<ASTCreateQuery>();
if (!create->attach)
throw Exception("PostgreSQL database engine does not support create table", ErrorCodes::NOT_IMPLEMENTED);
attachTable(table_name, storage, {});
attachTable(local_context, table_name, storage, {});
}

View File

@ -55,8 +55,8 @@ public:
void createTable(ContextPtr, const String & table_name, const StoragePtr & storage, const ASTPtr & create_query) override;
void dropTable(ContextPtr, const String & table_name, bool no_delay) override;
void attachTable(const String & table_name, const StoragePtr & storage, const String & relative_table_path) override;
StoragePtr detachTable(const String & table_name) override;
void attachTable(ContextPtr context, const String & table_name, const StoragePtr & storage, const String & relative_table_path) override;
StoragePtr detachTable(ContextPtr context, const String & table_name) override;
void drop(ContextPtr /*context*/) override;
void shutdown() override;

View File

@ -0,0 +1,225 @@
#include <Formats/EscapingRuleUtils.h>
#include <DataTypes/Serializations/SerializationNullable.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
FormatSettings::EscapingRule stringToEscapingRule(const String & escaping_rule)
{
if (escaping_rule.empty())
return FormatSettings::EscapingRule::None;
else if (escaping_rule == "None")
return FormatSettings::EscapingRule::None;
else if (escaping_rule == "Escaped")
return FormatSettings::EscapingRule::Escaped;
else if (escaping_rule == "Quoted")
return FormatSettings::EscapingRule::Quoted;
else if (escaping_rule == "CSV")
return FormatSettings::EscapingRule::CSV;
else if (escaping_rule == "JSON")
return FormatSettings::EscapingRule::JSON;
else if (escaping_rule == "XML")
return FormatSettings::EscapingRule::XML;
else if (escaping_rule == "Raw")
return FormatSettings::EscapingRule::Raw;
else
throw Exception("Unknown escaping rule \"" + escaping_rule + "\"", ErrorCodes::BAD_ARGUMENTS);
}
String escapingRuleToString(FormatSettings::EscapingRule escaping_rule)
{
switch (escaping_rule)
{
case FormatSettings::EscapingRule::None:
return "None";
case FormatSettings::EscapingRule::Escaped:
return "Escaped";
case FormatSettings::EscapingRule::Quoted:
return "Quoted";
case FormatSettings::EscapingRule::CSV:
return "CSV";
case FormatSettings::EscapingRule::JSON:
return "JSON";
case FormatSettings::EscapingRule::XML:
return "XML";
case FormatSettings::EscapingRule::Raw:
return "Raw";
}
__builtin_unreachable();
}
void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
{
String tmp;
constexpr const char * field_name = "<SKIPPED COLUMN>";
constexpr size_t field_name_len = 16;
switch (escaping_rule)
{
case FormatSettings::EscapingRule::None:
/// Empty field, just skip spaces
break;
case FormatSettings::EscapingRule::Escaped:
readEscapedString(tmp, buf);
break;
case FormatSettings::EscapingRule::Quoted:
/// FIXME: it skips only strings, not numbers, arrays or tuples.
/// we should read until delimiter and skip all data between
/// single quotes.
readQuotedString(tmp, buf);
break;
case FormatSettings::EscapingRule::CSV:
readCSVString(tmp, buf, format_settings.csv);
break;
case FormatSettings::EscapingRule::JSON:
skipJSONField(buf, StringRef(field_name, field_name_len));
break;
case FormatSettings::EscapingRule::Raw:
readString(tmp, buf);
break;
default:
__builtin_unreachable();
}
}
bool deserializeFieldByEscapingRule(
const DataTypePtr & type,
const SerializationPtr & serialization,
IColumn & column,
ReadBuffer & buf,
FormatSettings::EscapingRule escaping_rule,
const FormatSettings & format_settings)
{
bool read = true;
bool parse_as_nullable = format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable();
switch (escaping_rule)
{
case FormatSettings::EscapingRule::Escaped:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextEscapedImpl(column, buf, format_settings, serialization);
else
serialization->deserializeTextEscaped(column, buf, format_settings);
break;
case FormatSettings::EscapingRule::Quoted:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextQuotedImpl(column, buf, format_settings, serialization);
else
serialization->deserializeTextQuoted(column, buf, format_settings);
break;
case FormatSettings::EscapingRule::CSV:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextCSVImpl(column, buf, format_settings, serialization);
else
serialization->deserializeTextCSV(column, buf, format_settings);
break;
case FormatSettings::EscapingRule::JSON:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextJSONImpl(column, buf, format_settings, serialization);
else
serialization->deserializeTextJSON(column, buf, format_settings);
break;
case FormatSettings::EscapingRule::Raw:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextRawImpl(column, buf, format_settings, serialization);
else
serialization->deserializeTextRaw(column, buf, format_settings);
break;
default:
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Escaping rule {} is not suitable for deserialization", escapingRuleToString(escaping_rule));
}
return read;
}
void serializeFieldByEscapingRule(
const IColumn & column,
const ISerialization & serialization,
WriteBuffer & out,
size_t row_num,
FormatSettings::EscapingRule escaping_rule,
const FormatSettings & format_settings)
{
switch (escaping_rule)
{
case FormatSettings::EscapingRule::Escaped:
serialization.serializeTextEscaped(column, row_num, out, format_settings);
break;
case FormatSettings::EscapingRule::Quoted:
serialization.serializeTextQuoted(column, row_num, out, format_settings);
break;
case FormatSettings::EscapingRule::CSV:
serialization.serializeTextCSV(column, row_num, out, format_settings);
break;
case FormatSettings::EscapingRule::JSON:
serialization.serializeTextJSON(column, row_num, out, format_settings);
break;
case FormatSettings::EscapingRule::XML:
serialization.serializeTextXML(column, row_num, out, format_settings);
break;
case FormatSettings::EscapingRule::Raw:
serialization.serializeTextRaw(column, row_num, out, format_settings);
break;
case FormatSettings::EscapingRule::None:
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot serialize field with None escaping rule");
}
}
void writeStringByEscapingRule(const String & value, WriteBuffer & out, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
{
switch (escaping_rule)
{
case FormatSettings::EscapingRule::Quoted:
writeQuotedString(value, out);
break;
case FormatSettings::EscapingRule::JSON:
writeJSONString(value, out, format_settings);
break;
case FormatSettings::EscapingRule::Raw:
writeString(value, out);
break;
case FormatSettings::EscapingRule::CSV:
writeCSVString(value, out);
break;
case FormatSettings::EscapingRule::Escaped:
writeEscapedString(value, out);
break;
case FormatSettings::EscapingRule::XML:
writeXMLStringForTextElement(value, out);
break;
case FormatSettings::EscapingRule::None:
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot serialize string with None escaping rule");
}
}
String readStringByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
{
String result;
switch (escaping_rule)
{
case FormatSettings::EscapingRule::Quoted:
readQuotedString(result, buf);
break;
case FormatSettings::EscapingRule::JSON:
readJSONString(result, buf);
break;
case FormatSettings::EscapingRule::Raw:
readString(result, buf);
break;
case FormatSettings::EscapingRule::CSV:
readCSVString(result, buf, format_settings.csv);
break;
case FormatSettings::EscapingRule::Escaped:
readEscapedString(result, buf);
break;
default:
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read string with {} escaping rule", escapingRuleToString(escaping_rule));
}
return result;
}
}

View File

@ -0,0 +1,37 @@
#pragma once
#include <Formats/FormatSettings.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/Serializations/ISerialization.h>
#include <IO/ReadBuffer.h>
namespace DB
{
FormatSettings::EscapingRule stringToEscapingRule(const String & escaping_rule);
String escapingRuleToString(FormatSettings::EscapingRule escaping_rule);
void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings);
bool deserializeFieldByEscapingRule(
const DataTypePtr & type,
const SerializationPtr & serialization,
IColumn & column,
ReadBuffer & buf,
FormatSettings::EscapingRule escaping_rule,
const FormatSettings & format_settings);
void serializeFieldByEscapingRule(
const IColumn & column,
const ISerialization & serialization,
WriteBuffer & out,
size_t row_num,
FormatSettings::EscapingRule escaping_rule,
const FormatSettings & format_settings);
void writeStringByEscapingRule(const String & value, WriteBuffer & out, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings);
String readStringByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings);
}

View File

@ -47,6 +47,17 @@ struct FormatSettings
UnixTimestamp
};
enum class EscapingRule
{
None,
Escaped,
Quoted,
CSV,
JSON,
XML,
Raw
};
DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple;
UInt64 input_allow_errors_num = 0;
@ -89,7 +100,7 @@ struct FormatSettings
std::string row_after_delimiter;
std::string row_between_delimiter;
std::string field_delimiter;
std::string escaping_rule;
EscapingRule escaping_rule = EscapingRule::Escaped;
} custom;
struct
@ -148,7 +159,7 @@ struct FormatSettings
struct
{
std::string regexp;
std::string escaping_rule;
EscapingRule escaping_rule = EscapingRule::Raw;
bool skip_unmatched = false;
} regexp;

View File

@ -1,9 +1,9 @@
#include <Formats/ParsedTemplateFormatString.h>
#include <Formats/verbosePrintString.h>
#include <Formats/EscapingRuleUtils.h>
#include <IO/ReadBufferFromMemory.h>
#include <IO/Operators.h>
#include <IO/ReadBufferFromFile.h>
#include <Core/Settings.h>
#include <Interpreters/Context.h>
namespace DB
@ -11,7 +11,6 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int INVALID_TEMPLATE_FORMAT;
}
@ -83,7 +82,7 @@ void ParsedTemplateFormatString::parse(const String & format_string, const Colum
state = Format;
else if (*pos == '}')
{
formats.push_back(ColumnFormat::None);
escaping_rules.push_back(EscapingRule::None);
delimiters.emplace_back();
state = Delimiter;
}
@ -108,7 +107,7 @@ void ParsedTemplateFormatString::parse(const String & format_string, const Colum
case Format:
if (*pos == '}')
{
formats.push_back(stringToFormat(String(token_begin, pos - token_begin)));
escaping_rules.push_back(stringToEscapingRule(String(token_begin, pos - token_begin)));
token_begin = pos + 1;
delimiters.emplace_back();
state = Delimiter;
@ -120,56 +119,11 @@ void ParsedTemplateFormatString::parse(const String & format_string, const Colum
delimiters.back().append(token_begin, pos - token_begin);
}
ParsedTemplateFormatString::ColumnFormat ParsedTemplateFormatString::stringToFormat(const String & col_format)
{
if (col_format.empty())
return ColumnFormat::None;
else if (col_format == "None")
return ColumnFormat::None;
else if (col_format == "Escaped")
return ColumnFormat::Escaped;
else if (col_format == "Quoted")
return ColumnFormat::Quoted;
else if (col_format == "CSV")
return ColumnFormat::Csv;
else if (col_format == "JSON")
return ColumnFormat::Json;
else if (col_format == "XML")
return ColumnFormat::Xml;
else if (col_format == "Raw")
return ColumnFormat::Raw;
else
throw Exception("Unknown field format \"" + col_format + "\"", ErrorCodes::BAD_ARGUMENTS);
}
size_t ParsedTemplateFormatString::columnsCount() const
{
return format_idx_to_column_idx.size();
}
String ParsedTemplateFormatString::formatToString(ParsedTemplateFormatString::ColumnFormat format)
{
switch (format)
{
case ColumnFormat::None:
return "None";
case ColumnFormat::Escaped:
return "Escaped";
case ColumnFormat::Quoted:
return "Quoted";
case ColumnFormat::Csv:
return "CSV";
case ColumnFormat::Json:
return "Json";
case ColumnFormat::Xml:
return "Xml";
case ColumnFormat::Raw:
return "Raw";
}
__builtin_unreachable();
}
const char * ParsedTemplateFormatString::readMayBeQuotedColumnNameInto(const char * pos, size_t size, String & s)
{
s.clear();
@ -197,7 +151,7 @@ String ParsedTemplateFormatString::dump() const
res << "\nDelimiter " << 0 << ": ";
verbosePrintString(delimiters.front().c_str(), delimiters.front().c_str() + delimiters.front().size(), res);
size_t num_columns = std::max(formats.size(), format_idx_to_column_idx.size());
size_t num_columns = std::max(escaping_rules.size(), format_idx_to_column_idx.size());
for (size_t i = 0; i < num_columns; ++i)
{
res << "\nColumn " << i << ": \"";
@ -216,7 +170,7 @@ String ParsedTemplateFormatString::dump() const
else
res << *format_idx_to_column_idx[i];
res << "), Format " << (i < formats.size() ? formatToString(formats[i]) : "<ERROR>");
res << "), Format " << (i < escaping_rules.size() ? escapingRuleToString(escaping_rules[i]) : "<ERROR>");
res << "\nDelimiter " << i + 1 << ": ";
if (delimiters.size() <= i + 1)
@ -235,34 +189,4 @@ void ParsedTemplateFormatString::throwInvalidFormat(const String & message, size
ErrorCodes::INVALID_TEMPLATE_FORMAT);
}
ParsedTemplateFormatString ParsedTemplateFormatString::setupCustomSeparatedResultsetFormat(const FormatSettings::Custom & settings)
{
/// Set resultset format to "result_before_delimiter ${data} result_after_delimiter"
ParsedTemplateFormatString resultset_format;
resultset_format.delimiters.emplace_back(settings.result_before_delimiter);
resultset_format.delimiters.emplace_back(settings.result_after_delimiter);
resultset_format.formats.emplace_back(ParsedTemplateFormatString::ColumnFormat::None);
resultset_format.format_idx_to_column_idx.emplace_back(0);
resultset_format.column_names.emplace_back("data");
return resultset_format;
}
ParsedTemplateFormatString ParsedTemplateFormatString::setupCustomSeparatedRowFormat(const FormatSettings::Custom & settings, const Block & sample)
{
/// Set row format to
/// "row_before_delimiter ${Col0:escaping} field_delimiter ${Col1:escaping} field_delimiter ... ${ColN:escaping} row_after_delimiter"
ParsedTemplateFormatString::ColumnFormat escaping = ParsedTemplateFormatString::stringToFormat(settings.escaping_rule);
ParsedTemplateFormatString row_format;
row_format.delimiters.emplace_back(settings.row_before_delimiter);
for (size_t i = 0; i < sample.columns(); ++i)
{
row_format.formats.emplace_back(escaping);
row_format.format_idx_to_column_idx.emplace_back(i);
row_format.column_names.emplace_back(sample.getByPosition(i).name);
bool last_column = i == sample.columns() - 1;
row_format.delimiters.emplace_back(last_column ? settings.row_after_delimiter : settings.field_delimiter);
}
return row_format;
}
}

View File

@ -15,23 +15,14 @@ using Strings = std::vector<String>;
struct ParsedTemplateFormatString
{
enum class ColumnFormat
{
None,
Escaped,
Quoted,
Csv,
Json,
Xml,
Raw
};
using EscapingRule = FormatSettings::EscapingRule;
/// Format string has syntax: "Delimiter0 ${ColumnName0:Format0} Delimiter1 ${ColumnName1:Format1} Delimiter2"
/// The following vectors is filled with corresponding values, delimiters.size() - 1 = formats.size() = format_idx_to_column_idx.size()
/// If format_idx_to_column_idx[i] has no value, then TemplateRowInputFormat will skip i-th column.
std::vector<String> delimiters;
std::vector<ColumnFormat> formats;
std::vector<EscapingRule> escaping_rules;
std::vector<std::optional<size_t>> format_idx_to_column_idx;
/// For diagnostic info
@ -44,16 +35,11 @@ struct ParsedTemplateFormatString
void parse(const String & format_string, const ColumnIdxGetter & idx_by_name);
static ColumnFormat stringToFormat(const String & format);
static String formatToString(ColumnFormat format);
static const char * readMayBeQuotedColumnNameInto(const char * pos, size_t size, String & s);
size_t columnsCount() const;
String dump() const;
[[noreturn]] void throwInvalidFormat(const String & message, size_t column) const;
static ParsedTemplateFormatString setupCustomSeparatedResultsetFormat(const FormatSettings::Custom & settings);
static ParsedTemplateFormatString setupCustomSeparatedRowFormat(const FormatSettings::Custom & settings, const Block & sample);
};
}

View File

@ -50,6 +50,8 @@ void registerInputFormatAvro(FormatFactory & factory);
void registerOutputFormatAvro(FormatFactory & factory);
void registerInputFormatRawBLOB(FormatFactory & factory);
void registerOutputFormatRawBLOB(FormatFactory & factory);
void registerInputFormatCustomSeparated(FormatFactory & factory);
void registerOutputFormatCustomSeparated(FormatFactory & factory);
/// Output only (presentational) formats.
@ -115,6 +117,8 @@ void registerFormats()
registerOutputFormatMsgPack(factory);
registerInputFormatRawBLOB(factory);
registerOutputFormatRawBLOB(factory);
registerInputFormatCustomSeparated(factory);
registerOutputFormatCustomSeparated(factory);
registerInputFormatORC(factory);
registerOutputFormatORC(factory);

View File

@ -1166,4 +1166,50 @@ bool loadAtPosition(ReadBuffer & in, Memory<> & memory, char * & current)
return loaded_more;
}
/// Searches for delimiter in input stream and sets buffer position after delimiter (if found) or EOF (if not)
static void findAndSkipNextDelimiter(PeekableReadBuffer & buf, const String & delimiter)
{
if (delimiter.empty())
return;
while (!buf.eof())
{
void * pos = memchr(buf.position(), delimiter[0], buf.available());
if (!pos)
{
buf.position() += buf.available();
continue;
}
buf.position() = static_cast<ReadBuffer::Position>(pos);
PeekableReadBufferCheckpoint checkpoint{buf};
if (checkString(delimiter, buf))
return;
buf.rollbackToCheckpoint();
++buf.position();
}
}
void skipToNextRowOrEof(PeekableReadBuffer & buf, const String & row_after_delimiter, const String & row_between_delimiter, bool skip_spaces)
{
if (row_after_delimiter.empty())
{
findAndSkipNextDelimiter(buf, row_between_delimiter);
return;
}
while (true)
{
findAndSkipNextDelimiter(buf, row_after_delimiter);
if (skip_spaces)
skipWhitespaceIfAny(buf);
if (checkString(row_between_delimiter, buf))
break;
}
}
}

View File

@ -30,6 +30,7 @@
#include <IO/CompressionMethod.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadBufferFromMemory.h>
#include <IO/PeekableReadBuffer.h>
#include <IO/VarInt.h>
#include <DataTypes/DataTypeDateTime.h>
@ -1324,6 +1325,9 @@ void saveUpToPosition(ReadBuffer & in, Memory<Allocator<false>> & memory, char *
*/
bool loadAtPosition(ReadBuffer & in, Memory<Allocator<false>> & memory, char * & current);
/// Skip data until start of the next row or eof (the end of row is determined by two delimiters:
/// row_after_delimiter and row_between_delimiter).
void skipToNextRowOrEof(PeekableReadBuffer & buf, const String & row_after_delimiter, const String & row_between_delimiter, bool skip_spaces);
struct PcgDeserializer
{

View File

@ -0,0 +1,181 @@
#include <Interpreters/AddIndexConstraintsOptimizer.h>
#include <Interpreters/TreeCNFConverter.h>
#include <Parsers/IAST_fwd.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTConstraintDeclaration.h>
#include <Storages/StorageInMemoryMetadata.h>
#include <Parsers/ASTSelectQuery.h>
namespace DB
{
AddIndexConstraintsOptimizer::AddIndexConstraintsOptimizer(
const StorageMetadataPtr & metadata_snapshot_)
: metadata_snapshot(metadata_snapshot_)
{
}
namespace
{
bool onlyIndexColumns(const ASTPtr & ast, const std::unordered_set<std::string_view> & primary_key_set)
{
const auto * identifier = ast->as<ASTIdentifier>();
if (identifier && !primary_key_set.contains(identifier->name()))
return false;
for (auto & child : ast->children)
if (!onlyIndexColumns(child, primary_key_set))
return false;
return true;
}
bool onlyConstants(const ASTPtr & ast)
{
const auto * identifier = ast->as<ASTIdentifier>();
if (identifier)
return false;
for (auto & child : ast->children)
if (!onlyConstants(child))
return false;
return true;
}
const std::unordered_map<std::string, ComparisonGraph::CompareResult> & getRelationMap()
{
const static std::unordered_map<std::string, ComparisonGraph::CompareResult> relations =
{
{"equals", ComparisonGraph::CompareResult::EQUAL},
{"less", ComparisonGraph::CompareResult::LESS},
{"lessOrEquals", ComparisonGraph::CompareResult::LESS_OR_EQUAL},
{"greaterOrEquals", ComparisonGraph::CompareResult::GREATER_OR_EQUAL},
{"greater", ComparisonGraph::CompareResult::GREATER},
};
return relations;
}
const std::unordered_map<ComparisonGraph::CompareResult, std::string> & getReverseRelationMap()
{
const static std::unordered_map<ComparisonGraph::CompareResult, std::string> relations =
{
{ComparisonGraph::CompareResult::EQUAL, "equals"},
{ComparisonGraph::CompareResult::LESS, "less"},
{ComparisonGraph::CompareResult::LESS_OR_EQUAL, "lessOrEquals"},
{ComparisonGraph::CompareResult::GREATER_OR_EQUAL, "greaterOrEquals"},
{ComparisonGraph::CompareResult::GREATER, "greater"},
};
return relations;
}
bool canBeSequence(const ComparisonGraph::CompareResult left, const ComparisonGraph::CompareResult right)
{
using CR = ComparisonGraph::CompareResult;
if (left == CR::UNKNOWN || right == CR::UNKNOWN || left == CR::NOT_EQUAL || right == CR::NOT_EQUAL)
return false;
if ((left == CR::GREATER || left == CR::GREATER_OR_EQUAL) && (right == CR::LESS || right == CR::LESS_OR_EQUAL))
return false;
if ((right == CR::GREATER || right == CR::GREATER_OR_EQUAL) && (left == CR::LESS || left == CR::LESS_OR_EQUAL))
return false;
return true;
}
ComparisonGraph::CompareResult mostStrict(const ComparisonGraph::CompareResult left, const ComparisonGraph::CompareResult right)
{
using CR = ComparisonGraph::CompareResult;
if (left == CR::LESS || left == CR::GREATER)
return left;
if (right == CR::LESS || right == CR::GREATER)
return right;
if (left == CR::LESS_OR_EQUAL || left == CR::GREATER_OR_EQUAL)
return left;
if (right == CR::LESS_OR_EQUAL || right == CR::GREATER_OR_EQUAL)
return right;
if (left == CR::EQUAL)
return left;
if (right == CR::EQUAL)
return right;
return CR::UNKNOWN;
}
/// Create OR-group for 'indexHint'.
/// Consider we have expression like A <op1> C, where C is constant.
/// Consider we have a constraint I <op2> A, where I depends only on columns from primary key.
/// Then if op1 and op2 forms a sequence of comparisons (e.g. A < C and I < A),
/// we can add to expression 'indexHint(I < A)' condition.
CNFQuery::OrGroup createIndexHintGroup(
const CNFQuery::OrGroup & group,
const ComparisonGraph & graph,
const ASTs & primary_key_only_asts)
{
CNFQuery::OrGroup result;
for (const auto & atom : group)
{
const auto * func = atom.ast->as<ASTFunction>();
if (func && func->arguments->children.size() == 2 && getRelationMap().contains(func->name))
{
auto check_and_insert = [&](const size_t index, const ComparisonGraph::CompareResult need_result)
{
if (!onlyConstants(func->arguments->children[1 - index]))
return false;
for (const auto & primary_key_ast : primary_key_only_asts)
{
ComparisonGraph::CompareResult actual_result;
if (index == 0)
actual_result = graph.compare(primary_key_ast, func->arguments->children[index]);
else
actual_result = graph.compare(func->arguments->children[index], primary_key_ast);
if (canBeSequence(need_result, actual_result))
{
ASTPtr helper_ast = func->clone();
auto * helper_func = helper_ast->as<ASTFunction>();
helper_func->name = getReverseRelationMap().at(mostStrict(need_result, actual_result));
helper_func->arguments->children[index] = primary_key_ast->clone();
result.insert(CNFQuery::AtomicFormula{atom.negative, helper_ast});
return true;
}
}
return false;
};
auto expected = getRelationMap().at(func->name);
if (!check_and_insert(0, expected) && !check_and_insert(1, expected))
return {};
}
}
return result;
}
}
void AddIndexConstraintsOptimizer::perform(CNFQuery & cnf_query)
{
const auto primary_key = metadata_snapshot->getColumnsRequiredForPrimaryKey();
const auto & graph = metadata_snapshot->getConstraints().getGraph();
const std::unordered_set<std::string_view> primary_key_set(std::begin(primary_key), std::end(primary_key));
ASTs primary_key_only_asts;
for (const auto & vertex : graph.getVertices())
for (const auto & ast : vertex)
if (onlyIndexColumns(ast, primary_key_set))
primary_key_only_asts.push_back(ast);
CNFQuery::AndGroup and_group;
cnf_query.iterateGroups([&](const auto & or_group)
{
auto add_group = createIndexHintGroup(or_group, graph, primary_key_only_asts);
if (!add_group.empty())
and_group.emplace(std::move(add_group));
});
if (!and_group.empty())
{
CNFQuery::OrGroup new_or_group;
new_or_group.insert(CNFQuery::AtomicFormula{false, makeASTFunction("indexHint", TreeCNFConverter::fromCNF(CNFQuery(std::move(and_group))))});
cnf_query.appendGroup(CNFQuery::AndGroup{new_or_group});
}
}
}

View File

@ -0,0 +1,35 @@
#pragma once
#include <Parsers/IAST_fwd.h>
#include <Core/Block.h>
#include <Core/NamesAndTypes.h>
#include <Interpreters/Aliases.h>
#include <Interpreters/SelectQueryOptions.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Storages/IStorage_fwd.h>
#include <Interpreters/TreeCNFConverter.h>
namespace DB
{
struct StorageInMemoryMetadata;
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
/// Optimizer that extracts constraints that
/// depends only on columns of primary key
/// and tries to add function 'indexHint' to
/// WHERE clause, which reduces amount of read data.
class AddIndexConstraintsOptimizer final
{
public:
AddIndexConstraintsOptimizer(
const StorageMetadataPtr & metadata_snapshot);
void perform(CNFQuery & cnf_query);
private:
const StorageMetadataPtr & metadata_snapshot;
};
}

View File

@ -0,0 +1,640 @@
#include <Interpreters/ComparisonGraph.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/queryToString.h>
#include <Common/FieldVisitorsAccurateComparison.h>
namespace DB
{
namespace ErrorCodes
{
extern const int VIOLATED_CONSTRAINT;
}
namespace
{
/// Make function a > b or a >= b
ASTPtr normalizeAtom(const ASTPtr & atom)
{
static const std::map<std::string, std::string> inverse_relations =
{
{"lessOrEquals", "greaterOrEquals"},
{"less", "greater"},
};
ASTPtr res = atom->clone();
if (const auto * func = res->as<ASTFunction>())
{
if (const auto it = inverse_relations.find(func->name); it != std::end(inverse_relations))
{
res = makeASTFunction(it->second, func->arguments->children[1]->clone(), func->arguments->children[0]->clone());
}
}
return res;
}
bool less(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateLess{}, lhs, rhs); }
bool greater(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateLess{}, rhs, lhs); }
bool equals(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateEquals{}, lhs, rhs); }
}
ComparisonGraph::ComparisonGraph(const ASTs & atomic_formulas)
{
if (atomic_formulas.empty())
return;
static const std::unordered_map<std::string, Edge::Type> relation_to_enum =
{
{"equals", Edge::EQUAL},
{"greater", Edge::GREATER},
{"greaterOrEquals", Edge::GREATER_OR_EQUAL},
};
/// Firstly build an intermediate graph,
/// in which each vertex corresponds to one expression.
/// That means that if we have edge (A, B) with type GREATER, then always A > B.
/// If we have EQUAL relation, then we add both edges (A, B) and (B, A).
Graph g;
for (const auto & atom_raw : atomic_formulas)
{
const auto atom = normalizeAtom(atom_raw);
auto get_index = [](const ASTPtr & ast, Graph & asts_graph) -> std::optional<size_t>
{
const auto it = asts_graph.ast_hash_to_component.find(ast->getTreeHash());
if (it != std::end(asts_graph.ast_hash_to_component))
{
if (!std::any_of(
std::cbegin(asts_graph.vertices[it->second].asts),
std::cend(asts_graph.vertices[it->second].asts),
[ast](const ASTPtr & constraint_ast)
{
return constraint_ast->getTreeHash() == ast->getTreeHash()
&& constraint_ast->getColumnName() == ast->getColumnName();
}))
{
return {};
}
return it->second;
}
else
{
asts_graph.ast_hash_to_component[ast->getTreeHash()] = asts_graph.vertices.size();
asts_graph.vertices.push_back(EqualComponent{{ast}, std::nullopt});
asts_graph.edges.emplace_back();
return asts_graph.vertices.size() - 1;
}
};
const auto * func = atom->as<ASTFunction>();
if (func && func->arguments->children.size() == 2)
{
auto index_left = get_index(func->arguments->children[0], g);
auto index_right = get_index(func->arguments->children[1], g);
if (index_left && index_right)
{
if (const auto it = relation_to_enum.find(func->name); it != std::end(relation_to_enum))
{
g.edges[*index_left].push_back(Edge{it->second, *index_right});
if (it->second == Edge::EQUAL)
g.edges[*index_right].push_back(Edge{it->second, *index_left});
}
}
}
}
/// Now expressions A and B are equal, if and only if
/// we have both paths from A to B and from B to A in graph.
/// That means that equivalence classes of expressions
/// are the same as strongly connected components in graph.
/// So, we find such components and build graph on them.
/// All expressions from one equivalence class will be stored
/// in the corresponding vertex of new graph.
graph = buildGraphFromAstsGraph(g);
dists = buildDistsFromGraph(graph);
std::tie(ast_const_lower_bound, ast_const_upper_bound) = buildConstBounds();
/// Find expressions that are known to be unequal.
static const std::unordered_set<String> not_equals_functions = {"notEquals", "greater"};
/// Explicitly save unequal components.
/// TODO: Build a graph for unequal components.
for (const auto & atom_raw : atomic_formulas)
{
const auto atom = normalizeAtom(atom_raw);
const auto * func = atom->as<ASTFunction>();
if (func && not_equals_functions.contains(func->name))
{
auto index_left = graph.ast_hash_to_component.at(func->arguments->children[0]->getTreeHash());
auto index_right = graph.ast_hash_to_component.at(func->arguments->children[1]->getTreeHash());
if (index_left == index_right)
throw Exception(ErrorCodes::VIOLATED_CONSTRAINT,
"Found expression '{}', but its arguments considered equal according to constraints",
queryToString(atom));
not_equal.emplace(index_left, index_right);
not_equal.emplace(index_right, index_left);
}
}
}
ComparisonGraph::CompareResult ComparisonGraph::pathToCompareResult(Path path, bool inverse)
{
switch (path)
{
case Path::GREATER: return inverse ? CompareResult::LESS : CompareResult::GREATER;
case Path::GREATER_OR_EQUAL: return inverse ? CompareResult::LESS_OR_EQUAL : CompareResult::GREATER_OR_EQUAL;
}
__builtin_unreachable();
}
std::optional<ComparisonGraph::Path> ComparisonGraph::findPath(const size_t start, const size_t finish) const
{
const auto it = dists.find(std::make_pair(start, finish));
if (it == std::end(dists))
return {};
/// Since path can be only GREATER or GREATER_OR_EQUALS,
/// we can strengthen the condition.
return not_equal.contains({start, finish}) ? Path::GREATER : it->second;
}
ComparisonGraph::CompareResult ComparisonGraph::compare(const ASTPtr & left, const ASTPtr & right) const
{
size_t start = 0;
size_t finish = 0;
/// TODO: check full ast
const auto it_left = graph.ast_hash_to_component.find(left->getTreeHash());
const auto it_right = graph.ast_hash_to_component.find(right->getTreeHash());
if (it_left == std::end(graph.ast_hash_to_component) || it_right == std::end(graph.ast_hash_to_component))
{
CompareResult result = CompareResult::UNKNOWN;
{
const auto left_bound = getConstLowerBound(left);
const auto right_bound = getConstUpperBound(right);
if (left_bound && right_bound)
{
if (greater(left_bound->first, right_bound->first))
result = CompareResult::GREATER;
else if (equals(left_bound->first, right_bound->first))
result = left_bound->second || right_bound->second
? CompareResult::GREATER : CompareResult::GREATER_OR_EQUAL;
}
}
{
const auto left_bound = getConstUpperBound(left);
const auto right_bound = getConstLowerBound(right);
if (left_bound && right_bound)
{
if (less(left_bound->first, right_bound->first))
result = CompareResult::LESS;
else if (equals(left_bound->first, right_bound->first))
result = left_bound->second || right_bound->second
? CompareResult::LESS : CompareResult::LESS_OR_EQUAL;
}
}
return result;
}
else
{
start = it_left->second;
finish = it_right->second;
}
if (start == finish)
return CompareResult::EQUAL;
if (auto path = findPath(start, finish))
return pathToCompareResult(*path, /*inverse=*/ false);
if (auto path = findPath(finish, start))
return pathToCompareResult(*path, /*inverse=*/ true);
if (not_equal.contains({start, finish}))
return CompareResult::NOT_EQUAL;
return CompareResult::UNKNOWN;
}
bool ComparisonGraph::isPossibleCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const
{
const auto result = compare(left, right);
if (expected == CompareResult::UNKNOWN || result == CompareResult::UNKNOWN)
return true;
if (expected == result)
return true;
static const std::set<std::pair<CompareResult, CompareResult>> possible_pairs =
{
{CompareResult::EQUAL, CompareResult::LESS_OR_EQUAL},
{CompareResult::EQUAL, CompareResult::GREATER_OR_EQUAL},
{CompareResult::LESS_OR_EQUAL, CompareResult::LESS},
{CompareResult::LESS_OR_EQUAL, CompareResult::EQUAL},
{CompareResult::LESS_OR_EQUAL, CompareResult::NOT_EQUAL},
{CompareResult::GREATER_OR_EQUAL, CompareResult::GREATER},
{CompareResult::GREATER_OR_EQUAL, CompareResult::EQUAL},
{CompareResult::GREATER_OR_EQUAL, CompareResult::NOT_EQUAL},
{CompareResult::LESS, CompareResult::LESS},
{CompareResult::LESS, CompareResult::LESS_OR_EQUAL},
{CompareResult::LESS, CompareResult::NOT_EQUAL},
{CompareResult::GREATER, CompareResult::GREATER},
{CompareResult::GREATER, CompareResult::GREATER_OR_EQUAL},
{CompareResult::GREATER, CompareResult::NOT_EQUAL},
{CompareResult::NOT_EQUAL, CompareResult::LESS},
{CompareResult::NOT_EQUAL, CompareResult::GREATER},
{CompareResult::NOT_EQUAL, CompareResult::LESS_OR_EQUAL},
{CompareResult::NOT_EQUAL, CompareResult::GREATER_OR_EQUAL},
};
return possible_pairs.contains({expected, result});
}
bool ComparisonGraph::isAlwaysCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const
{
const auto result = compare(left, right);
if (expected == CompareResult::UNKNOWN || result == CompareResult::UNKNOWN)
return false;
if (expected == result)
return true;
static const std::set<std::pair<CompareResult, CompareResult>> possible_pairs =
{
{CompareResult::LESS_OR_EQUAL, CompareResult::LESS},
{CompareResult::LESS_OR_EQUAL, CompareResult::EQUAL},
{CompareResult::GREATER_OR_EQUAL, CompareResult::GREATER},
{CompareResult::GREATER_OR_EQUAL, CompareResult::EQUAL},
{CompareResult::NOT_EQUAL, CompareResult::GREATER},
{CompareResult::NOT_EQUAL, CompareResult::LESS},
};
return possible_pairs.contains({expected, result});
}
ASTs ComparisonGraph::getEqual(const ASTPtr & ast) const
{
const auto res = getComponentId(ast);
if (!res)
return {};
else
return getComponent(res.value());
}
std::optional<size_t> ComparisonGraph::getComponentId(const ASTPtr & ast) const
{
const auto hash_it = graph.ast_hash_to_component.find(ast->getTreeHash());
if (hash_it == std::end(graph.ast_hash_to_component))
return {};
const size_t index = hash_it->second;
if (std::any_of(
std::cbegin(graph.vertices[index].asts),
std::cend(graph.vertices[index].asts),
[ast](const ASTPtr & constraint_ast)
{
return constraint_ast->getTreeHash() == ast->getTreeHash() &&
constraint_ast->getColumnName() == ast->getColumnName();
}))
{
return index;
}
else
{
return {};
}
}
bool ComparisonGraph::hasPath(const size_t left, const size_t right) const
{
return findPath(left, right) || findPath(right, left);
}
ASTs ComparisonGraph::getComponent(const size_t id) const
{
return graph.vertices[id].asts;
}
bool ComparisonGraph::EqualComponent::hasConstant() const
{
return constant_index.has_value();
}
ASTPtr ComparisonGraph::EqualComponent::getConstant() const
{
assert(constant_index);
return asts[*constant_index];
}
void ComparisonGraph::EqualComponent::buildConstants()
{
constant_index.reset();
for (size_t i = 0; i < asts.size(); ++i)
{
if (asts[i]->as<ASTLiteral>())
{
constant_index = i;
return;
}
}
}
ComparisonGraph::CompareResult ComparisonGraph::atomToCompareResult(const CNFQuery::AtomicFormula & atom)
{
if (const auto * func = atom.ast->as<ASTFunction>())
{
auto expected = functionNameToCompareResult(func->name);
if (atom.negative)
expected = inverseCompareResult(expected);
return expected;
}
return ComparisonGraph::CompareResult::UNKNOWN;
}
ComparisonGraph::CompareResult ComparisonGraph::functionNameToCompareResult(const std::string & name)
{
static const std::unordered_map<std::string, CompareResult> relation_to_compare =
{
{"equals", CompareResult::EQUAL},
{"notEquals", CompareResult::NOT_EQUAL},
{"less", CompareResult::LESS},
{"lessOrEquals", CompareResult::LESS_OR_EQUAL},
{"greaterOrEquals", CompareResult::GREATER_OR_EQUAL},
{"greater", CompareResult::GREATER},
};
const auto it = relation_to_compare.find(name);
return it == std::end(relation_to_compare) ? CompareResult::UNKNOWN : it->second;
}
ComparisonGraph::CompareResult ComparisonGraph::inverseCompareResult(const CompareResult result)
{
static const std::unordered_map<CompareResult, CompareResult> inverse_relations =
{
{CompareResult::NOT_EQUAL, CompareResult::EQUAL},
{CompareResult::EQUAL, CompareResult::NOT_EQUAL},
{CompareResult::GREATER_OR_EQUAL, CompareResult::LESS},
{CompareResult::GREATER, CompareResult::LESS_OR_EQUAL},
{CompareResult::LESS, CompareResult::GREATER_OR_EQUAL},
{CompareResult::LESS_OR_EQUAL, CompareResult::GREATER},
{CompareResult::UNKNOWN, CompareResult::UNKNOWN},
};
return inverse_relations.at(result);
}
std::optional<ASTPtr> ComparisonGraph::getEqualConst(const ASTPtr & ast) const
{
const auto hash_it = graph.ast_hash_to_component.find(ast->getTreeHash());
if (hash_it == std::end(graph.ast_hash_to_component))
return std::nullopt;
const size_t index = hash_it->second;
return graph.vertices[index].hasConstant()
? std::optional<ASTPtr>{graph.vertices[index].getConstant()}
: std::nullopt;
}
std::optional<std::pair<Field, bool>> ComparisonGraph::getConstUpperBound(const ASTPtr & ast) const
{
if (const auto * literal = ast->as<ASTLiteral>())
return std::make_pair(literal->value, false);
const auto it = graph.ast_hash_to_component.find(ast->getTreeHash());
if (it == std::end(graph.ast_hash_to_component))
return std::nullopt;
const size_t to = it->second;
const ssize_t from = ast_const_upper_bound[to];
if (from == -1)
return std::nullopt;
return std::make_pair(graph.vertices[from].getConstant()->as<ASTLiteral>()->value, dists.at({from, to}) == Path::GREATER);
}
std::optional<std::pair<Field, bool>> ComparisonGraph::getConstLowerBound(const ASTPtr & ast) const
{
if (const auto * literal = ast->as<ASTLiteral>())
return std::make_pair(literal->value, false);
const auto it = graph.ast_hash_to_component.find(ast->getTreeHash());
if (it == std::end(graph.ast_hash_to_component))
return std::nullopt;
const size_t from = it->second;
const ssize_t to = ast_const_lower_bound[from];
if (to == -1)
return std::nullopt;
return std::make_pair(graph.vertices[to].getConstant()->as<ASTLiteral>()->value, dists.at({from, to}) == Path::GREATER);
}
void ComparisonGraph::dfsOrder(const Graph & asts_graph, size_t v, std::vector<bool> & visited, std::vector<size_t> & order)
{
visited[v] = true;
for (const auto & edge : asts_graph.edges[v])
if (!visited[edge.to])
dfsOrder(asts_graph, edge.to, visited, order);
order.push_back(v);
}
ComparisonGraph::Graph ComparisonGraph::reverseGraph(const Graph & asts_graph)
{
Graph g;
g.ast_hash_to_component = asts_graph.ast_hash_to_component;
g.vertices = asts_graph.vertices;
g.edges.resize(g.vertices.size());
for (size_t v = 0; v < asts_graph.vertices.size(); ++v)
for (const auto & edge : asts_graph.edges[v])
g.edges[edge.to].push_back(Edge{edge.type, v});
return g;
}
std::vector<ASTs> ComparisonGraph::getVertices() const
{
std::vector<ASTs> result;
for (const auto & vertex : graph.vertices)
{
result.emplace_back();
for (const auto & ast : vertex.asts)
result.back().push_back(ast);
}
return result;
}
void ComparisonGraph::dfsComponents(
const Graph & reversed_graph, size_t v,
OptionalIndices & components, const size_t component)
{
components[v] = component;
for (const auto & edge : reversed_graph.edges[v])
if (!components[edge.to])
dfsComponents(reversed_graph, edge.to, components, component);
}
ComparisonGraph::Graph ComparisonGraph::buildGraphFromAstsGraph(const Graph & asts_graph)
{
/// Find strongly connected component by using 2 dfs traversals.
/// https://en.wikipedia.org/wiki/Kosaraju%27s_algorithm
const auto n = asts_graph.vertices.size();
std::vector<size_t> order;
{
std::vector<bool> visited(n, false);
for (size_t v = 0; v < n; ++v)
{
if (!visited[v])
dfsOrder(asts_graph, v, visited, order);
}
}
OptionalIndices components(n);
size_t component = 0;
{
const Graph reversed_graph = reverseGraph(asts_graph);
for (auto it = order.rbegin(); it != order.rend(); ++it)
{
if (!components[*it])
{
dfsComponents(reversed_graph, *it, components, component);
++component;
}
}
}
Graph result;
result.vertices.resize(component);
result.edges.resize(component);
for (const auto & [hash, index] : asts_graph.ast_hash_to_component)
{
assert(components[index]);
result.ast_hash_to_component[hash] = *components[index];
result.vertices[*components[index]].asts.insert(
std::end(result.vertices[*components[index]].asts),
std::begin(asts_graph.vertices[index].asts),
std::end(asts_graph.vertices[index].asts)); // asts_graph has only one ast per vertex
}
/// Calculate constants
for (auto & vertex : result.vertices)
vertex.buildConstants();
/// For each edge in initial graph, we add an edge between components in condensation graph.
for (size_t v = 0; v < n; ++v)
{
for (const auto & edge : asts_graph.edges[v])
result.edges[*components[v]].push_back(Edge{edge.type, *components[edge.to]});
/// TODO: make edges unique (left most strict)
}
/// If we have constansts in two components, we can compare them and add and extra edge.
for (size_t v = 0; v < result.vertices.size(); ++v)
{
for (size_t u = 0; u < result.vertices.size(); ++u)
{
if (v != u && result.vertices[v].hasConstant() && result.vertices[u].hasConstant())
{
const auto * left = result.vertices[v].getConstant()->as<ASTLiteral>();
const auto * right = result.vertices[u].getConstant()->as<ASTLiteral>();
/// Only GREATER. Equal constant fields = equal literals so it was already considered above.
if (greater(left->value, right->value))
result.edges[v].push_back(Edge{Edge::GREATER, u});
}
}
}
return result;
}
std::map<std::pair<size_t, size_t>, ComparisonGraph::Path> ComparisonGraph::buildDistsFromGraph(const Graph & g)
{
/// Min path : -1 means GREATER, 0 means GREATER_OR_EQUALS.
/// We use FloydWarshall algorithm to find distances between all pairs of vertices.
/// https://en.wikipedia.org/wiki/FloydWarshall_algorithm
constexpr auto inf = std::numeric_limits<Int8>::max();
const size_t n = g.vertices.size();
std::vector<std::vector<Int8>> results(n, std::vector<Int8>(n, inf));
for (size_t v = 0; v < n; ++v)
{
results[v][v] = 0;
for (const auto & edge : g.edges[v])
results[v][edge.to] = std::min(results[v][edge.to], static_cast<Int8>(edge.type == Edge::GREATER ? -1 : 0));
}
for (size_t k = 0; k < n; ++k)
for (size_t v = 0; v < n; ++v)
for (size_t u = 0; u < n; ++u)
if (results[v][k] != inf && results[k][u] != inf)
results[v][u] = std::min(results[v][u], std::min(results[v][k], results[k][u]));
std::map<std::pair<size_t, size_t>, Path> path;
for (size_t v = 0; v < n; ++v)
for (size_t u = 0; u < n; ++u)
if (results[v][u] != inf)
path[std::make_pair(v, u)] = (results[v][u] == -1 ? Path::GREATER : Path::GREATER_OR_EQUAL);
return path;
}
std::pair<std::vector<ssize_t>, std::vector<ssize_t>> ComparisonGraph::buildConstBounds() const
{
const size_t n = graph.vertices.size();
std::vector<ssize_t> lower(n, -1);
std::vector<ssize_t> upper(n, -1);
auto get_value = [this](const size_t vertex) -> Field
{
return graph.vertices[vertex].getConstant()->as<ASTLiteral>()->value;
};
for (const auto & [edge, path] : dists)
{
const auto [from, to] = edge;
if (graph.vertices[to].hasConstant())
{
if (lower[from] == -1
|| greater(get_value(to), get_value(lower[from]))
|| (equals(get_value(to), get_value(lower[from])) && path == Path::GREATER))
lower[from] = to;
}
if (graph.vertices[from].hasConstant())
{
if (upper[to] == -1
|| less(get_value(from), get_value(upper[to]))
|| (equals(get_value(from), get_value(upper[to])) && path == Path::GREATER))
upper[to] = from;
}
}
return {lower, upper};
}
}

View File

@ -0,0 +1,176 @@
#pragma once
#include <Parsers/IAST_fwd.h>
#include <Parsers/ASTLiteral.h>
#include <Interpreters/TreeCNFConverter.h>
#include <unordered_map>
#include <map>
#include <vector>
namespace DB
{
/*
* Graph of relations between terms in constraints.
* Allows to compare terms and get equal terms.
*/
class ComparisonGraph
{
public:
/// atomic_formulas are extracted from constraints.
ComparisonGraph(const std::vector<ASTPtr> & atomic_formulas);
enum class CompareResult
{
LESS,
LESS_OR_EQUAL,
EQUAL,
GREATER_OR_EQUAL,
GREATER,
NOT_EQUAL,
UNKNOWN,
};
static CompareResult atomToCompareResult(const CNFQuery::AtomicFormula & atom);
static CompareResult functionNameToCompareResult(const std::string & name);
static CompareResult inverseCompareResult(const CompareResult result);
CompareResult compare(const ASTPtr & left, const ASTPtr & right) const;
/// It's possible that left <expected> right
bool isPossibleCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const;
/// It's always true that left <expected> right
bool isAlwaysCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const;
/// Returns all expressions from component to which @ast belongs if any.
std::vector<ASTPtr> getEqual(const ASTPtr & ast) const;
/// Returns constant expression from component to which @ast belongs if any.
std::optional<ASTPtr> getEqualConst(const ASTPtr & ast) const;
/// Finds component id to which @ast belongs if any.
std::optional<std::size_t> getComponentId(const ASTPtr & ast) const;
/// Returns all expressions from component.
std::vector<ASTPtr> getComponent(const std::size_t id) const;
size_t getNumOfComponents() const { return graph.vertices.size(); }
bool hasPath(const size_t left, const size_t right) const;
/// Find constants lessOrEqual and greaterOrEqual.
/// For int and double linear programming can be applied here.
/// Returns: {constant, is strict less/greater}
std::optional<std::pair<Field, bool>> getConstUpperBound(const ASTPtr & ast) const;
std::optional<std::pair<Field, bool>> getConstLowerBound(const ASTPtr & ast) const;
/// Returns all expression in graph.
std::vector<ASTs> getVertices() const;
private:
/// Strongly connected component
struct EqualComponent
{
/// All these expressions are considered as equal.
std::vector<ASTPtr> asts;
std::optional<size_t> constant_index;
bool hasConstant() const;
ASTPtr getConstant() const;
void buildConstants();
};
/// Edge (from, to, type) means that it's always true that @from <op> @to,
/// where @op is the operation of type @type.
///
/// TODO: move to diff for int and double:
/// GREATER and GREATER_OR_EQUAL with +const or 0 --- ok
/// with -const --- not ok
/// EQUAL is ok only for 0
struct Edge
{
enum Type
{
GREATER,
GREATER_OR_EQUAL,
EQUAL,
};
Type type;
size_t to;
};
struct Graph
{
struct ASTHash
{
size_t operator() (const IAST::Hash & hash) const
{
return hash.first;
}
};
std::unordered_map<IAST::Hash, size_t, ASTHash> ast_hash_to_component;
std::vector<EqualComponent> vertices;
std::vector<std::vector<Edge>> edges;
};
/// Receives graph, in which each vertex corresponds to one expression.
/// Then finds strongly connected components and builds graph on them.
static Graph buildGraphFromAstsGraph(const Graph & asts_graph);
static Graph reverseGraph(const Graph & asts_graph);
/// The first part of finding strongly connected components.
/// Finds order of exit from vertices of dfs traversal of graph.
static void dfsOrder(const Graph & asts_graph, size_t v, std::vector<bool> & visited, std::vector<size_t> & order);
using OptionalIndices = std::vector<std::optional<size_t>>;
/// The second part of finding strongly connected components.
/// Assigns index of component for each vertex.
static void dfsComponents(
const Graph & reversed_graph, size_t v,
OptionalIndices & components, const size_t component);
enum class Path
{
GREATER,
GREATER_OR_EQUAL,
};
static CompareResult pathToCompareResult(Path path, bool inverse);
std::optional<Path> findPath(const size_t start, const size_t finish) const;
/// Calculate @dists.
static std::map<std::pair<size_t, size_t>, Path> buildDistsFromGraph(const Graph & g);
/// Calculate @ast_const_lower_bound and @ast_const_lower_bound.
std::pair<std::vector<ssize_t>, std::vector<ssize_t>> buildConstBounds() const;
/// Direct acyclic graph in which each vertex corresponds
/// to one equivalence class of expressions.
/// Each edge sets the relation between classes (GREATER or GREATER_OR_EQUAL).
Graph graph;
/// Precalculated distances between each pair of vertices.
/// Distance can be either 0 or -1.
/// 0 means GREATER_OR_EQUAL.
/// -1 means GREATER.
std::map<std::pair<size_t, size_t>, Path> dists;
/// Explicitly collected components, for which it's known
/// that expressions in them are unequal.
std::set<std::pair<size_t, size_t>> not_equal;
/// Maximal constant value for each component that
/// is lower bound for all expressions in component.
std::vector<ssize_t> ast_const_lower_bound;
/// Minimal constant value for each component that
/// is upper bound for all expressions in component.
std::vector<ssize_t> ast_const_upper_bound;
};
}

View File

@ -248,10 +248,9 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
#if USE_MYSQL
/// It's definitely not the best place for this logic, but behaviour must be consistent with DatabaseMaterializedMySQL::tryGetTable(...)
if (db_and_table.first->getEngineName() == "MaterializedMySQL")
if (!context_->isInternalQuery() && db_and_table.first->getEngineName() == "MaterializedMySQL")
{
if (!MaterializedMySQLSyncThread::isMySQLSyncThread())
db_and_table.second = std::make_shared<StorageMaterializedMySQL>(std::move(db_and_table.second), db_and_table.first.get());
db_and_table.second = std::make_shared<StorageMaterializedMySQL>(std::move(db_and_table.second), db_and_table.first.get());
}
#endif
return db_and_table;

View File

@ -416,7 +416,7 @@ ASTPtr InterpreterCreateQuery::formatConstraints(const ConstraintsDescription &
{
auto res = std::make_shared<ASTExpressionList>();
for (const auto & constraint : constraints.constraints)
for (const auto & constraint : constraints.getConstraints())
res->children.push_back(constraint->clone());
return res;
@ -564,11 +564,12 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
ConstraintsDescription InterpreterCreateQuery::getConstraintsDescription(const ASTExpressionList * constraints)
{
ConstraintsDescription res;
ASTs constraints_data;
if (constraints)
for (const auto & constraint : constraints->children)
res.constraints.push_back(std::dynamic_pointer_cast<ASTConstraintDeclaration>(constraint->clone()));
return res;
constraints_data.push_back(constraint->clone());
return ConstraintsDescription{constraints_data};
}

View File

@ -80,22 +80,22 @@ BlockIO InterpreterDropQuery::executeToTable(ASTDropQuery & query)
{
DatabasePtr database;
UUID table_to_wait_on = UUIDHelpers::Nil;
auto res = executeToTableImpl(query, database, table_to_wait_on);
auto res = executeToTableImpl(getContext(), query, database, table_to_wait_on);
if (query.no_delay)
waitForTableToBeActuallyDroppedOrDetached(query, database, table_to_wait_on);
return res;
}
BlockIO InterpreterDropQuery::executeToTableImpl(ASTDropQuery & query, DatabasePtr & db, UUID & uuid_to_wait)
BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQuery & query, DatabasePtr & db, UUID & uuid_to_wait)
{
/// NOTE: it does not contain UUID, we will resolve it with locked DDLGuard
auto table_id = StorageID(query);
if (query.temporary || table_id.database_name.empty())
{
if (getContext()->tryResolveStorageID(table_id, Context::ResolveExternal))
if (context_->tryResolveStorageID(table_id, Context::ResolveExternal))
return executeToTemporaryTable(table_id.getTableName(), query.kind);
else
query.setDatabase(table_id.database_name = getContext()->getCurrentDatabase());
query.setDatabase(table_id.database_name = context_->getCurrentDatabase());
}
if (query.temporary)
@ -109,8 +109,8 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ASTDropQuery & query, DatabaseP
auto ddl_guard = (!query.no_ddl_lock ? DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name) : nullptr);
/// If table was already dropped by anyone, an exception will be thrown
auto [database, table] = query.if_exists ? DatabaseCatalog::instance().tryGetDatabaseAndTable(table_id, getContext())
: DatabaseCatalog::instance().getDatabaseAndTable(table_id, getContext());
auto [database, table] = query.if_exists ? DatabaseCatalog::instance().tryGetDatabaseAndTable(table_id, context_)
: DatabaseCatalog::instance().getDatabaseAndTable(table_id, context_);
if (database && table)
{
@ -132,7 +132,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ASTDropQuery & query, DatabaseP
/// Prevents recursive drop from drop database query. The original query must specify a table.
bool is_drop_or_detach_database = !query_ptr->as<ASTDropQuery>()->table;
bool is_replicated_ddl_query = typeid_cast<DatabaseReplicated *>(database.get()) &&
!getContext()->getClientInfo().is_replicated_database_internal &&
!context_->getClientInfo().is_replicated_database_internal &&
!is_drop_or_detach_database;
AccessFlags drop_storage;
@ -147,20 +147,20 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ASTDropQuery & query, DatabaseP
if (is_replicated_ddl_query)
{
if (query.kind == ASTDropQuery::Kind::Detach)
getContext()->checkAccess(drop_storage, table_id);
context_->checkAccess(drop_storage, table_id);
else if (query.kind == ASTDropQuery::Kind::Truncate)
getContext()->checkAccess(AccessType::TRUNCATE, table_id);
context_->checkAccess(AccessType::TRUNCATE, table_id);
else if (query.kind == ASTDropQuery::Kind::Drop)
getContext()->checkAccess(drop_storage, table_id);
context_->checkAccess(drop_storage, table_id);
ddl_guard->releaseTableLock();
table.reset();
return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query.clone(), getContext());
return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query.clone(), context_);
}
if (query.kind == ASTDropQuery::Kind::Detach)
{
getContext()->checkAccess(drop_storage, table_id);
context_->checkAccess(drop_storage, table_id);
if (table->isDictionary())
{
@ -175,7 +175,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ASTDropQuery & query, DatabaseP
TableExclusiveLockHolder table_lock;
if (database->getUUID() == UUIDHelpers::Nil)
table_lock = table->lockExclusively(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout);
table_lock = table->lockExclusively(context_->getCurrentQueryId(), context_->getSettingsRef().lock_acquire_timeout);
if (query.permanently)
{
@ -183,12 +183,12 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ASTDropQuery & query, DatabaseP
DatabaseCatalog::instance().tryRemoveLoadingDependencies(table_id, getContext()->getSettingsRef().check_table_dependencies,
is_drop_or_detach_database);
/// Drop table from memory, don't touch data, metadata file renamed and will be skipped during server restart
database->detachTablePermanently(getContext(), table_id.table_name);
database->detachTablePermanently(context_, table_id.table_name);
}
else
{
/// Drop table from memory, don't touch data and metadata
database->detachTable(table_id.table_name);
database->detachTable(context_, table_id.table_name);
}
}
else if (query.kind == ASTDropQuery::Kind::Truncate)
@ -196,20 +196,20 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ASTDropQuery & query, DatabaseP
if (table->isDictionary())
throw Exception("Cannot TRUNCATE dictionary", ErrorCodes::SYNTAX_ERROR);
getContext()->checkAccess(AccessType::TRUNCATE, table_id);
context_->checkAccess(AccessType::TRUNCATE, table_id);
if (table->isStaticStorage())
throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only");
table->checkTableCanBeDropped();
auto table_lock = table->lockExclusively(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout);
auto table_lock = table->lockExclusively(context_->getCurrentQueryId(), context_->getSettingsRef().lock_acquire_timeout);
auto metadata_snapshot = table->getInMemoryMetadataPtr();
/// Drop table data, don't touch metadata
table->truncate(query_ptr, metadata_snapshot, getContext(), table_lock);
table->truncate(query_ptr, metadata_snapshot, context_, table_lock);
}
else if (query.kind == ASTDropQuery::Kind::Drop)
{
getContext()->checkAccess(drop_storage, table_id);
context_->checkAccess(drop_storage, table_id);
if (table->isDictionary())
{
@ -224,11 +224,11 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ASTDropQuery & query, DatabaseP
TableExclusiveLockHolder table_lock;
if (database->getUUID() == UUIDHelpers::Nil)
table_lock = table->lockExclusively(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout);
table_lock = table->lockExclusively(context_->getCurrentQueryId(), context_->getSettingsRef().lock_acquire_timeout);
DatabaseCatalog::instance().tryRemoveLoadingDependencies(table_id, getContext()->getSettingsRef().check_table_dependencies,
is_drop_or_detach_database);
database->dropTable(getContext(), table_id.table_name, query.no_delay);
database->dropTable(context_, table_id.table_name, query.no_delay);
}
db = database;
@ -320,16 +320,8 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query,
if (query.kind == ASTDropQuery::Kind::Detach && query.permanently)
throw Exception("DETACH PERMANENTLY is not implemented for databases", ErrorCodes::NOT_IMPLEMENTED);
#if USE_MYSQL
if (database->getEngineName() == "MaterializedMySQL")
stopDatabaseSynchronization(database);
#endif
if (auto * replicated = typeid_cast<DatabaseReplicated *>(database.get()))
replicated->stopReplication();
#if USE_LIBPQXX
if (auto * materialize_postgresql = typeid_cast<DatabaseMaterializedPostgreSQL *>(database.get()))
materialize_postgresql->stopReplication();
#endif
if (database->hasReplicationThread())
database->stopReplication();
if (database->shouldBeEmptyOnDetach())
{
@ -341,19 +333,21 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query,
/// Flush should not be done if shouldBeEmptyOnDetach() == false,
/// since in this case getTablesIterator() may do some additional work,
/// see DatabaseMaterializedMySQL<>::getTablesIterator()
/// see DatabaseMaterializedMySQL::getTablesIterator()
for (auto iterator = database->getTablesIterator(getContext()); iterator->isValid(); iterator->next())
{
iterator->table()->flush();
}
for (auto iterator = database->getTablesIterator(getContext()); iterator->isValid(); iterator->next())
auto table_context = Context::createCopy(getContext());
table_context->setInternalQuery(true);
for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next())
{
DatabasePtr db;
UUID table_to_wait = UUIDHelpers::Nil;
query_for_table.setTable(iterator->name());
query_for_table.is_dictionary = iterator->table()->isDictionary();
executeToTableImpl(query_for_table, db, table_to_wait);
executeToTableImpl(table_context, query_for_table, db, table_to_wait);
uuids_to_wait.push_back(table_to_wait);
}
}

View File

@ -36,7 +36,7 @@ private:
BlockIO executeToDatabaseImpl(const ASTDropQuery & query, DatabasePtr & database, std::vector<UUID> & uuids_to_wait);
BlockIO executeToTable(ASTDropQuery & query);
BlockIO executeToTableImpl(ASTDropQuery & query, DatabasePtr & db, UUID & uuid_to_wait);
BlockIO executeToTableImpl(ContextPtr context_, ASTDropQuery & query, DatabasePtr & db, UUID & uuid_to_wait);
static void waitForTableToBeActuallyDroppedOrDetached(const ASTDropQuery & query, const DatabasePtr & db, const UUID & uuid_to_wait);

View File

@ -523,7 +523,7 @@ StoragePtr InterpreterSystemQuery::tryRestartReplica(const StorageID & replica,
auto table_lock = table->lockExclusively(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout);
create_ast = database->getCreateTableQuery(replica.table_name, getContext());
database->detachTable(replica.table_name);
database->detachTable(system_context, replica.table_name);
}
table.reset();
@ -544,7 +544,7 @@ StoragePtr InterpreterSystemQuery::tryRestartReplica(const StorageID & replica,
constraints,
false);
database->attachTable(replica.table_name, table, data_path);
database->attachTable(system_context, replica.table_name, table, data_path);
table->startup();
return table;

View File

@ -0,0 +1,331 @@
#include <Interpreters/SubstituteColumnOptimizer.h>
#include <Interpreters/TreeCNFConverter.h>
#include <Interpreters/ComparisonGraph.h>
#include <Parsers/IAST_fwd.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Storages/StorageInMemoryMetadata.h>
#include <Parsers/ASTSelectQuery.h>
#include <Poco/Logger.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Storages/IStorage.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
constexpr auto COMPONENT_PART = "__component_";
constexpr UInt64 COLUMN_PENALTY = 10 * 1024 * 1024;
constexpr Int64 INDEX_PRICE = -1'000'000'000'000'000'000;
class ComponentMatcher
{
public:
using Visitor = InDepthNodeVisitor<ComponentMatcher, true>;
struct Data
{
const ComparisonGraph & graph;
std::set<UInt64> & components;
std::unordered_map<String, String> & old_name;
std::unordered_map<String, UInt64> & component;
UInt64 & current_id;
Data(const ComparisonGraph & graph_,
std::set<UInt64> & components_,
std::unordered_map<String, String> & old_name_,
std::unordered_map<String, UInt64> & component_,
UInt64 & current_id_)
: graph(graph_)
, components(components_)
, old_name(old_name_)
, component(component_)
, current_id(current_id_)
{
}
};
static void visit(ASTPtr & ast, Data & data)
{
if (auto id = data.graph.getComponentId(ast))
{
const String name = COMPONENT_PART + std::to_string(*id) + "_" + std::to_string(++data.current_id);
data.old_name[name] = ast->getAliasOrColumnName();
data.component[name] = *id;
data.components.insert(*id);
ast = std::make_shared<ASTIdentifier>(name);
}
}
static bool needChildVisit(const ASTPtr &, const ASTPtr &)
{
return true;
}
};
using ComponentVisitor = ComponentMatcher::Visitor;
struct ColumnPrice
{
Int64 compressed_size;
Int64 uncompressed_size;
ColumnPrice(const Int64 compressed_size_, const Int64 uncompressed_size_)
: compressed_size(compressed_size_)
, uncompressed_size(uncompressed_size_)
{
}
ColumnPrice() : ColumnPrice(0, 0) {}
bool operator<(const ColumnPrice & that) const
{
return std::tie(compressed_size, uncompressed_size) < std::tie(that.compressed_size, that.uncompressed_size);
}
ColumnPrice & operator+=(const ColumnPrice & that)
{
compressed_size += that.compressed_size;
uncompressed_size += that.uncompressed_size;
return *this;
}
ColumnPrice & operator-=(const ColumnPrice & that)
{
compressed_size -= that.compressed_size;
uncompressed_size -= that.uncompressed_size;
return *this;
}
};
using ColumnPriceByName = std::unordered_map<String, ColumnPrice>;
class SubstituteColumnMatcher
{
public:
using Visitor = InDepthNodeVisitor<SubstituteColumnMatcher, false>;
struct Data
{
std::unordered_map<UInt64, ASTPtr> id_to_expression_map;
std::unordered_map<String, UInt64> name_to_component_id;
std::unordered_map<String, String> old_name;
bool is_select;
};
static void visit(ASTPtr & ast, Data & data)
{
const auto * identifier = ast->as<ASTIdentifier>();
if (identifier && data.name_to_component_id.contains(identifier->name()))
{
const String & name = identifier->name();
const auto component_id = data.name_to_component_id.at(name);
auto new_ast = data.id_to_expression_map.at(component_id)->clone();
if (data.is_select)
new_ast->setAlias(data.old_name.at(name));
ast = new_ast;
}
}
static bool needChildVisit(const ASTPtr &, const ASTPtr &)
{
return true;
}
};
using SubstituteColumnVisitor = SubstituteColumnMatcher::Visitor;
ColumnPrice calculatePrice(
const ColumnPriceByName & column_prices,
const IdentifierNameSet & identifiers)
{
ColumnPrice result(0, 0);
for (const auto & ident : identifiers)
{
auto it = column_prices.find(ident);
if (it != column_prices.end())
result += it->second;
}
return result;
}
/// We need to choose one expression in each component,
/// so that total price of all read columns will be minimal.
/// Bruteforce equal ASTs in each component and calculate
/// price of all columns on which ast depends.
/// TODO: branch-and-bound
void bruteforce(
const ComparisonGraph & graph,
const std::vector<UInt64> & components,
size_t current_component,
const ColumnPriceByName & column_prices,
ColumnPrice current_price,
std::vector<ASTPtr> & expressions_stack,
ColumnPrice & min_price,
std::vector<ASTPtr> & min_expressions)
{
if (current_component == components.size())
{
if (current_price < min_price)
{
min_price = current_price;
min_expressions = expressions_stack;
}
}
else
{
for (const auto & ast : graph.getComponent(components[current_component]))
{
IdentifierNameSet identifiers;
ast->collectIdentifierNames(identifiers);
ColumnPrice expression_price = calculatePrice(column_prices, identifiers);
expressions_stack.push_back(ast);
current_price += expression_price;
ColumnPriceByName new_prices(column_prices);
/// Update prices of already counted columns.
for (const auto & identifier : identifiers)
new_prices[identifier] = ColumnPrice(0, 0);
bruteforce(graph,
components,
current_component + 1,
new_prices,
current_price,
expressions_stack,
min_price,
min_expressions);
current_price -= expression_price;
expressions_stack.pop_back();
}
}
}
}
SubstituteColumnOptimizer::SubstituteColumnOptimizer(
ASTSelectQuery * select_query_,
const StorageMetadataPtr & metadata_snapshot_,
const ConstStoragePtr & storage_)
: select_query(select_query_)
, metadata_snapshot(metadata_snapshot_)
, storage(storage_)
{
}
void SubstituteColumnOptimizer::perform()
{
if (!storage)
return;
const auto column_sizes = storage->getColumnSizes();
if (column_sizes.empty())
return;
const auto & compare_graph = metadata_snapshot->getConstraints().getGraph();
// Fill aliases
if (select_query->select())
{
auto * list = select_query->refSelect()->as<ASTExpressionList>();
if (!list)
throw Exception("List of selected columns must be ASTExpressionList", ErrorCodes::LOGICAL_ERROR);
for (ASTPtr & ast : list->children)
ast->setAlias(ast->getAliasOrColumnName());
}
auto run_for_all = [&](const auto func)
{
if (select_query->where())
func(select_query->refWhere(), false);
if (select_query->prewhere())
func(select_query->refPrewhere(), false);
if (select_query->select())
func(select_query->refSelect(), true);
if (select_query->having())
func(select_query->refHaving(), false);
};
std::set<UInt64> components;
std::unordered_map<String, String> old_name;
std::unordered_map<String, UInt64> name_to_component;
UInt64 counter_id = 0;
ComponentVisitor::Data component_data(
compare_graph, components, old_name, name_to_component, counter_id);
IdentifierNameSet identifiers;
auto preprocess = [&](ASTPtr & ast, bool)
{
ComponentVisitor(component_data).visit(ast);
ast->collectIdentifierNames(identifiers);
};
run_for_all(preprocess);
const auto primary_key = metadata_snapshot->getColumnsRequiredForPrimaryKey();
const std::unordered_set<std::string_view> primary_key_set(std::begin(primary_key), std::end(primary_key));
ColumnPriceByName column_prices;
for (const auto & [column_name, column_size] : column_sizes)
column_prices[column_name] = ColumnPrice(column_size.data_compressed + COLUMN_PENALTY, column_size.data_uncompressed);
for (const auto & column_name : primary_key)
column_prices[column_name] = ColumnPrice(INDEX_PRICE, INDEX_PRICE);
for (const auto & column_name : identifiers)
column_prices[column_name] = ColumnPrice(0, 0);
std::unordered_map<UInt64, ASTPtr> id_to_expression_map;
std::vector<UInt64> components_list;
for (const UInt64 component_id : components)
{
auto component = compare_graph.getComponent(component_id);
if (component.size() == 1)
id_to_expression_map[component_id] = component.front();
else
components_list.push_back(component_id);
}
std::vector<ASTPtr> expressions_stack;
ColumnPrice min_price(std::numeric_limits<Int64>::max(), std::numeric_limits<Int64>::max());
std::vector<ASTPtr> min_expressions;
bruteforce(compare_graph,
components_list,
0,
column_prices,
ColumnPrice(0, 0),
expressions_stack,
min_price,
min_expressions);
for (size_t i = 0; i < components_list.size(); ++i)
id_to_expression_map[components_list[i]] = min_expressions[i];
auto process = [&](ASTPtr & ast, bool is_select)
{
SubstituteColumnVisitor::Data substitute_data{id_to_expression_map, name_to_component, old_name, is_select};
SubstituteColumnVisitor(substitute_data).visit(ast);
};
run_for_all(process);
}
}

View File

@ -0,0 +1,35 @@
#pragma once
#include <Parsers/IAST_fwd.h>
#include <Core/Block.h>
#include <Core/NamesAndTypes.h>
#include <Interpreters/Aliases.h>
#include <Interpreters/SelectQueryOptions.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Storages/IStorage_fwd.h>
namespace DB
{
struct StorageInMemoryMetadata;
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
/// Optimizer that tries to replace columns to equal columns (according to constraints)
/// with lower size (accorsing to compressed and uncomressed size).
class SubstituteColumnOptimizer
{
public:
SubstituteColumnOptimizer(
ASTSelectQuery * select_query,
const StorageMetadataPtr & metadata_snapshot,
const ConstStoragePtr & storage);
void perform();
private:
ASTSelectQuery * select_query;
const StorageMetadataPtr & metadata_snapshot;
ConstStoragePtr storage;
};
}

View File

@ -0,0 +1,469 @@
#include <Interpreters/TreeCNFConverter.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Common/checkStackSize.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int INCORRECT_QUERY;
extern const int TOO_MANY_TEMPORARY_COLUMNS;
}
namespace
{
bool isLogicalFunction(const ASTFunction & func)
{
return func.name == "and" || func.name == "or" || func.name == "not";
}
size_t countAtoms(const ASTPtr & node)
{
checkStackSize();
if (node->as<ASTIdentifier>())
return 1;
const auto * func = node->as<ASTFunction>();
if (func && !isLogicalFunction(*func))
return 1;
size_t num_atoms = 0;
for (const auto & child : node->children)
num_atoms += countAtoms(child);
return num_atoms;
}
/// Splits AND(a, b, c) to AND(a, AND(b, c)) for AND/OR
void splitMultiLogic(ASTPtr & node)
{
checkStackSize();
auto * func = node->as<ASTFunction>();
if (func && (func->name == "and" || func->name == "or"))
{
if (func->arguments->children.size() < 2)
throw Exception("Bad AND or OR function. Expected at least 2 arguments", ErrorCodes::INCORRECT_QUERY);
if (func->arguments->children.size() > 2)
{
ASTPtr res = func->arguments->children[0]->clone();
for (size_t i = 1; i < func->arguments->children.size(); ++i)
res = makeASTFunction(func->name, res, func->arguments->children[i]->clone());
node = res;
}
auto * new_func = node->as<ASTFunction>();
for (auto & child : new_func->arguments->children)
splitMultiLogic(child);
}
else if (func && func->name == "not")
{
for (auto & child : func->arguments->children)
splitMultiLogic(child);
}
}
/// Push NOT to leafs, remove NOT NOT ...
void traversePushNot(ASTPtr & node, bool add_negation)
{
checkStackSize();
auto * func = node->as<ASTFunction>();
if (func && (func->name == "and" || func->name == "or"))
{
if (add_negation)
{
if (func->arguments->children.size() != 2)
throw Exception("Bad AND or OR function. Expected at least 2 arguments", ErrorCodes::LOGICAL_ERROR);
/// apply De Morgan's Law
node = makeASTFunction(
(func->name == "and" ? "or" : "and"),
func->arguments->children[0]->clone(),
func->arguments->children[1]->clone());
}
auto * new_func = node->as<ASTFunction>();
for (auto & child : new_func->arguments->children)
traversePushNot(child, add_negation);
}
else if (func && func->name == "not")
{
if (func->arguments->children.size() != 1)
throw Exception("Bad NOT function. Expected 1 argument", ErrorCodes::INCORRECT_QUERY);
/// delete NOT
node = func->arguments->children[0]->clone();
traversePushNot(node, !add_negation);
}
else
{
if (add_negation)
node = makeASTFunction("not", node->clone());
}
}
/// Push Or inside And (actually pull AND to top)
bool traversePushOr(ASTPtr & node, size_t num_atoms, size_t max_atoms)
{
if (max_atoms && num_atoms > max_atoms)
return false;
checkStackSize();
auto * func = node->as<ASTFunction>();
if (func && (func->name == "or" || func->name == "and"))
{
for (auto & child : func->arguments->children)
if (!traversePushOr(child, num_atoms, max_atoms))
return false;
}
if (func && func->name == "or")
{
assert(func->arguments->children.size() == 2);
size_t and_node_id = func->arguments->children.size();
for (size_t i = 0; i < func->arguments->children.size(); ++i)
{
auto & child = func->arguments->children[i];
auto * and_func = child->as<ASTFunction>();
if (and_func && and_func->name == "and")
and_node_id = i;
}
if (and_node_id == func->arguments->children.size())
return true;
const size_t other_node_id = 1 - and_node_id;
const auto * and_func = func->arguments->children[and_node_id]->as<ASTFunction>();
auto a = func->arguments->children[other_node_id];
auto b = and_func->arguments->children[0];
auto c = and_func->arguments->children[1];
/// apply the distributive law ( a or (b and c) -> (a or b) and (a or c) )
node = makeASTFunction(
"and",
makeASTFunction("or", a->clone(), b),
makeASTFunction("or", a, c));
/// Count all atoms from 'a', because it was cloned.
num_atoms += countAtoms(a);
return traversePushOr(node, num_atoms, max_atoms);
}
return true;
}
/// transform ast into cnf groups
void traverseCNF(const ASTPtr & node, CNFQuery::AndGroup & and_group, CNFQuery::OrGroup & or_group)
{
checkStackSize();
auto * func = node->as<ASTFunction>();
if (func && func->name == "and")
{
for (auto & child : func->arguments->children)
{
CNFQuery::OrGroup group;
traverseCNF(child, and_group, group);
if (!group.empty())
and_group.insert(std::move(group));
}
}
else if (func && func->name == "or")
{
for (auto & child : func->arguments->children)
{
traverseCNF(child, and_group, or_group);
}
}
else if (func && func->name == "not")
{
if (func->arguments->children.size() != 1)
throw Exception("Bad NOT function. Expected 1 argument", ErrorCodes::INCORRECT_QUERY);
or_group.insert(CNFQuery::AtomicFormula{true, func->arguments->children.front()});
}
else
{
or_group.insert(CNFQuery::AtomicFormula{false, node});
}
}
void traverseCNF(const ASTPtr & node, CNFQuery::AndGroup & result)
{
CNFQuery::OrGroup or_group;
traverseCNF(node, result, or_group);
if (!or_group.empty())
result.insert(or_group);
}
}
std::optional<CNFQuery> TreeCNFConverter::tryConvertToCNF(
const ASTPtr & query, size_t max_growth_multipler)
{
auto cnf = query->clone();
size_t num_atoms = countAtoms(cnf);
splitMultiLogic(cnf);
traversePushNot(cnf, false);
size_t max_atoms = max_growth_multipler
? std::max(MAX_ATOMS_WITHOUT_CHECK, num_atoms * max_growth_multipler)
: 0;
if (!traversePushOr(cnf, num_atoms, max_atoms))
return {};
CNFQuery::AndGroup and_group;
traverseCNF(cnf, and_group);
CNFQuery result{std::move(and_group)};
return result;
}
CNFQuery TreeCNFConverter::toCNF(
const ASTPtr & query, size_t max_growth_multipler)
{
auto cnf = tryConvertToCNF(query, max_growth_multipler);
if (!cnf)
throw Exception(ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS,
"Cannot expression '{}' to CNF, because it produces to many clauses."
"Size of formula inCNF can be exponential of size of source formula.");
return *cnf;
}
ASTPtr TreeCNFConverter::fromCNF(const CNFQuery & cnf)
{
const auto & groups = cnf.getStatements();
if (groups.empty())
return nullptr;
ASTs or_groups;
for (const auto & group : groups)
{
if (group.size() == 1)
{
if ((*group.begin()).negative)
or_groups.push_back(makeASTFunction("not", (*group.begin()).ast->clone()));
else
or_groups.push_back((*group.begin()).ast->clone());
}
else if (group.size() > 1)
{
or_groups.push_back(makeASTFunction("or"));
auto * func = or_groups.back()->as<ASTFunction>();
for (const auto & atom : group)
{
if (atom.negative)
func->arguments->children.push_back(makeASTFunction("not", atom.ast->clone()));
else
func->arguments->children.push_back(atom.ast->clone());
}
}
}
if (or_groups.size() == 1)
return or_groups.front();
ASTPtr res = makeASTFunction("and");
auto * func = res->as<ASTFunction>();
for (const auto & group : or_groups)
func->arguments->children.push_back(group);
return res;
}
static void pushPullNotInAtom(CNFQuery::AtomicFormula & atom, const std::unordered_map<std::string, std::string> & inverse_relations)
{
auto * func = atom.ast->as<ASTFunction>();
if (!func)
return;
if (auto it = inverse_relations.find(func->name); it != std::end(inverse_relations))
{
/// inverse func
atom.ast = atom.ast->clone();
auto * new_func = atom.ast->as<ASTFunction>();
new_func->name = it->second;
/// add not
atom.negative = !atom.negative;
}
}
static void pullNotOut(CNFQuery::AtomicFormula & atom)
{
static const std::unordered_map<std::string, std::string> inverse_relations = {
{"notEquals", "equals"},
{"greaterOrEquals", "less"},
{"greater", "lessOrEquals"},
{"notIn", "in"},
{"notLike", "like"},
{"notEmpty", "empty"},
};
pushPullNotInAtom(atom, inverse_relations);
}
void pushNotIn(CNFQuery::AtomicFormula & atom)
{
if (!atom.negative)
return;
static const std::unordered_map<std::string, std::string> inverse_relations = {
{"equals", "notEquals"},
{"less", "greaterOrEquals"},
{"lessOrEquals", "greater"},
{"in", "notIn"},
{"like", "notLike"},
{"empty", "notEmpty"},
{"notEquals", "equals"},
{"greaterOrEquals", "less"},
{"greater", "lessOrEquals"},
{"notIn", "in"},
{"notLike", "like"},
{"notEmpty", "empty"},
};
pushPullNotInAtom(atom, inverse_relations);
}
CNFQuery & CNFQuery::pullNotOutFunctions()
{
transformAtoms([](const AtomicFormula & atom) -> AtomicFormula
{
AtomicFormula result{atom.negative, atom.ast->clone()};
pullNotOut(result);
return result;
});
return *this;
}
CNFQuery & CNFQuery::pushNotInFuntions()
{
transformAtoms([](const AtomicFormula & atom) -> AtomicFormula
{
AtomicFormula result{atom.negative, atom.ast->clone()};
pushNotIn(result);
return result;
});
return *this;
}
namespace
{
CNFQuery::AndGroup reduceOnce(const CNFQuery::AndGroup & groups)
{
CNFQuery::AndGroup result;
for (const CNFQuery::OrGroup & group : groups)
{
CNFQuery::OrGroup copy(group);
bool inserted = false;
for (const CNFQuery::AtomicFormula & atom : group)
{
copy.erase(atom);
CNFQuery::AtomicFormula negative_atom(atom);
negative_atom.negative = !atom.negative;
copy.insert(negative_atom);
if (groups.contains(copy))
{
copy.erase(negative_atom);
result.insert(copy);
inserted = true;
break;
}
copy.erase(negative_atom);
copy.insert(atom);
}
if (!inserted)
result.insert(group);
}
return result;
}
bool isSubset(const CNFQuery::OrGroup & left, const CNFQuery::OrGroup & right)
{
if (left.size() > right.size())
return false;
for (const auto & elem : left)
if (!right.contains(elem))
return false;
return true;
}
CNFQuery::AndGroup filterSubsets(const CNFQuery::AndGroup & groups)
{
CNFQuery::AndGroup result;
for (const CNFQuery::OrGroup & group : groups)
{
bool insert = true;
for (const CNFQuery::OrGroup & other_group : groups)
{
if (isSubset(other_group, group) && group != other_group)
{
insert = false;
break;
}
}
if (insert)
result.insert(group);
}
return result;
}
}
CNFQuery & CNFQuery::reduce()
{
while (true)
{
AndGroup new_statements = reduceOnce(statements);
if (statements == new_statements)
{
statements = filterSubsets(statements);
return *this;
}
else
statements = new_statements;
}
}
std::string CNFQuery::dump() const
{
WriteBufferFromOwnString res;
bool first = true;
for (const auto & group : statements)
{
if (!first)
res << " AND ";
first = false;
res << "(";
bool first_in_group = true;
for (const auto & atom : group)
{
if (!first_in_group)
res << " OR ";
first_in_group = false;
if (atom.negative)
res << " NOT ";
res << atom.ast->getColumnName();
}
res << ")";
}
return res.str();
}
}

View File

@ -0,0 +1,167 @@
#pragma once
#include <Parsers/IAST_fwd.h>
#include <Parsers/ASTLiteral.h>
#include <vector>
#include <set>
#include <unordered_map>
namespace DB
{
class CNFQuery
{
public:
struct AtomicFormula
{
bool negative = false;
ASTPtr ast;
/// for set
bool operator<(const AtomicFormula & rhs) const
{
return ast->getTreeHash() == rhs.ast->getTreeHash()
? negative < rhs.negative
: ast->getTreeHash() < rhs.ast->getTreeHash();
}
bool operator==(const AtomicFormula & rhs) const
{
return negative == rhs.negative &&
ast->getTreeHash() == rhs.ast->getTreeHash() &&
ast->getColumnName() == rhs.ast->getColumnName();
}
};
using OrGroup = std::set<AtomicFormula>;
using AndGroup = std::set<OrGroup>;
CNFQuery(AndGroup && statements_) : statements(std::move(statements_)) { }
template <typename P>
CNFQuery & filterAlwaysTrueGroups(P predicate_is_unknown) /// delete always true groups
{
AndGroup filtered;
for (const auto & or_group : statements)
{
if (predicate_is_unknown(or_group))
filtered.insert(or_group);
}
std::swap(statements, filtered);
return *this;
}
template <typename P>
CNFQuery & filterAlwaysFalseAtoms(P predicate_is_unknown) /// delete always false atoms
{
AndGroup filtered;
for (const auto & or_group : statements)
{
OrGroup filtered_group;
for (auto ast : or_group)
{
if (predicate_is_unknown(ast))
filtered_group.insert(ast);
}
if (!filtered_group.empty())
filtered.insert(filtered_group);
else
{
/// all atoms false -> group false -> CNF false
filtered.clear();
filtered_group.clear();
filtered_group.insert(AtomicFormula{false, std::make_shared<ASTLiteral>(static_cast<UInt8>(0))});
filtered.insert(filtered_group);
std::swap(statements, filtered);
return *this;
}
}
std::swap(statements, filtered);
return *this;
}
template <typename F>
const CNFQuery & iterateGroups(F func) const
{
for (const auto & group : statements)
func(group);
return *this;
}
CNFQuery & appendGroup(AndGroup&& and_group)
{
for (auto && or_group : and_group)
statements.emplace(std::move(or_group));
return *this;
}
template <typename F>
CNFQuery & transformGroups(F func)
{
AndGroup result;
for (const auto & group : statements)
{
auto new_group = func(group);
if (!new_group.empty())
result.insert(std::move(new_group));
}
std::swap(statements, result);
return *this;
}
template <typename F>
CNFQuery & transformAtoms(F func)
{
transformGroups([func](const OrGroup & group) -> OrGroup
{
OrGroup result;
for (const auto & atom : group)
{
auto new_atom = func(atom);
if (new_atom.ast)
result.insert(std::move(new_atom));
}
return result;
});
return *this;
}
const AndGroup & getStatements() const { return statements; }
std::string dump() const;
/// Converts != -> NOT =; <,>= -> (NOT) <; >,<= -> (NOT) <= for simpler matching
CNFQuery & pullNotOutFunctions();
/// Revert pullNotOutFunctions actions
CNFQuery & pushNotInFuntions();
/// (a OR b OR ...) AND (NOT a OR b OR ...) -> (b OR ...)
CNFQuery & reduce();
private:
AndGroup statements;
};
class TreeCNFConverter
{
public:
static constexpr size_t DEFAULT_MAX_GROWTH_MULTIPLIER = 20;
static constexpr size_t MAX_ATOMS_WITHOUT_CHECK = 200;
/// @max_growth_multipler means that it's allowed to grow size of formula only
/// in that amount of times. It's needed to avoid exponential explosion of formula.
/// CNF of boolean formula with N clauses can have 2^N clauses.
/// If amount of atomic formulas will be exceeded nullopt will be returned.
/// 0 - means unlimited.
static std::optional<CNFQuery> tryConvertToCNF(
const ASTPtr & query, size_t max_growth_multipler = DEFAULT_MAX_GROWTH_MULTIPLIER);
static CNFQuery toCNF(
const ASTPtr & query, size_t max_growth_multipler = DEFAULT_MAX_GROWTH_MULTIPLIER);
static ASTPtr fromCNF(const CNFQuery & cnf);
};
void pushNotIn(CNFQuery::AtomicFormula & atom);
}

View File

@ -4,6 +4,9 @@
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/OptimizeIfChains.h>
#include <Interpreters/OptimizeIfWithConstantConditionVisitor.h>
#include <Interpreters/WhereConstraintsOptimizer.h>
#include <Interpreters/SubstituteColumnOptimizer.h>
#include <Interpreters/TreeCNFConverter.h>
#include <Interpreters/ArithmeticOperationsInAgrFuncOptimize.h>
#include <Interpreters/DuplicateOrderByVisitor.h>
#include <Interpreters/GroupByFunctionKeysVisitor.h>
@ -539,6 +542,44 @@ void optimizeLimitBy(const ASTSelectQuery * select_query)
elems = std::move(unique_elems);
}
/// Use constraints to get rid of useless parts of query
void optimizeWithConstraints(ASTSelectQuery * select_query,
Aliases & /*aliases*/,
const NameSet & /*source_columns_set*/,
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns*/,
const StorageMetadataPtr & metadata_snapshot,
const bool optimize_append_index)
{
WhereConstraintsOptimizer(select_query, metadata_snapshot, optimize_append_index).perform();
}
void optimizeSubstituteColumn(ASTSelectQuery * select_query,
Aliases & /*aliases*/,
const NameSet & /*source_columns_set*/,
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns*/,
const StorageMetadataPtr & metadata_snapshot,
const ConstStoragePtr & storage)
{
SubstituteColumnOptimizer(select_query, metadata_snapshot, storage).perform();
}
/// Transform WHERE to CNF for more convenient optimization.
bool convertQueryToCNF(ASTSelectQuery * select_query)
{
if (select_query->where())
{
auto cnf_form = TreeCNFConverter::tryConvertToCNF(select_query->where());
if (!cnf_form)
return false;
cnf_form->pushNotInFuntions();
select_query->refWhere() = TreeCNFConverter::fromCNF(*cnf_form);
return true;
}
return false;
}
/// Remove duplicated columns from USING(...).
void optimizeUsing(const ASTSelectQuery * select_query)
{
@ -700,6 +741,20 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
if (settings.optimize_arithmetic_operations_in_aggregate_functions)
optimizeAggregationFunctions(query);
bool converted_to_cnf = false;
if (settings.convert_query_to_cnf)
converted_to_cnf = convertQueryToCNF(select_query);
if (converted_to_cnf && settings.optimize_using_constraints)
{
optimizeWithConstraints(select_query, result.aliases, result.source_columns_set,
tables_with_columns, result.metadata_snapshot, settings.optimize_append_index);
if (settings.optimize_substitute_columns)
optimizeSubstituteColumn(select_query, result.aliases, result.source_columns_set,
tables_with_columns, result.metadata_snapshot, result.storage);
}
/// GROUP BY injective function elimination.
optimizeGroupBy(select_query, context);

View File

@ -3,6 +3,7 @@
#include <Interpreters/Aliases.h>
#include <Interpreters/Context_fwd.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Storages/IStorage_fwd.h>
#include <Parsers/IAST_fwd.h>
namespace DB
@ -15,6 +16,7 @@ struct TreeRewriterResult;
class TreeOptimizer
{
public:
static void apply(
ASTPtr & query,
TreeRewriterResult & result,

View File

@ -0,0 +1,182 @@
#include <Interpreters/WhereConstraintsOptimizer.h>
#include <Interpreters/TreeCNFConverter.h>
#include <Interpreters/ComparisonGraph.h>
#include <Parsers/IAST_fwd.h>
#include <Parsers/ASTFunction.h>
#include <Storages/StorageInMemoryMetadata.h>
#include <Interpreters/AddIndexConstraintsOptimizer.h>
#include <Parsers/ASTSelectQuery.h>
#include <Poco/Logger.h>
#include <Parsers/queryToString.h>
namespace DB
{
WhereConstraintsOptimizer::WhereConstraintsOptimizer(
ASTSelectQuery * select_query_,
const StorageMetadataPtr & metadata_snapshot_,
bool optimize_append_index_)
: select_query(select_query_)
, metadata_snapshot(metadata_snapshot_)
, optimize_append_index(optimize_append_index_)
{
}
namespace
{
enum class MatchState
{
FULL_MATCH, /// a = b
NOT_MATCH, /// a = not b
NONE, /// other
};
MatchState match(CNFQuery::AtomicFormula a, CNFQuery::AtomicFormula b)
{
bool match_means_ok = (a.negative == b.negative);
if (a.ast->getTreeHash() == b.ast->getTreeHash())
return match_means_ok ? MatchState::FULL_MATCH : MatchState::NOT_MATCH;
return MatchState::NONE;
}
bool checkIfGroupAlwaysTrueFullMatch(const CNFQuery::OrGroup & group, const ConstraintsDescription & constraints_description)
{
/// We have constraints in CNF.
/// CNF is always true => Each OR group in CNF is always true.
/// So, we try to check whether we have al least one OR group from CNF as subset in our group.
/// If we've found one then our group is always true too.
const auto & constraints_data = constraints_description.getConstraintData();
std::vector<size_t> found(constraints_data.size());
for (size_t i = 0; i < constraints_data.size(); ++i)
found[i] = constraints_data[i].size();
for (const auto & atom : group)
{
const auto constraint_atom_ids = constraints_description.getAtomIds(atom.ast);
if (constraint_atom_ids)
{
const auto constraint_atoms = constraints_description.getAtomsById(*constraint_atom_ids);
for (size_t i = 0; i < constraint_atoms.size(); ++i)
{
if (match(constraint_atoms[i], atom) == MatchState::FULL_MATCH)
{
if ((--found[(*constraint_atom_ids)[i].group_id]) == 0)
return true;
}
}
}
}
return false;
}
bool checkIfGroupAlwaysTrueGraph(const CNFQuery::OrGroup & group, const ComparisonGraph & graph)
{
/// We try to find at least one atom that is always true by using comparison graph.
for (const auto & atom : group)
{
const auto * func = atom.ast->as<ASTFunction>();
if (func && func->arguments->children.size() == 2)
{
const auto expected = ComparisonGraph::atomToCompareResult(atom);
if (graph.isAlwaysCompare(expected, func->arguments->children[0], func->arguments->children[1]))
return true;
}
}
return false;
}
bool checkIfAtomAlwaysFalseFullMatch(const CNFQuery::AtomicFormula & atom, const ConstraintsDescription & constraints_description)
{
const auto constraint_atom_ids = constraints_description.getAtomIds(atom.ast);
if (constraint_atom_ids)
{
for (const auto & constraint_atom : constraints_description.getAtomsById(*constraint_atom_ids))
{
const auto match_result = match(constraint_atom, atom);
if (match_result == MatchState::NOT_MATCH)
return true;
}
}
return false;
}
bool checkIfAtomAlwaysFalseGraph(const CNFQuery::AtomicFormula & atom, const ComparisonGraph & graph)
{
const auto * func = atom.ast->as<ASTFunction>();
if (func && func->arguments->children.size() == 2)
{
/// TODO: special support for !=
const auto expected = ComparisonGraph::atomToCompareResult(atom);
return !graph.isPossibleCompare(expected, func->arguments->children[0], func->arguments->children[1]);
}
return false;
}
void replaceToConstants(ASTPtr & term, const ComparisonGraph & graph)
{
const auto equal_constant = graph.getEqualConst(term);
if (equal_constant)
{
term = (*equal_constant)->clone();
}
else
{
for (auto & child : term->children)
replaceToConstants(child, graph);
}
}
CNFQuery::AtomicFormula replaceTermsToConstants(const CNFQuery::AtomicFormula & atom, const ComparisonGraph & graph)
{
CNFQuery::AtomicFormula result;
result.negative = atom.negative;
result.ast = atom.ast->clone();
replaceToConstants(result.ast, graph);
return result;
}
}
void WhereConstraintsOptimizer::perform()
{
if (select_query->where() && metadata_snapshot)
{
const auto & compare_graph = metadata_snapshot->getConstraints().getGraph();
auto cnf = TreeCNFConverter::toCNF(select_query->where());
cnf.pullNotOutFunctions()
.filterAlwaysTrueGroups([&compare_graph, this](const auto & group)
{
/// remove always true groups from CNF
return !checkIfGroupAlwaysTrueFullMatch(group, metadata_snapshot->getConstraints()) && !checkIfGroupAlwaysTrueGraph(group, compare_graph);
})
.filterAlwaysFalseAtoms([&compare_graph, this](const auto & atom)
{
/// remove always false atoms from CNF
return !checkIfAtomAlwaysFalseFullMatch(atom, metadata_snapshot->getConstraints()) && !checkIfAtomAlwaysFalseGraph(atom, compare_graph);
})
.transformAtoms([&compare_graph](const auto & atom)
{
return replaceTermsToConstants(atom, compare_graph);
})
.reduce()
.pushNotInFuntions();
if (optimize_append_index)
AddIndexConstraintsOptimizer(metadata_snapshot).perform(cnf);
select_query->setExpression(ASTSelectQuery::Expression::WHERE, TreeCNFConverter::fromCNF(cnf));
}
}
}

View File

@ -0,0 +1,30 @@
#pragma once
#include <Parsers/IAST_fwd.h>
#include <Parsers/ASTSelectQuery.h>
namespace DB
{
struct StorageInMemoryMetadata;
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
/// Optimizer that can remove useless parts of conditions
/// in WHERE clause according to table constraints.
class WhereConstraintsOptimizer final
{
public:
WhereConstraintsOptimizer(
ASTSelectQuery * select_query,
const StorageMetadataPtr & metadata_snapshot,
bool optimize_append_index_);
void perform();
private:
ASTSelectQuery * select_query;
const StorageMetadataPtr & metadata_snapshot;
bool optimize_append_index;
};
}

View File

@ -0,0 +1,183 @@
#include <Interpreters/ComparisonGraph.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Common/FieldVisitorToString.h>
#include <gtest/gtest.h>
using namespace DB;
static ComparisonGraph getGraph(const String & query)
{
ParserExpressionList parser(false);
ASTPtr ast = parseQuery(parser, query, 0, 0);
return ComparisonGraph(ast->children);
}
TEST(ComparisonGraph, Bounds)
{
String query = "x <= 1, 1 < c, 3 < c, c < d, d < e, e < 7, e < 10, 10 <= y";
auto graph = getGraph(query);
auto d = std::make_shared<ASTIdentifier>("d");
{
auto res = graph.getConstLowerBound(d);
ASSERT_TRUE(res.has_value());
const auto & [lower, strict] = *res;
ASSERT_EQ(get<UInt64>(lower), 3);
ASSERT_TRUE(strict);
}
{
auto res = graph.getConstUpperBound(d);
ASSERT_TRUE(res.has_value());
const auto & [upper, strict] = *res;
ASSERT_EQ(get<UInt64>(upper), 7);
ASSERT_TRUE(strict);
}
{
auto x = std::make_shared<ASTIdentifier>("x");
auto y = std::make_shared<ASTIdentifier>("y");
ASSERT_EQ(graph.compare(x, y), ComparisonGraph::CompareResult::LESS);
ASSERT_EQ(graph.compare(y, x), ComparisonGraph::CompareResult::GREATER);
}
}
using Components = std::set<std::set<String>>;
static std::set<String> componentToStrings(const ASTs & comp)
{
std::set<String> res;
for (const auto & ast : comp)
res.insert(ast->getColumnName());
return res;
}
static void checkComponents(const String & query, const Components & expected)
{
auto graph = getGraph(query);
size_t num_components = graph.getNumOfComponents();
ASSERT_EQ(num_components, expected.size());
Components res;
for (size_t i = 0; i < num_components; ++i)
res.insert(componentToStrings(graph.getComponent(i)));
ASSERT_EQ(res, expected);
}
TEST(ComparisonGraph, Components)
{
{
String query = "a >= b, b >= c, c >= d, d >= b, d >= e, a >= e";
Components expected = {{"a"}, {"b", "c", "d"}, {"e"}};
checkComponents(query, expected);
}
{
String query = "a >= b, b >= a, b >= c, c >= d, d >= c";
Components expected = {{"a", "b"}, {"c", "d"}};
checkComponents(query, expected);
}
}
TEST(ComparisonGraph, Compare)
{
using CompareResult = ComparisonGraph::CompareResult;
{
String query = "a >= b, c >= b";
auto graph = getGraph(query);
auto a = std::make_shared<ASTIdentifier>("a");
auto c = std::make_shared<ASTIdentifier>("c");
ASSERT_EQ(graph.compare(a, c), CompareResult::UNKNOWN);
}
{
String query = "a >= b, b > c";
auto graph = getGraph(query);
auto a = std::make_shared<ASTIdentifier>("a");
auto b = std::make_shared<ASTIdentifier>("b");
auto c = std::make_shared<ASTIdentifier>("c");
ASSERT_EQ(graph.compare(a, c), CompareResult::GREATER);
ASSERT_EQ(graph.compare(a, b), CompareResult::GREATER_OR_EQUAL);
ASSERT_EQ(graph.compare(b, c), CompareResult::GREATER);
}
{
String query = "a != b, c < a";
auto graph = getGraph(query);
auto a = std::make_shared<ASTIdentifier>("a");
auto b = std::make_shared<ASTIdentifier>("b");
auto c = std::make_shared<ASTIdentifier>("c");
ASSERT_EQ(graph.compare(a, b), CompareResult::NOT_EQUAL);
ASSERT_EQ(graph.compare(a, c), CompareResult::GREATER);
ASSERT_EQ(graph.compare(b, c), CompareResult::UNKNOWN);
}
{
/// These constraints are inconsistent.
String query = "a >= b, b >= a, a != b";
ASSERT_THROW(getGraph(query), Exception);
}
{
/// These constraints are inconsistent.
String query = "a > b, b > c, c > a";
ASSERT_THROW(getGraph(query), Exception);
}
{
String query = "a >= 3, b > a, c >= 3, d >= c";
auto graph = getGraph(query);
auto a = std::make_shared<ASTIdentifier>("a");
auto b = std::make_shared<ASTIdentifier>("b");
auto d = std::make_shared<ASTIdentifier>("d");
auto lit_2 = std::make_shared<ASTLiteral>(2u);
auto lit_3 = std::make_shared<ASTLiteral>(3u);
auto lit_4 = std::make_shared<ASTLiteral>(4u);
ASSERT_EQ(graph.compare(lit_3, a), CompareResult::LESS_OR_EQUAL);
ASSERT_FALSE(graph.isAlwaysCompare(CompareResult::LESS, lit_3, a));
ASSERT_TRUE(graph.isAlwaysCompare(CompareResult::LESS, lit_2, a));
ASSERT_EQ(graph.compare(b, lit_2), CompareResult::GREATER);
ASSERT_EQ(graph.compare(b, lit_3), CompareResult::GREATER);
ASSERT_EQ(graph.compare(b, lit_4), CompareResult::UNKNOWN);
ASSERT_EQ(graph.compare(d, lit_2), CompareResult::GREATER);
ASSERT_EQ(graph.compare(d, lit_3), CompareResult::GREATER_OR_EQUAL);
ASSERT_EQ(graph.compare(d, lit_4), CompareResult::UNKNOWN);
}
{
String query = "a >= 5, a <= 10";
auto graph = getGraph(query);
auto a = std::make_shared<ASTIdentifier>("a");
auto lit_8 = std::make_shared<ASTLiteral>(8);
auto lit_3 = std::make_shared<ASTLiteral>(3);
auto lit_15 = std::make_shared<ASTLiteral>(15);
ASSERT_EQ(graph.compare(a, lit_8), CompareResult::UNKNOWN);
ASSERT_EQ(graph.compare(a, lit_3), CompareResult::GREATER);
ASSERT_EQ(graph.compare(a, lit_15), CompareResult::LESS);
}
}

View File

@ -11,6 +11,7 @@ ASTPtr ASTConstraintDeclaration::clone() const
auto res = std::make_shared<ASTConstraintDeclaration>();
res->name = name;
res->type = type;
if (expr)
res->set(res->expr, expr->clone());
@ -21,7 +22,7 @@ ASTPtr ASTConstraintDeclaration::clone() const
void ASTConstraintDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
{
s.ostr << backQuoteIfNeed(name);
s.ostr << (s.hilite ? hilite_keyword : "") << " CHECK " << (s.hilite ? hilite_none : "");
s.ostr << (s.hilite ? hilite_keyword : "") << (type == Type::CHECK ? " CHECK " : " ASSUME ") << (s.hilite ? hilite_none : "");
expr->formatImpl(s, state, frame);
}

View File

@ -10,7 +10,14 @@ namespace DB
class ASTConstraintDeclaration : public IAST
{
public:
enum class Type : UInt8
{
CHECK,
ASSUME,
};
String name;
Type type;
IAST * expr;
String getID(char) const override { return "Constraint"; }

View File

@ -143,24 +143,32 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
bool ParserConstraintDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserKeyword s_check("CHECK");
ParserKeyword s_assume("ASSUME");
ParserIdentifier name_p;
ParserLogicalOrExpression expression_p;
ASTPtr name;
ASTPtr expr;
ASTConstraintDeclaration::Type type = ASTConstraintDeclaration::Type::CHECK;
if (!name_p.parse(pos, name, expected))
return false;
if (!s_check.ignore(pos, expected))
return false;
{
if (s_assume.ignore(pos, expected))
type = ASTConstraintDeclaration::Type::ASSUME;
else
return false;
}
if (!expression_p.parse(pos, expr, expected))
return false;
auto constraint = std::make_shared<ASTConstraintDeclaration>();
constraint->name = name->as<ASTIdentifier &>().name();
constraint->type = type;
constraint->set(constraint->expr, expr);
node = constraint;

View File

@ -0,0 +1,251 @@
#include <Processors/Formats/Impl/CustomSeparatedRowInputFormat.h>
#include <Processors/Formats/Impl/TemplateRowInputFormat.h>
#include <Formats/EscapingRuleUtils.h>
#include <Formats/registerWithNamesAndTypes.h>
#include <IO/Operators.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
static FormatSettings updateFormatSettings(const FormatSettings & settings)
{
if (settings.custom.escaping_rule != FormatSettings::EscapingRule::CSV || settings.custom.field_delimiter.empty())
return settings;
auto updated = settings;
updated.csv.delimiter = settings.custom.field_delimiter.front();
return updated;
}
CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat(
const Block & header_,
ReadBuffer & in_,
const Params & params_,
bool with_names_,
bool with_types_,
bool ignore_spaces_,
const FormatSettings & format_settings_)
: RowInputFormatWithNamesAndTypes(header_, buf, params_, with_names_, with_types_, updateFormatSettings(format_settings_))
, buf(in_)
, ignore_spaces(ignore_spaces_)
, escaping_rule(format_settings_.custom.escaping_rule)
{
/// In case of CustomSeparatedWithNames(AndTypes) formats and enabled setting input_format_with_names_use_header we don't know
/// the exact number of columns in data (because it can contain unknown columns). So, if field_delimiter and row_after_delimiter are
/// the same and row_between_delimiter is empty, we won't be able to determine the end of row while reading column names or types.
if ((with_types_ || with_names_) && format_settings_.with_names_use_header
&& format_settings_.custom.field_delimiter == format_settings_.custom.row_after_delimiter
&& format_settings_.custom.row_between_delimiter.empty())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Input format CustomSeparatedWithNames(AndTypes) cannot work properly with enabled setting input_format_with_names_use_header, "
"when format_custom_field_delimiter and format_custom_row_after_delimiter are the same and format_custom_row_between_delimiter is empty.");
}
}
void CustomSeparatedRowInputFormat::skipPrefixBeforeHeader()
{
skipSpaces();
assertString(format_settings.custom.result_before_delimiter, buf);
}
void CustomSeparatedRowInputFormat::skipRowStartDelimiter()
{
skipSpaces();
assertString(format_settings.custom.row_before_delimiter, buf);
}
void CustomSeparatedRowInputFormat::skipFieldDelimiter()
{
skipSpaces();
assertString(format_settings.custom.field_delimiter, buf);
}
void CustomSeparatedRowInputFormat::skipRowEndDelimiter()
{
skipSpaces();
assertString(format_settings.custom.row_after_delimiter, buf);
}
void CustomSeparatedRowInputFormat::skipRowBetweenDelimiter()
{
skipSpaces();
assertString(format_settings.custom.row_between_delimiter, buf);
}
void CustomSeparatedRowInputFormat::skipField()
{
skipSpaces();
skipFieldByEscapingRule(buf, escaping_rule, format_settings);
}
bool CustomSeparatedRowInputFormat::checkEndOfRow()
{
PeekableReadBufferCheckpoint checkpoint{buf, true};
skipSpaces();
if (!checkString(format_settings.custom.row_after_delimiter, buf))
return false;
skipSpaces();
/// At the end of row after row_after_delimiter we expect result_after_delimiter or row_between_delimiter.
if (checkString(format_settings.custom.row_between_delimiter, buf))
return true;
buf.rollbackToCheckpoint();
skipSpaces();
buf.ignore(format_settings.custom.row_after_delimiter.size());
return checkForSuffixImpl(true);
}
std::vector<String> CustomSeparatedRowInputFormat::readHeaderRow()
{
std::vector<String> values;
skipRowStartDelimiter();
do
{
if (!values.empty())
skipFieldDelimiter();
skipSpaces();
values.push_back(readStringByEscapingRule(buf, escaping_rule, format_settings));
}
while (!checkEndOfRow());
skipRowEndDelimiter();
return values;
}
void CustomSeparatedRowInputFormat::skipHeaderRow()
{
size_t columns = getPort().getHeader().columns();
skipRowStartDelimiter();
for (size_t i = 0; i != columns; ++i)
{
skipField();
if (i + 1 != columns)
skipFieldDelimiter();
}
skipRowEndDelimiter();
}
bool CustomSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool, const String &)
{
skipSpaces();
return deserializeFieldByEscapingRule(type, serialization, column, buf, escaping_rule, format_settings);
}
bool CustomSeparatedRowInputFormat::checkForSuffixImpl(bool check_eof)
{
skipSpaces();
if (format_settings.custom.result_after_delimiter.empty())
{
if (!check_eof)
return false;
return buf.eof();
}
if (unlikely(checkString(format_settings.custom.result_after_delimiter, buf)))
{
skipSpaces();
if (!check_eof)
return true;
if (buf.eof())
return true;
}
return false;
}
bool CustomSeparatedRowInputFormat::tryParseSuffixWithDiagnosticInfo(WriteBuffer & out)
{
PeekableReadBufferCheckpoint checkpoint{buf};
if (checkForSuffixImpl(false))
{
if (buf.eof())
out << "<End of stream>\n";
else
out << " There is some data after suffix\n";
return false;
}
buf.rollbackToCheckpoint();
return true;
}
bool CustomSeparatedRowInputFormat::checkForSuffix()
{
PeekableReadBufferCheckpoint checkpoint{buf};
if (checkForSuffixImpl(true))
return true;
buf.rollbackToCheckpoint();
return false;
}
bool CustomSeparatedRowInputFormat::allowSyncAfterError() const
{
return !format_settings.custom.row_after_delimiter.empty() || !format_settings.custom.row_between_delimiter.empty();
}
void CustomSeparatedRowInputFormat::syncAfterError()
{
skipToNextRowOrEof(buf, format_settings.custom.row_after_delimiter, format_settings.custom.row_between_delimiter, ignore_spaces);
end_of_stream = buf.eof();
/// It can happen that buf.position() is not at the beginning of row
/// if some delimiters is similar to row_format.delimiters.back() and row_between_delimiter.
/// It will cause another parsing error.
}
bool CustomSeparatedRowInputFormat::parseRowStartWithDiagnosticInfo(WriteBuffer & out)
{
return parseDelimiterWithDiagnosticInfo(out, buf, format_settings.custom.row_before_delimiter, "delimiter before first firld", ignore_spaces);
}
bool CustomSeparatedRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)
{
return parseDelimiterWithDiagnosticInfo(out, buf, format_settings.custom.field_delimiter, "delimiter between fields", ignore_spaces);
}
bool CustomSeparatedRowInputFormat::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
{
return parseDelimiterWithDiagnosticInfo(out, buf, format_settings.custom.row_after_delimiter, "delimiter after last field", ignore_spaces);
}
bool CustomSeparatedRowInputFormat::parseRowBetweenDelimiterWithDiagnosticInfo(WriteBuffer & out)
{
return parseDelimiterWithDiagnosticInfo(out, buf, format_settings.custom.row_between_delimiter, "delimiter between rows", ignore_spaces);
}
void CustomSeparatedRowInputFormat::resetParser()
{
RowInputFormatWithNamesAndTypes::resetParser();
buf.reset();
}
void registerInputFormatCustomSeparated(FormatFactory & factory)
{
for (bool ignore_spaces : {false, true})
{
auto register_func = [&](const String & format_name, bool with_names, bool with_types)
{
factory.registerInputFormat(format_name, [=](
ReadBuffer & buf,
const Block & sample,
IRowInputFormat::Params params,
const FormatSettings & settings)
{
return std::make_shared<CustomSeparatedRowInputFormat>(sample, buf, params, with_names, with_types, ignore_spaces, settings);
});
};
registerWithNamesAndTypes(ignore_spaces ? "CustomSeparatedIgnoreSpaces" : "CustomSeparated", register_func);
}
}
}

View File

@ -0,0 +1,69 @@
#pragma once
#include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
#include <Formats/ParsedTemplateFormatString.h>
#include <IO/PeekableReadBuffer.h>
#include <IO/ReadHelpers.h>
namespace DB
{
class CustomSeparatedRowInputFormat : public RowInputFormatWithNamesAndTypes
{
public:
CustomSeparatedRowInputFormat(
const Block & header_,
ReadBuffer & in_,
const Params & params_,
bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_settings_);
void resetParser() override;
String getName() const override { return "CustomSeparatedRowInputFormat"; }
private:
CustomSeparatedRowInputFormat(
const Block & header_,
std::unique_ptr<PeekableReadBuffer> in_,
const Params & params_,
bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_settings_);
using EscapingRule = FormatSettings::EscapingRule;
bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) override;
void skipField(size_t /*file_column*/) override { skipField(); }
void skipField();
void skipNames() override { skipHeaderRow(); }
void skipTypes() override { skipHeaderRow(); }
void skipHeaderRow();
void skipPrefixBeforeHeader() override;
void skipRowStartDelimiter() override;
void skipFieldDelimiter() override;
void skipRowEndDelimiter() override;
void skipRowBetweenDelimiter() override;
bool checkForSuffix() override;
bool allowSyncAfterError() const override;
void syncAfterError() override;
bool parseRowStartWithDiagnosticInfo(WriteBuffer & out) override;
bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) override;
bool parseRowEndWithDiagnosticInfo(WriteBuffer & out) override;
bool parseRowBetweenDelimiterWithDiagnosticInfo(WriteBuffer & out) override;
bool tryParseSuffixWithDiagnosticInfo(WriteBuffer & out) override;
std::vector<String> readNames() override { return readHeaderRow(); }
std::vector<String> readTypes() override { return readHeaderRow(); }
std::vector<String> readHeaderRow();
bool checkEndOfRow();
bool checkForSuffixImpl(bool check_eof);
inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(buf); }
PeekableReadBuffer buf;
bool ignore_spaces;
EscapingRule escaping_rule;
};
}

View File

@ -0,0 +1,97 @@
#include <Processors/Formats/Impl/CustomSeparatedRowOutputFormat.h>
#include <Formats/registerWithNamesAndTypes.h>
#include <Formats/EscapingRuleUtils.h>
#include <IO/WriteHelpers.h>
namespace DB
{
CustomSeparatedRowOutputFormat::CustomSeparatedRowOutputFormat(
const Block & header_, WriteBuffer & out_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_, bool with_names_, bool with_types_)
: IRowOutputFormat(header_, out_, params_)
, with_names(with_names_)
, with_types(with_types_)
, format_settings(format_settings_)
, escaping_rule(format_settings.custom.escaping_rule)
{
}
void CustomSeparatedRowOutputFormat::writeLine(const std::vector<String> & values)
{
writeRowStartDelimiter();
for (size_t i = 0; i != values.size(); ++i)
{
writeStringByEscapingRule(values[i], out, escaping_rule, format_settings);
if (i + 1 != values.size())
writeFieldDelimiter();
}
writeRowEndDelimiter();
}
void CustomSeparatedRowOutputFormat::writePrefix()
{
writeString(format_settings.custom.result_before_delimiter, out);
const auto & header = getPort(PortKind::Main).getHeader();
if (with_names)
{
writeLine(header.getNames());
writeRowBetweenDelimiter();
}
if (with_types)
{
writeLine(header.getDataTypeNames());
writeRowBetweenDelimiter();
}
}
void CustomSeparatedRowOutputFormat::writeSuffix()
{
writeString(format_settings.custom.result_after_delimiter, out);
}
void CustomSeparatedRowOutputFormat::writeRowStartDelimiter()
{
writeString(format_settings.custom.row_before_delimiter, out);
}
void CustomSeparatedRowOutputFormat::writeFieldDelimiter()
{
writeString(format_settings.custom.field_delimiter, out);
}
void CustomSeparatedRowOutputFormat::writeRowEndDelimiter()
{
writeString(format_settings.custom.row_after_delimiter, out);
}
void CustomSeparatedRowOutputFormat::writeRowBetweenDelimiter()
{
writeString(format_settings.custom.row_between_delimiter, out);
}
void CustomSeparatedRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num)
{
serializeFieldByEscapingRule(column, serialization, out, row_num, escaping_rule, format_settings);
}
void registerOutputFormatCustomSeparated(FormatFactory & factory)
{
auto register_func = [&](const String & format_name, bool with_names, bool with_types)
{
factory.registerOutputFormat(format_name, [with_names, with_types](
WriteBuffer & buf,
const Block & sample,
const RowOutputFormatParams & params,
const FormatSettings & settings)
{
return std::make_shared<CustomSeparatedRowOutputFormat>(sample, buf, params, settings, with_names, with_types);
});
};
registerWithNamesAndTypes("CustomSeparated", register_func);
}
}

View File

@ -0,0 +1,36 @@
#pragma once
#include <Processors/Formats/IRowOutputFormat.h>
#include <Formats/ParsedTemplateFormatString.h>
namespace DB
{
class WriteBuffer;
class CustomSeparatedRowOutputFormat : public IRowOutputFormat
{
public:
CustomSeparatedRowOutputFormat(const Block & header_, WriteBuffer & out_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_, bool with_names_, bool with_types_);
String getName() const override { return "CustomSeparatedRowOutputFormat"; }
private:
using EscapingRule = FormatSettings::EscapingRule;
void writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) override;
void writeFieldDelimiter() override;
void writeRowStartDelimiter() override;
void writeRowEndDelimiter() override;
void writeRowBetweenDelimiter() override;
void writePrefix() override;
void writeSuffix() override;
void writeLine(const std::vector<String> & values);
bool with_names;
bool with_types;
const FormatSettings format_settings;
EscapingRule escaping_rule;
};
}

View File

@ -15,7 +15,7 @@ namespace ErrorCodes
}
JSONAsStringRowInputFormat::JSONAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) :
IRowInputFormat(header_, in_, std::move(params_)), buf(*in)
IRowInputFormat(header_, buf, std::move(params_)), buf(in_)
{
if (header_.columns() > 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS,

View File

@ -26,8 +26,10 @@ JSONRowOutputFormat::JSONRowOutputFormat(
need_validate_utf8 = true;
WriteBufferFromOwnString buf;
writeJSONString(fields[i].name, buf, settings);
{
WriteBufferValidUTF8 validating_buf(buf);
writeJSONString(fields[i].name, validating_buf, settings);
}
fields[i].name = buf.str();
}

View File

@ -29,7 +29,7 @@ namespace ErrorCodes
}
MsgPackRowInputFormat::MsgPackRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_)
: IRowInputFormat(header_, in_, std::move(params_)), buf(*in), parser(visitor), data_types(header_.getDataTypes()) {}
: IRowInputFormat(header_, buf, std::move(params_)), buf(in_), parser(visitor), data_types(header_.getDataTypes()) {}
void MsgPackRowInputFormat::resetParser()
{

View File

@ -182,7 +182,6 @@ void MsgPackRowOutputFormat::write(const Columns & columns, size_t row_num)
void registerOutputFormatMsgPack(FormatFactory & factory)
{
factory.registerOutputFormat("MsgPack", [](
WriteBuffer & buf,
const Block & sample,
@ -191,6 +190,7 @@ void registerOutputFormatMsgPack(FormatFactory & factory)
{
return std::make_shared<MsgPackRowOutputFormat>(buf, sample, params);
});
factory.markOutputFormatSupportsParallelFormatting("MsgPack");
}
}

View File

@ -24,6 +24,15 @@ MySQLOutputFormat::MySQLOutputFormat(WriteBuffer & out_, const Block & header_,
/// But it's also possible to specify MySQLWire as output format for clickhouse-client or clickhouse-local.
/// There is no `sequence_id` stored in `settings_.mysql_wire` in this case, so we create a dummy one.
sequence_id = settings_.mysql_wire.sequence_id ? settings_.mysql_wire.sequence_id : &dummy_sequence_id;
const auto & header = getPort(PortKind::Main).getHeader();
data_types = header.getDataTypes();
serializations.reserve(data_types.size());
for (const auto & type : data_types)
serializations.emplace_back(type->getDefaultSerialization());
packet_endpoint = MySQLProtocol::PacketEndpoint::create(out, *sequence_id);
}
void MySQLOutputFormat::setContext(ContextPtr context_)
@ -34,13 +43,6 @@ void MySQLOutputFormat::setContext(ContextPtr context_)
void MySQLOutputFormat::writePrefix()
{
const auto & header = getPort(PortKind::Main).getHeader();
data_types = header.getDataTypes();
serializations.reserve(data_types.size());
for (const auto & type : data_types)
serializations.emplace_back(type->getDefaultSerialization());
packet_endpoint = MySQLProtocol::PacketEndpoint::create(out, *sequence_id);
if (header.columns())
{

View File

@ -2,6 +2,7 @@
#include <base/find_symbols.h>
#include <Processors/Formats/Impl/RegexpRowInputFormat.h>
#include <DataTypes/Serializations/SerializationNullable.h>
#include <Formats/EscapingRuleUtils.h>
#include <IO/ReadHelpers.h>
namespace DB
@ -10,16 +11,15 @@ namespace DB
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
RegexpRowInputFormat::RegexpRowInputFormat(
ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_)
: IRowInputFormat(header_, in_, std::move(params_))
: IRowInputFormat(header_, buf, std::move(params_))
, buf(in_)
, format_settings(format_settings_)
, field_format(stringToFormat(format_settings_.regexp.escaping_rule))
, escaping_rule(format_settings_.regexp.escaping_rule)
, regexp(format_settings_.regexp.regexp)
{
size_t fields_count = regexp.NumberOfCapturingGroups();
@ -42,72 +42,19 @@ void RegexpRowInputFormat::resetParser()
buf.reset();
}
RegexpRowInputFormat::ColumnFormat RegexpRowInputFormat::stringToFormat(const String & format)
{
if (format == "Escaped")
return ColumnFormat::Escaped;
if (format == "Quoted")
return ColumnFormat::Quoted;
if (format == "CSV")
return ColumnFormat::Csv;
if (format == "JSON")
return ColumnFormat::Json;
if (format == "Raw")
return ColumnFormat::Raw;
throw Exception("Unsupported column format \"" + format + "\".", ErrorCodes::BAD_ARGUMENTS);
}
bool RegexpRowInputFormat::readField(size_t index, MutableColumns & columns)
{
const auto & type = getPort().getHeader().getByPosition(index).type;
bool parse_as_nullable = format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable();
bool read = true;
ReadBuffer field_buf(const_cast<char *>(matched_fields[index].data()), matched_fields[index].size(), 0);
try
{
const auto & serialization = serializations[index];
switch (field_format)
{
case ColumnFormat::Escaped:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextEscapedImpl(*columns[index], field_buf, format_settings, serialization);
else
serialization->deserializeTextEscaped(*columns[index], field_buf, format_settings);
break;
case ColumnFormat::Quoted:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextQuotedImpl(*columns[index], field_buf, format_settings, serialization);
else
serialization->deserializeTextQuoted(*columns[index], field_buf, format_settings);
break;
case ColumnFormat::Csv:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextCSVImpl(*columns[index], field_buf, format_settings, serialization);
else
serialization->deserializeTextCSV(*columns[index], field_buf, format_settings);
break;
case ColumnFormat::Json:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextJSONImpl(*columns[index], field_buf, format_settings, serialization);
else
serialization->deserializeTextJSON(*columns[index], field_buf, format_settings);
break;
case ColumnFormat::Raw:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextRawImpl(*columns[index], field_buf, format_settings, serialization);
else
serialization->deserializeTextRaw(*columns[index], field_buf, format_settings);
break;
default:
break;
}
return deserializeFieldByEscapingRule(type, serializations[index], *columns[index], field_buf, escaping_rule, format_settings);
}
catch (Exception & e)
{
e.addMessage("(while reading the value of column " + getPort().getHeader().getByPosition(index).name + ")");
throw;
}
return read;
}
void RegexpRowInputFormat::readFieldsFromMatch(MutableColumns & columns, RowReadExtension & ext)

View File

@ -25,7 +25,7 @@ class ReadBuffer;
class RegexpRowInputFormat : public IRowInputFormat
{
using ColumnFormat = ParsedTemplateFormatString::ColumnFormat;
using EscapingRule = FormatSettings::EscapingRule;
public:
RegexpRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_);
@ -37,11 +37,10 @@ private:
bool readField(size_t index, MutableColumns & columns);
void readFieldsFromMatch(MutableColumns & columns, RowReadExtension & ext);
static ColumnFormat stringToFormat(const String & format);
PeekableReadBuffer buf;
const FormatSettings format_settings;
const ColumnFormat field_format;
const EscapingRule escaping_rule;
const RE2 regexp;
// The vector of fields extracted from line using regexp.

View File

@ -1,5 +1,6 @@
#include <Processors/Formats/Impl/TemplateBlockOutputFormat.h>
#include <Formats/FormatFactory.h>
#include <Formats/EscapingRuleUtils.h>
#include <IO/WriteHelpers.h>
#include <DataTypes/DataTypesNumber.h>
#include <Interpreters/Context.h>
@ -39,7 +40,7 @@ TemplateBlockOutputFormat::TemplateBlockOutputFormat(const Block & header_, Writ
case static_cast<size_t>(ResultsetPart::Totals):
case static_cast<size_t>(ResultsetPart::ExtremesMin):
case static_cast<size_t>(ResultsetPart::ExtremesMax):
if (format.formats[i] != ColumnFormat::None)
if (format.escaping_rules[i] != EscapingRule::None)
format.throwInvalidFormat("Serialization type for data, totals, min and max must be empty or None", i);
break;
case static_cast<size_t>(ResultsetPart::Rows):
@ -47,7 +48,7 @@ TemplateBlockOutputFormat::TemplateBlockOutputFormat(const Block & header_, Writ
case static_cast<size_t>(ResultsetPart::TimeElapsed):
case static_cast<size_t>(ResultsetPart::RowsRead):
case static_cast<size_t>(ResultsetPart::BytesRead):
if (format.formats[i] == ColumnFormat::None)
if (format.escaping_rules[i] == EscapingRule::None)
format.throwInvalidFormat("Serialization type for output part rows, rows_before_limit, time, "
"rows_read or bytes_read is not specified", i);
break;
@ -68,7 +69,7 @@ TemplateBlockOutputFormat::TemplateBlockOutputFormat(const Block & header_, Writ
if (header_.columns() <= *row_format.format_idx_to_column_idx[i])
row_format.throwInvalidFormat("Column index " + std::to_string(*row_format.format_idx_to_column_idx[i]) +
" must be less then number of columns (" + std::to_string(header_.columns()) + ")", i);
if (row_format.formats[i] == ColumnFormat::None)
if (row_format.escaping_rules[i] == EscapingRule::None)
row_format.throwInvalidFormat("Serialization type for file column is not specified", i);
}
}
@ -105,44 +106,17 @@ void TemplateBlockOutputFormat::writeRow(const Chunk & chunk, size_t row_num)
writeString(row_format.delimiters[j], out);
size_t col_idx = *row_format.format_idx_to_column_idx[j];
serializeField(*chunk.getColumns()[col_idx], *serializations[col_idx], row_num, row_format.formats[j]);
serializeFieldByEscapingRule(*chunk.getColumns()[col_idx], *serializations[col_idx], out, row_num, row_format.escaping_rules[j], settings);
}
writeString(row_format.delimiters[columns], out);
}
void TemplateBlockOutputFormat::serializeField(const IColumn & column, const ISerialization & serialization, size_t row_num, ColumnFormat col_format)
{
switch (col_format)
{
case ColumnFormat::Escaped:
serialization.serializeTextEscaped(column, row_num, out, settings);
break;
case ColumnFormat::Quoted:
serialization.serializeTextQuoted(column, row_num, out, settings);
break;
case ColumnFormat::Csv:
serialization.serializeTextCSV(column, row_num, out, settings);
break;
case ColumnFormat::Json:
serialization.serializeTextJSON(column, row_num, out, settings);
break;
case ColumnFormat::Xml:
serialization.serializeTextXML(column, row_num, out, settings);
break;
case ColumnFormat::Raw:
serialization.serializeTextRaw(column, row_num, out, settings);
break;
default:
__builtin_unreachable();
}
}
template <typename U, typename V> void TemplateBlockOutputFormat::writeValue(U value, ColumnFormat col_format)
template <typename U, typename V> void TemplateBlockOutputFormat::writeValue(U value, EscapingRule escaping_rule)
{
auto type = std::make_unique<V>();
auto col = type->createColumn();
col->insert(value);
serializeField(*col, *type->getDefaultSerialization(), 0, col_format);
serializeFieldByEscapingRule(*col, *type->getDefaultSerialization(), out, 0, escaping_rule, settings);
}
void TemplateBlockOutputFormat::consume(Chunk chunk)
@ -193,21 +167,21 @@ void TemplateBlockOutputFormat::finalizeImpl()
writeRow(extremes, 1);
break;
case ResultsetPart::Rows:
writeValue<size_t, DataTypeUInt64>(row_count, format.formats[i]);
writeValue<size_t, DataTypeUInt64>(row_count, format.escaping_rules[i]);
break;
case ResultsetPart::RowsBeforeLimit:
if (!rows_before_limit_set)
format.throwInvalidFormat("Cannot print rows_before_limit for this request", i);
writeValue<size_t, DataTypeUInt64>(rows_before_limit, format.formats[i]);
writeValue<size_t, DataTypeUInt64>(rows_before_limit, format.escaping_rules[i]);
break;
case ResultsetPart::TimeElapsed:
writeValue<double, DataTypeFloat64>(watch.elapsedSeconds(), format.formats[i]);
writeValue<double, DataTypeFloat64>(watch.elapsedSeconds(), format.escaping_rules[i]);
break;
case ResultsetPart::RowsRead:
writeValue<size_t, DataTypeUInt64>(progress.read_rows.load(), format.formats[i]);
writeValue<size_t, DataTypeUInt64>(progress.read_rows.load(), format.escaping_rules[i]);
break;
case ResultsetPart::BytesRead:
writeValue<size_t, DataTypeUInt64>(progress.read_bytes.load(), format.formats[i]);
writeValue<size_t, DataTypeUInt64>(progress.read_bytes.load(), format.escaping_rules[i]);
break;
default:
break;
@ -232,7 +206,7 @@ void registerOutputFormatTemplate(FormatFactory & factory)
{
/// Default format string: "${data}"
resultset_format.delimiters.resize(2);
resultset_format.formats.emplace_back(ParsedTemplateFormatString::ColumnFormat::None);
resultset_format.escaping_rules.emplace_back(ParsedTemplateFormatString::EscapingRule::None);
resultset_format.format_idx_to_column_idx.emplace_back(0);
resultset_format.column_names.emplace_back("data");
}
@ -258,17 +232,5 @@ void registerOutputFormatTemplate(FormatFactory & factory)
return std::make_shared<TemplateBlockOutputFormat>(sample, buf, settings, resultset_format, row_format, settings.template_settings.row_between_delimiter);
});
factory.registerOutputFormat("CustomSeparated", [](
WriteBuffer & buf,
const Block & sample,
const RowOutputFormatParams &,
const FormatSettings & settings)
{
ParsedTemplateFormatString resultset_format = ParsedTemplateFormatString::setupCustomSeparatedResultsetFormat(settings.custom);
ParsedTemplateFormatString row_format = ParsedTemplateFormatString::setupCustomSeparatedRowFormat(settings.custom, sample);
return std::make_shared<TemplateBlockOutputFormat>(sample, buf, settings, resultset_format, row_format, settings.custom.row_between_delimiter);
});
}
}

View File

@ -12,7 +12,7 @@ namespace DB
class TemplateBlockOutputFormat : public IOutputFormat
{
using ColumnFormat = ParsedTemplateFormatString::ColumnFormat;
using EscapingRule = FormatSettings::EscapingRule;
public:
TemplateBlockOutputFormat(const Block & header_, WriteBuffer & out_, const FormatSettings & settings_,
ParsedTemplateFormatString format_, ParsedTemplateFormatString row_format_,
@ -46,8 +46,7 @@ private:
void finalizeImpl() override;
void writeRow(const Chunk & chunk, size_t row_num);
void serializeField(const IColumn & column, const ISerialization & serialization, size_t row_num, ColumnFormat format);
template <typename U, typename V> void writeValue(U value, ColumnFormat col_format);
template <typename U, typename V> void writeValue(U value, EscapingRule escaping_rule);
const FormatSettings settings;
Serializations serializations;

View File

@ -1,6 +1,7 @@
#include <Processors/Formats/Impl/TemplateRowInputFormat.h>
#include <Formats/FormatFactory.h>
#include <Formats/verbosePrintString.h>
#include <Formats/EscapingRuleUtils.h>
#include <IO/Operators.h>
#include <DataTypes/DataTypeNothing.h>
#include <Interpreters/Context.h>
@ -38,14 +39,14 @@ TemplateRowInputFormat::TemplateRowInputFormat(const Block & header_, ReadBuffer
format.throwInvalidFormat("Invalid input part", i);
if (has_data)
format.throwInvalidFormat("${data} can occur only once", i);
if (format.formats[i] != ColumnFormat::None)
if (format.escaping_rules[i] != EscapingRule::None)
format.throwInvalidFormat("${data} must have empty or None deserialization type", i);
has_data = true;
format_data_idx = i;
}
else
{
if (format.formats[i] == ColumnFormat::Xml)
if (format.escaping_rules[i] == EscapingRule::XML)
format.throwInvalidFormat("XML deserialization is not supported", i);
}
}
@ -54,7 +55,7 @@ TemplateRowInputFormat::TemplateRowInputFormat(const Block & header_, ReadBuffer
std::vector<UInt8> column_in_format(header_.columns(), false);
for (size_t i = 0; i < row_format.columnsCount(); ++i)
{
if (row_format.formats[i] == ColumnFormat::Xml)
if (row_format.escaping_rules[i] == EscapingRule::XML)
row_format.throwInvalidFormat("XML deserialization is not supported", i);
if (row_format.format_idx_to_column_idx[i])
@ -62,7 +63,7 @@ TemplateRowInputFormat::TemplateRowInputFormat(const Block & header_, ReadBuffer
if (header_.columns() <= *row_format.format_idx_to_column_idx[i])
row_format.throwInvalidFormat("Column index " + std::to_string(*row_format.format_idx_to_column_idx[i]) +
" must be less then number of columns (" + std::to_string(header_.columns()) + ")", i);
if (row_format.formats[i] == ColumnFormat::None)
if (row_format.escaping_rules[i] == EscapingRule::None)
row_format.throwInvalidFormat("Column is not skipped, but deserialization type is None", i);
size_t col_idx = *row_format.format_idx_to_column_idx[i];
@ -111,12 +112,12 @@ ReturnType TemplateRowInputFormat::tryReadPrefixOrSuffix(size_t & input_part_beg
{
skipSpaces();
if constexpr (throw_exception)
skipField(format.formats[input_part_beg]);
skipField(format.escaping_rules[input_part_beg]);
else
{
try
{
skipField(format.formats[input_part_beg]);
skipField(format.escaping_rules[input_part_beg]);
}
catch (const Exception & e)
{
@ -176,7 +177,7 @@ bool TemplateRowInputFormat::readRow(MutableColumns & columns, RowReadExtension
extra.read_columns[col_idx] = deserializeField(data_types[col_idx], serializations[col_idx], *columns[col_idx], i);
}
else
skipField(row_format.formats[i]);
skipField(row_format.escaping_rules[i]);
}
@ -192,49 +193,14 @@ bool TemplateRowInputFormat::readRow(MutableColumns & columns, RowReadExtension
bool TemplateRowInputFormat::deserializeField(const DataTypePtr & type,
const SerializationPtr & serialization, IColumn & column, size_t file_column)
{
ColumnFormat col_format = row_format.formats[file_column];
bool read = true;
bool parse_as_nullable = settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable();
EscapingRule escaping_rule = row_format.escaping_rules[file_column];
if (escaping_rule == EscapingRule::CSV)
/// Will read unquoted string until settings.csv.delimiter
settings.csv.delimiter = row_format.delimiters[file_column + 1].empty() ? default_csv_delimiter :
row_format.delimiters[file_column + 1].front();
try
{
switch (col_format)
{
case ColumnFormat::Escaped:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextEscapedImpl(column, buf, settings, serialization);
else
serialization->deserializeTextEscaped(column, buf, settings);
break;
case ColumnFormat::Quoted:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextQuotedImpl(column, buf, settings, serialization);
else
serialization->deserializeTextQuoted(column, buf, settings);
break;
case ColumnFormat::Csv:
/// Will read unquoted string until settings.csv.delimiter
settings.csv.delimiter = row_format.delimiters[file_column + 1].empty() ? default_csv_delimiter :
row_format.delimiters[file_column + 1].front();
if (parse_as_nullable)
read = SerializationNullable::deserializeTextCSVImpl(column, buf, settings, serialization);
else
serialization->deserializeTextCSV(column, buf, settings);
break;
case ColumnFormat::Json:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextJSONImpl(column, buf, settings, serialization);
else
serialization->deserializeTextJSON(column, buf, settings);
break;
case ColumnFormat::Raw:
if (parse_as_nullable)
read = SerializationNullable::deserializeTextRawImpl(column, buf, settings, serialization);
else
serialization->deserializeTextRaw(column, buf, settings);
break;
default:
__builtin_unreachable();
}
return deserializeFieldByEscapingRule(type, serialization, column, buf, escaping_rule, settings);
}
catch (Exception & e)
{
@ -242,36 +208,13 @@ bool TemplateRowInputFormat::deserializeField(const DataTypePtr & type,
throwUnexpectedEof();
throw;
}
return read;
}
void TemplateRowInputFormat::skipField(TemplateRowInputFormat::ColumnFormat col_format)
void TemplateRowInputFormat::skipField(TemplateRowInputFormat::EscapingRule escaping_rule)
{
String tmp;
constexpr const char * field_name = "<SKIPPED COLUMN>";
constexpr size_t field_name_len = 16;
try
{
switch (col_format)
{
case ColumnFormat::None:
/// Empty field, just skip spaces
break;
case ColumnFormat::Escaped:
readEscapedString(tmp, buf);
break;
case ColumnFormat::Quoted:
readQuotedString(tmp, buf);
break;
case ColumnFormat::Csv:
readCSVString(tmp, buf, settings.csv);
break;
case ColumnFormat::Json:
skipJSONField(buf, StringRef(field_name, field_name_len));
break;
default:
__builtin_unreachable();
}
skipFieldByEscapingRule(buf, escaping_rule, settings);
}
catch (Exception & e)
{
@ -344,29 +287,13 @@ bool TemplateRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & col
out << "\nUsing format string (from format_schema_rows): " << row_format.dump() << "\n";
out << "\nTrying to parse next row, because suffix does not match:\n";
try
{
if (likely(row_num != 1))
assertString(row_between_delimiter, buf);
}
catch (const DB::Exception &)
{
writeErrorStringForWrongDelimiter(out, "delimiter between rows", row_between_delimiter);
if (likely(row_num != 1) && !parseDelimiterWithDiagnosticInfo(out, buf, row_between_delimiter, "delimiter between rows", ignore_spaces))
return false;
}
for (size_t i = 0; i < row_format.columnsCount(); ++i)
{
skipSpaces();
try
{
assertString(row_format.delimiters[i], buf);
}
catch (const DB::Exception &)
{
writeErrorStringForWrongDelimiter(out, "delimiter before field " + std::to_string(i), row_format.delimiters[i]);
if (!parseDelimiterWithDiagnosticInfo(out, buf, row_format.delimiters[i], "delimiter before field " + std::to_string(i), ignore_spaces))
return false;
}
skipSpaces();
if (row_format.format_idx_to_column_idx[i])
@ -377,7 +304,7 @@ bool TemplateRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & col
*columns[col_idx], out, i))
{
out << "Maybe it's not possible to deserialize field " + std::to_string(i) +
" as " + ParsedTemplateFormatString::formatToString(row_format.formats[i]);
" as " + escapingRuleToString(row_format.escaping_rules[i]);
return false;
}
}
@ -391,39 +318,39 @@ bool TemplateRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & col
}
}
skipSpaces();
return parseDelimiterWithDiagnosticInfo(out, buf, row_format.delimiters.back(), "delimiter after last field", ignore_spaces);
}
bool parseDelimiterWithDiagnosticInfo(WriteBuffer & out, ReadBuffer & buf, const String & delimiter, const String & description, bool skip_spaces)
{
if (skip_spaces)
skipWhitespaceIfAny(buf);
try
{
assertString(row_format.delimiters.back(), buf);
assertString(delimiter, buf);
}
catch (const DB::Exception &)
{
writeErrorStringForWrongDelimiter(out, "delimiter after last field", row_format.delimiters.back());
out << "ERROR: There is no " << description << ": expected ";
verbosePrintString(delimiter.data(), delimiter.data() + delimiter.size(), out);
out << ", got ";
if (buf.eof())
out << "<End of stream>";
else
verbosePrintString(buf.position(), std::min(buf.position() + delimiter.size() + 10, buf.buffer().end()), out);
out << '\n';
return false;
}
return true;
}
void TemplateRowInputFormat::writeErrorStringForWrongDelimiter(WriteBuffer & out, const String & description, const String & delim)
{
out << "ERROR: There is no " << description << ": expected ";
verbosePrintString(delim.data(), delim.data() + delim.size(), out);
out << ", got ";
if (buf.eof())
out << "<End of stream>";
else
verbosePrintString(buf.position(), std::min(buf.position() + delim.size() + 10, buf.buffer().end()), out);
out << '\n';
}
void TemplateRowInputFormat::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column)
{
const auto & index = row_format.format_idx_to_column_idx[file_column];
if (index)
deserializeField(type, serializations[*index], column, file_column);
else
skipField(row_format.formats[file_column]);
skipField(row_format.escaping_rules[file_column]);
}
bool TemplateRowInputFormat::isGarbageAfterField(size_t, ReadBuffer::Position)
@ -439,62 +366,13 @@ bool TemplateRowInputFormat::allowSyncAfterError() const
void TemplateRowInputFormat::syncAfterError()
{
bool at_beginning_of_row_or_eof = false;
while (!at_beginning_of_row_or_eof)
{
skipToNextDelimiterOrEof(row_format.delimiters.back());
if (buf.eof())
{
end_of_stream = true;
return;
}
buf.ignore(row_format.delimiters.back().size());
skipSpaces();
if (checkForSuffix())
return;
bool last_delimiter_in_row_found = !row_format.delimiters.back().empty();
if (last_delimiter_in_row_found && checkString(row_between_delimiter, buf))
at_beginning_of_row_or_eof = true;
else
skipToNextDelimiterOrEof(row_between_delimiter);
if (buf.eof())
at_beginning_of_row_or_eof = end_of_stream = true;
}
skipToNextRowOrEof(buf, row_format.delimiters.back(), row_between_delimiter, ignore_spaces);
end_of_stream = buf.eof();
/// It can happen that buf.position() is not at the beginning of row
/// if some delimiters is similar to row_format.delimiters.back() and row_between_delimiter.
/// It will cause another parsing error.
}
/// Searches for delimiter in input stream and sets buffer position to the beginning of delimiter (if found) or EOF (if not)
void TemplateRowInputFormat::skipToNextDelimiterOrEof(const String & delimiter)
{
if (delimiter.empty())
return;
while (!buf.eof())
{
void * pos = memchr(buf.position(), delimiter[0], buf.available());
if (!pos)
{
buf.position() += buf.available();
continue;
}
buf.position() = static_cast<ReadBuffer::Position>(pos);
PeekableReadBufferCheckpoint checkpoint{buf};
if (checkString(delimiter, buf))
return;
buf.rollbackToCheckpoint();
++buf.position();
}
}
void TemplateRowInputFormat::throwUnexpectedEof()
{
throw ParsingException("Unexpected EOF while parsing row " + std::to_string(row_num) + ". "
@ -524,7 +402,7 @@ void registerInputFormatTemplate(FormatFactory & factory)
{
/// Default format string: "${data}"
resultset_format.delimiters.resize(2);
resultset_format.formats.emplace_back(ParsedTemplateFormatString::ColumnFormat::None);
resultset_format.escaping_rules.emplace_back(ParsedTemplateFormatString::EscapingRule::None);
resultset_format.format_idx_to_column_idx.emplace_back(0);
resultset_format.column_names.emplace_back("data");
}
@ -554,21 +432,6 @@ void registerInputFormatTemplate(FormatFactory & factory)
return std::make_shared<TemplateRowInputFormat>(sample, buf, params, settings, ignore_spaces, resultset_format, row_format, settings.template_settings.row_between_delimiter);
});
}
for (bool ignore_spaces : {false, true})
{
factory.registerInputFormat(ignore_spaces ? "CustomSeparatedIgnoreSpaces" : "CustomSeparated", [=](
ReadBuffer & buf,
const Block & sample,
IRowInputFormat::Params params,
const FormatSettings & settings)
{
ParsedTemplateFormatString resultset_format = ParsedTemplateFormatString::setupCustomSeparatedResultsetFormat(settings.custom);
ParsedTemplateFormatString row_format = ParsedTemplateFormatString::setupCustomSeparatedRowFormat(settings.custom, sample);
return std::make_shared<TemplateRowInputFormat>(sample, buf, params, settings, ignore_spaces, resultset_format, row_format, settings.custom.row_between_delimiter);
});
}
}
}

View File

@ -13,7 +13,7 @@ namespace DB
class TemplateRowInputFormat : public RowInputFormatWithDiagnosticInfo
{
using ColumnFormat = ParsedTemplateFormatString::ColumnFormat;
using EscapingRule = FormatSettings::EscapingRule;
public:
TemplateRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_,
FormatSettings settings_, bool ignore_spaces_,
@ -35,7 +35,7 @@ private:
bool deserializeField(const DataTypePtr & type,
const SerializationPtr & serialization, IColumn & column, size_t file_column);
void skipField(ColumnFormat col_format);
void skipField(EscapingRule escaping_rule);
inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(buf); }
template <typename ReturnType = void>
@ -47,9 +47,6 @@ private:
void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override;
bool isGarbageAfterField(size_t after_col_idx, ReadBuffer::Position pos) override;
void writeErrorStringForWrongDelimiter(WriteBuffer & out, const String & description, const String & delim);
void skipToNextDelimiterOrEof(const String & delimiter);
PeekableReadBuffer buf;
const DataTypes data_types;
@ -67,4 +64,6 @@ private:
const std::string row_between_delimiter;
};
bool parseDelimiterWithDiagnosticInfo(WriteBuffer & out, ReadBuffer & buf, const String & delimiter, const String & description, bool skip_spaces);
}

View File

@ -3,7 +3,6 @@
#include <Interpreters/Context.h>
#include <Interpreters/convertFieldToType.h>
#include <Parsers/TokenIterator.h>
#include <Parsers/ExpressionListParsers.h>
#include <Processors/Formats/Impl/ValuesBlockInputFormat.h>
#include <Formats/FormatFactory.h>
#include <Core/Block.h>
@ -12,7 +11,6 @@
#include <Common/checkStackSize.h>
#include <Parsers/ASTLiteral.h>
#include <DataTypes/Serializations/SerializationNullable.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeMap.h>

View File

@ -75,6 +75,11 @@ void RowInputFormatWithNamesAndTypes::addInputColumn(const String & column_name,
void RowInputFormatWithNamesAndTypes::readPrefix()
{
/// This is a bit of abstraction leakage, but we need it in parallel parsing:
/// we check if this InputFormat is working with the "real" beginning of the data.
if (getCurrentUnitNumber() != 0)
return;
if (with_names || with_types || data_types.at(0)->textCanContainOnlyValidUTF8())
{
/// We assume that column name or type cannot contain BOM, so, if format has header,
@ -82,9 +87,12 @@ void RowInputFormatWithNamesAndTypes::readPrefix()
skipBOMIfExists(*in);
}
/// Skip prefix before names and types.
skipPrefixBeforeHeader();
/// This is a bit of abstraction leakage, but we need it in parallel parsing:
/// we check if this InputFormat is working with the "real" beginning of the data.
if (with_names && getCurrentUnitNumber() == 0)
if (with_names)
{
if (format_settings.with_names_use_header)
{
@ -108,8 +116,10 @@ void RowInputFormatWithNamesAndTypes::readPrefix()
else if (!column_mapping->is_set)
setupAllColumnsByTableSchema();
if (with_types && getCurrentUnitNumber() == 0)
if (with_types)
{
/// Skip delimiter between names and types.
skipRowBetweenDelimiter();
if (format_settings.with_types_use_header)
{
auto types = readTypes();
@ -148,10 +158,20 @@ void RowInputFormatWithNamesAndTypes::insertDefaultsForNotSeenColumns(MutableCol
bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadExtension & ext)
{
if (in->eof())
if (unlikely(end_of_stream))
return false;
if (unlikely(checkForSuffix()))
{
end_of_stream = true;
return false;
}
updateDiagnosticInfo();
if (likely(row_num != 1 || (getCurrentUnitNumber() == 0 && (with_names || with_types))))
skipRowBetweenDelimiter();
skipRowStartDelimiter();
ext.read_columns.resize(data_types.size());
@ -190,6 +210,7 @@ void RowInputFormatWithNamesAndTypes::resetParser()
column_mapping->column_indexes_for_input_fields.clear();
column_mapping->not_presented_columns.clear();
column_mapping->names_of_columns.clear();
end_of_stream = false;
}
void RowInputFormatWithNamesAndTypes::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column)
@ -215,6 +236,12 @@ bool RowInputFormatWithNamesAndTypes::parseRowAndPrintDiagnosticInfo(MutableColu
return false;
}
if (!tryParseSuffixWithDiagnosticInfo(out))
return false;
if (likely(row_num != 1) && !parseRowBetweenDelimiterWithDiagnosticInfo(out))
return false;
if (!parseRowStartWithDiagnosticInfo(out))
return false;

Some files were not shown because too many files have changed in this diff Show More