Merge branch 'fix-trash-s3-metrics' of github.com:ClickHouse/ClickHouse into fix-trash-s3-metrics

This commit is contained in:
Alexey Milovidov 2022-05-02 03:54:50 +02:00
commit 10e354b323
167 changed files with 3174 additions and 1068 deletions

79
.github/workflows/codeql.yml vendored Normal file
View File

@ -0,0 +1,79 @@
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL"
"on":
# push:
# branches: [ master ]
# pull_request:
# # The branches below must be a subset of the branches above
# branches: [ master ]
schedule:
- cron: '0 */6 * * *'
workflow_dispatch:
env:
CC: clang-14
CXX: clang++-14
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: ['cpp']
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
# Learn more about CodeQL language support at https://git.io/codeql-language-support
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: 'true'
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# queries: ./path/to/local/query, your-org/your-repo/queries@main
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
# - name: Autobuild
# uses: github/codeql-action/autobuild@v2
# Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
- name: Build
run: |
sudo apt-get install -yq git cmake python ninja-build
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
mkdir build
cd build
cmake -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 ..
ninja
rm -rf ../contrib
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2

View File

@ -92,6 +92,38 @@ jobs:
with:
name: changed_images
path: ${{ runner.temp }}/changed_images.json
StyleCheck:
needs: DockerHubPush
runs-on: [self-hosted, style-checker]
if: ${{ success() || failure() }}
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{ runner.temp }}/style_check
EOF
- name: Download changed images
# even if artifact does not exist, e.g. on `do not test` label or failed Docker job
continue-on-error: true
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.TEMP_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Style Check
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 style_check.py
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
DocsCheck:
needs: DockerHubPush
runs-on: [self-hosted, func-tester-aarch64]

4
.gitignore vendored
View File

@ -33,10 +33,6 @@
/docs/zh/single.md
/docs/ja/single.md
/docs/fa/single.md
/docs/en/development/cmake-in-clickhouse.md
/docs/ja/development/cmake-in-clickhouse.md
/docs/zh/development/cmake-in-clickhouse.md
/docs/ru/development/cmake-in-clickhouse.md
# callgrind files
callgrind.out.*

View File

@ -211,13 +211,8 @@ function run_tests
if [ -v CHPC_TEST_RUN_BY_HASH_TOTAL ]; then
# filter tests array in bash https://stackoverflow.com/a/40375567
for index in "${!test_files[@]}"; do
# sorry for this, just calculating hash(test_name) % total_tests_group == my_test_group_num
test_hash_result=$(echo test_files[$index] | perl -ne 'use Digest::MD5 qw(md5); print unpack('Q', md5($_)) % $ENV{CHPC_TEST_RUN_BY_HASH_TOTAL} == $ENV{CHPC_TEST_RUN_BY_HASH_NUM};')
# BTW, for some reason when hash(test_name) % total_tests_group != my_test_group_num perl outputs nothing, not zero
if [ "$test_hash_result" != "1" ]; then
# deleting element from array
[ $(( index % CHPC_TEST_RUN_BY_HASH_TOTAL )) != "$CHPC_TEST_RUN_BY_HASH_NUM" ] && \
unset -v 'test_files[$index]'
fi
done
# to have sequential indexes...
test_files=("${test_files[@]}")

View File

@ -0,0 +1,550 @@
---
sidebar_position: 69
sidebar_label: CMake in ClickHouse
description: How to make ClickHouse compile and link faster
---
# CMake in ClickHouse
How to make ClickHouse compile and link faster. Minimal ClickHouse build example:
```bash
cmake .. \
-DCMAKE_C_COMPILER=$(which clang-13) \
-DCMAKE_CXX_COMPILER=$(which clang++-13) \
-DCMAKE_BUILD_TYPE=Debug \
-DENABLE_CLICKHOUSE_ALL=OFF \
-DENABLE_CLICKHOUSE_SERVER=ON \
-DENABLE_CLICKHOUSE_CLIENT=ON \
-DENABLE_LIBRARIES=OFF \
-DUSE_UNWIND=ON \
-DENABLE_UTILS=OFF \
-DENABLE_TESTS=OFF
```
## CMake files types
1. ClickHouse's source CMake files (located in the root directory and in /src).
2. Arch-dependent CMake files (located in /cmake/*os_name*).
3. Libraries finders (search for contrib libraries, located in /contrib/*/CMakeLists.txt).
4. Contrib build CMake files (used instead of libraries' own CMake files, located in /cmake/modules)
## List of CMake flags
- The flag name is a link to its position in the code.
- If an option's default value is itself an option, it's also a link to its position in this list.
## ClickHouse modes
<table>
<thead>
<tr>
<th>Name</th>
<th>Default value</th>
<th>Description</th>
<th>Comment</th>
</tr>
</thead>
<tbody>
<tr>
<td><a name="enable-clickhouse-all"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L10" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></a></td>
<td><code class="syntax">ON</code></td>
<td>Enable all ClickHouse modes by default</td>
<td>The <code class="syntax">clickhouse</code> binary is a multi purpose tool that contains multiple execution modes (client, server, etc.), each of them may be built and linked as a separate library. If you do not know what modes you need, turn this option OFF and enable SERVER and CLIENT only.</td>
</tr>
<tr>
<td><a name="enable-clickhouse-benchmark"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L20" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_BENCHMARK</code></a></td>
<td><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></td>
<td>Queries benchmarking mode</td>
<td><a href="https://clickhouse.com/docs/en/operations/utilities/clickhouse-benchmark/" target="_blank">https://clickhouse.com/docs/en/operations/utilities/clickhouse-benchmark/</a></td>
</tr>
<tr>
<td><a name="enable-clickhouse-client"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L13" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_CLIENT</code></a></td>
<td><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></td>
<td>Client mode (interactive tui/shell that connects to the server)</td>
<td></td>
</tr>
<tr>
<td><a name="enable-clickhouse-compressor"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L25" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_COMPRESSOR</code></a></td>
<td><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></td>
<td>Data compressor and decompressor</td>
<td><a href="https://clickhouse.com/docs/en/operations/utilities/clickhouse-compressor/" target="_blank">https://clickhouse.com/docs/en/operations/utilities/clickhouse-compressor/</a></td>
</tr>
<tr>
<td><a name="enable-clickhouse-copier"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L28" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_COPIER</code></a></td>
<td><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></td>
<td>Inter-cluster data copying mode</td>
<td><a href="https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/" target="_blank">https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/</a></td>
</tr>
<tr>
<td><a name="enable-clickhouse-extract-from-config"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L22" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG</code></a></td>
<td><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></td>
<td>Configs processor (extract values etc.)</td>
<td></td>
</tr>
<tr>
<td><a name="enable-clickhouse-format"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L30" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_FORMAT</code></a></td>
<td><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></td>
<td>Queries pretty-printer and formatter with syntax highlighting</td>
<td></td>
</tr>
<tr>
<td><a name="enable-clickhouse-git-import"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L50" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_GIT_IMPORT</code></a></td>
<td><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></td>
<td>A tool to analyze Git repositories</td>
<td><a href="https://presentations.clickhouse.com/matemarketing_2020/" target="_blank">https://presentations.clickhouse.com/matemarketing_2020/</a></td>
</tr>
<tr>
<td><a name="enable-clickhouse-install"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L67" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_INSTALL</code></a></td>
<td><code class="syntax">OFF</code></td>
<td>Install ClickHouse without .deb/.rpm/.tgz packages (having the binary only)</td>
<td></td>
</tr>
<tr>
<td><a name="enable-clickhouse-keeper"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L54" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_KEEPER</code></a></td>
<td><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></td>
<td>ClickHouse alternative to ZooKeeper</td>
<td></td>
</tr>
<tr>
<td><a name="enable-clickhouse-keeper-converter"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L56" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_KEEPER_CONVERTER</code></a></td>
<td><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></td>
<td>Util allows to convert ZooKeeper logs and snapshots into clickhouse-keeper snapshot</td>
<td></td>
</tr>
<tr>
<td><a name="enable-clickhouse-library-bridge"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L46" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_LIBRARY_BRIDGE</code></a></td>
<td><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></td>
<td>HTTP-server working like a proxy to Library dictionary source</td>
<td></td>
</tr>
<tr>
<td><a name="enable-clickhouse-local"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L17" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_LOCAL</code></a></td>
<td><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></td>
<td>Local files fast processing mode</td>
<td><a href="https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/" target="_blank">https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/</a></td>
</tr>
<tr>
<td><a name="enable-clickhouse-obfuscator"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L34" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_OBFUSCATOR</code></a></td>
<td><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></td>
<td>Table data obfuscator (convert real data to benchmark-ready one)</td>
<td><a href="https://clickhouse.com/docs/en/operations/utilities/clickhouse-obfuscator/" target="_blank">https://clickhouse.com/docs/en/operations/utilities/clickhouse-obfuscator/</a></td>
</tr>
<tr>
<td><a name="enable-clickhouse-odbc-bridge"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L40" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_ODBC_BRIDGE</code></a></td>
<td><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></td>
<td>HTTP-server working like a proxy to ODBC driver</td>
<td></td>
</tr>
<tr>
<td><a name="enable-clickhouse-server"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L12" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_SERVER</code></a></td>
<td><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></td>
<td>Server mode (main mode)</td>
<td></td>
</tr>
<tr>
<td><a name="enable-clickhouse-static-files-disk-uploader"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L52" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER</code></a></td>
<td><code class="syntax">ENABLE_CLICKHOUSE_ALL</code></td>
<td>A tool to export table data files to be later put to a static files web server</td>
<td></td>
</tr>
</tbody>
</table>
## External libraries
Note that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.
<table>
<thead>
<tr>
<th>Name</th>
<th>Default value</th>
<th>Description</th>
<th>Comment</th>
</tr>
</thead>
<tbody>
<tr>
<td><a name="enable-avx"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/cpu_features.cmake#L19" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_AVX</code></a></td>
<td><code class="syntax">0</code></td>
<td>Use AVX instructions on x86_64</td>
<td></td>
</tr>
<tr>
<td><a name="enable-avx"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/cpu_features.cmake#L20" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_AVX2</code></a></td>
<td><code class="syntax">0</code></td>
<td>Use AVX2 instructions on x86_64</td>
<td></td>
</tr>
<tr>
<td><a name="enable-avx-for-spec-op"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/cpu_features.cmake#L23" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_AVX2_FOR_SPEC_OP</code></a></td>
<td><code class="syntax">0</code></td>
<td>Use avx2 instructions for specific operations on x86_64</td>
<td></td>
</tr>
<tr>
<td><a name="enable-avx"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/cpu_features.cmake#L21" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_AVX512</code></a></td>
<td><code class="syntax">0</code></td>
<td>Use AVX512 instructions on x86_64</td>
<td></td>
</tr>
<tr>
<td><a name="enable-avx-for-spec-op"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/cpu_features.cmake#L24" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_AVX512_FOR_SPEC_OP</code></a></td>
<td><code class="syntax">0</code></td>
<td>Use avx512 instructions for specific operations on x86_64</td>
<td></td>
</tr>
<tr>
<td><a name="enable-bmi"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/cpu_features.cmake#L22" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_BMI</code></a></td>
<td><code class="syntax">0</code></td>
<td>Use BMI instructions on x86_64</td>
<td></td>
</tr>
<tr>
<td><a name="enable-ccache"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/ccache.cmake#L22" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CCACHE</code></a></td>
<td><code class="syntax">ENABLE_CCACHE_BY_DEFAULT</code></td>
<td>Speedup re-compilations using ccache (external tool)</td>
<td><a href="https://ccache.dev/" target="_blank">https://ccache.dev/</a></td>
</tr>
<tr>
<td><a name="enable-clang-tidy"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/analysis.cmake#L2" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CLANG_TIDY</code></a></td>
<td><code class="syntax">OFF</code></td>
<td>Use clang-tidy static analyzer</td>
<td><a href="https://clang.llvm.org/extra/clang-tidy/" target="_blank">https://clang.llvm.org/extra/clang-tidy/</a></td>
</tr>
<tr>
<td><a name="enable-pclmulqdq"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/cpu_features.cmake#L17" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_PCLMULQDQ</code></a></td>
<td><code class="syntax">1</code></td>
<td>Use pclmulqdq instructions on x86_64</td>
<td></td>
</tr>
<tr>
<td><a name="enable-popcnt"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/cpu_features.cmake#L18" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_POPCNT</code></a></td>
<td><code class="syntax">1</code></td>
<td>Use popcnt instructions on x86_64</td>
<td></td>
</tr>
<tr>
<td><a name="enable-sse"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/cpu_features.cmake#L15" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_SSE41</code></a></td>
<td><code class="syntax">1</code></td>
<td>Use SSE4.1 instructions on x86_64</td>
<td></td>
</tr>
<tr>
<td><a name="enable-sse"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/cpu_features.cmake#L16" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_SSE42</code></a></td>
<td><code class="syntax">1</code></td>
<td>Use SSE4.2 instructions on x86_64</td>
<td></td>
</tr>
<tr>
<td><a name="enable-ssse"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/cpu_features.cmake#L14" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_SSSE3</code></a></td>
<td><code class="syntax">1</code></td>
<td>Use SSSE3 instructions on x86_64</td>
<td></td>
</tr>
</tbody>
</table>
## Other flags
<table>
<thead>
<tr>
<th>Name</th>
<th>Default value</th>
<th>Description</th>
<th>Comment</th>
</tr>
</thead>
<tbody>
<tr>
<td><a name="add-gdb-index-for-gold"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L226" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ADD_GDB_INDEX_FOR_GOLD</code></a></td>
<td><code class="syntax">OFF</code></td>
<td>Add .gdb-index to resulting binaries for gold linker.</td>
<td>Ignored if <code class="syntax">lld</code> is used</td>
</tr>
<tr>
<td><a name="arch-native"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/cpu_features.cmake#L26" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ARCH_NATIVE</code></a></td>
<td><code class="syntax">0</code></td>
<td>Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use.</td>
<td></td>
</tr>
<tr>
<td><a name="build-standalone-keeper"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L253" rel="external nofollow noreferrer" target="_blank"><code class="syntax">BUILD_STANDALONE_KEEPER</code></a></td>
<td><code class="syntax">OFF</code></td>
<td>Build keeper as small standalone binary</td>
<td></td>
</tr>
<tr>
<td><a name="clickhouse-split-binary"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L113" rel="external nofollow noreferrer" target="_blank"><code class="syntax">CLICKHOUSE_SPLIT_BINARY</code></a></td>
<td><code class="syntax">OFF</code></td>
<td>Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled</td>
<td></td>
</tr>
<tr>
<td><a name="compiler-pipe"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L281" rel="external nofollow noreferrer" target="_blank"><code class="syntax">COMPILER_PIPE</code></a></td>
<td><code class="syntax">ON</code></td>
<td>-pipe compiler option</td>
<td>Less <code class="syntax">/tmp</code> usage, more RAM usage.</td>
</tr>
<tr>
<td><a name="enable-build-path-mapping"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L299" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_BUILD_PATH_MAPPING</code></a></td>
<td><code class="syntax">ON</code></td>
<td>Enable remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE(). It's to generate reproducible builds. See <a href="https://reproducible-builds.org/docs/build-path" target="_blank">https://reproducible-builds.org/docs/build-path</a></td>
<td>Reproducible builds If turned <code class="syntax">ON</code>, remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE().</td>
</tr>
<tr>
<td><a name="enable-check-heavy-builds"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L81" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_CHECK_HEAVY_BUILDS</code></a></td>
<td><code class="syntax">OFF</code></td>
<td>Don't allow C++ translation units to compile too long or to take too much memory while compiling.</td>
<td>Take care to add prlimit in command line before ccache, or else ccache thinks that prlimit is compiler, and clang++ is its input file, and refuses to work with multiple inputs, e.g in ccache log: [2021-03-31T18:06:32.655327 36900] Command line: /usr/bin/ccache prlimit --as=10000000000 --data=5000000000 --cpu=600 /usr/bin/clang++-11 - ...... std=gnu++2a -MD -MT src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -MF src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o.d -o src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -c ../src/Storages/MergeTree/IMergeTreeDataPart.cpp [2021-03-31T18:06:32.656704 36900] Multiple input files: /usr/bin/clang++-11 and ../src/Storages/MergeTree/IMergeTreeDataPart.cpp Another way would be to use --ccache-skip option before clang++-11 to make ccache ignore it.</td>
</tr>
<tr>
<td><a name="enable-examples"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L201" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_EXAMPLES</code></a></td>
<td><code class="syntax">OFF</code></td>
<td>Build all example programs in 'examples' subdirectories</td>
<td></td>
</tr>
<tr>
<td><a name="enable-fuzzing"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L129" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_FUZZING</code></a></td>
<td><code class="syntax">OFF</code></td>
<td>Fuzzy testing using libfuzzer</td>
<td></td>
</tr>
<tr>
<td><a name="enable-libraries"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L413" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_LIBRARIES</code></a></td>
<td><code class="syntax">ON</code></td>
<td>Enable all external libraries by default</td>
<td>Turns on all external libs like s3, kafka, ODBC, ...</td>
</tr>
<tr>
<td><a name="enable-multitarget-code"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/src/Functions/CMakeLists.txt#L102" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_MULTITARGET_CODE</code></a></td>
<td><code class="syntax">ON</code></td>
<td>Enable platform-dependent code</td>
<td>ClickHouse developers may use platform-dependent code under some macro (e.g. <code class="syntax">ifdef ENABLE_MULTITARGET</code>). If turned ON, this option defines such macro. See <code class="syntax">src/Functions/TargetSpecific.h</code></td>
</tr>
<tr>
<td><a name="enable-tests"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L200" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_TESTS</code></a></td>
<td><code class="syntax">ON</code></td>
<td>Provide unit_test_dbms target with Google.Test unit tests</td>
<td>If turned <code class="syntax">ON</code>, assumes the user has either the system GTest library or the bundled one.</td>
</tr>
<tr>
<td><a name="enable-thinlto"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L386" rel="external nofollow noreferrer" target="_blank"><code class="syntax">ENABLE_THINLTO</code></a></td>
<td><code class="syntax">ON</code></td>
<td>Clang-specific link time optimization</td>
<td><a href="https://clang.llvm.org/docs/ThinLTO.html" target="_blank">https://clang.llvm.org/docs/ThinLTO.html</a> Applies to clang only. Disabled when building with tests or sanitizers.</td>
</tr>
<tr>
<td><a name="fail-on-unsupported-options-combination"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L32" rel="external nofollow noreferrer" target="_blank"><code class="syntax">FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION</code></a></td>
<td><code class="syntax">ON</code></td>
<td>Stop/Fail CMake configuration if some ENABLE_XXX option is defined (either ON or OFF) but is not possible to satisfy</td>
<td>If turned off: e.g. when ENABLE_FOO is ON, but FOO tool was not found, the CMake will continue.</td>
</tr>
<tr>
<td><a name="glibc-compatibility"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L205" rel="external nofollow noreferrer" target="_blank"><code class="syntax">GLIBC_COMPATIBILITY</code></a></td>
<td><code class="syntax">ON</code></td>
<td>Enable compatibility with older glibc libraries.</td>
<td>Only for Linux, x86_64 or aarch64.</td>
</tr>
<tr>
<td><a name="install-stripped-binaries"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L270" rel="external nofollow noreferrer" target="_blank"><code class="syntax">INSTALL_STRIPPED_BINARIES</code></a></td>
<td><code class="syntax">OFF</code></td>
<td>Build stripped binaries with debug info in separate directory</td>
<td></td>
</tr>
<tr>
<td><a name="linker-name"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/tools.cmake#L58" rel="external nofollow noreferrer" target="_blank"><code class="syntax">LINKER_NAME</code></a></td>
<td><code class="syntax">OFF</code></td>
<td>Linker name or full path</td>
<td>Example values: <code class="syntax">lld-10</code>, <code class="syntax">gold</code>.</td>
</tr>
<tr>
<td><a name="parallel-compile-jobs"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/limit_jobs.cmake#L10" rel="external nofollow noreferrer" target="_blank"><code class="syntax">PARALLEL_COMPILE_JOBS</code></a></td>
<td><code class="syntax">""</code></td>
<td>Maximum number of concurrent compilation jobs</td>
<td>1 if not set</td>
</tr>
<tr>
<td><a name="parallel-link-jobs"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/limit_jobs.cmake#L13" rel="external nofollow noreferrer" target="_blank"><code class="syntax">PARALLEL_LINK_JOBS</code></a></td>
<td><code class="syntax">""</code></td>
<td>Maximum number of concurrent link jobs</td>
<td>1 if not set</td>
</tr>
<tr>
<td><a name="sanitize"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/sanitize.cmake#L7" rel="external nofollow noreferrer" target="_blank"><code class="syntax">SANITIZE</code></a></td>
<td><code class="syntax">""</code></td>
<td>Enable one of the code sanitizers</td>
<td>Possible values: - <code class="syntax">address</code> (ASan) - <code class="syntax">memory</code> (MSan) - <code class="syntax">thread</code> (TSan) - <code class="syntax">undefined</code> (UBSan) - "" (no sanitizing)</td>
</tr>
<tr>
<td><a name="split-shared-libraries"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L111" rel="external nofollow noreferrer" target="_blank"><code class="syntax">SPLIT_SHARED_LIBRARIES</code></a></td>
<td><code class="syntax">OFF</code></td>
<td>Keep all internal libraries as separate .so files</td>
<td>DEVELOPER ONLY. Faster linking if turned on.</td>
</tr>
<tr>
<td><a name="strip-debug-symbols-functions"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/src/Functions/CMakeLists.txt#L48" rel="external nofollow noreferrer" target="_blank"><code class="syntax">STRIP_DEBUG_SYMBOLS_FUNCTIONS</code></a></td>
<td><code class="syntax">STRIP_DSF_DEFAULT</code></td>
<td>Do not generate debugger info for ClickHouse functions</td>
<td>Provides faster linking and lower binary size. Tradeoff is the inability to debug some source files with e.g. gdb (empty stack frames and no local variables)."</td>
</tr>
<tr>
<td><a name="use-debug-helpers"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L252" rel="external nofollow noreferrer" target="_blank"><code class="syntax">USE_DEBUG_HELPERS</code></a></td>
<td><code class="syntax">USE_DEBUG_HELPERS</code></td>
<td>Enable debug helpers</td>
<td></td>
</tr>
<tr>
<td><a name="use-static-libraries"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L106" rel="external nofollow noreferrer" target="_blank"><code class="syntax">USE_STATIC_LIBRARIES</code></a></td>
<td><code class="syntax">ON</code></td>
<td>Disable to use shared libraries</td>
<td></td>
</tr>
<tr>
<td><a name="use-unwind"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/unwind.cmake#L1" rel="external nofollow noreferrer" target="_blank"><code class="syntax">USE_UNWIND</code></a></td>
<td><code class="syntax">ENABLE_LIBRARIES</code></td>
<td>Enable libunwind (better stacktraces)</td>
<td></td>
</tr>
<tr>
<td><a name="werror"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L417" rel="external nofollow noreferrer" target="_blank"><code class="syntax">WERROR</code></a></td>
<td><code class="syntax">OFF</code></td>
<td>Enable -Werror compiler option</td>
<td>Using system libs can cause a lot of warnings in includes (on macro expansion).</td>
</tr>
<tr>
<td><a name="weverything"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/cmake/warnings.cmake#L15" rel="external nofollow noreferrer" target="_blank"><code class="syntax">WEVERYTHING</code></a></td>
<td><code class="syntax">ON</code></td>
<td>Enable -Weverything option with some exceptions.</td>
<td>Add some warnings that are not available even with -Wall -Wextra -Wpedantic. Intended for exploration of new compiler warnings that may be found useful. Applies to clang only</td>
</tr>
<tr>
<td><a name="with-coverage"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L344" rel="external nofollow noreferrer" target="_blank"><code class="syntax">WITH_COVERAGE</code></a></td>
<td><code class="syntax">OFF</code></td>
<td>Profile the resulting binary/binaries</td>
<td>Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc</td>
</tr>
</tbody>
</table>
## Developer's guide for adding new CMake options
#### Don't be obvious. Be informative.
Bad:
```
option (ENABLE_TESTS "Enables testing" OFF)
```
This description is quite useless as it neither gives the viewer any additional information nor explains the option purpose.
Better:
```
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF)
```
If the option's purpose can't be guessed by its name, or the purpose guess may be misleading, or option has some
pre-conditions, leave a comment above the option() line and explain what it does.
The best way would be linking the docs page (if it exists).
The comment is parsed into a separate column (see below).
Even better:
```
# implies ${TESTS_ARE_ENABLED}
# see tests/CMakeLists.txt for implementation detail.
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF)
```
#### If the option's state could produce unwanted (or unusual) result, explicitly warn the user.
Suppose you have an option that may strip debug symbols from the ClickHouse's part.
This can speed up the linking process, but produces a binary that cannot be debugged.
In that case, prefer explicitly raising a warning telling the developer that he may be doing something wrong.
Also, such options should be disabled if applies.
Bad:
```
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
"Do not generate debugger info for ClickHouse functions.
${STRIP_DSF_DEFAULT})
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
target_compile_options(clickhouse_functions PRIVATE "-g0")
endif()
```
Better:
```
# Provides faster linking and lower binary size.
# Tradeoff is the inability to debug some source files with e.g. gdb
# (empty stack frames and no local variables)."
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
"Do not generate debugger info for ClickHouse functions."
${STRIP_DSF_DEFAULT})
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
message(WARNING "Not generating debugger info for ClickHouse functions")
target_compile_options(clickhouse_functions PRIVATE "-g0")
endif()
```
#### In the option's description, explain WHAT the option does rather than WHY it does something.
The WHY explanation should be placed in the comment. You may find that the option's name is self-descriptive.
Bad:
```
option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON)
```
Better:
```
# Only applicable for clang.
# Turned off when building with tests or sanitizers.
option(ENABLE_THINLTO "Clang-specific link time optimisation" ON).
```
#### Don't assume other developers know as much as you do.
In ClickHouse, there are many tools used that an ordinary developer may not know. If you are in doubt, give a link to
the tool's docs. It won't take much of your time.
Bad:
```
option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON)
```
Better (combined with the above hint):
```
# https://clang.llvm.org/docs/ThinLTO.html
# Only applicable for clang.
# Turned off when building with tests or sanitizers.
option(ENABLE_THINLTO "Clang-specific link time optimisation" ON).
```
Other example, bad:
```
option (USE_INCLUDE_WHAT_YOU_USE "Use 'include-what-you-use' tool" OFF)
```
Better:
```
# https://github.com/include-what-you-use/include-what-you-use
option (USE_INCLUDE_WHAT_YOU_USE "Reduce unneeded #include s (external tool)" OFF)
```
#### Prefer consistent default values.
CMake allows you to pass a plethora of values representing boolean true/false, e.g. 1, ON, YES, ....
Prefer the ON/OFF values, if possible.

View File

@ -0,0 +1,62 @@
# Integrating Rust libraries into ClickHouse.
Rust library integration will be described based on BLAKE3 hash-function integration.
The first step is forking a library and making neccessary changes for Rust and C/C++ compatibility.
After forking library repository you need to change target settings in Cargo.toml file. Firstly, you need to switch build to static library. Secondly, you need to add cbindgen crate to the crate list. We will use it later to generate C-header automatically.
The next step is creating or editing the build.rs script for your library - and enable cbindgen to generate the header during library build. These lines were added to BLAKE3 build script for the same purpose:
```
let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
let package_name = env::var("CARGO_PKG_NAME").unwrap();
let output_file = ("include/".to_owned() + &format!("{}.h", package_name)).to_string();
match cbindgen::generate(&crate_dir) {
Ok(header) => {
header.write_to_file(&output_file);
}
Err(err) => {
panic!("{}", err)
}
}
```
As you can see, script sets the output directory and launches header generation.
The next step is to add CMake files into library directory, so it can build with other submodules. As you can see, BLAKE3 main directory contains two CMake files - CMakeLists.txt and build_rust_lib.cmake. The second one is a function, which calls cargo build and sets all needed paths for library build. You should copy it to your library and then you can adjust cargo flags and other settings for you library needs.
When finished with CMake configuration, you should move on to create a C/C++ compatible API for your library. Let us see BLAKE3's method blake3_apply_shim:
```
#[no_mangle]
pub unsafe extern "C" fn blake3_apply_shim(
begin: *const c_char,
_size: u32,
out_char_data: *mut u8,
) -> *mut c_char {
if begin.is_null() {
let err_str = CString::new("input was a null pointer").unwrap();
return err_str.into_raw();
}
let mut hasher = Hasher::new();
let input_bytes = CStr::from_ptr(begin);
let input_res = input_bytes.to_bytes();
hasher.update(input_res);
let mut reader = hasher.finalize_xof();
reader.fill(std::slice::from_raw_parts_mut(out_char_data, OUT_LEN));
std::ptr::null_mut()
}
```
This method gets C-compatible string, its size and output string pointer as input. Then, it converts C-compatible inputs into types that are used by actual library methods and calls them. After that, it should convert library methods' outputs back into C-compatible type. In that particular case library supported direct writing into pointer by method fill(), so the convertion was not needed. The main advice here is to create less methods, so you will need to do less convertions on each method call and won't create much overhead.
Also, you should use attribute #[no_mangle] and extern "C" for every C-compatible attribute. Without it library can compile incorrectly and cbindgen won't launch header autogeneration.
After all these steps you can test your library in a small project to find all problems with compatibility or header generation. If any problems occur during header generation, you can try to configure it with cbindgen.toml file (you can find an example of it in BLAKE3 directory or a template here: [https://github.com/eqrion/cbindgen/blob/master/template.toml](https://github.com/eqrion/cbindgen/blob/master/template.toml)). If everything works correctly, you can finally integrate its methods into ClickHouse.
In addition, some problems with integration are worth noting here:
1) Some architectures may require special cargo flags or build.rs configurations, so you may want to test cross-compilation for different platforms first.
2) MemorySanitizer can cause false-positive reports as it's unable to see if some variables in Rust are initialized or not. It was solved with writing a method with more explicit definition for some variables, although this implementation of method is slower and is used only to fix MemorySanitizer builds.

View File

@ -306,8 +306,8 @@ CREATE TABLE table_name
Indices from the example can be used by ClickHouse to reduce the amount of data to read from disk in the following queries:
``` sql
SELECT count() FROM table WHERE s &lt; 'z'
SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) &gt;= 1234
SELECT count() FROM table WHERE s < 'z'
SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
```
#### Available Types of Indices {#available-types-of-indices}

View File

@ -544,6 +544,7 @@ Keys:
- `errorlog` Error log file.
- `size` Size of the file. Applies to `log` and `errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place.
- `count` The number of archived log files that ClickHouse stores.
- `console` Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`.
**Example**
@ -557,6 +558,15 @@ Keys:
</logger>
```
Writing to the console can be configured. Config example:
``` xml
<logger>
<level>information</level>
<console>1</console>
</logger>
```
Writing to the syslog is also supported. Config example:
``` xml
@ -677,8 +687,8 @@ On hosts with low RAM and swap, you possibly need setting `max_server_memory_usa
## max_concurrent_queries {#max-concurrent-queries}
The maximum number of simultaneously processed queries related to MergeTree table.
Queries may be limited by other settings: [max_concurrent_insert_queries](#max-concurrent-insert-queries), [max_concurrent_select_queries](#max-concurrent-select-queries), [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries).
The maximum number of simultaneously processed queries.
Note that other limits also apply: [max_concurrent_insert_queries](#max-concurrent-insert-queries), [max_concurrent_select_queries](#max-concurrent-select-queries), [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users).
:::note
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
@ -694,7 +704,7 @@ Default value: `100`.
**Example**
``` xml
<max_concurrent_queries>100</max_concurrent_queries>
<max_concurrent_queries>200</max_concurrent_queries>
```
## max_concurrent_insert_queries {#max-concurrent-insert-queries}
@ -781,21 +791,6 @@ Default value: `0`.
- [max_concurrent_queries](#max-concurrent-queries)
## min_marks_to_honor_max_concurrent_queries {#min-marks-to-honor-max-concurrent-queries}
The minimal number of marks read by the query for applying the [max_concurrent_queries](#max-concurrent-queries) setting.
Possible values:
- Positive integer.
- 0 — Disabled.
**Example**
``` xml
<min_marks_to_honor_max_concurrent_queries>10</min_marks_to_honor_max_concurrent_queries>
```
## max_connections {#max-connections}
The maximum number of inbound connections.

View File

@ -381,3 +381,36 @@ Possible values:
Default value: `1` second.
## max_concurrent_queries {#max-concurrent-queries}
Max number of concurrently executed queries related to the MergeTree table. Queries will still be limited by other `max_concurrent_queries` settings.
Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0` (no limit).
**Example**
``` xml
<max_concurrent_queries>50</max_concurrent_queries>
```
## min_marks_to_honor_max_concurrent_queries {#min-marks-to-honor-max-concurrent-queries}
The minimal number of marks read by the query for applying the [max_concurrent_queries](#max-concurrent-queries) setting. Note that queries will still be limited by other `max_concurrent_queries` settings.
Possible values:
- Positive integer.
- 0 — Disabled (`max_concurrent_queries` limit applied to no queries).
Default value: `0` (limit never applied).
**Example**
``` xml
<min_marks_to_honor_max_concurrent_queries>10</min_marks_to_honor_max_concurrent_queries>
```

View File

@ -17,18 +17,18 @@ SELECT * FROM system.metrics LIMIT 10
```
``` text
┌─metric─────────────────────┬─value─┬─description──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ Query │ 1 │ Number of executing queries
│ Merge │ 0 │ Number of executing background merges
│ PartMutation │ 0 │ Number of mutations (ALTER DELETE/UPDATE)
│ ReplicatedFetch │ 0 │ Number of data parts being fetched from replicas
│ ReplicatedSend │ 0 │ Number of data parts being sent to replicas
│ ReplicatedChecks │ 0 │ Number of data parts checking for consistency
│ BackgroundPoolTask │ 0 │ Number of active tasks in BackgroundProcessingPool (merges, mutations, fetches, or replication queue bookkeeping)
│ BackgroundSchedulePoolTask │ 0 │ Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc.
DiskSpaceReservedForMerge │ 0 │ Disk space reserved for currently running background merges. It is slightly more than the total size of currently merging parts.
DistributedSend │ 0 │ Number of connections to remote servers sending data that was INSERTed into Distributed tables. Both synchronous and asynchronous mode.
└────────────────────────────┴───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
┌─metric───────────────────────────────┬─value─┬─description────────────────────────────────────────────────────────────┐
│ Query │ 1 │ Number of executing queries │
│ Merge │ 0 │ Number of executing background merges │
│ PartMutation │ 0 │ Number of mutations (ALTER DELETE/UPDATE) │
│ ReplicatedFetch │ 0 │ Number of data parts being fetched from replicas │
│ ReplicatedSend │ 0 │ Number of data parts being sent to replicas │
│ ReplicatedChecks │ 0 │ Number of data parts checking for consistency │
│ BackgroundMergesAndMutationsPoolTask │ 0 │ Number of active merges and mutations in an associated background pool
│ BackgroundFetchesPoolTask │ 0 │ Number of active fetches in an associated background pool
BackgroundCommonPoolTask │ 0 │ Number of active tasks in an associated background pool
BackgroundMovePoolTask │ 0 │ Number of active tasks in BackgroundProcessingPool for moves
└──────────────────────────────────────┴───────┴────────────────────────────────────────────────────────────────────────┘
```
**See Also**

View File

@ -408,7 +408,7 @@ SELECT toTypeName(JSON_VALUE('{"hello":2}', '$.hello'));
Result:
``` text
"world"
world
0
2
String

View File

@ -0,0 +1,62 @@
# Интеграция библиотек на языке Rust в ClickHouse.
Интеграция библиотек будет описываться на основе работы проведенной для библиотеки BLAKE3.
Первым шагом интеграции является создание форка библиотеки для внесения дальнейших изменений по совместимости методов на Rust с C/C++.
В форке необходимо будет изменить конфигурацию Cargo.toml, сменив таргет на статическую библиотеку. Кроме того, необходимо добавить crate cbindgen для его дальнейшего использования при сборке.
Необходимо создать либо отредактировать сборочный скрипт build.rs, добавив в него запуск cbindgen - автогенератора заголовочных файлов .h. Пример такого запуска можно увидеть в build.rs для BLAKE3:
```
let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
let package_name = env::var("CARGO_PKG_NAME").unwrap();
let output_file = ("include/".to_owned() + &format!("{}.h", package_name)).to_string();
match cbindgen::generate(&crate_dir) {
Ok(header) => {
header.write_to_file(&output_file);
}
Err(err) => {
panic!("{}", err)
}
}
```
Скрипт назначает директорию для создания залоговочного файла и в конце запускает метод генерации cbindgen.
Далее необходимо подключить библиотеку к CMake. В BLAKE3 для этого были созданы два файла - CMakeLists.txt и файл, содержащий функцию для запуска cargo build как таргета, - build_rust_lib.cmake. Последний стоит скопировать в подключаемую библиотеку и отредактировать в соотвествии с требуемыми параметрами для сборки - добавить флаги или какие-либо настройки для разных архитектур.
Завершив настройку CMake, можно приступить к созданию методов-прослоек, которые обеспечат совместимость библиотеки и остального кода ClickHouse. В частности, рассмотрим такой метод, написанный для BLAKE3:
```
#[no_mangle]
pub unsafe extern "C" fn blake3_apply_shim(
begin: *const c_char,
_size: u32,
out_char_data: *mut u8,
) -> *mut c_char {
if begin.is_null() {
let err_str = CString::new("input was a null pointer").unwrap();
return err_str.into_raw();
}
let mut hasher = Hasher::new();
let input_bytes = CStr::from_ptr(begin);
let input_res = input_bytes.to_bytes();
hasher.update(input_res);
let mut reader = hasher.finalize_xof();
reader.fill(std::slice::from_raw_parts_mut(out_char_data, OUT_LEN));
std::ptr::null_mut()
}
```
На вход метод принимает строку в C-совместимом формате, её размер и указатель, в который будет положен результат. Кроме того, для того, чтобы иметь возможность вывести ошибку, метод возвращает строку с ней как результат работы (и нулевой указатель в случае отсутствия ошибок). C-совместимые не используются в методах BLAKE3, поэтому они конвертируются посредством соотвествующих структур и методов в привычные форматы для языка Rust. Далее запускаются оригинальные методы библиотеки. Их результат следует преобразовать обратно в C-совместимые структуры, однако в данном случае удается избежать обратной конвертации, поскольку библиотека поддерживает запись напрямую по указателю *mut u8.
Кроме того, стоит отметить обязательность аттрибута #[no_mangle] и указания extern "C" для всех таких методов. Без них не удастся провести корректную совместимую с C/C++ компиляцию и автогенерацию заголовков.
После этих действий можно протестировать компиляцию и работу методов на небольшом проекте для выявляения несовместимостей и ошибок. Если возникают пробемы с генерацией заголовков, может потребоваться поработать с конфигурацией cbindgen через файл cbindgen.toml, найти который можно либо в BLAKE3, либо взяв оригинальный темплейт разработчика cbindgen: [https://github.com/eqrion/cbindgen/blob/master/template.toml](https://github.com/eqrion/cbindgen/blob/master/template.toml).
В заключение, стоит отметить пару пробелм, возникших при интеграции BLAKE3:
1) Некоторые архитектуры могут потребовать настройки компиляции в build.rs и в build_rust_lib.cmake в связи со своими особенностями.
2) MemorySanitizer плохо понимает инициализацию памяти в Rust, поэтому для избежания ложноположительных срабатываний для BLAKE3 был создан альтернативный метод, который более явно, но при этом медленнее, инициализировал память. Он компилируется только для сборки с MemorySanitizer и в релиз не попадает. Вероятно, возможны и более красивые способы решения этой проблемы, но при интеграции BLAKE3 они не были обнаружены.

View File

@ -353,7 +353,7 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT
| Функция (оператор) / Индекс | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter |
|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, &lt;&gt;)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✗ |
| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ |
| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |

View File

@ -17,18 +17,18 @@ SELECT * FROM system.metrics LIMIT 10
```
``` text
┌─metric─────────────────────┬─value─┬─description──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ Query │ 1 │ Number of executing queries
│ Merge │ 0 │ Number of executing background merges
│ PartMutation │ 0 │ Number of mutations (ALTER DELETE/UPDATE)
│ ReplicatedFetch │ 0 │ Number of data parts being fetched from replicas
│ ReplicatedSend │ 0 │ Number of data parts being sent to replicas
│ ReplicatedChecks │ 0 │ Number of data parts checking for consistency
│ BackgroundPoolTask │ 0 │ Number of active tasks in BackgroundProcessingPool (merges, mutations, fetches, or replication queue bookkeeping)
│ BackgroundSchedulePoolTask │ 0 │ Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc.
DiskSpaceReservedForMerge │ 0 │ Disk space reserved for currently running background merges. It is slightly more than the total size of currently merging parts.
DistributedSend │ 0 │ Number of connections to remote servers sending data that was INSERTed into Distributed tables. Both synchronous and asynchronous mode.
└────────────────────────────┴───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
┌─metric───────────────────────────────┬─value─┬─description────────────────────────────────────────────────────────────┐
│ Query │ 1 │ Number of executing queries │
│ Merge │ 0 │ Number of executing background merges │
│ PartMutation │ 0 │ Number of mutations (ALTER DELETE/UPDATE) │
│ ReplicatedFetch │ 0 │ Number of data parts being fetched from replicas │
│ ReplicatedSend │ 0 │ Number of data parts being sent to replicas │
│ ReplicatedChecks │ 0 │ Number of data parts checking for consistency │
│ BackgroundMergesAndMutationsPoolTask │ 0 │ Number of active merges and mutations in an associated background pool
│ BackgroundFetchesPoolTask │ 0 │ Number of active fetches in an associated background pool
BackgroundCommonPoolTask │ 0 │ Number of active tasks in an associated background pool
BackgroundMovePoolTask │ 0 │ Number of active tasks in BackgroundProcessingPool for moves
└──────────────────────────────────────┴───────┴────────────────────────────────────────────────────────────────────────┘
```
**Смотрите также**

View File

@ -407,7 +407,7 @@ SELECT toTypeName(JSON_VALUE('{"hello":2}', '$.hello'));
Результат:
``` text
"world"
world
0
2
String

View File

@ -8,9 +8,9 @@ sidebar_label: "\u5982\u4F55\u5728Linux\u4E0A\u6784\u5EFAClickHouse for AARCH64\
# 如何在Linux上为AARCH64ARM64架构构建ClickHouse {#how-to-build-clickhouse-on-linux-for-aarch64-arm64-architecture}
这是当你有Linux机器并希望使用它来构建的情况下 `clickhouse` 二进制文件将运行在另一个Linux机器上与AARCH64CPU架构。 这适用于在Linux服务器上运行的持续集成检查。
有种情况当你有台Linux机器并希望用它来构建 `clickhouse` 的二进制文件将其运行在另一台AARCH64CPU架构的Linux机器上。这适用于在Linux服务器上运行的持续集成检查。
Aarch64的交叉构建基于 [构建说明](../development/build.md) 先跟着他们
Aarch64的交叉构建基于 [构建说明](../development/build.md) 让我们先睹为快。
# 安装Clang-8 {#install-clang-8}
@ -32,7 +32,7 @@ wget 'https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel
tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build-aarch64/cmake/toolchain/linux-aarch64 --strip-components=1
```
# 建ClickHouse {#build-clickhouse}
# 建ClickHouse {#build-clickhouse}
``` bash
cd ClickHouse
@ -41,4 +41,4 @@ CC=clang-8 CXX=clang++-8 cmake . -Bbuild-arm64 -DCMAKE_TOOLCHAIN_FILE=cmake/linu
ninja -C build-arm64
```
生成的二进制文件将仅在具有AARCH64CPU体系结构的Linux上运行。
生成的二进制文件将仅在AARCH64CPU体系架构的Linux上运行。

View File

@ -341,7 +341,7 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达
| 函数 (操作符) / 索引 | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter |
| ------------------------------------------------------------ | ----------- | ------ | ---------- | ---------- | ------------ |
| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, &lt;&gt;)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
@ -522,7 +522,7 @@ ClickHouse 在数据片段合并时会删除掉过期的数据。
当ClickHouse发现数据过期时, 它将会执行一个计划外的合并。要控制这类合并的频率, 您可以设置 `merge_with_ttl_timeout`。如果该值被设置的太低, 它将引发大量计划外的合并,这可能会消耗大量资源。
如果在合并的过程中执行 `SELECT` 查询, 则可能会得到过期的数据。为了避免这种情况,可以在 `SELECT` 之前使用 [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) 。
如果在两次合并的时间间隔中执行 `SELECT` 查询, 则可能会得到过期的数据。为了避免这种情况,可以在 `SELECT` 之前使用 [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) 。
## 使用多个块设备进行数据存储 {#table_engine-mergetree-multiple-volumes}

View File

@ -5,8 +5,9 @@ sidebar_label: "ClickHouse用户"
# ClickHouse用户 {#clickhouse-adopters}
!!! warning "免责声明"
如下使用ClickHouse的公司和他们的成功案例来源于公开资源因此和实际情况可能有所出入。如果您分享您公司使用ClickHouse的故事我们将不胜感激 [将其添加到列表](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md),但请确保你这样做不会有任何保密协议的问题。也欢迎提供来自其他公司的出版物的更新。
:::note 免责声明
如下使用ClickHouse的公司和他们的成功案例来源于公开资源因此和实际情况可能有所出入。如果您分享您公司使用ClickHouse的故事我们将不胜感激 [将其添加到列表](https://github.com/ClickHouse/ClickHouse/edit/master/docs/zh/introduction/adopters.md),但请确保你这样做不会有任何保密协议的问题。也欢迎提供来自其他公司的出版物的更新。
:::
| 公司简介 | 行业 | 用例 | 群集大小 | (Un)压缩数据大小<abbr title="of single replica"><sup>\*</sup></abbr> | 参考资料 |
|-----------------------------------------------------------------|------------------|----------------|---------------------------------------------------|----------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|

View File

@ -1,5 +1,7 @@
---
sidebar_label: 设置
sidebar_position: 51
slug: /zh/operations/settings/
---
# 设置 {#settings}

View File

@ -1,6 +1,7 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
sidebar_label: 设置
sidebar_position: 52
slug: /zh/operations/settings/settings
---
# 设置 {#settings}

View File

@ -17,18 +17,18 @@ SELECT * FROM system.metrics LIMIT 10
```
``` text
┌─metric─────────────────────┬─value─┬─description──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ Query │ 1 │ Number of executing queries
│ Merge │ 0 │ Number of executing background merges
│ PartMutation │ 0 │ Number of mutations (ALTER DELETE/UPDATE)
│ ReplicatedFetch │ 0 │ Number of data parts being fetched from replicas
│ ReplicatedSend │ 0 │ Number of data parts being sent to replicas
│ ReplicatedChecks │ 0 │ Number of data parts checking for consistency
│ BackgroundPoolTask │ 0 │ Number of active tasks in BackgroundProcessingPool (merges, mutations, fetches, or replication queue bookkeeping)
│ BackgroundSchedulePoolTask │ 0 │ Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc.
DiskSpaceReservedForMerge │ 0 │ Disk space reserved for currently running background merges. It is slightly more than the total size of currently merging parts.
DistributedSend │ 0 │ Number of connections to remote servers sending data that was INSERTed into Distributed tables. Both synchronous and asynchronous mode.
└────────────────────────────┴───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
┌─metric───────────────────────────────┬─value─┬─description────────────────────────────────────────────────────────────┐
│ Query │ 1 │ Number of executing queries │
│ Merge │ 0 │ Number of executing background merges │
│ PartMutation │ 0 │ Number of mutations (ALTER DELETE/UPDATE) │
│ ReplicatedFetch │ 0 │ Number of data parts being fetched from replicas │
│ ReplicatedSend │ 0 │ Number of data parts being sent to replicas │
│ ReplicatedChecks │ 0 │ Number of data parts checking for consistency │
│ BackgroundMergesAndMutationsPoolTask │ 0 │ Number of active merges and mutations in an associated background pool
│ BackgroundFetchesPoolTask │ 0 │ Number of active fetches in an associated background pool
BackgroundCommonPoolTask │ 0 │ Number of active tasks in an associated background pool
BackgroundMovePoolTask │ 0 │ Number of active tasks in BackgroundProcessingPool for moves
└──────────────────────────────────────┴───────┴────────────────────────────────────────────────────────────────────────┘
```
**另请参阅**

View File

@ -1 +0,0 @@
../../../en/sql-reference/data-types/map.md

View File

@ -0,0 +1,107 @@
---
sidebar_position: 65
sidebar_label: Map(key, value)
---
# Map(key, value) {#data_type-map}
`Map(key, value)` 可以存储 `key:value` 键值对类型的数据。
**参数**
- `key` — 键值对的key类型可以是[String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), 或者 [FixedString](../../sql-reference/data-types/fixedstring.md).
- `value` — 键值对的value类型可以是[String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [Array](../../sql-reference/data-types/array.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), 或者 [FixedString](../../sql-reference/data-types/fixedstring.md).
使用 `a['key']` 可以从 `a Map('key', 'value')` 类型的列中获取到对应的值,这是一个线性复杂度的查询。
**示例**
示例表:
``` sql
CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory;
INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30});
```
查询 `key2` 的所有值:
```sql
SELECT a['key2'] FROM table_map;
```
查询结果:
```text
┌─arrayElement(a, 'key2')─┐
│ 10 │
│ 20 │
│ 30 │
└─────────────────────────┘
```
如果在 `Map()` 类型的列中,查询的 `key` 值不存在,那么根据 `value` 的类型查询结果将会是数字0空字符串或者空数组。
```sql
INSERT INTO table_map VALUES ({'key3':100}), ({});
SELECT a['key3'] FROM table_map;
```
查询结果:
```text
┌─arrayElement(a, 'key3')─┐
│ 100 │
│ 0 │
└─────────────────────────┘
┌─arrayElement(a, 'key3')─┐
│ 0 │
│ 0 │
│ 0 │
└─────────────────────────┘
```
## 将Tuple类型转换成Map类型 {#map-and-tuple}
您可以使用 [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) 方法将 `Tuple()` 转换成 `Map()`
``` sql
SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map;
```
``` text
┌─map───────────────────────────┐
│ {1:'Ready',2:'Steady',3:'Go'} │
└───────────────────────────────┘
```
## Map.keys 和 Map.values 单独使用 {#map-subcolumns}
为了更好使用 `Map` 类型,在一定的场景下,可以单独使用 `keys` 或者 `values`,而不需要将整个列的数据都读取出来。
**示例**
查询:
``` sql
CREATE TABLE t_map (`a` Map(String, UInt64)) ENGINE = Memory;
INSERT INTO t_map VALUES (map('key1', 1, 'key2', 2, 'key3', 3));
SELECT a.keys FROM t_map;
SELECT a.values FROM t_map;
```
结果:
``` text
┌─a.keys─────────────────┐
│ ['key1','key2','key3'] │
└────────────────────────┘
┌─a.values─┐
│ [1,2,3] │
└──────────┘
```
**另请参阅**
- [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
- [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
[Original article](https://clickhouse.com/docs/zh/sql-reference/data-types/map/) <!--hide-->

View File

@ -1 +0,0 @@
../../../../en/sql-reference/statements/alter/setting.md

View File

@ -0,0 +1,57 @@
---
sidebar_position: 38
sidebar_label: SETTING
---
# 表设置操作 {#table_settings_manipulations}
这是一组更改表设置的操作。你可以修改设置或将其重置为默认值。单个查询可以同时更改多个设置。 如果指定名称的设置不存在,则查询会引发异常。
**语法**
``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY|RESET SETTING ...
```
!!! note "注意"
这些查询只能应用于 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 表。
## 修改设置 {#alter_modify_setting}
更改表设置
**语法**
```sql
MODIFY SETTING setting_name=value [, ...]
```
**示例**
```sql
CREATE TABLE example_table (id UInt32, data String) ENGINE=MergeTree() ORDER BY id;
ALTER TABLE example_table MODIFY SETTING max_part_loading_threads=8, max_parts_in_total=50000;
```
## 重置设置 {#alter_reset_setting}
重置表设置为默认值。如果设置处于默认状态,则不采取任何操作。
**语法**
```sql
RESET SETTING setting_name [, ...]
```
**示例**
```sql
CREATE TABLE example_table (id UInt32, data String) ENGINE=MergeTree() ORDER BY id
SETTINGS max_part_loading_threads=8;
ALTER TABLE example_table RESET SETTING max_part_loading_threads;
```
**参见**
- [MergeTree settings](../../../operations/settings/merge-tree-settings.md)

View File

@ -16,6 +16,13 @@
#include <Interpreters/Access/InterpreterGrantQuery.h>
#include <Interpreters/Access/InterpreterShowCreateAccessEntityQuery.h>
#include <Interpreters/Access/InterpreterShowGrantsQuery.h>
#include <Parsers/Access/ASTCreateQuotaQuery.h>
#include <Parsers/Access/ASTCreateRoleQuery.h>
#include <Parsers/Access/ASTCreateRowPolicyQuery.h>
#include <Parsers/Access/ASTCreateSettingsProfileQuery.h>
#include <Parsers/Access/ASTCreateUserQuery.h>
#include <Parsers/Access/ASTGrantQuery.h>
#include <Parsers/ParserAttachAccessEntity.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
#include <boost/range/algorithm/copy.hpp>

View File

@ -1,51 +1,11 @@
#pragma once
#include <Parsers/Access/ASTCreateQuotaQuery.h>
#include <Parsers/Access/ASTCreateRoleQuery.h>
#include <Parsers/Access/ASTCreateRowPolicyQuery.h>
#include <Parsers/Access/ASTCreateSettingsProfileQuery.h>
#include <Parsers/Access/ASTCreateUserQuery.h>
#include <Parsers/Access/ASTGrantQuery.h>
#include <Parsers/Access/ParserCreateQuotaQuery.h>
#include <Parsers/Access/ParserCreateRoleQuery.h>
#include <Parsers/Access/ParserCreateRowPolicyQuery.h>
#include <Parsers/Access/ParserCreateSettingsProfileQuery.h>
#include <Parsers/Access/ParserCreateUserQuery.h>
#include <Parsers/Access/ParserGrantQuery.h>
#include <base/types.h>
#include <memory>
namespace DB
{
/// Special parser for the 'ATTACH access entity' queries.
class ParserAttachAccessEntity : public IParserBase
{
protected:
const char * getName() const override { return "ATTACH access entity query"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
{
ParserCreateUserQuery create_user_p;
ParserCreateRoleQuery create_role_p;
ParserCreateRowPolicyQuery create_policy_p;
ParserCreateQuotaQuery create_quota_p;
ParserCreateSettingsProfileQuery create_profile_p;
ParserGrantQuery grant_p;
create_user_p.useAttachMode();
create_role_p.useAttachMode();
create_policy_p.useAttachMode();
create_quota_p.useAttachMode();
create_profile_p.useAttachMode();
grant_p.useAttachMode();
return create_user_p.parse(pos, node, expected) || create_role_p.parse(pos, node, expected)
|| create_policy_p.parse(pos, node, expected) || create_quota_p.parse(pos, node, expected)
|| create_profile_p.parse(pos, node, expected) || grant_p.parse(pos, node, expected);
}
};
struct IAccessEntity;
using AccessEntityPtr = std::shared_ptr<const IAccessEntity>;

View File

@ -514,7 +514,7 @@ XMLDocumentPtr ConfigProcessor::processConfig(
else
{
/// These embedded files added during build with some cmake magic.
/// Look at the end of programs/sever/CMakeLists.txt.
/// Look at the end of programs/server/CMakeLists.txt.
std::string embedded_name;
if (path == "config.xml")
embedded_name = "embedded.xml";

View File

@ -619,6 +619,8 @@
M(648, WRONG_DDL_RENAMING_SETTINGS) \
M(649, INVALID_TRANSACTION) \
M(650, SERIALIZATION_ERROR) \
M(651, CAPN_PROTO_BAD_TYPE) \
M(652, ONLY_NULLS_WHILE_READING_SCHEMA) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \

View File

@ -11,6 +11,13 @@ class NetException : public Exception
public:
NetException(const std::string & msg, int code) : Exception(msg, code) {}
// Format message with fmt::format, like the logging functions.
template <typename... Args>
NetException(int code, fmt::format_string<Args...> fmt, Args &&... args)
: Exception(fmt::format(fmt, std::forward<Args>(args)...), code)
{
}
NetException * clone() const override { return new NetException(*this); }
void rethrow() const override { throw *this; }

View File

@ -14,7 +14,7 @@ public:
{
}
size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
size_t totalRanges() const { return static_cast<size_t>(ceil(static_cast<float>(total_size - from) / range_step)); }
using Range = std::pair<size_t, size_t>;

View File

@ -0,0 +1,22 @@
#include <Common/RangeGenerator.h>
#include <gtest/gtest.h>
using namespace DB;
TEST(RangeGenerator, Common)
{
RangeGenerator g{25, 10};
EXPECT_EQ(g.totalRanges(), 3);
std::vector<RangeGenerator::Range> ranges{{0, 10}, {10, 20}, {20, 25}};
for (size_t i = 0; i < 3; ++i)
{
auto r = g.nextRange();
EXPECT_TRUE(r);
EXPECT_EQ(r, ranges[i]);
}
auto r = g.nextRange();
EXPECT_TRUE(!r);
}

View File

@ -309,23 +309,26 @@ void DiskCacheWrapper::removeSharedFile(const String & path, bool keep_s3)
DiskDecorator::removeSharedFile(path, keep_s3);
}
void DiskCacheWrapper::removeSharedRecursive(const String & path, bool keep_s3)
void DiskCacheWrapper::removeSharedRecursive(const String & path, bool keep_all, const NameSet & files_to_keep)
{
if (cache_disk->exists(path))
cache_disk->removeSharedRecursive(path, keep_s3);
DiskDecorator::removeSharedRecursive(path, keep_s3);
cache_disk->removeSharedRecursive(path, keep_all, files_to_keep);
DiskDecorator::removeSharedRecursive(path, keep_all, files_to_keep);
}
void DiskCacheWrapper::removeSharedFiles(const RemoveBatchRequest & files, bool keep_s3)
void DiskCacheWrapper::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all, const NameSet & files_to_keep)
{
for (const auto & file : files)
{
if (cache_disk->exists(file.path))
cache_disk->removeSharedFile(file.path, keep_s3);
{
bool keep_file = keep_all || files_to_keep.contains(fs::path(file.path).filename());
cache_disk->removeSharedFile(file.path, keep_file);
}
}
DiskDecorator::removeSharedFiles(files, keep_s3);
DiskDecorator::removeSharedFiles(files, keep_all, files_to_keep);
}
void DiskCacheWrapper::createHardLink(const String & src_path, const String & dst_path)

View File

@ -47,8 +47,9 @@ public:
void removeDirectory(const String & path) override;
void removeRecursive(const String & path) override;
void removeSharedFile(const String & path, bool keep_s3) override;
void removeSharedRecursive(const String & path, bool keep_s3) override;
void removeSharedFiles(const RemoveBatchRequest & files, bool keep_s3) override;
void removeSharedRecursive(const String & path, bool keep_all, const NameSet & files_to_keep) override;
void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all, const NameSet & files_to_keep) override;
void createHardLink(const String & src_path, const String & dst_path) override;
ReservationPtr reserve(UInt64 bytes) override;

View File

@ -108,6 +108,11 @@ void DiskDecorator::copy(const String & from_path, const std::shared_ptr<IDisk>
delegate->copy(from_path, to_disk, to_path);
}
void DiskDecorator::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir)
{
delegate->copyDirectoryContent(from_dir, to_disk, to_dir);
}
void DiskDecorator::listFiles(const String & path, std::vector<String> & file_names)
{
delegate->listFiles(path, file_names);
@ -151,14 +156,14 @@ void DiskDecorator::removeSharedFile(const String & path, bool keep_s3)
delegate->removeSharedFile(path, keep_s3);
}
void DiskDecorator::removeSharedFiles(const RemoveBatchRequest & files, bool keep_in_remote_fs)
void DiskDecorator::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only)
{
delegate->removeSharedFiles(files, keep_in_remote_fs);
delegate->removeSharedFiles(files, keep_all_batch_data, file_names_remove_metadata_only);
}
void DiskDecorator::removeSharedRecursive(const String & path, bool keep_s3)
void DiskDecorator::removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only)
{
delegate->removeSharedRecursive(path, keep_s3);
delegate->removeSharedRecursive(path, keep_all_batch_data, file_names_remove_metadata_only);
}
void DiskDecorator::setLastModified(const String & path, const Poco::Timestamp & timestamp)

View File

@ -33,6 +33,7 @@ public:
void moveFile(const String & from_path, const String & to_path) override;
void replaceFile(const String & from_path, const String & to_path) override;
void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir) override;
void listFiles(const String & path, std::vector<String> & file_names) override;
std::unique_ptr<ReadBufferFromFileBase> readFile(
@ -52,8 +53,8 @@ public:
void removeDirectory(const String & path) override;
void removeRecursive(const String & path) override;
void removeSharedFile(const String & path, bool keep_s3) override;
void removeSharedRecursive(const String & path, bool keep_s3) override;
void removeSharedFiles(const RemoveBatchRequest & files, bool keep_in_remote_fs) override;
void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override;
void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override;
void setLastModified(const String & path, const Poco::Timestamp & timestamp) override;
Poco::Timestamp getLastModified(const String & path) override;
void setReadOnly(const String & path) override;

View File

@ -249,6 +249,36 @@ void DiskEncrypted::copy(const String & from_path, const std::shared_ptr<IDisk>
copyThroughBuffers(from_path, to_disk, to_path);
}
void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir)
{
/// Check if we can copy the file without deciphering.
if (isSameDiskType(*this, *to_disk))
{
/// Disk type is the same, check if the key is the same too.
if (auto * to_disk_enc = typeid_cast<DiskEncrypted *>(to_disk.get()))
{
auto from_settings = current_settings.get();
auto to_settings = to_disk_enc->current_settings.get();
if (from_settings->keys == to_settings->keys)
{
/// Keys are the same so we can simply copy the encrypted file.
auto wrapped_from_path = wrappedPath(from_dir);
auto to_delegate = to_disk_enc->delegate;
auto wrapped_to_path = to_disk_enc->wrappedPath(to_dir);
delegate->copyDirectoryContent(wrapped_from_path, to_delegate, wrapped_to_path);
return;
}
}
}
if (!to_disk->exists(to_dir))
to_disk->createDirectories(to_dir);
/// Copy the file through buffers with deciphering.
copyThroughBuffers(from_dir, to_disk, to_dir);
}
std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
const String & path,
const ReadSettings & settings,

View File

@ -117,6 +117,8 @@ public:
void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir) override;
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
const ReadSettings & settings,
@ -159,10 +161,23 @@ public:
delegate->removeSharedFile(wrapped_path, flag);
}
void removeSharedRecursive(const String & path, bool flag) override
void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override
{
auto wrapped_path = wrappedPath(path);
delegate->removeSharedRecursive(wrapped_path, flag);
delegate->removeSharedRecursive(wrapped_path, keep_all_batch_data, file_names_remove_metadata_only);
}
void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override
{
for (const auto & file : files)
{
auto wrapped_path = wrappedPath(file.path);
bool keep = keep_all_batch_data || file_names_remove_metadata_only.contains(fs::path(file.path).filename());
if (file.if_exists)
delegate->removeSharedFileIfExists(wrapped_path, keep);
else
delegate->removeSharedFile(wrapped_path, keep);
}
}
void removeSharedFileIfExists(const String & path, bool flag) override

View File

@ -440,6 +440,14 @@ void DiskLocal::copy(const String & from_path, const std::shared_ptr<IDisk> & to
copyThroughBuffers(from_path, to_disk, to_path); /// Base implementation.
}
void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir)
{
if (isSameDiskType(*this, *to_disk))
fs::copy(from_dir, to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way.
else
copyThroughBuffers(from_dir, to_disk, to_dir); /// Base implementation.
}
SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const
{
return std::make_unique<LocalDirectorySyncGuard>(fs::path(disk_path) / path);

View File

@ -68,6 +68,8 @@ public:
void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir) override;
void listFiles(const String & path, std::vector<String> & file_names) override;
std::unique_ptr<ReadBufferFromFileBase> readFile(

View File

@ -200,6 +200,12 @@ void DiskRestartProxy::copy(const String & from_path, const std::shared_ptr<IDis
DiskDecorator::copy(from_path, to_disk, to_path);
}
void DiskRestartProxy::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir)
{
ReadLock lock (mutex);
DiskDecorator::copyDirectoryContent(from_dir, to_disk, to_dir);
}
void DiskRestartProxy::listFiles(const String & path, std::vector<String> & file_names)
{
ReadLock lock (mutex);
@ -251,16 +257,16 @@ void DiskRestartProxy::removeSharedFile(const String & path, bool keep_s3)
DiskDecorator::removeSharedFile(path, keep_s3);
}
void DiskRestartProxy::removeSharedFiles(const RemoveBatchRequest & files, bool keep_in_remote_fs)
void DiskRestartProxy::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only)
{
ReadLock lock (mutex);
DiskDecorator::removeSharedFiles(files, keep_in_remote_fs);
DiskDecorator::removeSharedFiles(files, keep_all_batch_data, file_names_remove_metadata_only);
}
void DiskRestartProxy::removeSharedRecursive(const String & path, bool keep_s3)
void DiskRestartProxy::removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only)
{
ReadLock lock (mutex);
DiskDecorator::removeSharedRecursive(path, keep_s3);
DiskDecorator::removeSharedRecursive(path, keep_all_batch_data, file_names_remove_metadata_only);
}
void DiskRestartProxy::setLastModified(const String & path, const Poco::Timestamp & timestamp)

View File

@ -42,6 +42,7 @@ public:
void moveFile(const String & from_path, const String & to_path) override;
void replaceFile(const String & from_path, const String & to_path) override;
void copy(const String & from_path, const DiskPtr & to_disk, const String & to_path) override;
void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir) override;
void listFiles(const String & path, std::vector<String> & file_names) override;
std::unique_ptr<ReadBufferFromFileBase> readFile(
const String & path,
@ -54,8 +55,8 @@ public:
void removeDirectory(const String & path) override;
void removeRecursive(const String & path) override;
void removeSharedFile(const String & path, bool keep_s3) override;
void removeSharedFiles(const RemoveBatchRequest & files, bool keep_in_remote_fs) override;
void removeSharedRecursive(const String & path, bool keep_s3) override;
void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override;
void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override;
void setLastModified(const String & path, const Poco::Timestamp & timestamp) override;
Poco::Timestamp getLastModified(const String & path) override;
void setReadOnly(const String & path) override;

View File

@ -165,10 +165,10 @@ std::unique_ptr<ReadBufferFromFileBase> DiskWebServer::readFile(const String & p
auto remote_path = fs_path.parent_path() / (escapeForFileName(fs_path.stem()) + fs_path.extension().string());
remote_path = remote_path.string().substr(url.size());
RemoteMetadata meta(path, remote_path);
meta.remote_fs_objects.emplace_back(remote_path, iter->second.size);
std::vector<BlobPathWithSize> blobs_to_read;
blobs_to_read.emplace_back(remote_path, iter->second.size);
auto web_impl = std::make_unique<ReadBufferFromWebServerGather>(url, meta.remote_fs_root_path, meta.remote_fs_objects, getContext(), read_settings);
auto web_impl = std::make_unique<ReadBufferFromWebServerGather>(url, path, blobs_to_read, getContext(), read_settings);
if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool)
{

View File

@ -139,7 +139,7 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName());
}
void removeSharedRecursive(const String &, bool) override
void removeSharedRecursive(const String &, bool, const NameSet &) override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName());
}

View File

@ -20,13 +20,13 @@ bool IDisk::isDirectoryEmpty(const String & path)
return !iterateDirectory(path)->isValid();
}
void copyFile(IDisk & from_disk, const String & from_path, IDisk & to_disk, const String & to_path)
void IDisk::copyFile(const String & from_file_path, IDisk & to_disk, const String & to_file_path)
{
LOG_DEBUG(&Poco::Logger::get("IDisk"), "Copying from {} (path: {}) {} to {} (path: {}) {}.",
from_disk.getName(), from_disk.getPath(), from_path, to_disk.getName(), to_disk.getPath(), to_path);
getName(), getPath(), from_file_path, to_disk.getName(), to_disk.getPath(), to_file_path);
auto in = from_disk.readFile(from_path);
auto out = to_disk.writeFile(to_path);
auto in = readFile(from_file_path);
auto out = to_disk.writeFile(to_file_path);
copyData(*in, *out);
out->finalize();
}
@ -34,7 +34,7 @@ void copyFile(IDisk & from_disk, const String & from_path, IDisk & to_disk, cons
using ResultsCollector = std::vector<std::future<void>>;
void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, Executor & exec, ResultsCollector & results)
void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, Executor & exec, ResultsCollector & results, bool copy_root_dir)
{
if (from_disk.isFile(from_path))
{
@ -42,28 +42,32 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p
[&from_disk, from_path, &to_disk, to_path]()
{
setThreadName("DiskCopier");
DB::copyFile(from_disk, from_path, to_disk, fs::path(to_path) / fileName(from_path));
from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path));
});
results.push_back(std::move(result));
}
else
{
fs::path dir_name = fs::path(from_path).parent_path().filename();
fs::path dest(fs::path(to_path) / dir_name);
to_disk.createDirectories(dest);
fs::path dest(to_path);
if (copy_root_dir)
{
fs::path dir_name = fs::path(from_path).parent_path().filename();
dest /= dir_name;
to_disk.createDirectories(dest);
}
for (auto it = from_disk.iterateDirectory(from_path); it->isValid(); it->next())
asyncCopy(from_disk, it->path(), to_disk, dest, exec, results);
asyncCopy(from_disk, it->path(), to_disk, dest, exec, results, true);
}
}
void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path, bool copy_root_dir)
{
auto & exec = to_disk->getExecutor();
ResultsCollector results;
asyncCopy(*this, from_path, *to_disk, to_path, exec, results);
asyncCopy(*this, from_path, *to_disk, to_path, exec, results, copy_root_dir);
for (auto & result : results)
result.wait();
@ -73,7 +77,16 @@ void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<I
void IDisk::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
{
copyThroughBuffers(from_path, to_disk, to_path);
copyThroughBuffers(from_path, to_disk, to_path, true);
}
void IDisk::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir)
{
if (!to_disk->exists(to_dir))
to_disk->createDirectories(to_dir);
copyThroughBuffers(from_dir, to_disk, to_dir, false);
}
void IDisk::truncateFile(const String &, size_t)

View File

@ -160,6 +160,12 @@ public:
/// Recursively copy data containing at `from_path` to `to_path` located at `to_disk`.
virtual void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path);
/// Recursively copy files from from_dir to to_dir. Create to_dir if not exists.
virtual void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir);
/// Copy file `from_file_path` to `to_file_path` located at `to_disk`.
virtual void copyFile(const String & from_file_path, IDisk & to_disk, const String & to_file_path);
/// List files at `path` and add their names to `file_names`
virtual void listFiles(const String & path, std::vector<String> & file_names) = 0;
@ -192,17 +198,18 @@ public:
/// Remove file. Throws exception if file doesn't exists or if directory is not empty.
/// Differs from removeFile for S3/HDFS disks
/// Second bool param is a flag to remove (true) or keep (false) shared data on S3
virtual void removeSharedFile(const String & path, bool) { removeFile(path); }
virtual void removeSharedFile(const String & path, bool /* keep_shared_data */) { removeFile(path); }
/// Remove file or directory with all children. Use with extra caution. Throws exception if file doesn't exists.
/// Differs from removeRecursive for S3/HDFS disks
/// Second bool param is a flag to remove (true) or keep (false) shared data on S3
virtual void removeSharedRecursive(const String & path, bool) { removeRecursive(path); }
/// Second bool param is a flag to remove (false) or keep (true) shared data on S3.
/// Third param determines which files cannot be removed even if second is true.
virtual void removeSharedRecursive(const String & path, bool /* keep_all_shared_data */, const NameSet & /* file_names_remove_metadata_only */) { removeRecursive(path); }
/// Remove file or directory if it exists.
/// Differs from removeFileIfExists for S3/HDFS disks
/// Second bool param is a flag to remove (true) or keep (false) shared data on S3
virtual void removeSharedFileIfExists(const String & path, bool) { removeFileIfExists(path); }
virtual void removeSharedFileIfExists(const String & path, bool /* keep_shared_data */) { removeFileIfExists(path); }
virtual String getCacheBasePath() const { return ""; }
@ -237,14 +244,17 @@ public:
/// Batch request to remove multiple files.
/// May be much faster for blob storage.
virtual void removeSharedFiles(const RemoveBatchRequest & files, bool keep_in_remote_fs)
/// Second bool param is a flag to remove (true) or keep (false) shared data on S3.
/// Third param determines which files cannot be removed even if second is true.
virtual void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only)
{
for (const auto & file : files)
{
bool keep_file = keep_all_batch_data || file_names_remove_metadata_only.contains(fs::path(file.path).filename());
if (file.if_exists)
removeSharedFileIfExists(file.path, keep_in_remote_fs);
removeSharedFileIfExists(file.path, keep_file);
else
removeSharedFile(file.path, keep_in_remote_fs);
removeSharedFile(file.path, keep_file);
}
}
@ -343,7 +353,7 @@ protected:
/// Base implementation of the function copy().
/// It just opens two files, reads data by portions from the first file, and writes it to the second one.
/// A derived class may override copy() to provide a faster implementation.
void copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path);
void copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path, bool copy_root_dir = true);
private:
std::unique_ptr<Executor> executor;

View File

@ -45,7 +45,6 @@ IDiskRemote::Metadata IDiskRemote::Metadata::createAndStoreMetadata(const String
return result;
}
IDiskRemote::Metadata IDiskRemote::Metadata::readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, IDiskRemote::MetadataUpdater updater)
{
Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_);
@ -55,7 +54,6 @@ IDiskRemote::Metadata IDiskRemote::Metadata::readUpdateAndStoreMetadata(const St
return result;
}
IDiskRemote::Metadata IDiskRemote::Metadata::createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, IDiskRemote::MetadataUpdater updater)
{
Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_);
@ -64,6 +62,17 @@ IDiskRemote::Metadata IDiskRemote::Metadata::createUpdateAndStoreMetadata(const
return result;
}
IDiskRemote::Metadata IDiskRemote::Metadata::readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, IDiskRemote::MetadataUpdater updater)
{
Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_);
result.load();
if (updater(result))
result.save(sync);
metadata_disk_->removeFile(metadata_file_path_);
return result;
}
IDiskRemote::Metadata IDiskRemote::Metadata::createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite)
{
@ -154,7 +163,8 @@ IDiskRemote::Metadata::Metadata(
const String & remote_fs_root_path_,
DiskPtr metadata_disk_,
const String & metadata_file_path_)
: RemoteMetadata(remote_fs_root_path_, metadata_file_path_)
: remote_fs_root_path(remote_fs_root_path_)
, metadata_file_path(metadata_file_path_)
, metadata_disk(metadata_disk_)
, total_size(0), ref_count(0)
{
@ -230,6 +240,12 @@ IDiskRemote::Metadata IDiskRemote::readUpdateAndStoreMetadata(const String & pat
}
IDiskRemote::Metadata IDiskRemote::readUpdateStoreMetadataAndRemove(const String & path, bool sync, IDiskRemote::MetadataUpdater updater)
{
std::unique_lock lock(metadata_mutex);
return Metadata::readUpdateStoreMetadataAndRemove(remote_fs_root_path, metadata_disk, path, sync, updater);
}
IDiskRemote::Metadata IDiskRemote::readOrCreateUpdateAndStoreMetadata(const String & path, WriteMode mode, bool sync, IDiskRemote::MetadataUpdater updater)
{
if (mode == WriteMode::Rewrite || !metadata_disk->exists(path))
@ -270,7 +286,7 @@ std::unordered_map<String, String> IDiskRemote::getSerializedMetadata(const std:
return metadatas;
}
void IDiskRemote::removeMetadata(const String & path, std::vector<std::string> & paths_to_remove)
void IDiskRemote::removeMetadata(const String & path, std::vector<String> & paths_to_remove)
{
LOG_TRACE(log, "Remove file by path: {}", backQuote(metadata_disk->getPath() + path));
@ -288,6 +304,7 @@ void IDiskRemote::removeMetadata(const String & path, std::vector<std::string> &
{
for (const auto & [remote_fs_object_path, _] : metadata.remote_fs_objects)
{
paths_to_remove.push_back(remote_fs_root_path + remote_fs_object_path);
if (cache)
@ -307,8 +324,7 @@ void IDiskRemote::removeMetadata(const String & path, std::vector<std::string> &
return true;
};
readUpdateAndStoreMetadata(path, false, metadata_updater);
metadata_disk->removeFile(path);
readUpdateStoreMetadataAndRemove(path, false, metadata_updater);
/// If there is no references - delete content from remote FS.
}
catch (const Exception & e)
@ -327,13 +343,13 @@ void IDiskRemote::removeMetadata(const String & path, std::vector<std::string> &
}
void IDiskRemote::removeMetadataRecursive(const String & path, std::vector<String> & paths_to_remove)
void IDiskRemote::removeMetadataRecursive(const String & path, std::unordered_map<String, std::vector<String>> & paths_to_remove)
{
checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks.
if (metadata_disk->isFile(path))
{
removeMetadata(path, paths_to_remove);
removeMetadata(path, paths_to_remove[path]);
}
else
{
@ -522,27 +538,43 @@ void IDiskRemote::removeSharedFileIfExists(const String & path, bool delete_meta
}
}
void IDiskRemote::removeSharedFiles(const RemoveBatchRequest & files, bool delete_metadata_only)
void IDiskRemote::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only)
{
std::vector<String> paths_to_remove;
std::unordered_map<String, std::vector<String>> paths_to_remove;
for (const auto & file : files)
{
bool skip = file.if_exists && !metadata_disk->exists(file.path);
if (!skip)
removeMetadata(file.path, paths_to_remove);
removeMetadata(file.path, paths_to_remove[file.path]);
}
if (!delete_metadata_only)
removeFromRemoteFS(paths_to_remove);
if (!keep_all_batch_data)
{
std::vector<String> remove_from_remote;
for (auto && [path, remote_paths] : paths_to_remove)
{
if (!file_names_remove_metadata_only.contains(fs::path(path).filename()))
remove_from_remote.insert(remove_from_remote.end(), remote_paths.begin(), remote_paths.end());
}
removeFromRemoteFS(remove_from_remote);
}
}
void IDiskRemote::removeSharedRecursive(const String & path, bool delete_metadata_only)
void IDiskRemote::removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only)
{
std::vector<String> paths_to_remove;
std::unordered_map<String, std::vector<String>> paths_to_remove;
removeMetadataRecursive(path, paths_to_remove);
if (!delete_metadata_only)
removeFromRemoteFS(paths_to_remove);
if (!keep_all_batch_data)
{
std::vector<String> remove_from_remote;
for (auto && [local_path, remote_paths] : paths_to_remove)
{
if (!file_names_remove_metadata_only.contains(fs::path(local_path).filename()))
remove_from_remote.insert(remove_from_remote.end(), remote_paths.begin(), remote_paths.end());
}
removeFromRemoteFS(remove_from_remote);
}
}

View File

@ -78,6 +78,8 @@ public:
Metadata readMetadata(const String & path) const;
Metadata readMetadataUnlocked(const String & path, std::shared_lock<std::shared_mutex> &) const;
Metadata readUpdateAndStoreMetadata(const String & path, bool sync, MetadataUpdater updater);
Metadata readUpdateStoreMetadataAndRemove(const String & path, bool sync, MetadataUpdater updater);
Metadata readOrCreateUpdateAndStoreMetadata(const String & path, WriteMode mode, bool sync, MetadataUpdater updater);
Metadata createAndStoreMetadata(const String & path, bool sync);
@ -107,15 +109,16 @@ public:
void removeFileIfExists(const String & path) override { removeSharedFileIfExists(path, false); }
void removeRecursive(const String & path) override { removeSharedRecursive(path, false); }
void removeRecursive(const String & path) override { removeSharedRecursive(path, false, {}); }
void removeSharedFile(const String & path, bool delete_metadata_only) override;
void removeSharedFileIfExists(const String & path, bool delete_metadata_only) override;
void removeSharedFiles(const RemoveBatchRequest & files, bool delete_metadata_only) override;
void removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override;
void removeSharedRecursive(const String & path, bool delete_metadata_only) override;
void removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) override;
void listFiles(const String & path, std::vector<String> & file_names) override;
@ -172,7 +175,7 @@ protected:
private:
void removeMetadata(const String & path, std::vector<String> & paths_to_remove);
void removeMetadataRecursive(const String & path, std::vector<String> & paths_to_remove);
void removeMetadataRecursive(const String & path, std::unordered_map<String, std::vector<String>> & paths_to_remove);
bool tryReserve(UInt64 bytes);
@ -184,10 +187,18 @@ private:
using RemoteDiskPtr = std::shared_ptr<IDiskRemote>;
/// Remote FS (S3, HDFS) metadata file layout:
/// FS objects, their number and total size of all FS objects.
/// Each FS object represents a file path in remote FS and its size.
/// Minimum info, required to be passed to ReadIndirectBufferFromRemoteFS<T>
struct RemoteMetadata
struct IDiskRemote::Metadata
{
using Updater = std::function<bool(IDiskRemote::Metadata & metadata)>;
/// Metadata file version.
static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1;
static constexpr UInt32 VERSION_RELATIVE_PATHS = 2;
static constexpr UInt32 VERSION_READ_ONLY_FLAG = 3;
/// Remote FS objects paths and their sizes.
std::vector<BlobPathWithSize> remote_fs_objects;
@ -197,22 +208,6 @@ struct RemoteMetadata
/// Relative path to metadata file on local FS.
const String metadata_file_path;
RemoteMetadata(const String & remote_fs_root_path_, const String & metadata_file_path_)
: remote_fs_root_path(remote_fs_root_path_), metadata_file_path(metadata_file_path_) {}
};
/// Remote FS (S3, HDFS) metadata file layout:
/// FS objects, their number and total size of all FS objects.
/// Each FS object represents a file path in remote FS and its size.
struct IDiskRemote::Metadata : RemoteMetadata
{
using Updater = std::function<bool(IDiskRemote::Metadata & metadata)>;
/// Metadata file version.
static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1;
static constexpr UInt32 VERSION_RELATIVE_PATHS = 2;
static constexpr UInt32 VERSION_READ_ONLY_FLAG = 3;
DiskPtr metadata_disk;
/// Total size of all remote FS (S3, HDFS) objects.
@ -236,6 +231,7 @@ struct IDiskRemote::Metadata : RemoteMetadata
static Metadata readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_);
static Metadata readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater);
static Metadata readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater);
static Metadata createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync);
static Metadata createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater);

View File

@ -755,7 +755,7 @@ void DiskS3::restore()
bool cleanup_s3 = information.source_bucket != bucket || information.source_path != remote_fs_root_path;
for (const auto & root : data_roots)
if (exists(root))
removeSharedRecursive(root + '/', !cleanup_s3);
removeSharedRecursive(root + '/', !cleanup_s3, {});
restoreFiles(information);
restoreFileOperations(information);

View File

@ -28,7 +28,7 @@ namespace ErrorCodes
extern const int FILE_DOESNT_EXIST;
extern const int UNKNOWN_EXCEPTION;
extern const int INCORRECT_DATA;
extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
extern const int CAPN_PROTO_BAD_TYPE;
}
capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaInfo & schema_info)
@ -447,7 +447,7 @@ static DataTypePtr getEnumDataTypeFromEnumSchema(const capnp::EnumSchema & enum_
if (enumerants.size() < 32768)
return getEnumDataTypeFromEnumerants<Int16>(enumerants);
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "ClickHouse supports only 8 and 16-but Enums");
throw Exception(ErrorCodes::CAPN_PROTO_BAD_TYPE, "ClickHouse supports only 8 and 16-bit Enums");
}
static DataTypePtr getDataTypeFromCapnProtoType(const capnp::Type & capnp_type)
@ -495,17 +495,17 @@ static DataTypePtr getDataTypeFromCapnProtoType(const capnp::Type & capnp_type)
{
auto fields = struct_schema.getUnionFields();
if (fields.size() != 2 || (!fields[0].getType().isVoid() && !fields[1].getType().isVoid()))
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Unions are not supported");
throw Exception(ErrorCodes::CAPN_PROTO_BAD_TYPE, "Unions are not supported");
auto value_type = fields[0].getType().isVoid() ? fields[1].getType() : fields[0].getType();
if (value_type.isStruct() || value_type.isList())
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Tuples and Lists cannot be inside Nullable");
throw Exception(ErrorCodes::CAPN_PROTO_BAD_TYPE, "Tuples and Lists cannot be inside Nullable");
auto nested_type = getDataTypeFromCapnProtoType(value_type);
return std::make_shared<DataTypeNullable>(nested_type);
}
if (checkIfStructContainsUnnamedUnion(struct_schema))
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Unnamed union is not supported");
throw Exception(ErrorCodes::CAPN_PROTO_BAD_TYPE, "Unnamed union is not supported");
/// Treat Struct as Tuple.
DataTypes nested_types;
@ -518,14 +518,14 @@ static DataTypePtr getDataTypeFromCapnProtoType(const capnp::Type & capnp_type)
return std::make_shared<DataTypeTuple>(std::move(nested_types), std::move(nested_names));
}
default:
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Unsupported CapnProtoType: {}", getCapnProtoFullTypeName(capnp_type));
throw Exception(ErrorCodes::CAPN_PROTO_BAD_TYPE, "Unsupported CapnProtoType: {}", getCapnProtoFullTypeName(capnp_type));
}
}
NamesAndTypesList capnProtoSchemaToCHSchema(const capnp::StructSchema & schema)
{
if (checkIfStructContainsUnnamedUnion(schema))
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Unnamed union is not supported");
throw Exception(ErrorCodes::CAPN_PROTO_BAD_TYPE, "Unnamed union is not supported");
NamesAndTypesList names_and_types;
for (auto field : schema.getNonUnionFields())

View File

@ -374,7 +374,7 @@ String FormatFactory::getContentType(
SchemaReaderPtr FormatFactory::getSchemaReader(
const String & name,
ReadBuffer & buf,
ContextPtr context,
ContextPtr & context,
const std::optional<FormatSettings> & _format_settings) const
{
const auto & schema_reader_creator = dict.at(name).schema_reader_creator;
@ -387,7 +387,7 @@ SchemaReaderPtr FormatFactory::getSchemaReader(
ExternalSchemaReaderPtr FormatFactory::getExternalSchemaReader(
const String & name,
ContextPtr context,
ContextPtr & context,
const std::optional<FormatSettings> & _format_settings) const
{
const auto & external_schema_reader_creator = dict.at(name).external_schema_reader_creator;

View File

@ -160,12 +160,12 @@ public:
SchemaReaderPtr getSchemaReader(
const String & name,
ReadBuffer & buf,
ContextPtr context,
ContextPtr & context,
const std::optional<FormatSettings> & format_settings = std::nullopt) const;
ExternalSchemaReaderPtr getExternalSchemaReader(
const String & name,
ContextPtr context,
ContextPtr & context,
const std::optional<FormatSettings> & format_settings = std::nullopt) const;
void registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine);

View File

@ -15,8 +15,10 @@ namespace DB
namespace ErrorCodes
{
extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
extern const int EMPTY_DATA_PASSED;
extern const int BAD_ARGUMENTS;
extern const int ONLY_NULLS_WHILE_READING_SCHEMA;
extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
}
static std::optional<NamesAndTypesList> getOrderedColumnsList(
@ -41,11 +43,17 @@ static std::optional<NamesAndTypesList> getOrderedColumnsList(
return res;
}
bool isRetryableSchemaInferenceError(int code)
{
return code == ErrorCodes::EMPTY_DATA_PASSED || code == ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA;
}
ColumnsDescription readSchemaFromFormat(
const String & format_name,
const std::optional<FormatSettings> & format_settings,
ReadBufferCreator read_buffer_creator,
ContextPtr context,
ReadBufferIterator & read_buffer_iterator,
bool retry,
ContextPtr & context,
std::unique_ptr<ReadBuffer> & buf_out)
{
NamesAndTypesList names_and_types;
@ -63,20 +71,43 @@ ColumnsDescription readSchemaFromFormat(
}
else if (FormatFactory::instance().checkIfFormatHasSchemaReader(format_name))
{
buf_out = read_buffer_creator();
if (buf_out->eof())
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file, file is empty", format_name);
std::string exception_messages;
SchemaReaderPtr schema_reader;
std::unique_ptr<ReadBuffer> buf;
while ((buf = read_buffer_iterator()))
{
if (buf->eof())
{
auto exception_message = fmt::format("Cannot extract table structure from {} format file, file is emptyg", format_name);
auto schema_reader = FormatFactory::instance().getSchemaReader(format_name, *buf_out, context, format_settings);
try
{
names_and_types = schema_reader->readSchema();
}
catch (const DB::Exception & e)
{
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}", format_name, e.message());
if (!retry)
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, exception_message);
exception_messages += "\n" + exception_message;
continue;
}
try
{
schema_reader = FormatFactory::instance().getSchemaReader(format_name, *buf, context, format_settings);
names_and_types = schema_reader->readSchema();
buf_out = std::move(buf);
break;
}
catch (...)
{
auto exception_message = getCurrentExceptionMessage(false);
if (!retry || !isRetryableSchemaInferenceError(getCurrentExceptionCode()))
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}", format_name, exception_message);
exception_messages += "\n" + exception_message;
}
}
if (names_and_types.empty())
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "All attempts to extract table structure from files failed. Errors:{}", exception_messages);
/// If we have "INSERT SELECT" query then try to order
/// columns as they are ordered in table schema for formats
/// without strict column order (like JSON and TSKV).
@ -99,10 +130,10 @@ ColumnsDescription readSchemaFromFormat(
return ColumnsDescription(names_and_types);
}
ColumnsDescription readSchemaFromFormat(const String & format_name, const std::optional<FormatSettings> & format_settings, ReadBufferCreator read_buffer_creator, ContextPtr context)
ColumnsDescription readSchemaFromFormat(const String & format_name, const std::optional<FormatSettings> & format_settings, ReadBufferIterator & read_buffer_iterator, bool retry, ContextPtr & context)
{
std::unique_ptr<ReadBuffer> buf_out;
return readSchemaFromFormat(format_name, format_settings, read_buffer_creator, context, buf_out);
return readSchemaFromFormat(format_name, format_settings, read_buffer_iterator, retry, context, buf_out);
}
DataTypePtr makeNullableRecursivelyAndCheckForNothing(DataTypePtr type)

View File

@ -6,27 +6,32 @@
namespace DB
{
using ReadBufferIterator = std::function<std::unique_ptr<ReadBuffer>()>;
/// Try to determine the schema of the data in specifying format.
/// For formats that have an external schema reader, it will
/// use it and won't create a read buffer.
/// For formats that have a schema reader from the data,
/// read buffer will be created by the provided creator and
/// the schema will be extracted from the data.
/// If format doesn't have any schema reader or a schema reader
/// couldn't determine the schema, an exception will be thrown.
using ReadBufferCreator = std::function<std::unique_ptr<ReadBuffer>()>;
/// read buffer will be created by the provided iterator and
/// the schema will be extracted from the data. If schema reader
/// couldn't determine the schema we will try the next read buffer
/// from provided iterator if it makes sense. If format doesn't
/// have any schema reader or we couldn't determine the schema,
/// an exception will be thrown.
ColumnsDescription readSchemaFromFormat(
const String & format_name,
const std::optional<FormatSettings> & format_settings,
ReadBufferCreator read_buffer_creator,
ContextPtr context);
ReadBufferIterator & read_buffer_iterator,
bool retry,
ContextPtr & context);
/// If ReadBuffer is created, it will be written to buf_out.
ColumnsDescription readSchemaFromFormat(
const String & format_name,
const std::optional<FormatSettings> & format_settings,
ReadBufferCreator read_buffer_creator,
ContextPtr context,
ReadBufferIterator & read_buffer_iterator,
bool retry,
ContextPtr & context,
std::unique_ptr<ReadBuffer> & buf_out);
/// Make type Nullable recursively:

View File

@ -12,6 +12,7 @@
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTAsterisk.h>
#include <Parsers/ASTColumnsMatcher.h>
#include <Parsers/ASTColumnsTransformers.h>
#include <Parsers/ASTQualifiedAsterisk.h>
#include <Parsers/ParserTablesInSelectQuery.h>
#include <Parsers/ExpressionListParsers.h>
@ -81,6 +82,7 @@ public:
/// By default should_add_column_predicate returns true for any column name
void addTableColumns(
const String & table_name,
ASTs & columns,
ShouldAddColumnPredicate should_add_column_predicate = [](const String &) { return true; })
{
auto it = table_columns.find(table_name);
@ -105,7 +107,7 @@ public:
else
identifier = std::make_shared<ASTIdentifier>(std::vector<String>{it->first, column.name});
new_select_expression_list->children.emplace_back(std::move(identifier));
columns.emplace_back(std::move(identifier));
}
}
}
@ -129,14 +131,18 @@ private:
for (const auto & child : node.children)
{
if (child->as<ASTAsterisk>())
ASTs columns;
if (const auto * asterisk = child->as<ASTAsterisk>())
{
has_asterisks = true;
for (auto & table_name : data.tables_order)
data.addTableColumns(table_name);
data.addTableColumns(table_name, columns);
for (const auto & transformer : asterisk->children)
IASTColumnsTransformer::transform(transformer, columns);
}
else if (child->as<ASTQualifiedAsterisk>())
else if (const auto * qualified_asterisk = child->as<ASTQualifiedAsterisk>())
{
has_asterisks = true;
@ -144,17 +150,44 @@ private:
throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR);
auto & identifier = child->children[0]->as<ASTTableIdentifier &>();
data.addTableColumns(identifier.name());
data.addTableColumns(identifier.name(), columns);
// QualifiedAsterisk's transformers start to appear at child 1
for (auto it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it)
{
IASTColumnsTransformer::transform(*it, columns);
}
}
else if (auto * columns_matcher = child->as<ASTColumnsMatcher>())
else if (const auto * columns_list_matcher = child->as<ASTColumnsListMatcher>())
{
has_asterisks = true;
for (const auto & ident : columns_list_matcher->column_list->children)
columns.emplace_back(ident->clone());
for (const auto & transformer : columns_list_matcher->children)
IASTColumnsTransformer::transform(transformer, columns);
}
else if (const auto * columns_regexp_matcher = child->as<ASTColumnsRegexpMatcher>())
{
has_asterisks = true;
for (auto & table_name : data.tables_order)
data.addTableColumns(table_name, [&](const String & column_name) { return columns_matcher->isColumnMatching(column_name); });
data.addTableColumns(
table_name,
columns,
[&](const String & column_name) { return columns_regexp_matcher->isColumnMatching(column_name); });
for (const auto & transformer : columns_regexp_matcher->children)
IASTColumnsTransformer::transform(transformer, columns);
}
else
data.new_select_expression_list->children.push_back(child);
data.new_select_expression_list->children.insert(
data.new_select_expression_list->children.end(),
std::make_move_iterator(columns.begin()),
std::make_move_iterator(columns.end()));
}
if (!has_asterisks)

View File

@ -96,8 +96,8 @@ void PredicateRewriteVisitorData::visitOtherInternalSelect(ASTSelectQuery & sele
size_t alias_index = 0;
for (auto & ref_select : temp_select_query->refSelect()->children)
{
if (!ref_select->as<ASTAsterisk>() && !ref_select->as<ASTQualifiedAsterisk>() && !ref_select->as<ASTColumnsMatcher>() &&
!ref_select->as<ASTIdentifier>())
if (!ref_select->as<ASTAsterisk>() && !ref_select->as<ASTQualifiedAsterisk>() && !ref_select->as<ASTColumnsListMatcher>()
&& !ref_select->as<ASTColumnsRegexpMatcher>() && !ref_select->as<ASTIdentifier>())
{
if (const auto & alias = ref_select->tryGetAlias(); alias.empty())
ref_select->setAlias("--predicate_optimizer_" + toString(alias_index++));

View File

@ -196,7 +196,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
bool has_asterisk = false;
for (const auto & child : node.children)
{
if (child->as<ASTAsterisk>() || child->as<ASTColumnsMatcher>())
if (child->as<ASTAsterisk>() || child->as<ASTColumnsListMatcher>() || child->as<ASTColumnsRegexpMatcher>())
{
if (tables_with_columns.empty())
throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR);
@ -229,47 +229,40 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
for (const auto & column : *cols)
{
if (first_table || !data.join_using_columns.contains(column.name))
{
addIdentifier(columns, table.table, column.name);
}
}
}
first_table = false;
}
for (const auto & transformer : asterisk->children)
{
IASTColumnsTransformer::transform(transformer, columns);
}
}
else if (const auto * asterisk_pattern = child->as<ASTColumnsMatcher>())
{
if (asterisk_pattern->column_list)
{
for (const auto & ident : asterisk_pattern->column_list->children)
columns.emplace_back(ident->clone());
}
else
{
bool first_table = true;
for (const auto & table : tables_with_columns)
{
for (const auto & column : table.columns)
{
if (asterisk_pattern->isColumnMatching(column.name) && (first_table || !data.join_using_columns.contains(column.name)))
{
addIdentifier(columns, table.table, column.name);
}
}
first_table = false;
}
}
// ColumnsMatcher's transformers start to appear at child 1
for (auto it = asterisk_pattern->children.begin() + 1; it != asterisk_pattern->children.end(); ++it)
for (const auto & transformer : asterisk->children)
IASTColumnsTransformer::transform(transformer, columns);
}
else if (auto * asterisk_column_list = child->as<ASTColumnsListMatcher>())
{
for (const auto & ident : asterisk_column_list->column_list->children)
columns.emplace_back(ident->clone());
for (const auto & transformer : asterisk_column_list->children)
IASTColumnsTransformer::transform(transformer, columns);
}
else if (const auto * asterisk_regexp_pattern = child->as<ASTColumnsRegexpMatcher>())
{
bool first_table = true;
for (const auto & table : tables_with_columns)
{
IASTColumnsTransformer::transform(*it, columns);
for (const auto & column : table.columns)
{
if (asterisk_regexp_pattern->isColumnMatching(column.name) && (first_table || !data.join_using_columns.count(column.name)))
{
addIdentifier(columns, table.table, column.name);
}
}
first_table = false;
}
for (const auto & transformer : asterisk_regexp_pattern->children)
IASTColumnsTransformer::transform(transformer, columns);
}
else if (const auto * qualified_asterisk = child->as<ASTQualifiedAsterisk>())
{
@ -280,12 +273,11 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
if (ident_db_and_name.satisfies(table.table, true))
{
for (const auto & column : table.columns)
{
addIdentifier(columns, table.table, column.name);
}
break;
}
}
// QualifiedAsterisk's transformers start to appear at child 1
for (auto it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it)
{

View File

@ -21,6 +21,7 @@
#include <Interpreters/Context.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/GatherFunctionQuantileVisitor.h>
#include <Interpreters/UserDefinedExecutableFunctionFactory.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
@ -138,10 +139,18 @@ void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context)
continue;
}
}
else if (!function_factory.get(function->name, context)->isInjective({}))
else
{
++i;
continue;
FunctionOverloadResolverPtr function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(function->name, context);
if (!function_builder)
function_builder = function_factory.get(function->name, context);
if (!function_builder->isInjective({}))
{
++i;
continue;
}
}
/// copy shared pointer to args in order to ensure lifetime

View File

@ -193,6 +193,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
if (const auto * ptype = typeid_cast<const DataTypeDecimal<Decimal32> *>(&type)) return convertDecimalType(src, *ptype);
if (const auto * ptype = typeid_cast<const DataTypeDecimal<Decimal64> *>(&type)) return convertDecimalType(src, *ptype);
if (const auto * ptype = typeid_cast<const DataTypeDecimal<Decimal128> *>(&type)) return convertDecimalType(src, *ptype);
if (const auto * ptype = typeid_cast<const DataTypeDecimal<Decimal256> *>(&type)) return convertDecimalType(src, *ptype);
if (which_type.isEnum() && (src.getType() == Field::Types::UInt64 || src.getType() == Field::Types::Int64))
{

View File

@ -17,6 +17,8 @@ void ASTAsterisk::appendColumnName(WriteBuffer & ostr) const { ostr.write('*');
void ASTAsterisk::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
settings.ostr << "*";
/// Format column transformers
for (const auto & child : children)
{
settings.ostr << ' ';

View File

@ -1,64 +1,117 @@
#include "ASTColumnsMatcher.h"
#include <Parsers/ASTColumnsMatcher.h>
#include <IO/Operators.h>
#include <IO/WriteHelpers.h>
#include <Common/quoteString.h>
#include <re2/re2.h>
#include <Common/SipHash.h>
#include <IO/Operators.h>
#include <Common/quoteString.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_COMPILE_REGEXP;
}
ASTPtr ASTColumnsMatcher::clone() const
ASTPtr ASTColumnsRegexpMatcher::clone() const
{
auto clone = std::make_shared<ASTColumnsMatcher>(*this);
auto clone = std::make_shared<ASTColumnsRegexpMatcher>(*this);
clone->cloneChildren();
return clone;
}
void ASTColumnsMatcher::appendColumnName(WriteBuffer & ostr) const { writeString(original_pattern, ostr); }
void ASTColumnsRegexpMatcher::appendColumnName(WriteBuffer & ostr) const
{
writeCString("COLUMNS(", ostr);
writeQuotedString(original_pattern, ostr);
writeChar(')', ostr);
}
void ASTColumnsMatcher::updateTreeHashImpl(SipHash & hash_state) const
void ASTColumnsRegexpMatcher::updateTreeHashImpl(SipHash & hash_state) const
{
hash_state.update(original_pattern.size());
hash_state.update(original_pattern);
IAST::updateTreeHashImpl(hash_state);
}
void ASTColumnsMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
void ASTColumnsRegexpMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
if (column_list)
{
frame.expression_list_prepend_whitespace = false;
column_list->formatImpl(settings, state, frame);
}
else
settings.ostr << quoteString(original_pattern);
settings.ostr << quoteString(original_pattern);
settings.ostr << ")";
for (ASTs::const_iterator it = children.begin() + 1; it != children.end(); ++it)
/// Format column transformers
for (const auto & child : children)
{
settings.ostr << ' ';
(*it)->formatImpl(settings, state, frame);
child->formatImpl(settings, state, frame);
}
}
void ASTColumnsMatcher::setPattern(String pattern)
void ASTColumnsRegexpMatcher::setPattern(String pattern)
{
original_pattern = std::move(pattern);
column_matcher = std::make_shared<RE2>(original_pattern, RE2::Quiet);
if (!column_matcher->ok())
throw DB::Exception("COLUMNS pattern " + original_pattern + " cannot be compiled: " + column_matcher->error(), DB::ErrorCodes::CANNOT_COMPILE_REGEXP);
throw DB::Exception(
"COLUMNS pattern " + original_pattern + " cannot be compiled: " + column_matcher->error(),
DB::ErrorCodes::CANNOT_COMPILE_REGEXP);
}
bool ASTColumnsMatcher::isColumnMatching(const String & column_name) const
bool ASTColumnsRegexpMatcher::isColumnMatching(const String & column_name) const
{
return RE2::PartialMatch(column_name, *column_matcher);
}
ASTPtr ASTColumnsListMatcher::clone() const
{
auto clone = std::make_shared<ASTColumnsListMatcher>(*this);
clone->column_list = column_list->clone();
clone->cloneChildren();
return clone;
}
void ASTColumnsListMatcher::updateTreeHashImpl(SipHash & hash_state) const
{
column_list->updateTreeHash(hash_state);
IAST::updateTreeHashImpl(hash_state);
}
void ASTColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const
{
writeCString("COLUMNS(", ostr);
for (auto it = column_list->children.begin(); it != column_list->children.end(); ++it)
{
if (it != column_list->children.begin())
writeCString(", ", ostr);
(*it)->appendColumnName(ostr);
}
writeChar(')', ostr);
}
void ASTColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
for (ASTs::const_iterator it = column_list->children.begin(); it != column_list->children.end(); ++it)
{
if (it != column_list->children.begin())
{
settings.ostr << ", ";
}
(*it)->formatImpl(settings, state, frame);
}
settings.ostr << ")";
/// Format column transformers
for (const auto & child : children)
{
settings.ostr << ' ';
child->formatImpl(settings, state, frame);
}
}
}

View File

@ -2,10 +2,9 @@
#include <Parsers/IAST.h>
namespace re2
{
class RE2;
class RE2;
}
@ -14,21 +13,13 @@ namespace DB
class WriteBuffer;
namespace ErrorCodes
{
}
struct AsteriskSemantic;
struct AsteriskSemanticImpl;
/** SELECT COLUMNS('regexp') is expanded to multiple columns like * (asterisk).
* Optional transformers can be attached to further manipulate these expanded columns.
*/
class ASTColumnsMatcher : public IAST
class ASTColumnsRegexpMatcher : public IAST
{
public:
String getID(char) const override { return "ColumnsMatcher"; }
String getID(char) const override { return "ColumnsRegexpMatcher"; }
ASTPtr clone() const override;
void appendColumnName(WriteBuffer & ostr) const override;
@ -36,17 +27,26 @@ public:
bool isColumnMatching(const String & column_name) const;
void updateTreeHashImpl(SipHash & hash_state) const override;
ASTPtr column_list;
protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
private:
std::shared_ptr<re2::RE2> column_matcher;
String original_pattern;
std::shared_ptr<AsteriskSemanticImpl> semantic; /// pimpl
};
friend struct AsteriskSemantic;
/// Same as the above but use a list of column names to do matching.
class ASTColumnsListMatcher : public IAST
{
public:
String getID(char) const override { return "ColumnsListMatcher"; }
ASTPtr clone() const override;
void appendColumnName(WriteBuffer & ostr) const override;
void updateTreeHashImpl(SipHash & hash_state) const override;
ASTPtr column_list;
protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
};

View File

@ -105,6 +105,49 @@ void ASTColumnsApplyTransformer::transform(ASTs & nodes) const
}
}
void ASTColumnsApplyTransformer::appendColumnName(WriteBuffer & ostr) const
{
writeCString("APPLY ", ostr);
if (!column_name_prefix.empty())
writeChar('(', ostr);
if (lambda)
lambda->appendColumnName(ostr);
else
{
writeString(func_name, ostr);
if (parameters)
parameters->appendColumnName(ostr);
}
if (!column_name_prefix.empty())
{
writeCString(", '", ostr);
writeString(column_name_prefix, ostr);
writeCString("')", ostr);
}
}
void ASTColumnsApplyTransformer::updateTreeHashImpl(SipHash & hash_state) const
{
hash_state.update(func_name.size());
hash_state.update(func_name);
if (parameters)
parameters->updateTreeHashImpl(hash_state);
if (lambda)
lambda->updateTreeHashImpl(hash_state);
hash_state.update(lambda_arg.size());
hash_state.update(lambda_arg);
hash_state.update(column_name_prefix.size());
hash_state.update(column_name_prefix);
IAST::updateTreeHashImpl(hash_state);
}
void ASTColumnsExceptTransformer::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << "EXCEPT" << (is_strict ? " STRICT " : " ") << (settings.hilite ? hilite_none : "");
@ -128,6 +171,38 @@ void ASTColumnsExceptTransformer::formatImpl(const FormatSettings & settings, Fo
settings.ostr << ")";
}
void ASTColumnsExceptTransformer::appendColumnName(WriteBuffer & ostr) const
{
writeCString("EXCEPT ", ostr);
if (is_strict)
writeCString("STRICT ", ostr);
if (children.size() > 1)
writeChar('(', ostr);
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
{
if (it != children.begin())
writeCString(", ", ostr);
(*it)->appendColumnName(ostr);
}
if (!original_pattern.empty())
writeQuotedString(original_pattern, ostr);
if (children.size() > 1)
writeChar(')', ostr);
}
void ASTColumnsExceptTransformer::updateTreeHashImpl(SipHash & hash_state) const
{
hash_state.update(is_strict);
hash_state.update(original_pattern.size());
hash_state.update(original_pattern);
IAST::updateTreeHashImpl(hash_state);
}
void ASTColumnsExceptTransformer::transform(ASTs & nodes) const
{
std::set<String> expected_columns;
@ -201,6 +276,21 @@ void ASTColumnsReplaceTransformer::Replacement::formatImpl(
settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(name);
}
void ASTColumnsReplaceTransformer::Replacement::appendColumnName(WriteBuffer & ostr) const
{
expr->appendColumnName(ostr);
writeCString(" AS ", ostr);
writeProbablyBackQuotedString(name, ostr);
}
void ASTColumnsReplaceTransformer::Replacement::updateTreeHashImpl(SipHash & hash_state) const
{
hash_state.update(name.size());
hash_state.update(name);
expr->updateTreeHashImpl(hash_state);
IAST::updateTreeHashImpl(hash_state);
}
void ASTColumnsReplaceTransformer::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << "REPLACE" << (is_strict ? " STRICT " : " ") << (settings.hilite ? hilite_none : "");
@ -211,9 +301,8 @@ void ASTColumnsReplaceTransformer::formatImpl(const FormatSettings & settings, F
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
{
if (it != children.begin())
{
settings.ostr << ", ";
}
(*it)->formatImpl(settings, state, frame);
}
@ -221,6 +310,32 @@ void ASTColumnsReplaceTransformer::formatImpl(const FormatSettings & settings, F
settings.ostr << ")";
}
void ASTColumnsReplaceTransformer::appendColumnName(WriteBuffer & ostr) const
{
writeCString("REPLACE ", ostr);
if (is_strict)
writeCString("STRICT ", ostr);
if (children.size() > 1)
writeChar('(', ostr);
for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it)
{
if (it != children.begin())
writeCString(", ", ostr);
(*it)->appendColumnName(ostr);
}
if (children.size() > 1)
writeChar(')', ostr);
}
void ASTColumnsReplaceTransformer::updateTreeHashImpl(SipHash & hash_state) const
{
hash_state.update(is_strict);
IAST::updateTreeHashImpl(hash_state);
}
void ASTColumnsReplaceTransformer::replaceChildren(ASTPtr & node, const ASTPtr & replacement, const String & name)
{
for (auto & child : node->children)

View File

@ -30,6 +30,8 @@ public:
return res;
}
void transform(ASTs & nodes) const override;
void appendColumnName(WriteBuffer & ostr) const override;
void updateTreeHashImpl(SipHash & hash_state) const override;
// Case 1 APPLY (quantile(0.9))
String func_name;
@ -59,6 +61,8 @@ public:
void transform(ASTs & nodes) const override;
void setPattern(String pattern);
bool isColumnMatching(const String & column_name) const;
void appendColumnName(WriteBuffer & ostr) const override;
void updateTreeHashImpl(SipHash & hash_state) const override;
protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
@ -76,12 +80,13 @@ public:
ASTPtr clone() const override
{
auto replacement = std::make_shared<Replacement>(*this);
replacement->children.clear();
replacement->expr = expr->clone();
replacement->children.push_back(replacement->expr);
return replacement;
}
void appendColumnName(WriteBuffer & ostr) const override;
void updateTreeHashImpl(SipHash & hash_state) const override;
String name;
ASTPtr expr;
@ -98,6 +103,8 @@ public:
return clone;
}
void transform(ASTs & nodes) const override;
void appendColumnName(WriteBuffer & ostr) const override;
void updateTreeHashImpl(SipHash & hash_state) const override;
protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;

View File

@ -17,6 +17,8 @@ void ASTQualifiedAsterisk::formatImpl(const FormatSettings & settings, FormatSta
const auto & qualifier = children.at(0);
qualifier->formatImpl(settings, state, frame);
settings.ostr << ".*";
/// Format column transformers
for (ASTs::const_iterator it = children.begin() + 1; it != children.end(); ++it)
{
settings.ostr << ' ';

View File

@ -1934,16 +1934,18 @@ bool ParserColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
return false;
++pos;
auto res = std::make_shared<ASTColumnsMatcher>();
ASTPtr res;
if (column_list)
{
res->column_list = column_list;
res->children.push_back(res->column_list);
auto list_matcher = std::make_shared<ASTColumnsListMatcher>();
list_matcher->column_list = column_list;
res = list_matcher;
}
else
{
res->setPattern(regex_node->as<ASTLiteral &>().value.get<String>());
res->children.push_back(regex_node);
auto regexp_matcher = std::make_shared<ASTColumnsRegexpMatcher>();
regexp_matcher->setPattern(regex_node->as<ASTLiteral &>().value.get<String>());
res = regexp_matcher;
}
ParserColumnsTransformers transformers_p(allowed_transformers);

View File

@ -0,0 +1,33 @@
#include <Parsers/ParserAttachAccessEntity.h>
#include <Parsers/Access/ParserCreateQuotaQuery.h>
#include <Parsers/Access/ParserCreateRoleQuery.h>
#include <Parsers/Access/ParserCreateRowPolicyQuery.h>
#include <Parsers/Access/ParserCreateSettingsProfileQuery.h>
#include <Parsers/Access/ParserCreateUserQuery.h>
#include <Parsers/Access/ParserGrantQuery.h>
namespace DB
{
bool ParserAttachAccessEntity::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserCreateUserQuery create_user_p;
ParserCreateRoleQuery create_role_p;
ParserCreateRowPolicyQuery create_policy_p;
ParserCreateQuotaQuery create_quota_p;
ParserCreateSettingsProfileQuery create_profile_p;
ParserGrantQuery grant_p;
create_user_p.useAttachMode();
create_role_p.useAttachMode();
create_policy_p.useAttachMode();
create_quota_p.useAttachMode();
create_profile_p.useAttachMode();
grant_p.useAttachMode();
return create_user_p.parse(pos, node, expected) || create_role_p.parse(pos, node, expected)
|| create_policy_p.parse(pos, node, expected) || create_quota_p.parse(pos, node, expected)
|| create_profile_p.parse(pos, node, expected) || grant_p.parse(pos, node, expected);
}
}

View File

@ -0,0 +1,18 @@
#pragma once
#include <Parsers/IParserBase.h>
namespace DB
{
/// Special parser for the 'ATTACH access entity' queries.
class ParserAttachAccessEntity : public IParserBase
{
protected:
const char * getName() const override { return "ATTACH access entity query"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
}

View File

@ -3,13 +3,15 @@
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/Access/ASTCreateUserQuery.h>
#include <Parsers/Access/ParserCreateUserQuery.h>
#include <Parsers/ParserAlterQuery.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/ParserOptimizeQuery.h>
#include <Parsers/ParserQueryWithOutput.h>
#include <Parsers/ParserAttachAccessEntity.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
#include "Access/AccessEntityIO.h"
#include <string_view>
#include <regex>
#include <gtest/gtest.h>

View File

@ -34,12 +34,12 @@ ExecutingGraph::Edge & ExecutingGraph::addEdge(Edges & edges, Edge edge, const I
{
auto it = processors_map.find(to);
if (it == processors_map.end())
{
String msg = "Processor " + to->getName() + " was found as " + (edge.backward ? "input" : "output")
+ " for processor " + from->getName() + ", but not found in list of processors.";
throw Exception(msg, ErrorCodes::LOGICAL_ERROR);
}
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Processor {} was found as {} for processor {}, but not found in list of processors",
to->getName(),
edge.backward ? "input" : "output",
from->getName());
edge.to = it->second;
auto & added_edge = edges.emplace_back(std::move(edge));
@ -128,8 +128,7 @@ bool ExecutingGraph::expandPipeline(std::stack<uint64_t> & stack, uint64_t pid)
{
auto * processor = processors[nodes.size()].get();
if (processors_map.contains(processor))
throw Exception("Processor " + processor->getName() + " was already added to pipeline.",
ErrorCodes::LOGICAL_ERROR);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Processor {} was already added to pipeline", processor->getName());
processors_map[processor] = nodes.size();
nodes.emplace_back(std::make_unique<Node>(processor, nodes.size()));

View File

@ -28,6 +28,23 @@ void ExecutorTasks::rethrowFirstThreadException()
executor_context->rethrowExceptionIfHas();
}
void ExecutorTasks::tryWakeUpAnyOtherThreadWithTasks(ExecutionThreadContext & self, std::unique_lock<std::mutex> & lock)
{
if (!task_queue.empty() && !threads_queue.empty() && !finished)
{
size_t next_thread = self.thread_number + 1 == num_threads ? 0 : (self.thread_number + 1);
auto thread_to_wake = task_queue.getAnyThreadWithTasks(next_thread);
if (threads_queue.has(thread_to_wake))
threads_queue.pop(thread_to_wake);
else
thread_to_wake = threads_queue.popAny();
lock.unlock();
executor_contexts[thread_to_wake]->wakeUp();
}
}
void ExecutorTasks::tryGetTask(ExecutionThreadContext & context)
{
{
@ -43,20 +60,7 @@ void ExecutorTasks::tryGetTask(ExecutionThreadContext & context)
if (context.hasTask())
{
if (!task_queue.empty() && !threads_queue.empty())
{
size_t next_thread = context.thread_number + 1 == num_threads ? 0 : (context.thread_number + 1);
auto thread_to_wake = task_queue.getAnyThreadWithTasks(next_thread);
if (threads_queue.has(thread_to_wake))
threads_queue.pop(thread_to_wake);
else
thread_to_wake = threads_queue.popAny();
lock.unlock();
executor_contexts[thread_to_wake]->wakeUp();
}
tryWakeUpAnyOtherThreadWithTasks(context, lock);
return;
}
@ -120,20 +124,7 @@ void ExecutorTasks::pushTasks(Queue & queue, Queue & async_queue, ExecutionThrea
queue.pop();
}
if (!threads_queue.empty() && !task_queue.empty() && !finished)
{
size_t next_thread = context.thread_number + 1 == num_threads ? 0 : (context.thread_number + 1);
auto thread_to_wake = task_queue.getAnyThreadWithTasks(next_thread);
if (threads_queue.has(thread_to_wake))
threads_queue.pop(thread_to_wake);
else
thread_to_wake = threads_queue.popAny();
lock.unlock();
executor_contexts[thread_to_wake]->wakeUp();
}
tryWakeUpAnyOtherThreadWithTasks(context, lock);
}
}

View File

@ -50,6 +50,7 @@ public:
void rethrowFirstThreadException();
void tryWakeUpAnyOtherThreadWithTasks(ExecutionThreadContext & self, std::unique_lock<std::mutex> & lock);
void tryGetTask(ExecutionThreadContext & context);
void pushTasks(Queue & queue, Queue & async_queue, ExecutionThreadContext & context);

View File

@ -185,10 +185,10 @@ void PipelineExecutor::executeSingleThread(size_t thread_num)
auto & context = tasks.getThreadContext(thread_num);
LOG_TRACE(log,
"Thread finished. Total time: {} sec. Execution time: {} sec. Processing time: {} sec. Wait time: {} sec.",
(context.total_time_ns / 1e9),
(context.execution_time_ns / 1e9),
(context.processing_time_ns / 1e9),
(context.wait_time_ns / 1e9));
context.total_time_ns / 1e9,
context.execution_time_ns / 1e9,
context.processing_time_ns / 1e9,
context.wait_time_ns / 1e9);
#endif
}

View File

@ -77,7 +77,7 @@ size_t StreamingFormatExecutor::execute()
case IProcessor::Status::NeedData:
case IProcessor::Status::Async:
case IProcessor::Status::ExpandPipeline:
throw Exception("Source processor returned status " + IProcessor::statusToName(status), ErrorCodes::LOGICAL_ERROR);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Source processor returned status {}", IProcessor::statusToName(status));
}
}
}

View File

@ -25,7 +25,7 @@ public:
size_t getAnyThreadWithTasks(size_t from_thread = 0)
{
if (num_tasks == 0)
throw Exception("TaskQueue is empty.", ErrorCodes::LOGICAL_ERROR);
throw Exception("TaskQueue is empty", ErrorCodes::LOGICAL_ERROR);
for (size_t i = 0; i < queues.size(); ++i)
{
@ -37,7 +37,7 @@ public:
from_thread = 0;
}
throw Exception("TaskQueue is empty.", ErrorCodes::LOGICAL_ERROR);
throw Exception("TaskQueue is empty", ErrorCodes::LOGICAL_ERROR);
}
Task * pop(size_t thread_num)

View File

@ -36,7 +36,7 @@ struct ThreadsQueue
void push(size_t thread)
{
if (unlikely(has(thread)))
throw Exception("Can't push thread because it is already in threads queue.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Can't push thread because it is already in threads queue", ErrorCodes::LOGICAL_ERROR);
swapThreads(thread, stack[stack_size]);
++stack_size;
@ -45,7 +45,7 @@ struct ThreadsQueue
void pop(size_t thread)
{
if (unlikely(!has(thread)))
throw Exception("Can't pop thread because it is not in threads queue.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Can't pop thread because it is not in threads queue", ErrorCodes::LOGICAL_ERROR);
--stack_size;
swapThreads(thread, stack[stack_size]);
@ -54,7 +54,7 @@ struct ThreadsQueue
size_t popAny()
{
if (unlikely(stack_size == 0))
throw Exception("Can't pop from empty queue.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Can't pop from empty queue", ErrorCodes::LOGICAL_ERROR);
--stack_size;
return stack[stack_size];

View File

@ -8,7 +8,10 @@ namespace DB
namespace ErrorCodes
{
extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
extern const int ONLY_NULLS_WHILE_READING_SCHEMA;
extern const int TYPE_MISMATCH;
extern const int INCORRECT_DATA;
extern const int EMPTY_DATA_PASSED;
}
static void chooseResultType(
@ -36,7 +39,7 @@ static void chooseResultType(
type = default_type;
else
throw Exception(
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
ErrorCodes::TYPE_MISMATCH,
"Automatically defined type {} for column {} in row {} differs from type defined by previous rows: {}",
type->getName(),
column_name,
@ -51,7 +54,7 @@ static void checkTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, c
{
if (!default_type)
throw Exception(
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA,
"Cannot determine table structure by first {} rows of data, because some columns contain only Nulls. To increase the maximum "
"number of rows to read for structure determination, use setting input_format_max_rows_to_read_for_schema_inference",
max_rows_to_read);
@ -100,7 +103,7 @@ NamesAndTypesList IRowSchemaReader::readSchema()
break;
if (new_data_types.size() != data_types.size())
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Rows have different amount of values");
throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values");
for (size_t i = 0; i != data_types.size(); ++i)
{
@ -114,7 +117,7 @@ NamesAndTypesList IRowSchemaReader::readSchema()
/// Check that we read at list one column.
if (data_types.empty())
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot read rows from the data");
throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Cannot read rows from the data");
/// If column names weren't set, use default names 'c1', 'c2', ...
if (column_names.empty())
@ -126,7 +129,7 @@ NamesAndTypesList IRowSchemaReader::readSchema()
/// If column names were set, check that the number of names match the number of types.
else if (column_names.size() != data_types.size())
throw Exception(
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
ErrorCodes::INCORRECT_DATA,
"The number of column names {} differs with the number of types {}", column_names.size(), data_types.size());
NamesAndTypesList result;
@ -192,7 +195,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
/// Check that we read at list one column.
if (names_to_types.empty())
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot read rows from the data");
throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Cannot read rows from the data");
NamesAndTypesList result;
for (auto & name : names_order)

View File

@ -11,7 +11,6 @@ namespace DB
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
}
RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
@ -319,7 +318,7 @@ NamesAndTypesList FormatWithNamesAndTypesSchemaReader::readSchema()
std::vector<String> data_type_names = format_reader->readTypes();
if (data_type_names.size() != names.size())
throw Exception(
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
ErrorCodes::INCORRECT_DATA,
"The number of column names {} differs with the number of types {}", names.size(), data_type_names.size());
NamesAndTypesList result;

View File

@ -67,8 +67,7 @@ void IInflatingTransform::work()
if (can_generate)
{
if (generated)
throw Exception("IInflatingTransform cannot consume chunk because it already was generated",
ErrorCodes::LOGICAL_ERROR);
throw Exception("IInflatingTransform cannot consume chunk because it already was generated", ErrorCodes::LOGICAL_ERROR);
current_chunk = generate();
generated = true;
@ -77,8 +76,7 @@ void IInflatingTransform::work()
else
{
if (!has_input)
throw Exception("IInflatingTransform cannot consume chunk because it wasn't read",
ErrorCodes::LOGICAL_ERROR);
throw Exception("IInflatingTransform cannot consume chunk because it wasn't read", ErrorCodes::LOGICAL_ERROR);
consume(std::move(current_chunk));
has_input = false;

View File

@ -178,7 +178,7 @@ public:
*/
virtual Status prepare()
{
throw Exception("Method 'prepare' is not implemented for " + getName() + " processor", ErrorCodes::NOT_IMPLEMENTED);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'prepare' is not implemented for {} processor", getName());
}
using PortNumbers = std::vector<UInt64>;
@ -193,7 +193,7 @@ public:
*/
virtual void work()
{
throw Exception("Method 'work' is not implemented for " + getName() + " processor", ErrorCodes::NOT_IMPLEMENTED);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'work' is not implemented for {} processor", getName());
}
/** Executor must call this method when 'prepare' returned Async.
@ -212,7 +212,7 @@ public:
*/
virtual int schedule()
{
throw Exception("Method 'schedule' is not implemented for " + getName() + " processor", ErrorCodes::NOT_IMPLEMENTED);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'schedule' is not implemented for {} processor", getName());
}
/** You must call this method if 'prepare' returned ExpandPipeline.
@ -226,7 +226,7 @@ public:
*/
virtual Processors expandPipeline()
{
throw Exception("Method 'expandPipeline' is not implemented for " + getName() + " processor", ErrorCodes::NOT_IMPLEMENTED);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'expandPipeline' is not implemented for {} processor", getName());
}
/// In case if query was cancelled executor will wait till all processors finish their jobs.
@ -258,7 +258,7 @@ public:
++number;
}
throw Exception("Can't find input port for " + getName() + " processor", ErrorCodes::LOGICAL_ERROR);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't find input port for {} processor", getName());
}
UInt64 getOutputPortNumber(const OutputPort * output_port) const
@ -272,7 +272,7 @@ public:
++number;
}
throw Exception("Can't find output port for " + getName() + " processor", ErrorCodes::LOGICAL_ERROR);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't find output port for {} processor", getName());
}
const auto & getInputs() const { return inputs; }

View File

@ -46,7 +46,7 @@ public:
virtual void transform(Chunk &)
{
throw Exception("Method transform is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method transform is not implemented for {}", getName());
}
Status prepare() override;

View File

@ -19,7 +19,7 @@ LimitTransform::LimitTransform(
, with_ties(with_ties_), description(std::move(description_))
{
if (num_streams != 1 && with_ties)
throw Exception("Cannot use LimitTransform with multiple ports and ties.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Cannot use LimitTransform with multiple ports and ties", ErrorCodes::LOGICAL_ERROR);
ports_data.resize(num_streams);
@ -86,8 +86,7 @@ IProcessor::Status LimitTransform::prepare(
return;
default:
throw Exception(
"Unexpected status for LimitTransform::preparePair : " + IProcessor::statusToName(status),
ErrorCodes::LOGICAL_ERROR);
ErrorCodes::LOGICAL_ERROR, "Unexpected status for LimitTransform::preparePair : {}", IProcessor::statusToName(status));
}
};
@ -126,7 +125,7 @@ IProcessor::Status LimitTransform::prepare(
LimitTransform::Status LimitTransform::prepare()
{
if (ports_data.size() != 1)
throw Exception("prepare without arguments is not supported for multi-port LimitTransform.",
throw Exception("prepare without arguments is not supported for multi-port LimitTransform",
ErrorCodes::LOGICAL_ERROR);
return prepare({0}, {0});

View File

@ -153,7 +153,7 @@ struct RowRef
return true;
}
bool hasEqualSortColumnsWith(const RowRef & other)
bool hasEqualSortColumnsWith(const RowRef & other) const
{
return checkEquals(num_columns, sort_columns, row_num, other.sort_columns, other.row_num);
}
@ -197,7 +197,7 @@ struct RowRefWithOwnedChunk
sort_columns = &owned_chunk->sort_columns;
}
bool hasEqualSortColumnsWith(const RowRefWithOwnedChunk & other)
bool hasEqualSortColumnsWith(const RowRefWithOwnedChunk & other) const
{
return RowRef::checkEquals(sort_columns->size(), sort_columns->data(), row_num,
other.sort_columns->data(), other.row_num);

View File

@ -32,9 +32,7 @@ OffsetTransform::OffsetTransform(
}
IProcessor::Status OffsetTransform::prepare(
const PortNumbers & updated_input_ports,
const PortNumbers & updated_output_ports)
IProcessor::Status OffsetTransform::prepare(const PortNumbers & updated_input_ports, const PortNumbers & updated_output_ports)
{
bool has_full_port = false;
@ -63,9 +61,7 @@ IProcessor::Status OffsetTransform::prepare(
return;
default:
throw Exception(
"Unexpected status for OffsetTransform::preparePair : " + IProcessor::statusToName(status),
ErrorCodes::LOGICAL_ERROR);
ErrorCodes::LOGICAL_ERROR, "Unexpected status for OffsetTransform::preparePair : {}", IProcessor::statusToName(status));
}
};
@ -88,7 +84,7 @@ IProcessor::Status OffsetTransform::prepare(
OffsetTransform::Status OffsetTransform::prepare()
{
if (ports_data.size() != 1)
throw Exception("prepare without arguments is not supported for multi-port OffsetTransform.",
throw Exception("prepare without arguments is not supported for multi-port OffsetTransform",
ErrorCodes::LOGICAL_ERROR);
return prepare({0}, {0});

View File

@ -16,7 +16,7 @@ void connect(OutputPort & output, InputPort & input)
auto out_name = output.getProcessor().getName();
auto in_name = input.getProcessor().getName();
assertCompatibleHeader(output.getHeader(), input.getHeader(), " function connect between " + out_name + " and " + in_name);
assertCompatibleHeader(output.getHeader(), input.getHeader(), fmt::format(" function connect between {} and {}", out_name, in_name));
input.output_port = &output;
output.input_port = &input;

View File

@ -89,7 +89,7 @@ protected:
DataPtr() : data(new Data())
{
if (unlikely((getUInt(data) & FLAGS_MASK) != 0))
throw Exception("Not alignment memory for Port.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Not alignment memory for Port", ErrorCodes::LOGICAL_ERROR);
}
/// Pointer can store flags in case of exception in swap.
~DataPtr() { delete getPtr(getUInt(data) & PTR_MASK); }
@ -133,7 +133,7 @@ protected:
State() : data(new Data())
{
if (unlikely((getUInt(data) & FLAGS_MASK) != 0))
throw Exception("Not alignment memory for Port.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Not alignment memory for Port", ErrorCodes::LOGICAL_ERROR);
}
~State()
@ -160,7 +160,7 @@ protected:
/// throw Exception("Cannot push block to port which is not needed.", ErrorCodes::LOGICAL_ERROR);
if (unlikely(flags & HAS_DATA))
throw Exception("Cannot push block to port which already has data.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Cannot push block to port which already has data", ErrorCodes::LOGICAL_ERROR);
}
void ALWAYS_INLINE pull(DataPtr & data_, std::uintptr_t & flags, bool set_not_needed = false)
@ -174,10 +174,10 @@ protected:
/// It's ok to check because this flag can be changed only by pulling thread.
if (unlikely((flags & IS_NEEDED) == 0) && !set_not_needed)
throw Exception("Cannot pull block from port which is not needed.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Cannot pull block from port which is not needed", ErrorCodes::LOGICAL_ERROR);
if (unlikely((flags & HAS_DATA) == 0))
throw Exception("Cannot pull block from port which has no data.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Cannot pull block from port which has no data", ErrorCodes::LOGICAL_ERROR);
}
std::uintptr_t ALWAYS_INLINE setFlags(std::uintptr_t flags, std::uintptr_t mask)
@ -289,13 +289,15 @@ public:
{
auto & chunk = data->chunk;
String msg = "Invalid number of columns in chunk pulled from OutputPort. Expected "
+ std::to_string(header.columns()) + ", found " + std::to_string(chunk.getNumColumns()) + '\n';
msg += "Header: " + header.dumpStructure() + '\n';
msg += "Chunk: " + chunk.dumpStructure() + '\n';
throw Exception(msg, ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Invalid number of columns in chunk pulled from OutputPort. Expected {}, found {}\n"
"Header: {}\n"
"Chunk: {}\n",
header.columns(),
chunk.getNumColumns(),
header.dumpStructure(),
chunk.dumpStructure());
}
return std::move(*data);
@ -403,14 +405,15 @@ public:
{
if (unlikely(!data_.exception && data_.chunk.getNumColumns() != header.columns()))
{
String msg = "Invalid number of columns in chunk pushed to OutputPort. Expected "
+ std::to_string(header.columns())
+ ", found " + std::to_string(data_.chunk.getNumColumns()) + '\n';
msg += "Header: " + header.dumpStructure() + '\n';
msg += "Chunk: " + data_.chunk.dumpStructure() + '\n';
throw Exception(msg, ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Invalid number of columns in chunk pushed to OutputPort. Expected {}, found {}\n"
"Header: {}\n"
"Chunk: {}\n",
header.columns(),
data_.chunk.getNumColumns(),
header.dumpStructure(),
data_.chunk.dumpStructure());
}
updateVersion();

View File

@ -64,22 +64,25 @@ void QueryPlan::unitePlans(QueryPlanStepPtr step, std::vector<std::unique_ptr<Qu
const auto & inputs = step->getInputStreams();
size_t num_inputs = step->getInputStreams().size();
if (num_inputs != plans.size())
{
throw Exception("Cannot unite QueryPlans using " + step->getName() +
" because step has different number of inputs. "
"Has " + std::to_string(plans.size()) + " plans "
"and " + std::to_string(num_inputs) + " inputs", ErrorCodes::LOGICAL_ERROR);
}
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot unite QueryPlans using {} because step has different number of inputs. Has {} plans and {} inputs",
step->getName(),
plans.size(),
num_inputs);
for (size_t i = 0; i < num_inputs; ++i)
{
const auto & step_header = inputs[i].header;
const auto & plan_header = plans[i]->getCurrentDataStream().header;
if (!blocksHaveEqualStructure(step_header, plan_header))
throw Exception("Cannot unite QueryPlans using " + step->getName() + " because "
"it has incompatible header with plan " + root->step->getName() + " "
"plan header: " + plan_header.dumpStructure() +
"step header: " + step_header.dumpStructure(), ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot unite QueryPlans using {} because it has incompatible header with plan {} plan header: {} step header: {}",
step->getName(),
root->step->getName(),
plan_header.dumpStructure(),
step_header.dumpStructure());
}
for (auto & plan : plans)
@ -108,8 +111,10 @@ void QueryPlan::addStep(QueryPlanStepPtr step)
if (num_input_streams == 0)
{
if (isInitialized())
throw Exception("Cannot add step " + step->getName() + " to QueryPlan because "
"step has no inputs, but QueryPlan is already initialized", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot add step {} to QueryPlan because step has no inputs, but QueryPlan is already initialized",
step->getName());
nodes.emplace_back(Node{.step = std::move(step)});
root = &nodes.back();
@ -119,25 +124,33 @@ void QueryPlan::addStep(QueryPlanStepPtr step)
if (num_input_streams == 1)
{
if (!isInitialized())
throw Exception("Cannot add step " + step->getName() + " to QueryPlan because "
"step has input, but QueryPlan is not initialized", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot add step {} to QueryPlan because step has input, but QueryPlan is not initialized",
step->getName());
const auto & root_header = root->step->getOutputStream().header;
const auto & step_header = step->getInputStreams().front().header;
if (!blocksHaveEqualStructure(root_header, step_header))
throw Exception("Cannot add step " + step->getName() + " to QueryPlan because "
"it has incompatible header with root step " + root->step->getName() + " "
"root header: " + root_header.dumpStructure() +
"step header: " + step_header.dumpStructure(), ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot add step {} to QueryPlan because it has incompatible header with root step {} root header: {} step header: {}",
step->getName(),
root->step->getName(),
root_header.dumpStructure(),
step_header.dumpStructure());
nodes.emplace_back(Node{.step = std::move(step), .children = {root}});
root = &nodes.back();
return;
}
throw Exception("Cannot add step " + step->getName() + " to QueryPlan because it has " +
std::to_string(num_input_streams) + " inputs but " + std::to_string(isInitialized() ? 1 : 0) +
" input expected", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot add step {} to QueryPlan because it has {} inputs but {} input expected",
step->getName(),
num_input_streams,
isInitialized() ? 1 : 0);
}
QueryPipelineBuilderPtr QueryPlan::buildQueryPipeline(

View File

@ -343,12 +343,12 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in
inputs_with_data.pop();
if (input_with_data.waiting_output == -1)
throw Exception("No associated output for input with data.", ErrorCodes::LOGICAL_ERROR);
throw Exception("No associated output for input with data", ErrorCodes::LOGICAL_ERROR);
auto & waiting_output = output_ports[input_with_data.waiting_output];
if (waiting_output.status == OutputStatus::NotActive)
throw Exception("Invalid status NotActive for associated output.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Invalid status NotActive for associated output", ErrorCodes::LOGICAL_ERROR);
if (waiting_output.status != OutputStatus::Finished)
{

View File

@ -337,8 +337,11 @@ void MySQLSource::initPositionMappingFromQueryResultStructure()
if (!settings->fetch_by_name)
{
if (description.sample_block.columns() != connection->result.getNumFields())
throw Exception{"mysqlxx::UseQueryResult contains " + toString(connection->result.getNumFields()) + " columns while "
+ toString(description.sample_block.columns()) + " expected", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH};
throw Exception(
ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH,
"mysqlxx::UseQueryResult contains {} columns while {} expected",
connection->result.getNumFields(),
description.sample_block.columns());
for (const auto idx : collections::range(0, connection->result.getNumFields()))
position_mapping[idx] = idx;
@ -362,18 +365,10 @@ void MySQLSource::initPositionMappingFromQueryResultStructure()
}
if (!missing_names.empty())
{
WriteBufferFromOwnString exception_message;
for (auto iter = missing_names.begin(); iter != missing_names.end(); ++iter)
{
if (iter != missing_names.begin())
exception_message << ", ";
exception_message << *iter;
}
throw Exception("mysqlxx::UseQueryResult must be contain the" + exception_message.str() + " columns.",
ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
}
throw Exception(
ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH,
"mysqlxx::UseQueryResult must contain columns: {}",
fmt::join(missing_names, ", "));
}
}

View File

@ -18,10 +18,7 @@ class SQLiteSource : public SourceWithProgress
using SQLitePtr = std::shared_ptr<sqlite3>;
public:
SQLiteSource(SQLitePtr sqlite_db_,
const String & query_str_,
const Block & sample_block,
UInt64 max_block_size_);
SQLiteSource(SQLitePtr sqlite_db_, const String & query_str_, const Block & sample_block, UInt64 max_block_size_);
String getName() const override { return "SQLite"; }

View File

@ -125,7 +125,7 @@ public:
ssize_t res = ::read(fd, internal_buffer.begin(), internal_buffer.size());
if (-1 == res && errno != EINTR)
throwFromErrno("Cannot read from pipe ", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
throwFromErrno("Cannot read from pipe", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR);
if (res == 0)
break;
@ -187,7 +187,7 @@ public:
ssize_t res = ::write(fd, working_buffer.begin() + bytes_written, offset() - bytes_written);
if ((-1 == res || 0 == res) && errno != EINTR)
throwFromErrno("Cannot write into pipe ", ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR);
throwFromErrno("Cannot write into pipe", ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR);
if (res > 0)
bytes_written += res;

View File

@ -12,23 +12,27 @@ namespace ErrorCodes
static void checkSingleInput(const IProcessor & transform)
{
if (transform.getInputs().size() != 1)
throw Exception("Transform for chain should have single input, "
"but " + transform.getName() + " has " +
toString(transform.getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Transform for chain should have single input, but {} has {} inputs",
transform.getName(),
transform.getInputs().size());
if (transform.getInputs().front().isConnected())
throw Exception("Transform for chain has connected input.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Transform for chain has connected input", ErrorCodes::LOGICAL_ERROR);
}
static void checkSingleOutput(const IProcessor & transform)
{
if (transform.getOutputs().size() != 1)
throw Exception("Transform for chain should have single output, "
"but " + transform.getName() + " has " +
toString(transform.getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Transform for chain should have single output, but {} has {} outputs",
transform.getName(),
transform.getOutputs().size());
if (transform.getOutputs().front().isConnected())
throw Exception("Transform for chain has connected input.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Transform for chain has connected output", ErrorCodes::LOGICAL_ERROR);
}
static void checkTransform(const IProcessor & transform)
@ -40,7 +44,7 @@ static void checkTransform(const IProcessor & transform)
static void checkInitialized(const std::list<ProcessorPtr> & processors)
{
if (processors.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Drain is not initialized");
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chain is not initialized");
}
Chain::Chain(ProcessorPtr processor)
@ -61,15 +65,17 @@ Chain::Chain(std::list<ProcessorPtr> processors_) : processors(std::move(process
{
for (const auto & input : processor->getInputs())
if (&input != &getInputPort() && !input.isConnected())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Cannot initialize chain because there is a not connected input for {}",
processor->getName());
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot initialize chain because there is a disconnected input for {}",
processor->getName());
for (const auto & output : processor->getOutputs())
if (&output != &getOutputPort() && !output.isConnected())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Cannot initialize chain because there is a not connected output for {}",
processor->getName());
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot initialize chain because there is a disconnected output for {}",
processor->getName());
}
}

View File

@ -3,6 +3,7 @@
#include <Core/BackgroundSchedulePool.h>
#include <Interpreters/Context.h>
#include <Common/Exception.h>
#include <Common/NetException.h>
#include "Core/Protocol.h"
#include <base/logger_useful.h>
@ -17,7 +18,7 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int UNKNOWN_PACKET_FROM_SERVER;
extern const int UNEXPECTED_PACKET_FROM_SERVER;
}
std::unique_ptr<ConnectionCollector> ConnectionCollector::connection_collector;
@ -33,7 +34,7 @@ ConnectionCollector & ConnectionCollector::init(ContextMutablePtr global_context
{
if (connection_collector)
{
throw Exception("Connection collector is initialized twice. This is a bug.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Connection collector is initialized twice. This is a bug", ErrorCodes::LOGICAL_ERROR);
}
connection_collector.reset(new ConnectionCollector(global_context_, max_threads));
@ -90,13 +91,13 @@ void ConnectionCollector::drainConnections(IConnections & connections, bool thro
break;
default:
/// Connection should be closed in case of unknown packet,
/// Connection should be closed in case of unexpected packet,
/// since this means that the connection in some bad state.
is_drained = false;
throw Exception(
ErrorCodes::UNKNOWN_PACKET_FROM_SERVER,
"Unknown packet {} from one of the following replicas: {}",
toString(packet.type),
throw NetException(
ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER,
"Unexpected packet {} from one of the following replicas: {}. (expected EndOfStream, Log, ProfileEvents or Exception)",
Protocol::Server::toString(packet.type),
connections.dumpAddresses());
}
}

View File

@ -61,15 +61,19 @@ void ExecutionSpeedLimits::throttle(
{
auto rows_per_second = read_rows / elapsed_seconds;
if (min_execution_rps && rows_per_second < min_execution_rps)
throw Exception("Query is executing too slow: " + toString(read_rows / elapsed_seconds)
+ " rows/sec., minimum: " + toString(min_execution_rps),
ErrorCodes::TOO_SLOW);
throw Exception(
ErrorCodes::TOO_SLOW,
"Query is executing too slow: {} rows/sec., minimum: {}",
read_rows / elapsed_seconds,
min_execution_rps);
auto bytes_per_second = read_bytes / elapsed_seconds;
if (min_execution_bps && bytes_per_second < min_execution_bps)
throw Exception("Query is executing too slow: " + toString(read_bytes / elapsed_seconds)
+ " bytes/sec., minimum: " + toString(min_execution_bps),
ErrorCodes::TOO_SLOW);
throw Exception(
ErrorCodes::TOO_SLOW,
"Query is executing too slow: {} bytes/sec., minimum: {}",
read_bytes / elapsed_seconds,
min_execution_bps);
/// If the predicted execution time is longer than `max_execution_time`.
if (max_execution_time != 0 && total_rows_to_read && read_rows)
@ -77,10 +81,12 @@ void ExecutionSpeedLimits::throttle(
double estimated_execution_time_seconds = elapsed_seconds * (static_cast<double>(total_rows_to_read) / read_rows);
if (estimated_execution_time_seconds > max_execution_time.totalSeconds())
throw Exception("Estimated query execution time (" + toString(estimated_execution_time_seconds) + " seconds)"
+ " is too long. Maximum: " + toString(max_execution_time.totalSeconds())
+ ". Estimated rows to process: " + toString(total_rows_to_read),
ErrorCodes::TOO_SLOW);
throw Exception(
ErrorCodes::TOO_SLOW,
"Estimated query execution time ({} seconds) is too long. Maximum: {}. Estimated rows to process: {}",
estimated_execution_time_seconds,
max_execution_time.totalSeconds(),
total_rows_to_read);
}
if (max_execution_rps && rows_per_second >= max_execution_rps)
@ -92,12 +98,13 @@ void ExecutionSpeedLimits::throttle(
}
}
static bool handleOverflowMode(OverflowMode mode, const String & message, int code)
template <typename... Args>
static bool handleOverflowMode(OverflowMode mode, int code, fmt::format_string<Args...> fmt, Args &&... args)
{
switch (mode)
{
case OverflowMode::THROW:
throw Exception(message, code);
throw Exception(code, std::move(fmt), std::forward<Args>(args)...);
case OverflowMode::BREAK:
return false;
default:
@ -112,10 +119,12 @@ bool ExecutionSpeedLimits::checkTimeLimit(const Stopwatch & stopwatch, OverflowM
auto elapsed_ns = stopwatch.elapsed();
if (elapsed_ns > static_cast<UInt64>(max_execution_time.totalMicroseconds()) * 1000)
return handleOverflowMode(overflow_mode,
"Timeout exceeded: elapsed " + toString(static_cast<double>(elapsed_ns) / 1000000000ULL)
+ " seconds, maximum: " + toString(max_execution_time.totalMicroseconds() / 1000000.0),
ErrorCodes::TIMEOUT_EXCEEDED);
return handleOverflowMode(
overflow_mode,
ErrorCodes::TIMEOUT_EXCEEDED,
"Timeout exceeded: elapsed {} seconds, maximum: {}",
static_cast<double>(elapsed_ns) / 1000000000ULL,
max_execution_time.totalMicroseconds() / 1000000.0);
}
return true;

View File

@ -23,16 +23,18 @@ namespace ErrorCodes
static void checkSource(const IProcessor & source)
{
if (!source.getInputs().empty())
throw Exception("Source for pipe shouldn't have any input, but " + source.getName() + " has " +
toString(source.getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Source for pipe shouldn't have any input, but {} has {} inputs",
source.getName(),
source.getInputs().size());
if (source.getOutputs().empty())
throw Exception("Source for pipe should have single output, but it doesn't have any",
ErrorCodes::LOGICAL_ERROR);
if (source.getOutputs().size() > 1)
throw Exception("Source for pipe should have single output, but " + source.getName() + " has " +
toString(source.getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
if (source.getOutputs().size() != 1)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Source for pipe should have single output, but {} has {} outputs",
source.getName(),
source.getOutputs().size());
}
static OutputPort * uniteExtremes(const OutputPortRawPtrs & ports, const Block & header, Processors & processors)
@ -112,8 +114,11 @@ PipelineResourcesHolder Pipe::detachResources()
Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, OutputPort * extremes)
{
if (!source->getInputs().empty())
throw Exception("Source for pipe shouldn't have any input, but " + source->getName() + " has " +
toString(source->getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Source for pipe shouldn't have any input, but {} has {} inputs",
source->getName(),
source->getInputs().size());
if (!output)
throw Exception("Cannot create Pipe from source because specified output port is nullptr",
@ -141,8 +146,7 @@ Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, Output
auto it = std::find_if(outputs.begin(), outputs.end(), [port](const OutputPort & p) { return &p == port; });
if (it == outputs.end())
throw Exception("Cannot create Pipe because specified " + name + " port does not belong to source",
ErrorCodes::LOGICAL_ERROR);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create Pipe because specified {} port does not belong to source", name);
};
check_port_from_source(output, "output");
@ -150,9 +154,11 @@ Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, Output
check_port_from_source(extremes, "extremes");
if (num_specified_ports != outputs.size())
throw Exception("Cannot create Pipe from source because it has " + std::to_string(outputs.size()) +
" output ports, but " + std::to_string(num_specified_ports) + " were specified",
ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot create Pipe from source because it has {} output ports, but {} were specified",
outputs.size(),
num_specified_ports);
}
totals_port = totals;
@ -188,14 +194,16 @@ Pipe::Pipe(Processors processors_) : processors(std::move(processors_))
for (const auto & port : processor->getInputs())
{
if (!port.isConnected())
throw Exception("Cannot create Pipe because processor " + processor->getName() +
" has not connected input port", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Cannot create Pipe because processor {} has disconnected input port", processor->getName());
const auto * connected_processor = &port.getOutputPort().getProcessor();
if (!set.contains(connected_processor))
throw Exception("Cannot create Pipe because processor " + processor->getName() +
" has input port which is connected with unknown processor " +
connected_processor->getName(), ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot create Pipe because processor {} has input port which is connected with unknown processor {}",
processor->getName(),
connected_processor->getName());
}
for (auto & port : processor->getOutputs())
@ -208,14 +216,16 @@ Pipe::Pipe(Processors processors_) : processors(std::move(processors_))
const auto * connected_processor = &port.getInputPort().getProcessor();
if (!set.contains(connected_processor))
throw Exception("Cannot create Pipe because processor " + processor->getName() +
" has output port which is connected with unknown processor " +
connected_processor->getName(), ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot create Pipe because processor {} has output port which is connected with unknown processor {}",
processor->getName(),
connected_processor->getName());
}
}
if (output_ports.empty())
throw Exception("Cannot create Pipe because processors don't have any not-connected output ports",
throw Exception("Cannot create Pipe because processors don't have any disconnected output ports",
ErrorCodes::LOGICAL_ERROR);
header = output_ports.front()->getHeader();
@ -365,10 +375,10 @@ void Pipe::addSource(ProcessorPtr source)
void Pipe::addTotalsSource(ProcessorPtr source)
{
if (output_ports.empty())
throw Exception("Cannot add totals source to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Cannot add totals source to empty Pipe", ErrorCodes::LOGICAL_ERROR);
if (totals_port)
throw Exception("Totals source was already added to Pipe.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Totals source was already added to Pipe", ErrorCodes::LOGICAL_ERROR);
checkSource(*source);
const auto & source_header = output_ports.front()->getHeader();
@ -385,10 +395,10 @@ void Pipe::addTotalsSource(ProcessorPtr source)
void Pipe::addExtremesSource(ProcessorPtr source)
{
if (output_ports.empty())
throw Exception("Cannot add extremes source to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Cannot add extremes source to empty Pipe", ErrorCodes::LOGICAL_ERROR);
if (extremes_port)
throw Exception("Extremes source was already added to Pipe.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Extremes source was already added to Pipe", ErrorCodes::LOGICAL_ERROR);
checkSource(*source);
const auto & source_header = output_ports.front()->getHeader();
@ -435,20 +445,23 @@ void Pipe::addTransform(ProcessorPtr transform)
void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort * extremes)
{
if (output_ports.empty())
throw Exception("Cannot add transform to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Cannot add transform to empty Pipe", ErrorCodes::LOGICAL_ERROR);
auto & inputs = transform->getInputs();
if (inputs.size() != output_ports.size())
throw Exception("Cannot add transform " + transform->getName() + " to Pipes because "
"Processor has " + std::to_string(inputs.size()) + " input ports, "
"but " + std::to_string(output_ports.size()) + " expected", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot add transform {} to Pipe because it has {} input ports, but {} expected",
transform->getName(),
inputs.size(),
output_ports.size());
if (totals && totals_port)
throw Exception("Cannot add transform with totals to Pipe because it already has totals.",
throw Exception("Cannot add transform with totals to Pipe because it already has totals",
ErrorCodes::LOGICAL_ERROR);
if (extremes && extremes_port)
throw Exception("Cannot add transform with extremes to Pipe because it already has extremes.",
throw Exception("Cannot add transform with extremes to Pipe because it already has extremes",
ErrorCodes::LOGICAL_ERROR);
if (totals)
@ -515,21 +528,24 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort
void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes)
{
if (output_ports.empty())
throw Exception("Cannot add transform to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Cannot add transform to empty Pipe", ErrorCodes::LOGICAL_ERROR);
auto & inputs = transform->getInputs();
size_t expected_inputs = output_ports.size() + (totals ? 1 : 0) + (extremes ? 1 : 0);
if (inputs.size() != expected_inputs)
throw Exception("Cannot add transform " + transform->getName() + " to Pipes because "
"Processor has " + std::to_string(inputs.size()) + " input ports, "
"but " + std::to_string(expected_inputs) + " expected", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot add transform {} to Pipe because it has {} input ports, but {} expected",
transform->getName(),
inputs.size(),
expected_inputs);
if (totals && !totals_port)
throw Exception("Cannot add transform consuming totals to Pipe because Pipe does not have totals.",
throw Exception("Cannot add transform consuming totals to Pipe because Pipe does not have totals",
ErrorCodes::LOGICAL_ERROR);
if (extremes && !extremes_port)
throw Exception("Cannot add transform consuming extremes to Pipe because it already has extremes.",
throw Exception("Cannot add transform consuming extremes to Pipe because it already has extremes",
ErrorCodes::LOGICAL_ERROR);
if (totals)
@ -561,17 +577,20 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort *
}
if (totals && !found_totals)
throw Exception("Cannot add transform " + transform->getName() + " to Pipes because "
"specified totals port does not belong to it", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot add transform {} to Pipes because specified totals port does not belong to it",
transform->getName());
if (extremes && !found_extremes)
throw Exception("Cannot add transform " + transform->getName() + " to Pipes because "
"specified extremes port does not belong to it", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot add transform {} to Pipes because specified extremes port does not belong to it",
transform->getName());
auto & outputs = transform->getOutputs();
if (outputs.empty())
throw Exception("Cannot add transform " + transform->getName() + " to Pipes because it has no outputs",
ErrorCodes::LOGICAL_ERROR);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform {} to Pipes because it has no outputs", transform->getName());
output_ports.clear();
output_ports.reserve(outputs.size());
@ -614,14 +633,18 @@ void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter)
if (transform)
{
if (transform->getInputs().size() != 1)
throw Exception("Processor for query pipeline transform should have single input, "
"but " + transform->getName() + " has " +
toString(transform->getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Processor for query pipeline transform should have single input, but {} has {} inputs",
transform->getName(),
transform->getInputs().size());
if (transform->getOutputs().size() != 1)
throw Exception("Processor for query pipeline transform should have single output, "
"but " + transform->getName() + " has " +
toString(transform->getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Processor for query pipeline transform should have single output, but {} has {} outputs",
transform->getName(),
transform->getOutputs().size());
}
const auto & out_header = transform ? transform->getOutputs().front().getHeader()
@ -661,10 +684,11 @@ void Pipe::addSimpleTransform(const ProcessorGetter & getter)
void Pipe::addChains(std::vector<Chain> chains)
{
if (output_ports.size() != chains.size())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Cannot add chains to Pipe because "
"number of output ports ({}) is not equal to the number of chains ({})",
output_ports.size(), chains.size());
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot add chains to Pipe because number of output ports ({}) is not equal to the number of chains ({})",
output_ports.size(),
chains.size());
dropTotals();
dropExtremes();
@ -702,7 +726,7 @@ void Pipe::addChains(std::vector<Chain> chains)
void Pipe::resize(size_t num_streams, bool force, bool strict)
{
if (output_ports.empty())
throw Exception("Cannot resize an empty Pipe.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Cannot resize an empty Pipe", ErrorCodes::LOGICAL_ERROR);
if (!force && num_streams == numOutputPorts())
return;
@ -720,7 +744,7 @@ void Pipe::resize(size_t num_streams, bool force, bool strict)
void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter)
{
if (output_ports.empty())
throw Exception("Cannot set sink to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Cannot set sink to empty Pipe", ErrorCodes::LOGICAL_ERROR);
auto add_transform = [&](OutputPort *& stream, Pipe::StreamType stream_type)
{
@ -732,14 +756,18 @@ void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter)
if (transform)
{
if (transform->getInputs().size() != 1)
throw Exception("Sink for query pipeline transform should have single input, "
"but " + transform->getName() + " has " +
toString(transform->getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Sink for query pipeline transform should have single input, but {} has {} inputs",
transform->getName(),
transform->getInputs().size());
if (!transform->getOutputs().empty())
throw Exception("Sink for query pipeline transform should have no outputs, "
"but " + transform->getName() + " has " +
toString(transform->getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Sink for query pipeline transform should have no outputs, but {} has {} outputs",
transform->getName(),
transform->getOutputs().size());
}
if (!transform)
@ -762,7 +790,7 @@ void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter)
void Pipe::transform(const Transformer & transformer)
{
if (output_ports.empty())
throw Exception("Cannot transform empty Pipe.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Cannot transform empty Pipe", ErrorCodes::LOGICAL_ERROR);
auto new_processors = transformer(output_ports);
@ -774,8 +802,10 @@ void Pipe::transform(const Transformer & transformer)
for (const auto & port : output_ports)
{
if (!port->isConnected())
throw Exception("Transformation of Pipe is not valid because output port (" +
port->getHeader().dumpStructure() + ") is not connected", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Transformation of Pipe is not valid because output port ({})",
port->getHeader().dumpStructure());
set.emplace(&port->getProcessor());
}
@ -787,14 +817,18 @@ void Pipe::transform(const Transformer & transformer)
for (const auto & port : processor->getInputs())
{
if (!port.isConnected())
throw Exception("Transformation of Pipe is not valid because processor " + processor->getName() +
" has not connected input port", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Transformation of Pipe is not valid because processor {} has not connected input port",
processor->getName());
const auto * connected_processor = &port.getOutputPort().getProcessor();
if (!set.contains(connected_processor))
throw Exception("Transformation of Pipe is not valid because processor " + processor->getName() +
" has input port which is connected with unknown processor " +
connected_processor->getName(), ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Transformation of Pipe is not valid because processor {} has input port which is connected with unknown processor {}",
processor->getName(),
connected_processor->getName());
}
for (auto & port : processor->getOutputs())
@ -807,15 +841,17 @@ void Pipe::transform(const Transformer & transformer)
const auto * connected_processor = &port.getInputPort().getProcessor();
if (!set.contains(connected_processor))
throw Exception("Transformation of Pipe is not valid because processor " + processor->getName() +
" has output port which is connected with unknown processor " +
connected_processor->getName(), ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Transformation of Pipe is not valid because processor {} has output port which is connected with unknown processor {}",
processor->getName(),
connected_processor->getName());
}
}
if (output_ports.empty())
throw Exception("Transformation of Pipe is not valid because processors don't have any "
"not-connected output ports", ErrorCodes::LOGICAL_ERROR);
throw Exception(
"Transformation of Pipe is not valid because processors don't have any disconnected output ports", ErrorCodes::LOGICAL_ERROR);
header = output_ports.front()->getHeader();
for (size_t i = 1; i < output_ports.size(); ++i)

View File

@ -18,7 +18,7 @@ struct PipelineResourcesHolder
PipelineResourcesHolder();
PipelineResourcesHolder(PipelineResourcesHolder &&) noexcept;
~PipelineResourcesHolder();
/// Custom mode assignment does not destroy data from lhs. It appends data from rhs to lhs.
/// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs.
PipelineResourcesHolder& operator=(PipelineResourcesHolder &&) noexcept;
/// Some processors may implicitly use Context or temporary Storage created by Interpreter.

View File

@ -33,7 +33,7 @@ static void checkInput(const InputPort & input, const ProcessorPtr & processor)
if (!input.isConnected())
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot create QueryPipeline because {} has not connected input",
"Cannot create QueryPipeline because {} has disconnected input",
processor->getName());
}
@ -42,7 +42,7 @@ static void checkOutput(const OutputPort & output, const ProcessorPtr & processo
if (!output.isConnected())
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot create QueryPipeline because {} has not connected output",
"Cannot create QueryPipeline because {} has disconnected output",
processor->getName());
}

View File

@ -37,7 +37,7 @@ void QueryPipelineBuilder::addQueryPlan(std::unique_ptr<QueryPlan> plan)
void QueryPipelineBuilder::checkInitialized()
{
if (!initialized())
throw Exception("QueryPipeline wasn't initialized.", ErrorCodes::LOGICAL_ERROR);
throw Exception("QueryPipeline is uninitialized", ErrorCodes::LOGICAL_ERROR);
}
void QueryPipelineBuilder::checkInitializedAndNotCompleted()
@ -45,35 +45,44 @@ void QueryPipelineBuilder::checkInitializedAndNotCompleted()
checkInitialized();
if (pipe.isCompleted())
throw Exception("QueryPipeline was already completed.", ErrorCodes::LOGICAL_ERROR);
throw Exception("QueryPipeline is already completed", ErrorCodes::LOGICAL_ERROR);
}
static void checkSource(const ProcessorPtr & source, bool can_have_totals)
{
if (!source->getInputs().empty())
throw Exception("Source for query pipeline shouldn't have any input, but " + source->getName() + " has " +
toString(source->getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Source for query pipeline shouldn't have any input, but {} has {} inputs",
source->getName(),
source->getInputs().size());
if (source->getOutputs().empty())
throw Exception("Source for query pipeline should have single output, but it doesn't have any",
ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Source for query pipeline should have single output, but {} doesn't have any", source->getName());
if (!can_have_totals && source->getOutputs().size() != 1)
throw Exception("Source for query pipeline should have single output, but " + source->getName() + " has " +
toString(source->getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Source for query pipeline should have single output, but {} has {} outputs",
source->getName(),
source->getOutputs().size());
if (source->getOutputs().size() > 2)
throw Exception("Source for query pipeline should have 1 or 2 outputs, but " + source->getName() + " has " +
toString(source->getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Source for query pipeline should have 1 or 2 output, but {} has {} outputs",
source->getName(),
source->getOutputs().size());
}
void QueryPipelineBuilder::init(Pipe pipe_)
{
if (initialized())
throw Exception("Pipeline has already been initialized.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Pipeline has already been initialized", ErrorCodes::LOGICAL_ERROR);
if (pipe_.empty())
throw Exception("Can't initialize pipeline with empty pipe.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Can't initialize pipeline with empty pipe", ErrorCodes::LOGICAL_ERROR);
pipe = std::move(pipe_);
}
@ -81,10 +90,10 @@ void QueryPipelineBuilder::init(Pipe pipe_)
void QueryPipelineBuilder::init(QueryPipeline pipeline)
{
if (initialized())
throw Exception("Pipeline has already been initialized.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Pipeline has already been initialized", ErrorCodes::LOGICAL_ERROR);
if (pipeline.pushing())
throw Exception("Can't initialize pushing pipeline.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Can't initialize pushing pipeline", ErrorCodes::LOGICAL_ERROR);
pipe.holder = std::move(pipeline.resources);
pipe.processors = std::move(pipeline.processors);
@ -191,11 +200,10 @@ void QueryPipelineBuilder::addTotalsHavingTransform(ProcessorPtr transform)
checkInitializedAndNotCompleted();
if (!typeid_cast<const TotalsHavingTransform *>(transform.get()))
throw Exception("TotalsHavingTransform expected for QueryPipeline::addTotalsHavingTransform.",
ErrorCodes::LOGICAL_ERROR);
throw Exception("TotalsHavingTransform is expected for QueryPipeline::addTotalsHavingTransform", ErrorCodes::LOGICAL_ERROR);
if (pipe.getTotalsPort())
throw Exception("Totals having transform was already added to pipeline.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Totals having transform was already added to pipeline", ErrorCodes::LOGICAL_ERROR);
resize(1);
@ -208,7 +216,7 @@ void QueryPipelineBuilder::addDefaultTotals()
checkInitializedAndNotCompleted();
if (pipe.getTotalsPort())
throw Exception("Totals having transform was already added to pipeline.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Totals having transform was already added to pipeline", ErrorCodes::LOGICAL_ERROR);
const auto & current_header = getHeader();
Columns columns;
@ -461,7 +469,7 @@ void QueryPipelineBuilder::setProcessListElement(QueryStatus * elem)
PipelineExecutorPtr QueryPipelineBuilder::execute()
{
if (!isCompleted())
throw Exception("Cannot execute pipeline because it is not completed.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Cannot execute pipeline because it is not completed", ErrorCodes::LOGICAL_ERROR);
return std::make_shared<PipelineExecutor>(pipe.processors, process_list_element);
}

View File

@ -78,8 +78,10 @@ RemoteInserter::RemoteInserter(
/// client's already got this information for remote table. Ignore.
}
else
throw NetException("Unexpected packet from server (expected Data or Exception, got "
+ String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER);
throw NetException(
ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER,
"Unexpected packet from server (expected Data or Exception, got {})",
Protocol::Server::toString(packet.type));
}
}
@ -131,8 +133,10 @@ void RemoteInserter::onFinish()
// Do nothing
}
else
throw NetException("Unexpected packet from server (expected EndOfStream or Exception, got "
+ String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER);
throw NetException(
ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER,
"Unexpected packet from server (expected EndOfStream or Exception, got {})",
Protocol::Server::toString(packet.type));
}
finished = true;

View File

@ -356,7 +356,7 @@ std::variant<Block, int> RemoteQueryExecutor::restartQueryWithoutDuplicatedUUIDs
else
return read(*read_context);
}
throw Exception("Found duplicate uuids while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS);
throw Exception("Found duplicate uuids while processing query", ErrorCodes::DUPLICATED_PART_UUIDS);
}
std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
@ -432,8 +432,10 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
default:
got_unknown_packet_from_replica = true;
throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from one of the following replicas: {}",
toString(packet.type),
throw Exception(
ErrorCodes::UNKNOWN_PACKET_FROM_SERVER,
"Unknown packet {} from one of the following replicas: {}",
packet.type,
connections->dumpAddresses());
}

Some files were not shown because too many files have changed in this diff Show More